This commit is contained in:
DELL 2026-02-12 09:46:39 +08:00
commit 953fdc81dc
20 changed files with 1317 additions and 2843 deletions

1
.gitignore vendored
View File

@ -64,3 +64,4 @@ target/
node_modules/
.arts/
.jlsp/
*.iml

View File

@ -1,13 +1,241 @@
<?xml version="1.0" encoding="UTF-8"?>
<module version="4">
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
<component name="FacetManager">
<facet type="web" name="Web">
<configuration>
<webroots />
</configuration>
</facet>
<facet type="jpa" name="JPA">
<configuration>
<setting name="validation-enabled" value="true" />
<setting name="provider-name" value="Hibernate" />
<datasource-mapping />
<datasource-mapping>
<factory-entry name="entityManagerFactory" />
</datasource-mapping>
<naming-strategy-map />
</configuration>
</facet>
<facet type="Spring" name="Spring">
<configuration />
</facet>
</component>
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
<output url="file://$MODULE_DIR$/target/classes" />
<output-test url="file://$MODULE_DIR$/target/test-classes" />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="Maven: org.jetbrains:annotations:26.0.2-1" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-compress:1.18" level="project" />
<orderEntry type="library" name="Maven: com.alibaba:fastjson:1.2.75" level="project" />
<orderEntry type="library" name="Maven: org.projectlombok:lombok:1.18.16" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-web:2.4.1" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter:2.4.1" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-logging:2.4.1" level="project" />
<orderEntry type="library" name="Maven: ch.qos.logback:logback-classic:1.2.3" level="project" />
<orderEntry type="library" name="Maven: ch.qos.logback:logback-core:1.2.3" level="project" />
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-to-slf4j:2.13.3" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:jul-to-slf4j:1.7.30" level="project" />
<orderEntry type="library" name="Maven: jakarta.annotation:jakarta.annotation-api:1.3.5" level="project" />
<orderEntry type="library" name="Maven: org.yaml:snakeyaml:1.27" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-json:2.4.1" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.datatype:jackson-datatype-jdk8:2.11.3" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.11.3" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.module:jackson-module-parameter-names:2.11.3" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-tomcat:2.4.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.tomcat.embed:tomcat-embed-core:9.0.41" level="project" />
<orderEntry type="library" name="Maven: org.glassfish:jakarta.el:3.0.3" level="project" />
<orderEntry type="library" name="Maven: org.apache.tomcat.embed:tomcat-embed-websocket:9.0.41" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-web:5.3.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-beans:5.3.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-webmvc:5.3.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-aop:5.3.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-expression:5.3.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-data-elasticsearch:2.4.1" level="project" />
<orderEntry type="library" name="Maven: org.springframework.data:spring-data-elasticsearch:4.1.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-tx:5.3.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework.data:spring-data-commons:2.4.2" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch.plugin:transport-netty4-client:7.7.0" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty-buffer:4.1.55.Final" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty-codec:4.1.55.Final" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty-codec-http:4.1.55.Final" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty-common:4.1.55.Final" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty-handler:4.1.55.Final" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty-resolver:4.1.55.Final" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty-transport:4.1.55.Final" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch.client:elasticsearch-rest-high-level-client:7.7.0" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch:elasticsearch:7.7.0" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch:elasticsearch-core:7.7.0" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch:elasticsearch-secure-sm:7.7.0" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch:elasticsearch-x-content:7.7.0" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.dataformat:jackson-dataformat-smile:2.11.3" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.11.3" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.dataformat:jackson-dataformat-cbor:2.11.3" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch:elasticsearch-geo:7.7.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-core:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-analyzers-common:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-backward-codecs:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-grouping:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-highlighter:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-join:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-memory:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-misc:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-queries:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-queryparser:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-sandbox:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-spatial-extras:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-spatial3d:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-suggest:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch:elasticsearch-cli:7.7.0" level="project" />
<orderEntry type="library" name="Maven: com.carrotsearch:hppc:0.8.1" level="project" />
<orderEntry type="library" name="Maven: joda-time:joda-time:2.10.4" level="project" />
<orderEntry type="library" name="Maven: com.tdunning:t-digest:3.2" level="project" />
<orderEntry type="library" name="Maven: org.hdrhistogram:HdrHistogram:2.1.9" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch:jna:4.5.1" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch.client:elasticsearch-rest-client:7.7.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpclient:4.5.13" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore:4.4.14" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpasyncclient:4.1.4" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore-nio:4.4.14" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch.plugin:mapper-extras-client:7.7.0" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch.plugin:parent-join-client:7.7.0" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch.plugin:aggs-matrix-stats-client:7.7.0" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch.plugin:rank-eval-client:7.7.0" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch.plugin:lang-mustache-client:7.7.0" level="project" />
<orderEntry type="library" name="Maven: com.github.spullara.mustache.java:compiler:0.9.6" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-core:2.11.3" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.30" level="project" />
<orderEntry type="library" name="Maven: org.springframework.cloud:spring-cloud-stream:3.0.7.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-validation:2.4.1" level="project" />
<orderEntry type="library" name="Maven: org.hibernate.validator:hibernate-validator:6.1.6.Final" level="project" />
<orderEntry type="library" name="Maven: jakarta.validation:jakarta.validation-api:2.0.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-messaging:5.3.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework.integration:spring-integration-core:5.4.2" level="project" />
<orderEntry type="library" name="Maven: io.projectreactor:reactor-core:3.4.1" level="project" />
<orderEntry type="library" name="Maven: org.reactivestreams:reactive-streams:1.0.3" level="project" />
<orderEntry type="library" name="Maven: org.springframework.integration:spring-integration-jmx:5.4.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework.retry:spring-retry:1.3.0" level="project" />
<orderEntry type="library" name="Maven: javax.annotation:javax.annotation-api:1.3.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework.cloud:spring-cloud-function-context:3.0.9.RELEASE" level="project" />
<orderEntry type="library" name="Maven: net.jodah:typetools:0.6.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework.cloud:spring-cloud-function-core:3.0.9.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework.cloud:spring-cloud-stream-binder-kafka:3.0.7.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework.cloud:spring-cloud-stream-binder-kafka-core:3.0.7.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework.integration:spring-integration-kafka:5.4.2" level="project" />
<orderEntry type="library" name="Maven: org.apache.kafka:kafka-clients:2.6.0" level="project" />
<orderEntry type="library" name="Maven: com.github.luben:zstd-jni:1.4.4-7" level="project" />
<orderEntry type="library" name="Maven: org.lz4:lz4-java:1.7.1" level="project" />
<orderEntry type="library" name="Maven: org.xerial.snappy:snappy-java:1.1.7.3" level="project" />
<orderEntry type="library" name="Maven: org.springframework.kafka:spring-kafka:2.6.4" level="project" />
<orderEntry type="library" name="Maven: com.google.protobuf:protobuf-java:3.11.4" level="project" />
<orderEntry type="library" name="Maven: com.squareup.okhttp3:okhttp:4.8.1" level="project" />
<orderEntry type="library" name="Maven: com.squareup.okio:okio:2.7.0" level="project" />
<orderEntry type="library" name="Maven: org.jetbrains.kotlin:kotlin-stdlib-common:1.4.21" level="project" />
<orderEntry type="library" name="Maven: org.jetbrains.kotlin:kotlin-stdlib:1.4.21" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.springframework.boot:spring-boot-starter-test:2.4.1" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.springframework.boot:spring-boot-test:2.4.1" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.springframework.boot:spring-boot-test-autoconfigure:2.4.1" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: com.jayway.jsonpath:json-path:2.4.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: net.minidev:json-smart:2.3" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: net.minidev:accessors-smart:1.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.ow2.asm:asm:5.0.4" level="project" />
<orderEntry type="library" name="Maven: jakarta.xml.bind:jakarta.xml.bind-api:2.3.3" level="project" />
<orderEntry type="library" name="Maven: jakarta.activation:jakarta.activation-api:1.2.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.assertj:assertj-core:3.18.1" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.hamcrest:hamcrest:2.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.junit.jupiter:junit-jupiter:5.7.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.junit.jupiter:junit-jupiter-params:5.7.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.junit.jupiter:junit-jupiter-engine:5.7.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.junit.platform:junit-platform-engine:1.7.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.mockito:mockito-core:3.6.28" level="project" />
<orderEntry type="library" name="Maven: net.bytebuddy:byte-buddy:1.10.18" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: net.bytebuddy:byte-buddy-agent:1.10.18" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.objenesis:objenesis:3.1" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.mockito:mockito-junit-jupiter:3.6.28" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.skyscreamer:jsonassert:1.5.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: com.vaadin.external.google:android-json:0.0.20131108.vaadin1" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-core:5.3.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-jcl:5.3.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.springframework:spring-test:5.3.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.xmlunit:xmlunit-core:2.7.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.springframework.cloud:spring-cloud-stream-test-support:3.0.7.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-autoconfigure:2.4.1" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-databind:2.11.3" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-annotations:2.11.3" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.springframework.kafka:spring-kafka-test:2.6.4" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-context:5.3.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.apache.kafka:kafka-clients:test:2.6.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.apache.kafka:kafka-streams:2.6.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.apache.kafka:connect-json:2.6.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.apache.kafka:connect-api:2.6.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.rocksdb:rocksdbjni:5.18.4" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.apache.kafka:kafka-streams-test-utils:2.6.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.apache.kafka:kafka_2.13:2.6.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: com.fasterxml.jackson.module:jackson-module-scala_2.13:2.11.3" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: com.fasterxml.jackson.module:jackson-module-paranamer:2.11.3" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: com.thoughtworks.paranamer:paranamer:2.8" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: com.fasterxml.jackson.dataformat:jackson-dataformat-csv:2.11.3" level="project" />
<orderEntry type="library" name="Maven: net.sf.jopt-simple:jopt-simple:5.0.4" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: com.yammer.metrics:metrics-core:2.2.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.scala-lang.modules:scala-collection-compat_2.13:2.1.6" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.scala-lang.modules:scala-java8-compat_2.13:0.9.1" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.scala-lang:scala-library:2.13.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.scala-lang:scala-reflect:2.13.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: com.typesafe.scala-logging:scala-logging_2.13:3.9.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.apache.zookeeper:zookeeper:3.5.8" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.apache.zookeeper:zookeeper-jute:3.5.8" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.apache.yetus:audience-annotations:0.5.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: io.netty:netty-transport-native-epoll:4.1.55.Final" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: io.netty:netty-transport-native-unix-common:4.1.55.Final" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: commons-cli:commons-cli:1.4" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.apache.kafka:kafka_2.13:test:2.6.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.junit.jupiter:junit-jupiter-api:5.7.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.apiguardian:apiguardian-api:1.1.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.opentest4j:opentest4j:1.2.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.junit.platform:junit-platform-commons:1.7.0" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-devtools:2.4.1" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot:2.4.1" level="project" />
<orderEntry type="library" scope="RUNTIME" name="Maven: mysql:mysql-connector-java:8.0.22" level="project" />
<orderEntry type="library" name="Maven: commons-net:commons-net:3.10.0" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-data-jpa:2.4.1" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-aop:2.4.1" level="project" />
<orderEntry type="library" name="Maven: org.aspectj:aspectjweaver:1.9.6" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-jdbc:2.4.1" level="project" />
<orderEntry type="library" name="Maven: com.zaxxer:HikariCP:3.4.5" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-jdbc:5.3.2" level="project" />
<orderEntry type="library" name="Maven: jakarta.transaction:jakarta.transaction-api:1.3.3" level="project" />
<orderEntry type="library" name="Maven: jakarta.persistence:jakarta.persistence-api:2.2.3" level="project" />
<orderEntry type="library" name="Maven: org.hibernate:hibernate-core:5.4.25.Final" level="project" />
<orderEntry type="library" name="Maven: org.jboss.logging:jboss-logging:3.4.1.Final" level="project" />
<orderEntry type="library" name="Maven: org.javassist:javassist:3.27.0-GA" level="project" />
<orderEntry type="library" name="Maven: antlr:antlr:2.7.7" level="project" />
<orderEntry type="library" name="Maven: org.jboss:jandex:2.1.3.Final" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml:classmate:1.5.1" level="project" />
<orderEntry type="library" name="Maven: org.dom4j:dom4j:2.1.3" level="project" />
<orderEntry type="library" name="Maven: org.hibernate.common:hibernate-commons-annotations:5.1.2.Final" level="project" />
<orderEntry type="library" name="Maven: org.glassfish.jaxb:jaxb-runtime:2.3.3" level="project" />
<orderEntry type="library" name="Maven: org.glassfish.jaxb:txw2:2.3.3" level="project" />
<orderEntry type="library" name="Maven: com.sun.istack:istack-commons-runtime:3.0.11" level="project" />
<orderEntry type="library" scope="RUNTIME" name="Maven: com.sun.activation:jakarta.activation:1.2.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework.data:spring-data-jpa:2.4.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-orm:5.3.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-aspects:5.3.2" level="project" />
<orderEntry type="library" name="Maven: org.apache.poi:poi:5.2.4" level="project" />
<orderEntry type="library" name="Maven: commons-codec:commons-codec:1.15" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-collections4:4.4" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-math3:3.6.1" level="project" />
<orderEntry type="library" name="Maven: commons-io:commons-io:2.13.0" level="project" />
<orderEntry type="library" name="Maven: com.zaxxer:SparseBitSet:1.3" level="project" />
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-api:2.13.3" level="project" />
<orderEntry type="library" name="Maven: org.apache.poi:poi-ooxml:5.2.4" level="project" />
<orderEntry type="library" name="Maven: org.apache.poi:poi-ooxml-lite:5.2.4" level="project" />
<orderEntry type="library" name="Maven: org.apache.xmlbeans:xmlbeans:5.1.1" level="project" />
<orderEntry type="library" name="Maven: com.github.virtuald:curvesapi:1.08" level="project" />
<orderEntry type="library" name="Maven: com.jcraft:jsch:0.1.55" level="project" />
</component>
</module>

View File

@ -121,6 +121,11 @@
<artifactId>poi-ooxml</artifactId>
<version>5.2.4</version>
</dependency>
<dependency>
<groupId>com.jcraft</groupId>
<artifactId>jsch</artifactId>
<version>0.1.55</version> <!-- 推荐使用 0.1.55+ -->
</dependency>
</dependencies>

View File

@ -2,9 +2,8 @@ package com.jsc.dsp.controller;
import com.alibaba.fastjson.JSONObject;
import com.jsc.dsp.model.ReturnT;
import com.jsc.dsp.utils.AutoExportAndUpload;
import com.jsc.dsp.utils.ExportAndUploadUtils;
import com.jsc.dsp.utils.DatabaseConnector;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
@ -20,7 +19,7 @@ public class ExportController {
DatabaseConnector databaseConnector;
@Resource
AutoExportAndUpload autoExportAndUpload;
ExportAndUploadUtils exportAndUploadUtils;
@PostMapping("/exportExcel")
public ReturnT<String> exportExcel(@RequestBody JSONObject object) {
@ -33,25 +32,37 @@ public class ExportController {
}
}
@PostMapping("/triggerTask")
public ReturnT<String> triggerTask() {
try {
new Thread(() -> autoExportAndUpload.exportDataAndUpload()).start();
return new ReturnT<>(200, "", "");
} catch (Exception e) {
return new ReturnT<>(500, e.getMessage(), "");
}
}
@PostMapping("/exportTwitterExcel")
public ReturnT<String> triggerTwitterTask(@RequestBody JSONObject object) {
try {
String startTime = object.getString("startTime");
new Thread(() -> autoExportAndUpload.exportTwitterDataAndUpload(startTime)).start();
databaseConnector.twitterToXlsx(startTime);
return new ReturnT<>(200, "", "");
} catch (Exception e) {
return new ReturnT<>(500, e.getMessage(), "");
}
}
@PostMapping("/triggerTask")
public ReturnT<String> triggerTask() {
try {
new Thread(() -> exportAndUploadUtils.exportNewsDataAndUpload()).start();
return new ReturnT<>(200, "", "");
} catch (Exception e) {
return new ReturnT<>(500, e.getMessage(), "");
}
}
@PostMapping("/triggerTwitterTask")
public ReturnT<String> triggerTwitterTask() {
try {
new Thread(() -> exportAndUploadUtils.exportTwitterDataAndUpload()).start();
return new ReturnT<>(200, "", "");
} catch (Exception e) {
return new ReturnT<>(500, e.getMessage(), "");
}
}
}

View File

@ -1,50 +1,31 @@
package com.jsc.dsp.task;
import com.jsc.dsp.service.ConfigService;
import com.jsc.dsp.utils.AutoExportAndUpload;
import com.jsc.dsp.utils.DatabaseConnector;
import com.jsc.dsp.utils.FTPConnector;
import com.jsc.dsp.utils.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.jsc.dsp.utils.ExportAndUploadUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import javax.annotation.Resource;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.nio.file.attribute.BasicFileAttributes;
import java.nio.file.attribute.FileTime;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Comparator;
import java.util.Date;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
@Component
@ConditionalOnProperty(name = "switch.auto-export-and-upload", havingValue = "true", matchIfMissing = true)
public class AutoUpload {
@Resource
AutoExportAndUpload autoExportAndUploadComponent;
ExportAndUploadUtils exportAndUploadUtils;
@Value("${custom.ftpUploadPath}")
String ftpUploadPath;
@Scheduled(cron = "${custom.exportTaskSchedule}")
@Scheduled(cron = "${custom.exportNewsTaskSchedule}")
public void exportNewsDataAndUpload() {
autoExportAndUploadComponent.exportDataAndUpload();
exportAndUploadUtils.exportNewsDataAndUpload();
}
@Scheduled(cron = "${custom.exportTwitterTaskSchedule}")
public void exportTwitterDataAndUpload() {
exportAndUploadUtils.exportTwitterDataAndUpload();
}
}

View File

@ -26,7 +26,9 @@ import java.nio.file.Paths;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.List;
import com.fasterxml.jackson.databind.JsonNode;
import java.util.ArrayList;
@Service
@ -40,8 +42,11 @@ public class DatabaseConnector {
@Resource
EsDataTwitterRepository esDataTwitterRepository;
@Value("${custom.excelOutputPath}")
String excelOutputPath;
@Value("${custom.newsExcelOutputPath}")
String newsExcelOutputPath;
@Value("${custom.twitterExcelOutputPath}")
String twitterExcelOutputPath;
private static final ObjectMapper objectMapper = new ObjectMapper();
@ -77,7 +82,7 @@ public class DatabaseConnector {
public void exportToXlsx(String startTime) {
try {
Path dirPath = Paths.get(excelOutputPath);
Path dirPath = Paths.get(newsExcelOutputPath);
if (!Files.exists(dirPath)) {
Files.createDirectories(dirPath);
}
@ -133,8 +138,16 @@ public class DatabaseConnector {
row.createCell(9).setCellValue(item.getEsLoadtime());
row.createCell(10).setCellValue(item.getEsSitename());
row.createCell(11).setCellValue(item.getEsSrcname());
row.createCell(12).setCellValue(item.getEsUrlcontent());
row.createCell(13).setCellValue(item.getEsUrlcontentTranslate());
if (item.getEsUrlcontent().length() > 30000) {
row.createCell(12).setCellValue(item.getEsUrlcontent().substring(0, 30000));
} else {
row.createCell(12).setCellValue(item.getEsUrlcontent());
}
if (item.getEsUrlcontentTranslate().length() > 30000) {
row.createCell(13).setCellValue(item.getEsUrlcontentTranslate().substring(0, 30000));
} else {
row.createCell(13).setCellValue(item.getEsUrlcontentTranslate());
}
row.createCell(14).setCellValue(item.getEsUrlimage());
row.createCell(15).setCellValue(item.getEsUrlname());
row.createCell(16).setCellValue(item.getEsUrltime());
@ -169,7 +182,7 @@ public class DatabaseConnector {
public void twitterToXlsx(String startTime) {
try {
Path dirPath = Paths.get(excelOutputPath);
Path dirPath = Paths.get(twitterExcelOutputPath);
if (!Files.exists(dirPath)) {
Files.createDirectories(dirPath);
}
@ -314,7 +327,6 @@ public class DatabaseConnector {
}
public String extractFilenamesFromJsonArray(String jsonStr) {
if (jsonStr == null || jsonStr.trim().isEmpty()) {
return "";

View File

@ -4,15 +4,10 @@ import com.jsc.dsp.service.ConfigService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import javax.annotation.Resource;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
@ -21,15 +16,19 @@ import java.nio.file.attribute.BasicFileAttributes;
import java.nio.file.attribute.FileTime;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.Comparator;
import java.util.Date;
import java.util.List;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
@Component
public class AutoExportAndUpload {
public class ExportAndUploadUtils {
@Resource
DatabaseConnector databaseConnector;
@ -37,6 +36,9 @@ public class AutoExportAndUpload {
@Resource
FTPConnector ftpConnector;
@Resource
SFTPConnector sftpConnector;
@Resource
ConfigService configService;
@ -46,8 +48,11 @@ public class AutoExportAndUpload {
private static final SimpleDateFormat sdf = new SimpleDateFormat(DATE_FORMAT);
@Value("${custom.excelOutputPath}")
String excelOutputPath;
@Value("${custom.newsExcelOutputPath}")
String newsExcelOutputPath;
@Value("${custom.twitterExcelOutputPath}")
String twitterExcelOutputPath;
@Value("${custom.backupFilePath}")
String backupFilePath;
@ -61,7 +66,7 @@ public class AutoExportAndUpload {
/**
* 每周一五的早上8点执行导出数据的任务
*/
public void exportDataAndUpload() {
public void exportNewsDataAndUpload() {
logger.info("开始导出excel和pdf数据...");
String lastLoadTime = configService.getConfigValueByName("last_loadtime");
String currentLoadTime = StringUtils.DateToString(new Date());
@ -72,21 +77,21 @@ public class AutoExportAndUpload {
String zipFileName = "data_news-" + timestamp + "-001.zip";
String zipFileFullName = backupFilePath + File.separator + zipFileName;
String remoteZipPath = ftpUploadPath + "/" + zipFileName;
zipAndUploadDirectory(excelOutputPath, zipFileFullName, remoteZipPath);
zipAndUploadDirectory(newsExcelOutputPath, zipFileFullName, remoteZipPath);
}
public void exportTwitterDataAndUpload(String startTime) {
public void exportTwitterDataAndUpload() {
logger.info("开始导出twitter excel数据...");
// String twitterLastLoadTime = configService.getConfigValueByName("twitter_last_loadtime");
String twitterLastLoadTime = configService.getConfigValueByName("twitter_last_loadtime");
String currentLoadTime = StringUtils.DateToString(new Date());
String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd"));
databaseConnector.twitterToXlsx(startTime);
copyPagesFiles(startTime, currentLoadTime);
databaseConnector.twitterToXlsx(twitterLastLoadTime);
unzipAndMoveVideosImages(twitterLastLoadTime, currentLoadTime);
configService.setConfigValueByName("twitter_last_loadtime", currentLoadTime);
String zipFileName = "data_twitter-" + timestamp + "-001.zip";
String zipFileFullName = backupFilePath + File.separator + zipFileName;
String remoteZipPath = ftpUploadPath + "/" + zipFileName;
zipAndUploadDirectory(excelOutputPath, zipFileFullName, remoteZipPath);
zipAndUploadDirectory(twitterExcelOutputPath, zipFileFullName, remoteZipPath);
}
/**
@ -124,16 +129,16 @@ public class AutoExportAndUpload {
}
// 上传 ZIP 文件
// try (InputStream zipInputStream = Files.newInputStream(localZipFile)) {
// boolean uploaded = ftpConnector.uploadFile(zipInputStream, remoteZipPath);
// if (uploaded) {
// logger.info("ZIP 文件上传成功 - 本地: {}, FTP: {}", localZipPath, remoteZipPath);
// } else {
// logger.error("ZIP 文件上传失败 - FTP: {}", remoteZipPath);
// }
// } catch (IOException e) {
// logger.error("读取本地 ZIP 文件失败: {}", localZipPath, e);
// }
try (InputStream zipInputStream = Files.newInputStream(localZipFile)) {
boolean uploaded = sftpConnector.uploadFile(zipInputStream, remoteZipPath);
if (uploaded) {
logger.info("ZIP 文件上传成功 - 本地: {}, FTP: {}", localZipPath, remoteZipPath);
} else {
logger.error("ZIP 文件上传失败 - FTP: {}", remoteZipPath);
}
} catch (IOException e) {
logger.error("读取本地 ZIP 文件失败: {}", localZipPath, e);
}
// 注意此处不再删除 localZipFile由调用方决定是否保留或清理
}
@ -182,6 +187,155 @@ public class AutoExportAndUpload {
}
}
/**
* 解压存档文件并移动视频/图片目录
*
* @param startTime 业务开始时间格式yyyy-MM-dd HH:mm:ss实际未使用但保留接口兼容性
* @param endTime 业务结束时间格式yyyy-MM-dd HH:mm:ss
*/
public void unzipAndMoveVideosImages(String startTime, String endTime) {
logger.info("开始处理存档文件: startTime={}, endTime={}", startTime, endTime);
try {
// 1. 计算endTime前一日日期
LocalDate archiveDate = parseEndDate(endTime).minusDays(1);
String dateStr = archiveDate.format(DateTimeFormatter.ISO_DATE); // yyyy-MM-dd
// 2. 构建存档目录路径: D:/data/dbzq_backup/{yyyy}/{yyyy-MM}/{yyyy-MM-dd}
String year = String.valueOf(archiveDate.getYear());
String yearMonth = archiveDate.format(DateTimeFormatter.ofPattern("yyyy-MM"));
Path archiveBaseDir = Paths.get("D:/data/dbzq_backup", year, yearMonth, dateStr);
if (!Files.exists(archiveBaseDir) || !Files.isDirectory(archiveBaseDir)) {
logger.error("存档目录不存在: {}", archiveBaseDir);
throw new FileNotFoundException("存档目录不存在: " + archiveBaseDir);
}
logger.info("使用存档目录: {}", archiveBaseDir);
// 3. 确保输出目录存在
Path outputDir = Paths.get(twitterExcelOutputPath);
Files.createDirectories(outputDir);
logger.info("输出目录: {}", outputDir);
// 4. 处理视频压缩包 (image_data_plane_*.tar.gz)
processArchiveFiles(
archiveBaseDir,
"image_data_plane_",
"videos",
outputDir
);
// 5. 处理图片压缩包 (image_data_ship_*.tar.gz)
processArchiveFiles(
archiveBaseDir,
"image_data_ship_",
"images",
outputDir
);
logger.info("存档文件处理完成: {}", dateStr);
} catch (Exception e) {
logger.error("存档处理失败 [endTime={}]", endTime, e);
throw new RuntimeException("存档处理异常: " + e.getMessage(), e);
}
}
/**
* 解析结束时间字符串兼容多种常见格式
*/
private LocalDate parseEndDate(String endTime) {
// 尝试常见时间格式
String[] patterns = {
"yyyy-MM-dd HH:mm:ss",
"yyyy-MM-dd'T'HH:mm:ss",
"yyyy-MM-dd HH:mm",
"yyyy-MM-dd"
};
for (String pattern : patterns) {
try {
DateTimeFormatter formatter = DateTimeFormatter.ofPattern(pattern);
return LocalDate.parse(endTime.substring(0, 10), DateTimeFormatter.ISO_DATE); // 直接取日期部分
} catch (Exception ignored) {
// 尝试下一种格式
}
}
// 最终尝试完整解析
try {
return LocalDate.parse(endTime.trim().split("\\s+")[0]); // 取日期部分
} catch (DateTimeParseException e) {
throw new IllegalArgumentException("无法解析 endTime 格式: " + endTime +
",支持格式: yyyy-MM-dd[ HH:mm:ss]");
}
}
/**
* 处理指定前缀的压缩包
*
* @param archiveDir 存档目录
* @param filePrefix 文件前缀 ( "image_data_plane_")
* @param targetDirName 目标目录名 ( "videos")
* @param outputDir 输出根目录
*/
private void processArchiveFiles(Path archiveDir, String filePrefix,
String targetDirName, Path outputDir) throws IOException {
// 查找所有匹配的tar.gz文件
List<Path> tarFiles = Files.list(archiveDir)
.filter(path -> Files.isRegularFile(path)
&& path.getFileName().toString().startsWith(filePrefix)
&& path.getFileName().toString().endsWith(".tar.gz"))
.sorted() // 按文件名排序确保处理顺序
.collect(Collectors.toList());
if (tarFiles.isEmpty()) {
logger.warn("未找到 {} 开头的压缩包: {}", filePrefix, archiveDir);
return;
}
logger.info("找到 {} 个 {} 压缩包: {}", tarFiles.size(), filePrefix,
tarFiles.stream().map(Path::getFileName).collect(Collectors.toList()));
// 创建全局临时目录用于合并所有压缩包内容
Path tempMergeDir = Files.createTempDirectory("archive_merge_");
logger.debug("创建临时合并目录: {}", tempMergeDir);
try {
// 步骤1: 依次解压所有tar.gz到临时目录
int totalFiles = 0;
for (Path tarFile : tarFiles) {
logger.info("解压压缩包: {}", tarFile.getFileName());
totalFiles += FileUtils.extractTarGz(tarFile.toFile(), tempMergeDir.toFile());
}
if (totalFiles == 0) {
logger.warn("解压后未发现任何文件,跳过移动: {}", filePrefix);
return;
}
logger.info("共解压 {} 个文件到临时目录", totalFiles);
// 步骤2: 平铺移动所有文件到目标目录不保留目录结构同名覆盖
Path targetPath = outputDir.resolve(targetDirName);
Files.createDirectories(targetPath); // 确保目标目录存在
int movedCount = FileUtils.flattenAndMoveFiles(tempMergeDir, targetPath);
logger.info("成功平铺移动 {} 个文件到: {}", movedCount, targetPath);
} catch (Exception e) {
e.printStackTrace();
} finally {
// 清理临时目录
try {
FileUtils.deleteDirectory(tempMergeDir);
logger.debug("已清理临时目录: {}", tempMergeDir);
} catch (Exception e) {
logger.warn("清理临时目录失败: {}", tempMergeDir, e);
}
}
}
public void copyPagesFiles(String startTime, String endTime) {
try {
logger.info("开始复制PDF...");
@ -197,7 +351,7 @@ public class AutoExportAndUpload {
}
// 目标目录 excelOutputPath 下创建 pdf 子目录
Path targetBaseDir = Paths.get(excelOutputPath);
Path targetBaseDir = Paths.get(newsExcelOutputPath);
Path targetPdfDir = targetBaseDir.resolve("pdf");
// 确保目标目录存在

View File

@ -1,23 +1,25 @@
package com.jsc.dsp.utils;
import org.apache.commons.compress.archivers.ArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import java.io.*;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.Calendar;
import java.util.Date;
import java.util.HashSet;
import java.util.logging.Logger;
import java.nio.file.*;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class FileUtils {
private final Logger logger = Logger.getLogger(this.getClass().getName());
private final Logger logger = LoggerFactory.getLogger(this.getClass().getName());
public FileUtils() {
}
@ -79,7 +81,7 @@ public class FileUtils {
public int downloadFromUrl(String urlStr, String savePath) {
try {
if (downloadedFileSet.contains(urlStr)) {
logger.warning("File exist from " + urlStr);
logger.warn("File exist from " + urlStr);
return 2;
}
String[] urlCascade = urlStr.split("/");
@ -183,6 +185,180 @@ public class FileUtils {
}
}
/**
* 解压tar.gz文件到指定目录
*/
/**
* 解压tar.gz文件到指定目录返回解压的文件数量不包含目录
*
* @return 解压的普通文件数量
*/
public static int extractTarGz(File tarFile, File destDir) throws IOException {
if (!destDir.exists() && !destDir.mkdirs()) {
throw new IOException("无法创建目标目录: " + destDir.getAbsolutePath());
}
int fileCount = 0;
try (FileInputStream fis = new FileInputStream(tarFile);
BufferedInputStream bis = new BufferedInputStream(fis);
GzipCompressorInputStream gzIn = new GzipCompressorInputStream(bis);
TarArchiveInputStream tarIn = new TarArchiveInputStream(gzIn)) {
TarArchiveEntry entry;
while ((entry = tarIn.getNextTarEntry()) != null) {
// 跳过空条目符号链接特殊设备文件
if (entry.getName().trim().isEmpty()
|| entry.isSymbolicLink()
|| entry.isCharacterDevice()
|| entry.isBlockDevice()) {
continue;
}
// 安全校验防止路径遍历攻击
Path entryPath = destDir.toPath().resolve(entry.getName()).normalize();
if (!entryPath.startsWith(destDir.toPath().normalize())) {
continue;
}
// 创建目录结构为后续文件写入做准备
if (entry.isDirectory()) {
Files.createDirectories(entryPath);
} else {
Files.createDirectories(entryPath.getParent());
Files.copy(tarIn, entryPath, StandardCopyOption.REPLACE_EXISTING);
fileCount++;
}
}
return fileCount;
} catch (IOException e) {
throw e;
}
}
/**
* 递归删除目录含子目录和文件
*/
public static void deleteDirectory(Path path) throws IOException {
if (!Files.exists(path)) return;
Files.walkFileTree(path, new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
Files.delete(file);
return FileVisitResult.CONTINUE;
}
@Override
public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
Files.delete(dir);
return FileVisitResult.CONTINUE;
}
});
}
public static void moveAllFilesRecursively(Path sourceDir, Path targetDir) throws IOException {
if (!Files.exists(sourceDir) || !Files.isDirectory(sourceDir)) {
return;
}
// 使用Files.walk递归遍历所有文件
try (Stream<Path> walkStream = Files.walk(sourceDir)) {
walkStream
.filter(path -> !Files.isDirectory(path)) // 只处理文件
.sorted() // 确保先创建父目录再移动文件
.forEach(file -> {
try {
// 计算相对路径相对于sourceDir
Path relativePath = sourceDir.relativize(file);
// 构建目标文件路径
Path targetFile = targetDir.resolve(relativePath);
// 确保目标父目录存在
Files.createDirectories(targetFile.getParent());
// 移动文件覆盖同名文件
Files.move(file, targetFile,
StandardCopyOption.REPLACE_EXISTING,
StandardCopyOption.COPY_ATTRIBUTES);
} catch (IOException e) {
throw new UncheckedIOException(e); // 便于Stream中抛出
}
});
} catch (UncheckedIOException e) {
throw e.getCause() instanceof IOException ? (IOException) e.getCause() : new IOException(e);
}
}
/**
* 递归遍历源目录将所有文件平铺移动到目标目录不保留目录结构同名覆盖
*
* @param sourceDir 源目录临时解压目录
* @param targetDir 目标目录 D:/output/twitter/videos
* @return 成功移动的文件数量
*/
public static int flattenAndMoveFiles(Path sourceDir, Path targetDir) throws Exception {
if (!Files.exists(sourceDir) || !Files.isDirectory(sourceDir)) {
return 0;
}
AtomicInteger movedCount = new AtomicInteger(0);
Map<String, Path> duplicateFiles = new HashMap<>(); // 记录被覆盖的文件
try (Stream<Path> walkStream = Files.walk(sourceDir)) {
walkStream
.filter(path -> Files.isRegularFile(path)) // 只处理普通文件
.forEach(file -> {
try {
String fileName = file.getFileName().toString();
Path targetFile = targetDir.resolve(fileName);
// 检测同名文件覆盖用于日志记录
boolean willOverwrite = Files.exists(targetFile);
if (willOverwrite) {
duplicateFiles.put(fileName, file);
}
// 移动文件覆盖同名文件
Files.move(file, targetFile);
movedCount.incrementAndGet();
} catch (Exception e) {
e.printStackTrace();
}
});
} catch (UncheckedIOException e) {
throw e.getCause() instanceof IOException ? (IOException) e.getCause() : new IOException(e);
}
return movedCount.get();
}
/**
* 清空目录内容保留目录本身
*/
public static void cleanDirectory(Path dir) throws IOException {
if (!Files.exists(dir)) return;
try (DirectoryStream<Path> stream = Files.newDirectoryStream(dir)) {
for (Path entry : stream) {
if (Files.isDirectory(entry)) {
deleteDirectory(entry);
} else {
Files.delete(entry);
}
}
}
}
public static void main(String[] args) {
saveStringToFile("{\"aaa\":\"测试测试testtest\"}", "E:/yuxin/test.json");
}

View File

@ -0,0 +1,138 @@
package com.jsc.dsp.utils;
import com.jcraft.jsch.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;
@Component
public class SFTPConnector {
private static final Logger log = LoggerFactory.getLogger(SFTPConnector.class);
@Value("${sftp.host}")
private String host;
@Value("${sftp.port:22}") // SFTP 默认端口 22
private Integer port;
@Value("${sftp.username}")
private String username;
@Value("${sftp.password}") // 支持密码认证生产环境建议改用私钥
private String password;
@Value("${sftp.timeout:30000}")
private Integer timeout; // 单位毫秒
@Value("${sftp.strictHostKeyChecking:false}") // false 仅用于测试环境
private boolean strictHostKeyChecking;
/**
* 上传文件到 SFTP 服务器密码认证
*
* @param inputStream 源文件流方法内部负责关闭
* @param remotePath 远程绝对路径 /upload/2024/file.pdf
* @return 上传成功返回 true
*/
public boolean uploadFile(InputStream inputStream, String remotePath) {
Session session = null;
ChannelSftp channelSftp = null;
try {
// 1. 初始化 JSch 会话
JSch jsch = new JSch();
session = jsch.getSession(username, host, port);
session.setPassword(password);
session.setTimeout(timeout);
// 2. 配置 SSH 连接参数安全提示生产环境必须启用 StrictHostKeyChecking 并配置 known_hosts
Properties config = new Properties();
config.put("StrictHostKeyChecking", String.valueOf(strictHostKeyChecking));
session.setConfig(config);
// 3. 建立连接
session.connect();
channelSftp = (ChannelSftp) session.openChannel("sftp");
channelSftp.connect(timeout);
// 4. 确保目标目录存在
ensureDirectoryExists(channelSftp, remotePath);
// 5. 上传文件JSch 会完整读取流但不关闭流
channelSftp.put(inputStream, remotePath);
log.info("SFTP 文件上传成功: {}", remotePath);
return true;
} catch (JSchException | SftpException e) {
log.error("SFTP 上传失败 [host={}, path={}]: {}", host, remotePath, e.getMessage(), e);
return false;
} catch (Exception e) {
log.error("SFTP 上传异常 [path={}]: {}", remotePath, e.getMessage(), e);
return false;
} finally {
// 6. 资源清理先关流再关通道/会话
closeQuietly(inputStream);
if (channelSftp != null && channelSftp.isConnected()) {
try {
channelSftp.disconnect();
} catch (Exception e) {
log.warn("关闭 SFTP 通道异常", e);
}
}
if (session != null && session.isConnected()) {
session.disconnect();
}
}
}
/**
* 递归创建远程目录基于 ChannelSftp
*
* @param sftp SFTP 通道
* @param remotePath 完整远程文件路径含文件名
* @throws SftpException 目录创建失败时抛出
*/
private void ensureDirectoryExists(ChannelSftp sftp, String remotePath) throws SftpException {
String dirPath = extractDirectory(remotePath);
if ("/".equals(dirPath)) return;
String[] dirs = dirPath.split("/");
StringBuilder current = new StringBuilder();
for (String dir : dirs) {
if (dir.isEmpty()) continue;
current.append("/").append(dir);
try {
sftp.cd(current.toString()); // 尝试进入目录
} catch (SftpException e) {
sftp.mkdir(current.toString()); // 不存在则创建
sftp.cd(current.toString());
}
}
}
/**
* 从完整路径提取目录部分 /a/b/file.txt /a/b
*/
private String extractDirectory(String path) {
int lastSlash = path.lastIndexOf('/');
return (lastSlash <= 0) ? "/" : path.substring(0, lastSlash);
}
/**
* 安静关闭输入流
*/
private void closeQuietly(InputStream is) {
if (is != null) {
try {
is.close();
} catch (IOException e) {
log.debug("关闭输入流时忽略异常", e);
}
}
}
}

View File

@ -0,0 +1,94 @@
package com.jsc.dsp.utils;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.google.protobuf.Descriptors;
import com.google.protobuf.GeneratedMessageV3;
import com.google.protobuf.InvalidProtocolBufferException;
import com.jsc.dsp.proto.EsOuterClass;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Map;
/**
* 备选方案使用 FastJSON 手动转换无额外依赖
*/
public class TodistParseUtil {
public static String protobufToJson(EsOuterClass.EsSets esSets) {
JSONObject root = new JSONObject();
// 处理 repeated Es 字段
JSONArray esArray = new JSONArray();
for (EsOuterClass.Es es : esSets.getEsList()) {
esArray.add(messageToJson(es));
}
root.put("es", esArray);
return JSON.toJSONString(root, true); // pretty format
}
private static JSONObject messageToJson(GeneratedMessageV3 message) {
JSONObject json = new JSONObject();
Map<Descriptors.FieldDescriptor, Object> fields = message.getAllFields();
for (Map.Entry<Descriptors.FieldDescriptor, Object> entry : fields.entrySet()) {
Descriptors.FieldDescriptor field = entry.getKey();
Object value = entry.getValue();
if (field.isRepeated()) {
JSONArray array = new JSONArray();
if (value instanceof Iterable) {
for (Object item : (Iterable<?>) value) {
array.add(convertFieldValue(item));
}
}
json.put(field.getName(), array);
} else {
json.put(field.getName(), convertFieldValue(value));
}
}
return json;
}
private static Object convertFieldValue(Object value) {
if (value instanceof GeneratedMessageV3) {
return messageToJson((GeneratedMessageV3) value);
}
// 其他类型直接返回Protobuf 基本类型可被 FastJSON 识别
return value;
}
public static void main(String[] args) {
String filePath = "C:/Users/yuxin/Documents/xwechat_files/wxid_dtvj9sibla0d21_9cb3/msg/file/2026-02/public_info_data_1770264282958.todist";
try {
// 1. 流式读取文件避免大文件 OOM
byte[] data = Files.readAllBytes(Paths.get(filePath));
// 2. Protobuf 反序列化
EsOuterClass.EsSets esSets = EsOuterClass.EsSets.parseFrom(data);
System.out.println("✅ 成功解析 EsSets" + esSets.getEsCount() + " 条记录");
// 3. 转换为 JSON使用 Protobuf 原生 JsonFormat
String json = protobufToJson(esSets);
// 4. 输出格式化 JSON
System.out.println("/n📄 JSON Output:");
System.out.println(json);
} catch (InvalidProtocolBufferException e) {
System.err.println("❌ Protobuf 解析失败: " + e.getMessage());
e.printStackTrace();
} catch (IOException e) {
System.err.println("❌ 文件读取失败: " + e.getMessage());
e.printStackTrace();
} catch (Exception e) {
System.err.println("❌ 未知错误: " + e.getMessage());
e.printStackTrace();
}
}
}

View File

@ -59,11 +59,12 @@ topics:
stream-db: com.jsc.dsp.service.StorageService
stream-file-dl: com.jsc.dsp.service.FileDlService
# 本地调试时这几个开关设置为 false
switch:
enable-storage-service: false
enable-file-dl-service: false
enable-protobuf-service: false
auto-export-and-upload: true
auto-export-and-upload: false
ftp:
host: 144.34.185.108
@ -73,6 +74,12 @@ ftp:
timeout: 5000
passive-mode: true
sftp:
host: 74.121.148.204
port: 22
username: root
password: NSgRMhIXL6gp
custom:
dev-mode: false
filter-words-query-api: http://47.115.228.133:28081/api/open/wordBank/queryAll
@ -87,10 +94,12 @@ custom:
websiteUpdateAPI: http://47.115.228.133:28081/api/open/target/website/update
socialQueryAPI: http://47.115.228.133:28081/api/open/target/social/queryAll?sortBy=id&shuffleResult=false
socialUpdateAPI: http://47.115.228.133:28081/api/open/target/social/update
websiteWhiteList: 能源界(国内信息);能源界(国际信息);中国能源新闻网;新华能源网;中国能源网(能源战略);中国农网(三农要闻);中国经济网(三农经济);中华粮网(粮食安全);美国之音(中国版面);美国之音(中美关系);美国之音(台海两岸版面);美国之音(港澳版面);看中国(看大陆版面);看中国(重点新闻);德国之声(中国报道);纽约时报中文网(中国版面);大纪元(一周大陆新闻);EnergyNow;联合国粮农组织;路透社(中国版面)
websiteWhiteList: 能源界(国内信息);能源界(国际信息);中国能源新闻网;新华能源网;中国能源网(能源战略);中国农网(三农要闻);中国经济网(三农经济);中华粮网(粮食安全);美国之音(中国版面);美国之音(中美关系);美国之音(台海两岸版面);美国之音(港澳版面);看中国(看大陆版面);看中国(重点新闻);德国之声(中国报道);纽约时报中文网(中国版面);大纪元(一周大陆新闻);EnergyNow;联合国粮农组织;路透社(中国版面);朝中社;劳动新闻;美国农业部食品和营养服务局;布鲁金斯学会(亚太版面);俄罗斯新闻社;美国能源部;朝鲜新闻;联邦能源管理委员会;华盛顿邮报;ChinaAid;美国战略与国际研究中心;美国外交关系委员会;美国兰德;国际危机组织;美国国务院东亚与太平洋事务局;俄罗斯卫星通讯社;尤里·列瓦达分析中心;塔斯社;韩国外交部
twitterWhiteList: nytchinese;YesterdayBigcat;takaichi_sanae;yonhapcn;VOAChinese;ChineseWSJ;whyyoutouzhele;Jaemyung_Lee
excelOutputPath: D:/data/output/upload
newsExcelOutputPath: D:/data/output/upload
twitterExcelOutputPath: D:/data/output/twitter
backupFilePath: D:/data/output/backup
pagesOutputPath: D:/data/output/pdf
ftpUploadPath: /home/jsc-2b
exportTaskSchedule: "0 0 12 * * 1,3,5"
exportNewsTaskSchedule: "0 30 8 * * 1,2,3,4,5,6,7"
exportTwitterTaskSchedule: "0 30 6 * * 1,2,3,4,5,6,7"

View File

@ -21,14 +21,11 @@ DB_CONFIG = {
TRANSLATE_API_URL = "http://47.113.231.200:28081/translate"
# 指定时间格式YYYY-MM-DD HH:MM:SS
LOADTIME_AFTER = "2026-01-16 10:40:00"
LOADTIME_AFTER = "2026-02-10 11:59:00"
# 目标站点列表
TARGET_SRCNAMES = [
'http://www.rodong.rep.kp/ko/index.php?MUBAMUAxQA==',
'http://www.kcna.kp/kp/category/articles/q/5394b80bdae203fadef02522cfb578c0.kcmsf',
'https://energynow.com/category/press_releases/',
'https://www.fao.org/newsroom/en' # 添加你的站点
'https://www.38north.org/' # 添加你的站点
]
# 单次请求间隔(秒),避免 API 被限流
@ -104,7 +101,7 @@ def translate_content_with_paragraphs(content: str) -> str:
def update_record(cursor, es_sid: int, new_title: str, new_content: str):
update_query = """
UPDATE indeximos
SET es_title = % s, es_content = % s
SET es_abstract = % s, es_content = % s
WHERE es_sid = % s
"""
cursor.execute(update_query, (new_title, new_content, es_sid))
@ -122,8 +119,8 @@ def main():
SELECT es_sid, es_urltitle, es_urlcontent
FROM indeximos
WHERE es_loadtime > %s
AND (es_title IS NULL OR TRIM(es_title) = '')
AND es_srcname IN ({placeholders})
AND (es_content IS NULL OR TRIM(es_content) = '')
-- AND es_srcname IN ({placeholders})
AND LENGTH(es_video) > 5
"""
params = [LOADTIME_AFTER] + TARGET_SRCNAMES

View File

@ -76,7 +76,7 @@ class SeleniumMiddleware:
}
edge_options.add_experimental_option("prefs", prefs)
self.driver = Edge(executable_path="C:/Users/DELL/Downloads/edgedriver_win64/msedgedriver.exe", options=edge_options)
self.driver = Edge(executable_path=r"C:\Program Files\Python38\msedgedriver.exe", options=edge_options)
@classmethod
def from_crawler(cls, crawler):

View File

@ -110,8 +110,8 @@ CUSTOM_USER_AGENT = [
# 部署在外网采集fb时使用selenium_chrome
SELENIUM_DRIVER_NAME = 'chrome'
SELENIUM_DRIVER_EXECUTABLE_PATH = 'local'
# SELENIUM_DRIVER_EXECUTABLE_PATH = 'http://144.34.185.108:28098'
# SELENIUM_DRIVER_EXECUTABLE_PATH = 'local'
SELENIUM_DRIVER_EXECUTABLE_PATH = 'http://144.34.185.108:28098'
SELENIUM_DRIVER_ARGUMENTS = [
'--headless',
'--no-sandbox',

View File

@ -1,24 +1,17 @@
# -*- coding: utf-8 -*-
import json
import logging as logger
import random
import re
import time
from urllib import parse
import redis
import scrapy
from scrapy_selenium import SeleniumRequest
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from MediaSpiders.items import MediaspidersItem
from MediaSpiders.utils.http_utils import http_post
from MediaSpiders.utils.login_utils import login
from MediaSpiders.utils.time_utils import get_time_stamp, get_current_timestamp
from selenium.webdriver.common.action_chains import ActionChains
from MediaSpiders.utils.traslate_utils import translate_single, translate_content_with_paragraphs, needs_translation
@ -42,8 +35,8 @@ class TwitterSpider(scrapy.Spider):
'IMAGES_RESULT_FIELD': 'es_urlimage',
'FILES_STORE': r'/usr/local/videos',
'FILES_RESULT_FIELD': 'es_video',
'ZIP_FILE_NAME': 'image_data_publicinfo_',
'FILE_ZIP_FILE_NAME': 'image_data_plane_',
'ZIP_FILE_NAME': 'image_data_ship_', # 图片包名称
'FILE_ZIP_FILE_NAME': 'image_data_plane_', # 视频包名称
'ITEM_PIPELINES': {
'scrapy.pipelines.images.ImagesPipeline': 2,
'scrapy.pipelines.files.FilesPipeline': 1,
@ -79,32 +72,34 @@ class TwitterSpider(scrapy.Spider):
self.redis_client = redis.Redis(host=self.settings['REDIS_HOST'], port=self.settings['REDIS_PORT'],
password=self.settings['REDIS_PWD'])
self.simhash_filter_key = self.settings['TWITTER_SIMHASH_FILTER_KEY']
cookie_string = None
# 获取采集登录账号并登录
login_users = self.redis_client.smembers('MediaSpiders:Twitter_login_accounts')
# 尝试自动化登录网页获取 cookies若失败则从redis中 使用已有cookies
try:
driver = login().login_with_selenium(
'https://x.com/i/flow/login',
self.name,
login_users=login_users,
response=response
)
cookies = driver.get_cookies()
# 取cookie中的ct0为x-csrf-token取gt为x-guest-token
self.cookie_dict = {}
for cookie in cookies:
self.cookie_dict[cookie['name']] = cookie['value']
except Exception as e:
logger.info("自动化获取cookies失败")
cookie_string = self.redis_client.get("MediaSpiders:Twitter_Cookies").decode()
# 从redis中 使用已有cookies否则自动化登录网页获取cookies
cookie_string = self.redis_client.get("MediaSpiders:Twitter_Cookies").decode()
ct0 = None
if cookie_string:
self.cookie_dict = form_cookie_dict(cookie_string)
# 5. 构建 headers
ct0 = self.cookie_dict.get('ct0')
if not ct0:
logger.error("redis中cookie缺失ct0 (CSRF token)")
return
else:
try:
driver = login().login_with_selenium(
'https://x.com/i/flow/login',
self.name,
login_users=login_users,
response=response
)
cookies = driver.get_cookies()
# 取cookie中的ct0为x-csrf-token取gt为x-guest-token
self.cookie_dict = {}
for cookie in cookies:
self.cookie_dict[cookie['name']] = cookie['value']
except Exception as e:
logger.info("自动化获取cookies失败")
self.header = {
'Host': 'api.twitter.com',
@ -137,7 +132,7 @@ class TwitterSpider(scrapy.Spider):
yield scrapy.Request(url=graphql_url, callback=self.parse,
meta={
'uid': user_info['userUid'],
'proxy': 'http://127.0.0.1:10809',
# 'proxy': 'http://127.0.0.1:10808',
'currentCount': 0
},
cookies=self.cookie_dict, headers=self.header)

View File

@ -1,6 +1,6 @@
syntax = "proto3";
message EsSets //es<EFBFBD><EFBFBD>
message EsSets
{
repeated Es Es = 1;
}
@ -8,79 +8,79 @@ message EsSets //es<65><73>
message Es
{
string es_sid = 1; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_subjectId = 2; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD>id
string es_hkey = 3; //URLΨһ<EFBFBD><EFBFBD><EFBFBD>
string es_pkey = 4; //<EFBFBD><EFBFBD>URL<EFBFBD><EFBFBD><EFBFBD>
string es_startid = 5; //<EFBFBD><EFBFBD>ʼ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_urlname = 6; //URL<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_sitename = 7; //<EFBFBD><EFBFBD>վ<EFBFBD><EFBFBD>
string es_extname = 8; //<EFBFBD><EFBFBD>׺<EFBFBD><EFBFBD>
string es_channel = 9; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƶ<EFBFBD><EFBFBD>
string es_groupname = 10; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_urltitle = 11; //<EFBFBD><EFBFBD>ҳ<EFBFBD><EFBFBD><EFBFBD>ñ<EFBFBD><EFBFBD><EFBFBD>
string es_urltopic = 12; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ҳ<title><EFBFBD>ñ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ı<EFBFBD><EFBFBD><EFBFBD>
string es_lasttime = 13; //<EFBFBD>ɼ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_loadtime = 14; //<EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD>ʵ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ES<EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD>
string es_urldate = 15; //<EFBFBD><EFBFBD><EFBFBD>µķ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_urltime = 16; //<EFBFBD><EFBFBD><EFBFBD>µķ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_srcname = 17; //<EFBFBD><EFBFBD><EFBFBD>µ<EFBFBD><EFBFBD><EFBFBD>Դ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȱʧ<EFBFBD><EFBFBD>
string es_authors = 18; //<EFBFBD><EFBFBD><EFBFBD>µ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ߣ<EFBFBD><EFBFBD><EFBFBD>ȱʧ<EFBFBD><EFBFBD>
string es_district = 19; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>µ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>µĵ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȱʧ<EFBFBD><EFBFBD>
string es_catalog = 20; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_catalog1 = 21; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_catalog2 = 22; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_keywords = 23; //<EFBFBD><EFBFBD><EFBFBD>¹ؼ<EFBFBD><EFBFBD>ʣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>½<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ģ<EFBFBD>
string es_abstract = 24; //<EFBFBD><EFBFBD><EFBFBD>µ<EFBFBD>ժҪ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>½<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ģ<EFBFBD>
string es_simflag = 25; //<EFBFBD>ظ<EFBFBD><EFBFBD><EFBFBD>ǣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>֮<EFBFBD>ظ<EFBFBD><EFBFBD><EFBFBD>ҳ<EFBFBD><EFBFBD>HKEY
string es_simrank = 26; //<EFBFBD><EFBFBD><EFBFBD>ƶ<EFBFBD><EFBFBD><EFBFBD>ֵ
string es_urlimage = 27; //ͼƬ<EFBFBD><EFBFBD>ַ
string es_imageflag = 28; //<EFBFBD><EFBFBD>ҳ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͼƬ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ŀ
string es_tableflag = 29; //<EFBFBD><EFBFBD>ҳ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ŀ
string es_doclength = 30; //<EFBFBD><EFBFBD><EFBFBD>ij<EFBFBD><EFBFBD><EFBFBD>
string es_content = 31; //<EFBFBD><EFBFBD>ҳ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݣ<EFBFBD><EFBFBD><EFBFBD>ͼƬ<EFBFBD><EFBFBD>
string es_urlcontent = 32; //<EFBFBD><EFBFBD>ҳ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͼƬ<EFBFBD><EFBFBD>
string es_bbsnum = 33; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_pagelevel = 34; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʼҳ<EFBFBD>ʼ<EFBFBD>IJ<EFBFBD><EFBFBD><EFBFBD>
string es_urllevel = 35; //<EFBFBD><EFBFBD><EFBFBD>ӵ<EFBFBD>Ŀ¼<EFBFBD><EFBFBD><EFBFBD>
string es_simhash = 36; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>simhashֵ
string es_ip = 37; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ip
string es_heat = 38; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȶ<EFBFBD>
string es_similaritycount = 39; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_similarity = 40; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>id
string es_similaritytime = 41; //<EFBFBD><EFBFBD><EFBFBD>ƶȼ<EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD><EFBFBD>
string es_emotion = 42; //<EFBFBD><EFBFBD><EFBFBD>
string es_warningtime = 43; //Ԥ<EFBFBD><EFBFBD>ʱ<EFBFBD><EFBFBD>
string es_carriertype = 44; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_commentcount = 45; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_forwardcount = 46; //ת<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_positiveWords = 47; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_negativeWords = 48; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_negativeProbability = 49; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_reportinfo = 50; //<EFBFBD>Ƿ<EFBFBD><EFBFBD>ϱ<EFBFBD><EFBFBD><EFBFBD>Ϣ
string es_attention = 51; //<EFBFBD>Ƿ<EFBFBD><EFBFBD>ע
string es_warning = 52; //<EFBFBD>Ƿ<EFBFBD>Ԥ<EFBFBD><EFBFBD>
string es_readsign = 53; //<EFBFBD>Ƿ<EFBFBD><EFBFBD>Ѷ<EFBFBD>
string es_briefing = 54; //<EFBFBD>Ƿ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_warning_word = 55; //Ԥ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_attentiontime = 56; //<EFBFBD><EFBFBD>עʱ<EFBFBD><EFBFBD>
string es_collection = 57; //<EFBFBD>Ƿ<EFBFBD><EFBFBD>ղ<EFBFBD>
string es_attachment = 58; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_userid = 59;//number,<EFBFBD>û<EFBFBD>id<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ý<EFBFBD><EFBFBD><EFBFBD>˻<EFBFBD>)
string es_contenttype = 60;//string,<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Post<EFBFBD><EFBFBD><EFBFBD>ͣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>status<EFBFBD><EFBFBD>link<EFBFBD><EFBFBD>photo<EFBFBD><EFBFBD>video<EFBFBD><EFBFBD>event<EFBFBD><EFBFBD>music<EFBFBD><EFBFBD>note<EFBFBD><EFBFBD>offer<EFBFBD><EFBFBD>album<EFBFBD>ȣ<EFBFBD>
string es_likecount = 61;//number,<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_links = 62;//string<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>е<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ӵ<EFBFBD>ַ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƶ<EFBFBD>ļ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ӵ<EFBFBD>ַ
string es_reactioncount = 63;//number,<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_linkdesc = 64;//string<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><EFBFBD>post <EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϊ<EFBFBD><EFBFBD><EFBFBD>ӣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ӵ<EFBFBD>һЩ<EFBFBD><EFBFBD>Ϣ
string es_repostuid = 65;//number<EFBFBD><EFBFBD>ת<EFBFBD><EFBFBD>ԭ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ߵ<EFBFBD>ID
string es_repostuname =66;//string<EFBFBD><EFBFBD>ת<EFBFBD><EFBFBD>ԭ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ߵ<EFBFBD>name
string es_repostid = 67;//string<EFBFBD><EFBFBD>ת<EFBFBD><EFBFBD>ԭ<EFBFBD><EFBFBD>ID
string es_tags = 68;//string<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_mentionsaccount = 69;//string<EFBFBD><EFBFBD><EFBFBD><EFBFBD>˺<EFBFBD>
string es_video = 70;//string<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>е<EFBFBD><EFBFBD><EFBFBD>Ƶ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_isrepost = 71;//boolean<EFBFBD><EFBFBD><EFBFBD>Ƿ<EFBFBD>ת<EFBFBD><EFBFBD>
string es_lang = 72;//string<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_client = 73;//string<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͻ<EFBFBD><EFBFBD><EFBFBD>
string es_sid = 1;
string es_subjectId = 2;
string es_hkey = 3;
string es_pkey = 4;
string es_startid = 5;
string es_urlname = 6;
string es_sitename = 7;
string es_extname = 8;
string es_channel = 9;
string es_groupname = 10;
string es_urltitle = 11;
string es_urltopic = 12;
string es_lasttime = 13;
string es_loadtime = 14;
string es_urldate = 15;
string es_urltime = 16;
string es_srcname = 17;
string es_authors = 18;
string es_district = 19;
string es_catalog = 20;
string es_catalog1 = 21;
string es_catalog2 = 22;
string es_keywords = 23;
string es_abstract = 24;
string es_simflag = 25;
string es_simrank = 26;
string es_urlimage = 27;
string es_imageflag = 28;
string es_tableflag = 29;
string es_doclength = 30;
string es_content = 31;
string es_urlcontent = 32;
string es_bbsnum = 33;
string es_pagelevel = 34;
string es_urllevel = 35;
string es_simhash = 36;
string es_ip = 37;
string es_heat = 38;
string es_similaritycount = 39;
string es_similarity = 40;
string es_similaritytime = 41;
string es_emotion = 42;
string es_warningtime = 43;
string es_carriertype = 44;
string es_commentcount = 45;
string es_forwardcount = 46;
string es_positiveWords = 47;
string es_negativeWords = 48;
string es_negativeProbability = 49;
string es_reportinfo = 50;
string es_attention = 51;
string es_warning = 52;
string es_readsign = 53;
string es_briefing = 54;
string es_warning_word = 55;
string es_attentiontime = 56;
string es_collection = 57;
string es_attachment = 58;
string es_userid = 59;
string es_contenttype = 60;
string es_likecount = 61;
string es_links = 62;
string es_reactioncount = 63;
string es_linkdesc = 64;
string es_repostuid = 65;
string es_repostuname =66;
string es_repostid = 67;
string es_tags = 68;
string es_mentionsaccount = 69;
string es_video = 70;
string es_isrepost = 71;
string es_lang = 72;
string es_client = 73;
string es_snapshot = 74;
string es_title = 75;
}

File diff suppressed because it is too large Load Diff

View File

@ -12,8 +12,9 @@ SCHEDULER_PERSIST = True
SELENIUM_DRIVER_NAME = 'firefox'
SELENIUM_DRIVER_EXECUTABLE_PATH = [
'http://10.55.13.121:28095',
'http://10.55.13.108:28095',
# 'http://10.55.13.108:28095',
'http://10.55.13.3:28095',
'http://74.121.148.204:28095'
]
SELENIUM_DRIVER_ARGUMENTS = ['-headless'] # '--headless' if using chrome instead of firefox
SELENIUM_DRIVER_PREFERENCES = {
@ -168,7 +169,7 @@ ITEM_PIPELINES = {
}
############################## 翻译
MAX_TEXT_LENGTH = 100
MAX_TEXT_LENGTH = 5999
# 翻译 API 地址(替换为你的服务器 IP 或域名)
TRANSLATE_API_URL = "http://47.113.231.200:28082/translate"
# 单次请求间隔(秒),避免 API 被限流

View File

@ -137,7 +137,7 @@ def get_format_time(pattern, time_str):
if __name__ == '__main__':
# a = [' 令和4年6月9日', 'www.kcna.kp (主体111.6.6.)', '民國111年06月09日 ', 'Jun. 9, 2022', '111年 06月 21日']
a = ['2026년 1월 6일 화요일 1면 [사진있음]']
a = ['Wed, 12/03/2025 - 12:00']
for _ in a:
# print(get_time_stamp(_))
print(get_time_stamp(_, {r"(\d{4}\d{1,2}월 \d{1,2}일)\D*(\d{2}:\d{2}:\d{2})*\D*": ['%Y-%m-%d %H:%M:%S']}))
print(get_time_stamp(_))
# print(get_time_stamp(_, {r"(\d{2}.\d{2}.\d{4})\D*(\d{2}\d{2}\d{2})*\D*": ['%d-%m-%Y %H:%M:%S']}))

View File

@ -73,3 +73,6 @@ def update_record(cursor, es_sid: int, new_title: str, new_content: str):
WHERE es_sid = % s
"""
cursor.execute(update_query, (new_title, new_content, es_sid))
if __name__ == "__main__":
print(translate_content_with_paragraphs("ВСУ провалили наступление на Сумском и Харьковском направлениях, сообщили РИА Новости в силовых структурах. В результате слаженных действий российских бойцов контратаки отражены, а противник обращен в бегство. Введенные ЕС ограничения на передвижения российских дипломатов противоречат Венской конвенции о дипломатических сношениях и мешают нормальной работе дипмиссий. Об этом заявил РИА Новости посол России в Бельгии Денис Гончар. Вице-президент США Джей Ди Вэнс посетит с визитом Армению и Азербайджан. Поездка в Ереван состоится 9-10 февраля, в Баку 10-11 февраля. В Вашингтон Вэнс вернется \"в среду вечером\", сообщает его пресс-пул. Либерально-демократическая партия под руководством премьер-министра Японии Санаэ Такаити победила на выборах в ключевую нижнюю палату парламента. Представители ЛДП получат 316 из 465 мандатов и смогут проводить законопроекты, даже если они не получат поддержки верхней палаты, где партия не имеет большинства. В России самая низкая безработица в странах \"Большой двадцатки\", выяснило РИА Новости, изучив данные национальных статслужб по итогам 2025 года. Уровень безработицы в России в декабре составил 2,2 процента, что на одну десятую процента ниже показателя 2024 года."))