From 2c974902ab3630fc2a91afb47ffd58f7ff85db47 Mon Sep 17 00:00:00 2001 From: DELL Date: Thu, 26 Feb 2026 16:20:00 +0800 Subject: [PATCH] =?UTF-8?q?=E7=83=AD=E6=90=9C=E4=BF=A1=E6=81=AF=E5=85=A5?= =?UTF-8?q?=E5=BA=93?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dsp/dsp.iml | 220 +----------------- .../jsc/dsp/controller/ExportController.java | 10 + .../jsc/dsp/dao/EsDataBaiduRepository.java | 12 + .../com/jsc/dsp/model/EsDataBaiduView.java | 37 +++ .../com/jsc/dsp/service/StorageService.java | 29 +++ .../java/com/jsc/dsp/task/AutoUpload.java | 5 + .../com/jsc/dsp/utils/DatabaseConnector.java | 107 +++++++++ .../jsc/dsp/utils/ExportAndUploadUtils.java | 16 ++ dsp/src/main/resources/application.yml | 4 +- 9 files changed, 220 insertions(+), 220 deletions(-) create mode 100644 dsp/src/main/java/com/jsc/dsp/dao/EsDataBaiduRepository.java create mode 100644 dsp/src/main/java/com/jsc/dsp/model/EsDataBaiduView.java diff --git a/dsp/dsp.iml b/dsp/dsp.iml index 2909fd3..0995b55 100644 --- a/dsp/dsp.iml +++ b/dsp/dsp.iml @@ -1,5 +1,5 @@ - + @@ -20,222 +20,4 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/dsp/src/main/java/com/jsc/dsp/controller/ExportController.java b/dsp/src/main/java/com/jsc/dsp/controller/ExportController.java index 90f0412..81ad2f1 100644 --- a/dsp/src/main/java/com/jsc/dsp/controller/ExportController.java +++ b/dsp/src/main/java/com/jsc/dsp/controller/ExportController.java @@ -63,6 +63,16 @@ public class ExportController { } } + @PostMapping("/triggerBaiduHotSeachTask") + public ReturnT triggerBaiduHotSeachTask() { + try { + new Thread(() -> exportAndUploadUtils.exportHotSearchAndUpload()).start(); + return new ReturnT<>(200, "", ""); + } catch (Exception e) { + return new ReturnT<>(500, e.getMessage(), ""); + } + } + } diff --git a/dsp/src/main/java/com/jsc/dsp/dao/EsDataBaiduRepository.java b/dsp/src/main/java/com/jsc/dsp/dao/EsDataBaiduRepository.java new file mode 100644 index 0000000..c2d9a70 --- /dev/null +++ b/dsp/src/main/java/com/jsc/dsp/dao/EsDataBaiduRepository.java @@ -0,0 +1,12 @@ +package com.jsc.dsp.dao; + +import com.jsc.dsp.model.EsDataBaiduView; +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.stereotype.Repository; + +import java.util.List; + +@Repository +public interface EsDataBaiduRepository extends JpaRepository { + List findAllByEsLoadtimeAfter(String loadtime); +} diff --git a/dsp/src/main/java/com/jsc/dsp/model/EsDataBaiduView.java b/dsp/src/main/java/com/jsc/dsp/model/EsDataBaiduView.java new file mode 100644 index 0000000..296df77 --- /dev/null +++ b/dsp/src/main/java/com/jsc/dsp/model/EsDataBaiduView.java @@ -0,0 +1,37 @@ +package com.jsc.dsp.model; + +import lombok.Data; + +import javax.persistence.Entity; +import javax.persistence.Id; +import javax.persistence.Table; + +@Entity +@Data +@Table(name = "es_data_baidu") +public class EsDataBaiduView { + + @Id + private String esSid; + + private String esUrltime; + private String esLoadtime; + + private String esCarriertype; + + private String esSitename; + + private String esUrlcontent; + + private String esUrlname; + + private String esUrltitle; + + private String esHkey; + + private String esLasttime; + + private String esSimrank; + + private String esHeat; +} diff --git a/dsp/src/main/java/com/jsc/dsp/service/StorageService.java b/dsp/src/main/java/com/jsc/dsp/service/StorageService.java index 968d548..1c053c8 100644 --- a/dsp/src/main/java/com/jsc/dsp/service/StorageService.java +++ b/dsp/src/main/java/com/jsc/dsp/service/StorageService.java @@ -175,7 +175,36 @@ public class StorageService extends StreamService { } dbStorageItems.add(indeximos); } + if (indeximos.getEs_carriertype().equals("hot_search")) { + logger.info("开始处理热搜【" + indeximos.getEs_sitename() + "】的数据入库流程"); + String uuid = UUID.randomUUID().toString().replaceAll("-", ""); + String es_urlname = indeximos.getEs_urlname(); + if (!es_urlname.isEmpty()) { + // 根据urlname生成固定的UUID,避免重复入库相同的文章 + UUID _uuid = UUID.nameUUIDFromBytes(es_urlname.getBytes()); + uuid = _uuid.toString().replaceAll("-", ""); + } + indeximos.setEs_sid(uuid); + indeximos.setEs_loadtime(StringUtils.TimestampToStringDate(System.currentTimeMillis())); + builder.setEsSid(uuid); + for (Field f : indeximos.getClass().getDeclaredFields()) { + f.setAccessible(true); + //判断字段是否为空,并且对象属性中的基本都会转为对象类型来判断 + if (f.get(indeximos) == null) { + String fieldType = databaseConnector.getFieldType(Indeximos.class, f.getName()); + if (fieldType.contains("Float")) { + f.set(indeximos, 0.0f); + } else { + if (!dateFields.contains(f.getName())) { + f.set(indeximos, ""); + } + } + } + } + dbStorageItems.add(indeximos); + } } + if (dbStorageItems.size() > 0) { databaseConnector.insertIntoDB(dbStorageItems); } diff --git a/dsp/src/main/java/com/jsc/dsp/task/AutoUpload.java b/dsp/src/main/java/com/jsc/dsp/task/AutoUpload.java index 91f18e1..069f849 100644 --- a/dsp/src/main/java/com/jsc/dsp/task/AutoUpload.java +++ b/dsp/src/main/java/com/jsc/dsp/task/AutoUpload.java @@ -28,4 +28,9 @@ public class AutoUpload { exportAndUploadUtils.exportTwitterDataAndUpload(); } + @Scheduled(cron = "${custom.exportHotSearchTaskSchedule}") + public void exportHotSearchAndUpload() { + exportAndUploadUtils.exportHotSearchAndUpload(); + } + } diff --git a/dsp/src/main/java/com/jsc/dsp/utils/DatabaseConnector.java b/dsp/src/main/java/com/jsc/dsp/utils/DatabaseConnector.java index 5126c96..56e3457 100644 --- a/dsp/src/main/java/com/jsc/dsp/utils/DatabaseConnector.java +++ b/dsp/src/main/java/com/jsc/dsp/utils/DatabaseConnector.java @@ -1,9 +1,11 @@ package com.jsc.dsp.utils; import com.fasterxml.jackson.databind.ObjectMapper; +import com.jsc.dsp.dao.EsDataBaiduRepository; import com.jsc.dsp.dao.EsDataNewsRepository; import com.jsc.dsp.dao.EsDataTwitterRepository; import com.jsc.dsp.dao.IndeximosRepository; +import com.jsc.dsp.model.EsDataBaiduView; import com.jsc.dsp.model.EsDataNewsView; import com.jsc.dsp.model.EsDataTwitterView; import com.jsc.dsp.model.Indeximos; @@ -39,15 +41,22 @@ public class DatabaseConnector { @Resource EsDataNewsRepository esDataNewsRepository; + @Resource EsDataTwitterRepository esDataTwitterRepository; + @Resource + EsDataBaiduRepository esDataBaiduRepository; + @Value("${custom.newsExcelOutputPath}") String newsExcelOutputPath; @Value("${custom.twitterExcelOutputPath}") String twitterExcelOutputPath; + @Value("${custom.baiduExcelOutputPath}") + String baiduExcelOutputPath; + private static final ObjectMapper objectMapper = new ObjectMapper(); private final Logger logger = LoggerFactory.getLogger(this.getClass().getName()); @@ -360,4 +369,102 @@ public class DatabaseConnector { } } + public void baiduToXlsx(String startTime) { + try { + Path dirPath = Paths.get(baiduExcelOutputPath); + if (!Files.exists(dirPath)) { + Files.createDirectories(dirPath); + } + String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd")); + String fileName = "data_baidu-" + timestamp + "-001.xlsx"; + Path filePath = dirPath.resolve(fileName); + + List esDataNewsViewList = esDataBaiduRepository.findAllByEsLoadtimeAfter(startTime); + if (!esDataNewsViewList.isEmpty()) { + Field[] fields = esDataNewsViewList.get(0).getClass().getDeclaredFields(); // 通过反射获取类的成员信息,并使用这些类成员为后续生成的excel表头做准备 + try (Workbook workbook = new XSSFWorkbook(); + ByteArrayOutputStream out = new ByteArrayOutputStream()) { + + Sheet sheet = workbook.createSheet("data"); + + // 创建表头 + Row headerRow = sheet.createRow(0); + CellStyle headerStyle = workbook.createCellStyle(); // 创建单元格 + headerStyle.setFillForegroundColor(IndexedColors.LIGHT_BLUE.getIndex()); + headerStyle.setFillPattern(FillPatternType.SOLID_FOREGROUND); + + for (int i = 0; i < fields.length; i++) { + Cell cell = headerRow.createCell(i); + String formField = formField(fields[i]); + cell.setCellValue(formField); + cell.setCellStyle(headerStyle); + } + // 填充数据 + int rowNum = 1; + for (EsDataBaiduView item : esDataNewsViewList) { + Row row = sheet.createRow(rowNum++); + logger.debug("导出excel第" + rowNum + "行"); + // 0: esUrltime + row.createCell(0).setCellValue(item.getEsUrltime() != null ? item.getEsUrltime() : ""); + + // 1: esCarriertype + row.createCell(2).setCellValue(item.getEsCarriertype() != null ? item.getEsCarriertype() : ""); + + // 2: esSitename + row.createCell(3).setCellValue(item.getEsSitename() != null ? item.getEsSitename() : ""); + + // 3: esUrlcontent + String esUrlcontent = item.getEsUrlcontent(); + if (esUrlcontent != null && esUrlcontent.length() > 10000) { + row.createCell(4).setCellValue(esUrlcontent.substring(0, 10000)); + } else { + row.createCell(4).setCellValue(esUrlcontent != null ? esUrlcontent : ""); + } + + // 4: esUrlname + row.createCell(6).setCellValue(item.getEsUrlname() != null ? item.getEsUrlname() : ""); + + // 5: esUrltitle + String esUrltitle = item.getEsUrltitle(); + if (esUrltitle != null && esUrltitle.length() > 10000) { + row.createCell(7).setCellValue(esUrltitle.substring(0, 10000)); + } else { + row.createCell(7).setCellValue(esUrltitle != null ? esUrltitle : ""); + } + + // 6: esHkey + row.createCell(16).setCellValue(item.getEsHkey() != null ? item.getEsHkey() : ""); + + // 7: esLasttime + String esLasttime = extractFilenamesFromJsonArray(item.getEsLasttime()); + row.createCell(17).setCellValue(esLasttime); + + // 8: esSimrank + row.createCell(18).setCellValue(item.getEsSimrank() != null ? item.getEsSimrank() : ""); + + // 9: esHeat + row.createCell(19).setCellValue(item.getEsHeat() != null ? item.getEsHeat() : ""); + + } + logger.info("完成excel数据写入,共" + rowNum + "行"); + + // 自动调整列宽 + for (int i = 0; i < fields.length; i++) { + sheet.autoSizeColumn(i); + } + + workbook.write(out); + + try (FileOutputStream fos = new FileOutputStream(filePath.toFile())) { + workbook.write(fos); + } + } catch (IOException e) { + e.printStackTrace(); + } + } + logger.info("excel导出完成!"); + } catch (Exception e) { + e.printStackTrace(); + } + } } diff --git a/dsp/src/main/java/com/jsc/dsp/utils/ExportAndUploadUtils.java b/dsp/src/main/java/com/jsc/dsp/utils/ExportAndUploadUtils.java index fefc5dd..f33b69e 100644 --- a/dsp/src/main/java/com/jsc/dsp/utils/ExportAndUploadUtils.java +++ b/dsp/src/main/java/com/jsc/dsp/utils/ExportAndUploadUtils.java @@ -54,6 +54,9 @@ public class ExportAndUploadUtils { @Value("${custom.twitterExcelOutputPath}") String twitterExcelOutputPath; + @Value("${custom.baiduExcelOutputPath}") + String baiduExcelOutputPath; + @Value("${custom.backupFilePath}") String backupFilePath; @@ -94,6 +97,19 @@ public class ExportAndUploadUtils { zipAndUploadDirectory(twitterExcelOutputPath, zipFileFullName, remoteZipPath); } + public void exportHotSearchAndUpload() { + logger.info("开始导出百度热搜 excel数据..."); + String baiduLastLoadTime = configService.getConfigValueByName("baidu_last_loadtime"); + String currentLoadTime = StringUtils.DateToString(new Date()); + String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd")); + databaseConnector.baiduToXlsx(baiduLastLoadTime); + configService.setConfigValueByName("baidu_hot_last_loadtime", currentLoadTime); + String zipFileName = "data_hot_search-" + timestamp + "-001.zip"; + String zipFileFullName = backupFilePath + File.separator + zipFileName; + String remoteZipPath = ftpUploadPath + "/" + zipFileName; + zipAndUploadDirectory(baiduExcelOutputPath, zipFileFullName, remoteZipPath); + } + /** * 将指定目录打包成 ZIP 文件(保存到指定本地路径),并上传到 FTP 服务器 * diff --git a/dsp/src/main/resources/application.yml b/dsp/src/main/resources/application.yml index 7d3d648..e2dd22b 100644 --- a/dsp/src/main/resources/application.yml +++ b/dsp/src/main/resources/application.yml @@ -98,8 +98,10 @@ custom: twitterWhiteList: nytchinese;YesterdayBigcat;takaichi_sanae;yonhapcn;VOAChinese;ChineseWSJ;whyyoutouzhele;Jaemyung_Lee newsExcelOutputPath: D:/data/output/upload twitterExcelOutputPath: D:/data/output/twitter + baiduExcelOutputPath: D:/data/output/baidu backupFilePath: D:/data/output/backup pagesOutputPath: D:/data/output/pdf ftpUploadPath: /home/jsc-2b exportNewsTaskSchedule: "0 30 8 * * 1,2,3,4,5,6,7" - exportTwitterTaskSchedule: "0 30 6 * * 1,2,3,4,5,6,7" \ No newline at end of file + exportTwitterTaskSchedule: "0 30 6 * * 1,2,3,4,5,6,7" + exportHotSearchTaskSchedule: "0 0 20 * * 1,2,3,4,5,6,7" \ No newline at end of file