[热搜] 热搜功能优化,excel导出及解压缩

This commit is contained in:
DELL 2026-03-02 13:49:42 +08:00
parent e1893f0ae3
commit 85158d00db
8 changed files with 96 additions and 84 deletions

View File

@ -43,6 +43,17 @@ public class ExportController {
} }
} }
@PostMapping("/exportHotSearchExcel")
public ReturnT<String> exportHotSearchExcel(@RequestBody JSONObject object) {
try {
String startTime = object.getString("startTime");
databaseConnector.hotSearchToXlsx(startTime);
return new ReturnT<>(200, "", "");
} catch (Exception e) {
return new ReturnT<>(500, e.getMessage(), "");
}
}
@PostMapping("/triggerTask") @PostMapping("/triggerTask")
public ReturnT<String> triggerTask() { public ReturnT<String> triggerTask() {
try { try {
@ -53,18 +64,8 @@ public class ExportController {
} }
} }
@PostMapping("/triggerTwitterTask") @PostMapping("/triggerHotSearchTask")
public ReturnT<String> triggerTwitterTask() { public ReturnT<String> triggerHotSearchTask() {
try {
new Thread(() -> exportAndUploadUtils.exportTwitterDataAndUpload()).start();
return new ReturnT<>(200, "", "");
} catch (Exception e) {
return new ReturnT<>(500, e.getMessage(), "");
}
}
@PostMapping("/triggerBaiduHotSeachTask")
public ReturnT<String> triggerBaiduHotSeachTask() {
try { try {
new Thread(() -> exportAndUploadUtils.exportHotSearchAndUpload()).start(); new Thread(() -> exportAndUploadUtils.exportHotSearchAndUpload()).start();
return new ReturnT<>(200, "", ""); return new ReturnT<>(200, "", "");
@ -73,6 +74,4 @@ public class ExportController {
} }
} }
} }

View File

@ -1,12 +0,0 @@
package com.jsc.dsp.dao;
import com.jsc.dsp.model.EsDataBaiduView;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository;
import java.util.List;
@Repository
public interface EsDataBaiduRepository extends JpaRepository<EsDataBaiduView, String> {
List<EsDataBaiduView> findAllByEsLoadtimeAfter(String loadtime);
}

View File

@ -0,0 +1,12 @@
package com.jsc.dsp.dao;
import com.jsc.dsp.model.EsDataHotSearchView;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.stereotype.Repository;
import java.util.List;
@Repository
public interface EsDataHotSearchRepository extends JpaRepository<EsDataHotSearchView, String> {
List<EsDataHotSearchView> findAllByEsLoadtimeAfter(String loadtime);
}

View File

@ -8,30 +8,31 @@ import javax.persistence.Table;
@Entity @Entity
@Data @Data
@Table(name = "es_data_baidu") @Table(name = "es_data_hot_search")
public class EsDataBaiduView { public class EsDataHotSearchView {
@Id @Id
private String esSid; private String esSid;
private String esUrltime; private String esUrltime;
private String esLoadtime;
private String esCarriertype; private String esCarriertype;
private String esSitename; private String esSitename;
private String esSimrank;
private String esUrltitle;
private String esUrlcontent; private String esUrlcontent;
private String esUrlname; private String esUrlname;
private String esUrltitle;
private String esHkey; private String esHkey;
private String esLasttime; private String esLasttime;
private String esSimrank;
private String esHeat; private String esHeat;
private String esLoadtime;
} }

View File

@ -33,4 +33,6 @@ public class EsDataNewsView {
String esAbstract; String esAbstract;
String esKeywords; String esKeywords;
String file; String file;
String esHkey;
String esUrltopic;
} }

View File

@ -1,11 +1,11 @@
package com.jsc.dsp.utils; package com.jsc.dsp.utils;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import com.jsc.dsp.dao.EsDataBaiduRepository; import com.jsc.dsp.dao.EsDataHotSearchRepository;
import com.jsc.dsp.dao.EsDataNewsRepository; import com.jsc.dsp.dao.EsDataNewsRepository;
import com.jsc.dsp.dao.EsDataTwitterRepository; import com.jsc.dsp.dao.EsDataTwitterRepository;
import com.jsc.dsp.dao.IndeximosRepository; import com.jsc.dsp.dao.IndeximosRepository;
import com.jsc.dsp.model.EsDataBaiduView; import com.jsc.dsp.model.EsDataHotSearchView;
import com.jsc.dsp.model.EsDataNewsView; import com.jsc.dsp.model.EsDataNewsView;
import com.jsc.dsp.model.EsDataTwitterView; import com.jsc.dsp.model.EsDataTwitterView;
import com.jsc.dsp.model.Indeximos; import com.jsc.dsp.model.Indeximos;
@ -46,7 +46,7 @@ public class DatabaseConnector {
EsDataTwitterRepository esDataTwitterRepository; EsDataTwitterRepository esDataTwitterRepository;
@Resource @Resource
EsDataBaiduRepository esDataBaiduRepository; EsDataHotSearchRepository esDataHotSearchRepository;
@Value("${custom.newsExcelOutputPath}") @Value("${custom.newsExcelOutputPath}")
String newsExcelOutputPath; String newsExcelOutputPath;
@ -54,8 +54,8 @@ public class DatabaseConnector {
@Value("${custom.twitterExcelOutputPath}") @Value("${custom.twitterExcelOutputPath}")
String twitterExcelOutputPath; String twitterExcelOutputPath;
@Value("${custom.baiduExcelOutputPath}") @Value("${custom.hotSearchExcelOutputPath}")
String baiduExcelOutputPath; String hotSearchExcelOutputPath;
private static final ObjectMapper objectMapper = new ObjectMapper(); private static final ObjectMapper objectMapper = new ObjectMapper();
@ -165,6 +165,8 @@ public class DatabaseConnector {
row.createCell(19).setCellValue(item.getEsAbstract()); row.createCell(19).setCellValue(item.getEsAbstract());
row.createCell(20).setCellValue(item.getEsKeywords()); row.createCell(20).setCellValue(item.getEsKeywords());
row.createCell(21).setCellValue(item.getFile()); row.createCell(21).setCellValue(item.getFile());
row.createCell(22).setCellValue(item.getEsHkey());
row.createCell(23).setCellValue(item.getEsUrltopic());
} }
logger.info("完成excel数据写入" + rowNum + ""); logger.info("完成excel数据写入" + rowNum + "");
@ -181,8 +183,8 @@ public class DatabaseConnector {
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
} }
} logger.info("excel导出完成");
logger.info("excel导出完成!"); } else logger.info("获取数据为空,excel导出");
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
} }
@ -322,8 +324,8 @@ public class DatabaseConnector {
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
} }
} logger.info("excel导出完成");
logger.info("excel导出完成!"); } else logger.info("获取数据为空,excel导出");
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
} }
@ -369,19 +371,19 @@ public class DatabaseConnector {
} }
} }
public void baiduToXlsx(String startTime) { public void hotSearchToXlsx(String startTime) {
try { try {
Path dirPath = Paths.get(baiduExcelOutputPath); Path dirPath = Paths.get(hotSearchExcelOutputPath);
if (!Files.exists(dirPath)) { if (!Files.exists(dirPath)) {
Files.createDirectories(dirPath); Files.createDirectories(dirPath);
} }
String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd")); String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd"));
String fileName = "data_baidu-" + timestamp + "-001.xlsx"; String fileName = "data_hot_search-" + timestamp + "-001.xlsx";
Path filePath = dirPath.resolve(fileName); Path filePath = dirPath.resolve(fileName);
List<EsDataBaiduView> esDataNewsViewList = esDataBaiduRepository.findAllByEsLoadtimeAfter(startTime); List<EsDataHotSearchView> esDataHotSearchViewList = esDataHotSearchRepository.findAllByEsLoadtimeAfter(startTime);
if (!esDataNewsViewList.isEmpty()) { if (!esDataHotSearchViewList.isEmpty()) {
Field[] fields = esDataNewsViewList.get(0).getClass().getDeclaredFields(); // 通过反射获取类的成员信息并使用这些类成员为后续生成的excel表头做准备 Field[] fields = esDataHotSearchViewList.get(0).getClass().getDeclaredFields(); // 通过反射获取类的成员信息并使用这些类成员为后续生成的excel表头做准备
try (Workbook workbook = new XSSFWorkbook(); try (Workbook workbook = new XSSFWorkbook();
ByteArrayOutputStream out = new ByteArrayOutputStream()) { ByteArrayOutputStream out = new ByteArrayOutputStream()) {
@ -401,50 +403,56 @@ public class DatabaseConnector {
} }
// 填充数据 // 填充数据
int rowNum = 1; int rowNum = 1;
for (EsDataBaiduView item : esDataNewsViewList) { for (EsDataHotSearchView item : esDataHotSearchViewList) {
Row row = sheet.createRow(rowNum++); Row row = sheet.createRow(rowNum++);
logger.debug("导出excel第" + rowNum + ""); logger.debug("导出excel第" + rowNum + "");
// 0: esUrltime // 0: esSid
row.createCell(0).setCellValue(item.getEsUrltime() != null ? item.getEsUrltime() : ""); row.createCell(0).setCellValue(item.getEsSid() != null ? item.getEsSid() : "");
// 1: esUrltime
row.createCell(1).setCellValue(item.getEsUrltime() != null ? item.getEsUrltime() : "");
// 1: esCarriertype // 2: esCarriertype
row.createCell(2).setCellValue(item.getEsCarriertype() != null ? item.getEsCarriertype() : ""); row.createCell(2).setCellValue(item.getEsCarriertype() != null ? item.getEsCarriertype() : "");
// 2: esSitename // 3: esSitename
row.createCell(3).setCellValue(item.getEsSitename() != null ? item.getEsSitename() : ""); row.createCell(3).setCellValue(item.getEsSitename() != null ? item.getEsSitename() : "");
// 3: esUrlcontent // 4: esSimrank
String esUrlcontent = item.getEsUrlcontent(); row.createCell(4).setCellValue(item.getEsSimrank() != null ? String.valueOf(Float.valueOf(item.getEsSimrank()).intValue()) : "");
if (esUrlcontent != null && esUrlcontent.length() > 10000) {
row.createCell(4).setCellValue(esUrlcontent.substring(0, 10000));
} else {
row.createCell(4).setCellValue(esUrlcontent != null ? esUrlcontent : "");
}
// 4: esUrlname
row.createCell(6).setCellValue(item.getEsUrlname() != null ? item.getEsUrlname() : "");
// 5: esUrltitle // 5: esUrltitle
String esUrltitle = item.getEsUrltitle(); String esUrltitle = item.getEsUrltitle();
if (esUrltitle != null && esUrltitle.length() > 10000) { if (esUrltitle != null && esUrltitle.length() > 10000) {
row.createCell(7).setCellValue(esUrltitle.substring(0, 10000)); row.createCell(5).setCellValue(esUrltitle.substring(0, 10000));
} else { } else {
row.createCell(7).setCellValue(esUrltitle != null ? esUrltitle : ""); row.createCell(5).setCellValue(esUrltitle != null ? esUrltitle : "");
} }
// 6: esHkey // 6: esUrlcontent
row.createCell(16).setCellValue(item.getEsHkey() != null ? item.getEsHkey() : ""); String esUrlcontent = item.getEsUrlcontent();
if (esUrlcontent != null && esUrlcontent.length() > 10000) {
row.createCell(6).setCellValue(esUrlcontent.substring(0, 10000));
} else {
row.createCell(6).setCellValue(esUrlcontent != null ? esUrlcontent : "");
}
// 7: esLasttime // 7: esUrlname
row.createCell(7).setCellValue(item.getEsUrlname() != null ? item.getEsUrlname() : "");
// 8: esHkey
row.createCell(8).setCellValue(item.getEsHkey() != null ? item.getEsHkey() : "");
// 9: esLasttime
String esLasttime = extractFilenamesFromJsonArray(item.getEsLasttime()); String esLasttime = extractFilenamesFromJsonArray(item.getEsLasttime());
row.createCell(17).setCellValue(esLasttime); row.createCell(9).setCellValue(esLasttime);
// 8: esSimrank
row.createCell(18).setCellValue(item.getEsSimrank() != null ? item.getEsSimrank() : "");
// 9: esHeat // 10: esHeat
row.createCell(19).setCellValue(item.getEsHeat() != null ? item.getEsHeat() : ""); row.createCell(10).setCellValue(item.getEsHeat() != null ? item.getEsHeat() : "");
// 1: esLasttime
String esLoadtime = extractFilenamesFromJsonArray(item.getEsLoadtime());
row.createCell(11).setCellValue(esLoadtime);
} }
logger.info("完成excel数据写入" + rowNum + ""); logger.info("完成excel数据写入" + rowNum + "");
@ -461,10 +469,12 @@ public class DatabaseConnector {
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
} }
}
logger.info("excel导出完成"); logger.info("excel导出完成");
} else logger.info("获取数据为空excel未导出");
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
} }
} }
} }

View File

@ -54,8 +54,8 @@ public class ExportAndUploadUtils {
@Value("${custom.twitterExcelOutputPath}") @Value("${custom.twitterExcelOutputPath}")
String twitterExcelOutputPath; String twitterExcelOutputPath;
@Value("${custom.baiduExcelOutputPath}") @Value("${custom.hotSearchExcelOutputPath}")
String baiduExcelOutputPath; String hotSearchExcelOutputPath;
@Value("${custom.backupFilePath}") @Value("${custom.backupFilePath}")
String backupFilePath; String backupFilePath;
@ -99,15 +99,15 @@ public class ExportAndUploadUtils {
public void exportHotSearchAndUpload() { public void exportHotSearchAndUpload() {
logger.info("开始导出百度热搜 excel数据..."); logger.info("开始导出百度热搜 excel数据...");
String baiduLastLoadTime = configService.getConfigValueByName("baidu_last_loadtime"); String hotSearchLastLoadTime = configService.getConfigValueByName("hot_search_last_loadtime");
String currentLoadTime = StringUtils.DateToString(new Date()); String currentLoadTime = StringUtils.DateToString(new Date());
String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd")); String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd"));
databaseConnector.baiduToXlsx(baiduLastLoadTime); databaseConnector.hotSearchToXlsx(hotSearchLastLoadTime);
configService.setConfigValueByName("baidu_hot_last_loadtime", currentLoadTime); configService.setConfigValueByName("hot_search_last_loadtime", currentLoadTime);
String zipFileName = "data_hot_search-" + timestamp + "-001.zip"; String zipFileName = "data_hot_search-" + timestamp + "-001.zip";
String zipFileFullName = backupFilePath + File.separator + zipFileName; String zipFileFullName = backupFilePath + File.separator + zipFileName;
String remoteZipPath = ftpUploadPath + "/" + zipFileName; String remoteZipPath = ftpUploadPath + "/" + zipFileName;
zipAndUploadDirectory(baiduExcelOutputPath, zipFileFullName, remoteZipPath); zipAndUploadDirectory(hotSearchExcelOutputPath, zipFileFullName, remoteZipPath);
} }
/** /**

View File

@ -1,5 +1,5 @@
server: server:
port: 8084 port: 18084
servlet: servlet:
context-path: /dsp context-path: /dsp
spring: spring:
@ -98,7 +98,7 @@ custom:
twitterWhiteList: nytchinese;YesterdayBigcat;takaichi_sanae;yonhapcn;VOAChinese;ChineseWSJ;whyyoutouzhele;Jaemyung_Lee twitterWhiteList: nytchinese;YesterdayBigcat;takaichi_sanae;yonhapcn;VOAChinese;ChineseWSJ;whyyoutouzhele;Jaemyung_Lee
newsExcelOutputPath: D:/data/output/upload newsExcelOutputPath: D:/data/output/upload
twitterExcelOutputPath: D:/data/output/twitter twitterExcelOutputPath: D:/data/output/twitter
baiduExcelOutputPath: D:/data/output/baidu hotSearchExcelOutputPath: D:/data/output/hotSearch
backupFilePath: D:/data/output/backup backupFilePath: D:/data/output/backup
pagesOutputPath: D:/data/output/pdf pagesOutputPath: D:/data/output/pdf
ftpUploadPath: /home/jsc-2b ftpUploadPath: /home/jsc-2b