From feb48f3579a26673e5a6a777bc243a3535f0ea19 Mon Sep 17 00:00:00 2001 From: DELL Date: Fri, 27 Feb 2026 11:19:21 +0800 Subject: [PATCH] =?UTF-8?q?[=E7=99=BE=E5=BA=A6=E7=83=AD=E6=90=9C]=20?= =?UTF-8?q?=E9=87=87=E9=9B=86=E6=97=B6=E9=97=B4=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../MediaSpiders/spiders/BaiduHotSearchSprder.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/spiders/MediaSpiders/MediaSpiders/spiders/BaiduHotSearchSprder.py b/spiders/MediaSpiders/MediaSpiders/spiders/BaiduHotSearchSprder.py index ba794e3..d9c6d70 100644 --- a/spiders/MediaSpiders/MediaSpiders/spiders/BaiduHotSearchSprder.py +++ b/spiders/MediaSpiders/MediaSpiders/spiders/BaiduHotSearchSprder.py @@ -1,16 +1,12 @@ # -*- coding: utf-8 -*- import json -import logging import hashlib -import datetime import re import time -from urllib.parse import urlparse import random from selenium.webdriver.common.by import By from selenium.common.exceptions import ( - NoSuchElementException, TimeoutException, StaleElementReferenceException, WebDriverException @@ -75,6 +71,7 @@ class BaiduHotSearchSprder(scrapy.Spider): def start_requests(self): """发起初始请求""" self.logger.info(f"开始爬取百度热搜,任务ID: {self.job_id if self.job_id else 'N/A'}") + self.url_time = get_current_timestamp() yield SeleniumRequest( url=self.start_urls, callback=self.parse, @@ -284,10 +281,8 @@ class BaiduHotSearchSprder(scrapy.Spider): self.logger.warning("标题内容为空,跳过该项") return None - # 2. 初始化所有字段变量 - now_ms = int(time.time() * 1000) - # 基础字段 + now_ms = get_current_timestamp() site_name = '百度热搜' carrier_type = 'hot_search' hkey = get_str_md5(title) @@ -357,9 +352,9 @@ class BaiduHotSearchSprder(scrapy.Spider): hot_search_item['es_urltitle'] = title hot_search_item['es_urlcontent'] = desc hot_search_item['es_carriertype'] = carrier_type - hot_search_item['es_urltime'] = get_current_timestamp() - hot_search_item['es_lasttime'] = get_current_timestamp() - hot_search_item['es_loadtime'] = get_current_timestamp() + hot_search_item['es_urltime'] = self.url_time + hot_search_item['es_lasttime'] = now_ms + hot_search_item['es_loadtime'] = now_ms hot_search_item['es_hkey'] = hkey hot_search_item['es_simrank'] = rank hot_search_item['es_heat'] = heat