diff --git a/spiders/MediaSpiders/MediaSpiders/spiders/BaiduHotSearchSprder.py b/spiders/MediaSpiders/MediaSpiders/spiders/BaiduHotSearchSprder.py index ba794e3..d9c6d70 100644 --- a/spiders/MediaSpiders/MediaSpiders/spiders/BaiduHotSearchSprder.py +++ b/spiders/MediaSpiders/MediaSpiders/spiders/BaiduHotSearchSprder.py @@ -1,16 +1,12 @@ # -*- coding: utf-8 -*- import json -import logging import hashlib -import datetime import re import time -from urllib.parse import urlparse import random from selenium.webdriver.common.by import By from selenium.common.exceptions import ( - NoSuchElementException, TimeoutException, StaleElementReferenceException, WebDriverException @@ -75,6 +71,7 @@ class BaiduHotSearchSprder(scrapy.Spider): def start_requests(self): """发起初始请求""" self.logger.info(f"开始爬取百度热搜,任务ID: {self.job_id if self.job_id else 'N/A'}") + self.url_time = get_current_timestamp() yield SeleniumRequest( url=self.start_urls, callback=self.parse, @@ -284,10 +281,8 @@ class BaiduHotSearchSprder(scrapy.Spider): self.logger.warning("标题内容为空,跳过该项") return None - # 2. 初始化所有字段变量 - now_ms = int(time.time() * 1000) - # 基础字段 + now_ms = get_current_timestamp() site_name = '百度热搜' carrier_type = 'hot_search' hkey = get_str_md5(title) @@ -357,9 +352,9 @@ class BaiduHotSearchSprder(scrapy.Spider): hot_search_item['es_urltitle'] = title hot_search_item['es_urlcontent'] = desc hot_search_item['es_carriertype'] = carrier_type - hot_search_item['es_urltime'] = get_current_timestamp() - hot_search_item['es_lasttime'] = get_current_timestamp() - hot_search_item['es_loadtime'] = get_current_timestamp() + hot_search_item['es_urltime'] = self.url_time + hot_search_item['es_lasttime'] = now_ms + hot_search_item['es_loadtime'] = now_ms hot_search_item['es_hkey'] = hkey hot_search_item['es_simrank'] = rank hot_search_item['es_heat'] = heat