[baidu] 百度热搜信息采集优化
This commit is contained in:
parent
2e9654966b
commit
b5384f8bcd
@ -293,7 +293,7 @@ class BaiduHotSearchSprder(scrapy.Spider):
|
|||||||
hkey = get_str_md5(title)
|
hkey = get_str_md5(title)
|
||||||
|
|
||||||
# 排名(默认使用当前收集序号)
|
# 排名(默认使用当前收集序号)
|
||||||
rank = str(self.collected_items + 1)
|
rank = str(self.collected_items)
|
||||||
rank_elem = find_visible_element(RANK_SELECTORS)
|
rank_elem = find_visible_element(RANK_SELECTORS)
|
||||||
if rank_elem:
|
if rank_elem:
|
||||||
extracted_rank = clean_text(rank_elem.text)
|
extracted_rank = clean_text(rank_elem.text)
|
||||||
@ -359,6 +359,7 @@ class BaiduHotSearchSprder(scrapy.Spider):
|
|||||||
hot_search_item['es_carriertype'] = carrier_type
|
hot_search_item['es_carriertype'] = carrier_type
|
||||||
hot_search_item['es_urltime'] = get_current_timestamp()
|
hot_search_item['es_urltime'] = get_current_timestamp()
|
||||||
hot_search_item['es_lasttime'] = get_current_timestamp()
|
hot_search_item['es_lasttime'] = get_current_timestamp()
|
||||||
|
hot_search_item['es_loadtime'] = get_current_timestamp()
|
||||||
hot_search_item['es_hkey'] = hkey
|
hot_search_item['es_hkey'] = hkey
|
||||||
hot_search_item['es_simrank'] = rank
|
hot_search_item['es_simrank'] = rank
|
||||||
hot_search_item['es_heat'] = heat
|
hot_search_item['es_heat'] = heat
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user