[twitter] 推文采集字段增加
This commit is contained in:
parent
d023703622
commit
3fdd2f5473
@ -176,12 +176,25 @@ class TwitterSpider(scrapy.Spider):
|
||||
item['es_urlname'] = 'https://x.com/%s/status/%s' % (screen_name, result['rest_id'])
|
||||
item['es_authors'] = screen_name
|
||||
item['es_extname'] = user_name
|
||||
|
||||
device_html = result['source']
|
||||
device_type = re.search(r'>([^<]+)</a>', device_html).group(1)
|
||||
|
||||
legacy = result['legacy']
|
||||
author_full_text = legacy['full_text']
|
||||
created_at = legacy['created_at']
|
||||
# 评论、转发、点赞数量
|
||||
item['es_commentcount'] = legacy['reply_count']
|
||||
item['es_forwardcount'] = legacy['retweet_count']
|
||||
item['es_likecount'] = legacy['favorite_count']
|
||||
# 评论+ 转发+ 点赞数量 TODO
|
||||
interaction_count = legacy['reply_count'] + legacy['retweet_count'] + legacy['favorite_count']
|
||||
# 语种
|
||||
lang = legacy['lang']
|
||||
# 推文话题 、 提及
|
||||
topic = legacy['entities']['hashtags']
|
||||
mentions = legacy['entities']['user_mentions']
|
||||
|
||||
item['es_lasttime'] = get_current_timestamp()
|
||||
item['es_loadtime'] = get_current_timestamp()
|
||||
item['es_urltime'] = get_time_stamp(
|
||||
@ -196,13 +209,16 @@ class TwitterSpider(scrapy.Spider):
|
||||
else:
|
||||
item['es_catalog2'] = ''
|
||||
legacy = result['quoted_status_result']['result']['legacy']
|
||||
original_tweet = result['quoted_status_result']['result']['rest_id']
|
||||
self.logger.info('采集引用推文原文信息')
|
||||
elif 'retweeted_status_result' in legacy:
|
||||
item['es_isrepost'] = '1'
|
||||
legacy = legacy['retweeted_status_result']['result']['legacy']
|
||||
original_tweet = result['retweeted_status_result']['result']['rest_id']
|
||||
self.logger.info('采集转发推文原文信息')
|
||||
else:
|
||||
item['es_isrepost'] = '0'
|
||||
original_tweet = ''
|
||||
self.logger.info('采集原文信息')
|
||||
|
||||
item['es_urlcontent'] = legacy['full_text']
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user