From 3fdd2f5473a3beb1dae7566064a5baff4a719286 Mon Sep 17 00:00:00 2001 From: DELL Date: Wed, 28 Jan 2026 15:27:33 +0800 Subject: [PATCH] =?UTF-8?q?[twitter]=20=E6=8E=A8=E6=96=87=E9=87=87?= =?UTF-8?q?=E9=9B=86=E5=AD=97=E6=AE=B5=E5=A2=9E=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../MediaSpiders/spiders/TwitterUserSpider.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/spiders/MediaSpiders/MediaSpiders/spiders/TwitterUserSpider.py b/spiders/MediaSpiders/MediaSpiders/spiders/TwitterUserSpider.py index 4b1102f..d11ffcc 100644 --- a/spiders/MediaSpiders/MediaSpiders/spiders/TwitterUserSpider.py +++ b/spiders/MediaSpiders/MediaSpiders/spiders/TwitterUserSpider.py @@ -176,12 +176,25 @@ class TwitterSpider(scrapy.Spider): item['es_urlname'] = 'https://x.com/%s/status/%s' % (screen_name, result['rest_id']) item['es_authors'] = screen_name item['es_extname'] = user_name + + device_html = result['source'] + device_type = re.search(r'>([^<]+)', device_html).group(1) + legacy = result['legacy'] author_full_text = legacy['full_text'] created_at = legacy['created_at'] + # 评论、转发、点赞数量 item['es_commentcount'] = legacy['reply_count'] item['es_forwardcount'] = legacy['retweet_count'] item['es_likecount'] = legacy['favorite_count'] + # 评论+ 转发+ 点赞数量 TODO + interaction_count = legacy['reply_count'] + legacy['retweet_count'] + legacy['favorite_count'] + # 语种 + lang = legacy['lang'] + # 推文话题 、 提及 + topic = legacy['entities']['hashtags'] + mentions = legacy['entities']['user_mentions'] + item['es_lasttime'] = get_current_timestamp() item['es_loadtime'] = get_current_timestamp() item['es_urltime'] = get_time_stamp( @@ -196,13 +209,16 @@ class TwitterSpider(scrapy.Spider): else: item['es_catalog2'] = '' legacy = result['quoted_status_result']['result']['legacy'] + original_tweet = result['quoted_status_result']['result']['rest_id'] self.logger.info('采集引用推文原文信息') elif 'retweeted_status_result' in legacy: item['es_isrepost'] = '1' legacy = legacy['retweeted_status_result']['result']['legacy'] + original_tweet = result['retweeted_status_result']['result']['rest_id'] self.logger.info('采集转发推文原文信息') else: item['es_isrepost'] = '0' + original_tweet = '' self.logger.info('采集原文信息') item['es_urlcontent'] = legacy['full_text']