From 93a8ff5ef4345c76eb02115d7ed337889a19f06e Mon Sep 17 00:00:00 2001 From: DELL Date: Mon, 26 Jan 2026 16:53:51 +0800 Subject: [PATCH] =?UTF-8?q?[twitter]es=5Fisrepost=E7=8A=B6=E6=80=81?= =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../MediaSpiders/spiders/TwitterUserSpider.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/spiders/MediaSpiders/MediaSpiders/spiders/TwitterUserSpider.py b/spiders/MediaSpiders/MediaSpiders/spiders/TwitterUserSpider.py index 83797b3..4b1102f 100644 --- a/spiders/MediaSpiders/MediaSpiders/spiders/TwitterUserSpider.py +++ b/spiders/MediaSpiders/MediaSpiders/spiders/TwitterUserSpider.py @@ -187,7 +187,7 @@ class TwitterSpider(scrapy.Spider): item['es_urltime'] = get_time_stamp( str(created_at)) + 8 * 3600 * 1000 # TW默认使用的是零时区,转换为北京时间 if 'quoted_status_result' in result: - item['es_isrepost'] = 1 + item['es_isrepost'] = '1' item['es_urltitle'] = author_full_text item['es_catalog1'] = author_full_text # 判断是否需要翻译 @@ -198,15 +198,19 @@ class TwitterSpider(scrapy.Spider): legacy = result['quoted_status_result']['result']['legacy'] self.logger.info('采集引用推文原文信息') elif 'retweeted_status_result' in legacy: - item['es_isrepost'] = 1 + item['es_isrepost'] = '1' legacy = legacy['retweeted_status_result']['result']['legacy'] self.logger.info('采集转发推文原文信息') + else: + item['es_isrepost'] = '0' + self.logger.info('采集原文信息') + item['es_urlcontent'] = legacy['full_text'] # 获取文本 url_content = legacy['full_text'] # 判断是否需要翻译 if needs_translation(url_content): - item['es_content'] = translate_content_with_paragraphs(url_content) # TODO 翻译 + item['es_content'] = translate_content_with_paragraphs(url_content) else: item['es_content'] = '' # 下载图片