[twitter]es_isrepost状态修改

This commit is contained in:
DELL 2026-01-26 16:53:51 +08:00
parent 5cc725fae9
commit 93a8ff5ef4

View File

@ -187,7 +187,7 @@ class TwitterSpider(scrapy.Spider):
item['es_urltime'] = get_time_stamp(
str(created_at)) + 8 * 3600 * 1000 # TW默认使用的是零时区转换为北京时间
if 'quoted_status_result' in result:
item['es_isrepost'] = 1
item['es_isrepost'] = '1'
item['es_urltitle'] = author_full_text
item['es_catalog1'] = author_full_text
# 判断是否需要翻译
@ -198,15 +198,19 @@ class TwitterSpider(scrapy.Spider):
legacy = result['quoted_status_result']['result']['legacy']
self.logger.info('采集引用推文原文信息')
elif 'retweeted_status_result' in legacy:
item['es_isrepost'] = 1
item['es_isrepost'] = '1'
legacy = legacy['retweeted_status_result']['result']['legacy']
self.logger.info('采集转发推文原文信息')
else:
item['es_isrepost'] = '0'
self.logger.info('采集原文信息')
item['es_urlcontent'] = legacy['full_text']
# 获取文本
url_content = legacy['full_text']
# 判断是否需要翻译
if needs_translation(url_content):
item['es_content'] = translate_content_with_paragraphs(url_content) # TODO 翻译
item['es_content'] = translate_content_with_paragraphs(url_content)
else:
item['es_content'] = ''
# 下载图片