[twitter]es_isrepost状态修改
This commit is contained in:
parent
5cc725fae9
commit
93a8ff5ef4
@ -187,7 +187,7 @@ class TwitterSpider(scrapy.Spider):
|
||||
item['es_urltime'] = get_time_stamp(
|
||||
str(created_at)) + 8 * 3600 * 1000 # TW默认使用的是零时区,转换为北京时间
|
||||
if 'quoted_status_result' in result:
|
||||
item['es_isrepost'] = 1
|
||||
item['es_isrepost'] = '1'
|
||||
item['es_urltitle'] = author_full_text
|
||||
item['es_catalog1'] = author_full_text
|
||||
# 判断是否需要翻译
|
||||
@ -198,15 +198,19 @@ class TwitterSpider(scrapy.Spider):
|
||||
legacy = result['quoted_status_result']['result']['legacy']
|
||||
self.logger.info('采集引用推文原文信息')
|
||||
elif 'retweeted_status_result' in legacy:
|
||||
item['es_isrepost'] = 1
|
||||
item['es_isrepost'] = '1'
|
||||
legacy = legacy['retweeted_status_result']['result']['legacy']
|
||||
self.logger.info('采集转发推文原文信息')
|
||||
else:
|
||||
item['es_isrepost'] = '0'
|
||||
self.logger.info('采集原文信息')
|
||||
|
||||
item['es_urlcontent'] = legacy['full_text']
|
||||
# 获取文本
|
||||
url_content = legacy['full_text']
|
||||
# 判断是否需要翻译
|
||||
if needs_translation(url_content):
|
||||
item['es_content'] = translate_content_with_paragraphs(url_content) # TODO 翻译
|
||||
item['es_content'] = translate_content_with_paragraphs(url_content)
|
||||
else:
|
||||
item['es_content'] = ''
|
||||
# 下载图片
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user