Compare commits

..

No commits in common. "4d3cb2381ad633170ec92bbf17f7c2295d41ce79" and "8c84df0fdc2b6c98a98000b71ce3987f428975ac" have entirely different histories.

2 changed files with 7 additions and 18 deletions

View File

@ -185,13 +185,13 @@ class TwitterSpider(scrapy.Spider):
item['es_urltime'] = get_time_stamp( item['es_urltime'] = get_time_stamp(
str(created_at)) + 8 * 3600 * 1000 # TW默认使用的是零时区转换为北京时间 str(created_at)) + 8 * 3600 * 1000 # TW默认使用的是零时区转换为北京时间
if 'quoted_status_result' in result: if 'quoted_status_result' in result:
item['es_isrepost'] = 1 item['es_isrepost'] = 'yes'
item['es_urltitle'] = author_full_text item['es_urltitle'] = author_full_text
item['es_catalog1'] = author_full_text item['es_catalog1'] = author_full_text
legacy = result['quoted_status_result']['result']['legacy'] legacy = result['quoted_status_result']['result']['legacy']
self.logger.info('采集引用推文原文信息') self.logger.info('采集引用推文原文信息')
elif 'retweeted_status_result' in legacy: elif 'retweeted_status_result' in legacy:
item['es_isrepost'] = 1 item['es_isrepost'] = 'yes'
legacy = legacy['retweeted_status_result']['result']['legacy'] legacy = legacy['retweeted_status_result']['result']['legacy']
self.logger.info('采集转发推文原文信息') self.logger.info('采集转发推文原文信息')
item['es_content'] = legacy['full_text'] item['es_content'] = legacy['full_text']

View File

@ -167,26 +167,15 @@ def parse_item_from_response(response, parse_rule, redis_client):
if url_bytes and url_bytes.decode('utf-8').strip() if url_bytes and url_bytes.decode('utf-8').strip()
] ]
if webpage_item['es_srcname'] in translate_list: if webpage_item['es_srcname'] in translate_list:
# 标题内容 替换
original_title = webpage_item['es_urltitle']
original_content = webpage_item['es_urlcontent']
# 翻译标题 # 翻译标题
ranslated_title = translate_single(original_title) webpage_item['es_abstract'] = translate_single(webpage_item['es_urltitle'])
if ranslated_title is None: if webpage_item['es_abstract'] is None:
logger.warning(" → 标题翻译失败,跳过整条") logger.warning(" → 标题翻译失败,跳过整条")
else: else:
logger.info(f"翻译成功,标题译文长度:{len(ranslated_title)}") logger.info(f"翻译成功,标题译文长度:{len(webpage_item['es_abstract'])}")
# 翻译内容(按段落,容错) # 翻译内容(按段落,容错)
translated_content = translate_content_with_paragraphs(original_content) webpage_item['es_content'] = translate_content_with_paragraphs(webpage_item['es_urlcontent'])
logger.info(f"翻译成功,内容译文长度:{len(translated_content)}") logger.info(f"翻译成功,内容译文长度:{len(webpage_item['es_content'])}")
# 当所有内容执行完成,则执行内容替换
webpage_item['es_abstract'] = original_title # 原标题
webpage_item['es_content'] = original_content # 原文
webpage_item['es_urltitle'] = ranslated_title # 翻译标题
webpage_item['es_urlcontent'] = translated_content # 译文
except Exception as e: except Exception as e:
logger.error(repr(e)) logger.error(repr(e))