Compare commits

...

2 Commits

Author SHA1 Message Date
DELL
4d3cb2381a 翻译标题与内容字段替换 2026-01-21 11:01:27 +08:00
DELL
073f4325d0 es_isrepost 赋值修改为 1 2026-01-21 10:04:57 +08:00
2 changed files with 18 additions and 7 deletions

View File

@ -185,13 +185,13 @@ class TwitterSpider(scrapy.Spider):
item['es_urltime'] = get_time_stamp(
str(created_at)) + 8 * 3600 * 1000 # TW默认使用的是零时区转换为北京时间
if 'quoted_status_result' in result:
item['es_isrepost'] = 'yes'
item['es_isrepost'] = 1
item['es_urltitle'] = author_full_text
item['es_catalog1'] = author_full_text
legacy = result['quoted_status_result']['result']['legacy']
self.logger.info('采集引用推文原文信息')
elif 'retweeted_status_result' in legacy:
item['es_isrepost'] = 'yes'
item['es_isrepost'] = 1
legacy = legacy['retweeted_status_result']['result']['legacy']
self.logger.info('采集转发推文原文信息')
item['es_content'] = legacy['full_text']

View File

@ -167,15 +167,26 @@ def parse_item_from_response(response, parse_rule, redis_client):
if url_bytes and url_bytes.decode('utf-8').strip()
]
if webpage_item['es_srcname'] in translate_list:
# 标题内容 替换
original_title = webpage_item['es_urltitle']
original_content = webpage_item['es_urlcontent']
# 翻译标题
webpage_item['es_abstract'] = translate_single(webpage_item['es_urltitle'])
if webpage_item['es_abstract'] is None:
ranslated_title = translate_single(original_title)
if ranslated_title is None:
logger.warning(" → 标题翻译失败,跳过整条")
else:
logger.info(f"翻译成功,标题译文长度:{len(webpage_item['es_abstract'])}")
logger.info(f"翻译成功,标题译文长度:{len(ranslated_title)}")
# 翻译内容(按段落,容错)
webpage_item['es_content'] = translate_content_with_paragraphs(webpage_item['es_urlcontent'])
logger.info(f"翻译成功,内容译文长度:{len(webpage_item['es_content'])}")
translated_content = translate_content_with_paragraphs(original_content)
logger.info(f"翻译成功,内容译文长度:{len(translated_content)}")
# 当所有内容执行完成,则执行内容替换
webpage_item['es_abstract'] = original_title # 原标题
webpage_item['es_content'] = original_content # 原文
webpage_item['es_urltitle'] = ranslated_title # 翻译标题
webpage_item['es_urlcontent'] = translated_content # 译文
except Exception as e:
logger.error(repr(e))