From 4d3cb2381ad633170ec92bbf17f7c2295d41ce79 Mon Sep 17 00:00:00 2001 From: DELL Date: Wed, 21 Jan 2026 11:01:27 +0800 Subject: [PATCH] =?UTF-8?q?=E7=BF=BB=E8=AF=91=E6=A0=87=E9=A2=98=E4=B8=8E?= =?UTF-8?q?=E5=86=85=E5=AE=B9=E5=AD=97=E6=AE=B5=E6=9B=BF=E6=8D=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../WebsiteSpider/utils/parser_utils.py | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/spiders/WebsiteSpider/WebsiteSpider/utils/parser_utils.py b/spiders/WebsiteSpider/WebsiteSpider/utils/parser_utils.py index 4cf52ed..1ff5160 100644 --- a/spiders/WebsiteSpider/WebsiteSpider/utils/parser_utils.py +++ b/spiders/WebsiteSpider/WebsiteSpider/utils/parser_utils.py @@ -167,15 +167,26 @@ def parse_item_from_response(response, parse_rule, redis_client): if url_bytes and url_bytes.decode('utf-8').strip() ] if webpage_item['es_srcname'] in translate_list: + # 标题内容 替换 + original_title = webpage_item['es_urltitle'] + original_content = webpage_item['es_urlcontent'] + # 翻译标题 - webpage_item['es_abstract'] = translate_single(webpage_item['es_urltitle']) - if webpage_item['es_abstract'] is None: + ranslated_title = translate_single(original_title) + if ranslated_title is None: logger.warning(" → 标题翻译失败,跳过整条") else: - logger.info(f"翻译成功,标题译文长度:{len(webpage_item['es_abstract'])}") + logger.info(f"翻译成功,标题译文长度:{len(ranslated_title)}") # 翻译内容(按段落,容错) - webpage_item['es_content'] = translate_content_with_paragraphs(webpage_item['es_urlcontent']) - logger.info(f"翻译成功,内容译文长度:{len(webpage_item['es_content'])}") + translated_content = translate_content_with_paragraphs(original_content) + logger.info(f"翻译成功,内容译文长度:{len(translated_content)}") + + # 当所有内容执行完成,则执行内容替换 + webpage_item['es_abstract'] = original_title # 原标题 + webpage_item['es_content'] = original_content # 原文 + webpage_item['es_urltitle'] = ranslated_title # 翻译标题 + webpage_item['es_urlcontent'] = translated_content # 译文 + except Exception as e: logger.error(repr(e))