From bf91c068018ff1cea9ae872c5527d0f3049b8760 Mon Sep 17 00:00:00 2001 From: DELL Date: Wed, 21 Jan 2026 15:39:07 +0800 Subject: [PATCH] =?UTF-8?q?=E7=BF=BB=E8=AF=91=E6=A0=87=E9=A2=98=E4=B8=8E?= =?UTF-8?q?=E5=86=85=E5=AE=B9=E5=AD=97=E6=AE=B5=E6=9B=BF=E6=8D=A2-?= =?UTF-8?q?=E5=9B=9E=E6=BB=9A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../WebsiteSpider/utils/parser_utils.py | 21 +++++-------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/spiders/WebsiteSpider/WebsiteSpider/utils/parser_utils.py b/spiders/WebsiteSpider/WebsiteSpider/utils/parser_utils.py index 1ff5160..4cf52ed 100644 --- a/spiders/WebsiteSpider/WebsiteSpider/utils/parser_utils.py +++ b/spiders/WebsiteSpider/WebsiteSpider/utils/parser_utils.py @@ -167,26 +167,15 @@ def parse_item_from_response(response, parse_rule, redis_client): if url_bytes and url_bytes.decode('utf-8').strip() ] if webpage_item['es_srcname'] in translate_list: - # 标题内容 替换 - original_title = webpage_item['es_urltitle'] - original_content = webpage_item['es_urlcontent'] - # 翻译标题 - ranslated_title = translate_single(original_title) - if ranslated_title is None: + webpage_item['es_abstract'] = translate_single(webpage_item['es_urltitle']) + if webpage_item['es_abstract'] is None: logger.warning(" → 标题翻译失败,跳过整条") else: - logger.info(f"翻译成功,标题译文长度:{len(ranslated_title)}") + logger.info(f"翻译成功,标题译文长度:{len(webpage_item['es_abstract'])}") # 翻译内容(按段落,容错) - translated_content = translate_content_with_paragraphs(original_content) - logger.info(f"翻译成功,内容译文长度:{len(translated_content)}") - - # 当所有内容执行完成,则执行内容替换 - webpage_item['es_abstract'] = original_title # 原标题 - webpage_item['es_content'] = original_content # 原文 - webpage_item['es_urltitle'] = ranslated_title # 翻译标题 - webpage_item['es_urlcontent'] = translated_content # 译文 - + webpage_item['es_content'] = translate_content_with_paragraphs(webpage_item['es_urlcontent']) + logger.info(f"翻译成功,内容译文长度:{len(webpage_item['es_content'])}") except Exception as e: logger.error(repr(e))