diff --git a/spiders/WebsiteSpider/WebsiteSpider/utils/parser_utils.py b/spiders/WebsiteSpider/WebsiteSpider/utils/parser_utils.py index 4cf52ed..1ff5160 100644 --- a/spiders/WebsiteSpider/WebsiteSpider/utils/parser_utils.py +++ b/spiders/WebsiteSpider/WebsiteSpider/utils/parser_utils.py @@ -167,15 +167,26 @@ def parse_item_from_response(response, parse_rule, redis_client): if url_bytes and url_bytes.decode('utf-8').strip() ] if webpage_item['es_srcname'] in translate_list: + # 标题内容 替换 + original_title = webpage_item['es_urltitle'] + original_content = webpage_item['es_urlcontent'] + # 翻译标题 - webpage_item['es_abstract'] = translate_single(webpage_item['es_urltitle']) - if webpage_item['es_abstract'] is None: + ranslated_title = translate_single(original_title) + if ranslated_title is None: logger.warning(" → 标题翻译失败,跳过整条") else: - logger.info(f"翻译成功,标题译文长度:{len(webpage_item['es_abstract'])}") + logger.info(f"翻译成功,标题译文长度:{len(ranslated_title)}") # 翻译内容(按段落,容错) - webpage_item['es_content'] = translate_content_with_paragraphs(webpage_item['es_urlcontent']) - logger.info(f"翻译成功,内容译文长度:{len(webpage_item['es_content'])}") + translated_content = translate_content_with_paragraphs(original_content) + logger.info(f"翻译成功,内容译文长度:{len(translated_content)}") + + # 当所有内容执行完成,则执行内容替换 + webpage_item['es_abstract'] = original_title # 原标题 + webpage_item['es_content'] = original_content # 原文 + webpage_item['es_urltitle'] = ranslated_title # 翻译标题 + webpage_item['es_urlcontent'] = translated_content # 译文 + except Exception as e: logger.error(repr(e))