diff --git a/spiders/WebsiteSpider/WebsiteSpider/utils/parser_utils.py b/spiders/WebsiteSpider/WebsiteSpider/utils/parser_utils.py index 1ff5160..4cf52ed 100644 --- a/spiders/WebsiteSpider/WebsiteSpider/utils/parser_utils.py +++ b/spiders/WebsiteSpider/WebsiteSpider/utils/parser_utils.py @@ -167,26 +167,15 @@ def parse_item_from_response(response, parse_rule, redis_client): if url_bytes and url_bytes.decode('utf-8').strip() ] if webpage_item['es_srcname'] in translate_list: - # 标题内容 替换 - original_title = webpage_item['es_urltitle'] - original_content = webpage_item['es_urlcontent'] - # 翻译标题 - ranslated_title = translate_single(original_title) - if ranslated_title is None: + webpage_item['es_abstract'] = translate_single(webpage_item['es_urltitle']) + if webpage_item['es_abstract'] is None: logger.warning(" → 标题翻译失败,跳过整条") else: - logger.info(f"翻译成功,标题译文长度:{len(ranslated_title)}") + logger.info(f"翻译成功,标题译文长度:{len(webpage_item['es_abstract'])}") # 翻译内容(按段落,容错) - translated_content = translate_content_with_paragraphs(original_content) - logger.info(f"翻译成功,内容译文长度:{len(translated_content)}") - - # 当所有内容执行完成,则执行内容替换 - webpage_item['es_abstract'] = original_title # 原标题 - webpage_item['es_content'] = original_content # 原文 - webpage_item['es_urltitle'] = ranslated_title # 翻译标题 - webpage_item['es_urlcontent'] = translated_content # 译文 - + webpage_item['es_content'] = translate_content_with_paragraphs(webpage_item['es_urlcontent']) + logger.info(f"翻译成功,内容译文长度:{len(webpage_item['es_content'])}") except Exception as e: logger.error(repr(e))