From 8c84df0fdc2b6c98a98000b71ce3987f428975ac Mon Sep 17 00:00:00 2001 From: DELL Date: Tue, 20 Jan 2026 17:23:31 +0800 Subject: [PATCH] =?UTF-8?q?[=E9=80=9A=E7=94=A8=E7=BF=BB=E8=AF=91]=20?= =?UTF-8?q?=E7=BF=BB=E8=AF=91=E5=90=8E=E6=A0=87=E9=A2=98=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- spiders/WebsiteSpider/WebsiteSpider/settings.py | 2 +- spiders/WebsiteSpider/WebsiteSpider/utils/parser_utils.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/spiders/WebsiteSpider/WebsiteSpider/settings.py b/spiders/WebsiteSpider/WebsiteSpider/settings.py index 975337e..58dc057 100644 --- a/spiders/WebsiteSpider/WebsiteSpider/settings.py +++ b/spiders/WebsiteSpider/WebsiteSpider/settings.py @@ -170,6 +170,6 @@ ITEM_PIPELINES = { ############################## 翻译 MAX_TEXT_LENGTH = 100 # 翻译 API 地址(替换为你的服务器 IP 或域名) -TRANSLATE_API_URL = "http://47.113.231.200:28081/translate" +TRANSLATE_API_URL = "http://47.113.231.200:28082/translate" # 单次请求间隔(秒),避免 API 被限流 REQUEST_DELAY = 1 diff --git a/spiders/WebsiteSpider/WebsiteSpider/utils/parser_utils.py b/spiders/WebsiteSpider/WebsiteSpider/utils/parser_utils.py index 041e510..4cf52ed 100644 --- a/spiders/WebsiteSpider/WebsiteSpider/utils/parser_utils.py +++ b/spiders/WebsiteSpider/WebsiteSpider/utils/parser_utils.py @@ -168,11 +168,11 @@ def parse_item_from_response(response, parse_rule, redis_client): ] if webpage_item['es_srcname'] in translate_list: # 翻译标题 - webpage_item['es_title'] = translate_single(webpage_item['es_urltitle']) - if webpage_item['es_title'] is None: + webpage_item['es_abstract'] = translate_single(webpage_item['es_urltitle']) + if webpage_item['es_abstract'] is None: logger.warning(" → 标题翻译失败,跳过整条") else: - logger.info(f"翻译成功,标题译文长度:{len(webpage_item['es_title'])}") + logger.info(f"翻译成功,标题译文长度:{len(webpage_item['es_abstract'])}") # 翻译内容(按段落,容错) webpage_item['es_content'] = translate_content_with_paragraphs(webpage_item['es_urlcontent']) logger.info(f"翻译成功,内容译文长度:{len(webpage_item['es_content'])}")