From 8f1999376f968a2f174b5363a70507281ea8e976 Mon Sep 17 00:00:00 2001
From: yuxin-pc <yuxin.93@petalmail.com>
Date: Fri, 13 Jun 2025 09:40:52 +0800
Subject: [PATCH] Update settings.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

配置项改回ZQ
---
 .../WebsiteSpider/WebsiteSpider/settings.py   | 30 +++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/spiders/WebsiteSpider/WebsiteSpider/settings.py b/spiders/WebsiteSpider/WebsiteSpider/settings.py
index 5d513fb..0f0b33e 100644
--- a/spiders/WebsiteSpider/WebsiteSpider/settings.py
+++ b/spiders/WebsiteSpider/WebsiteSpider/settings.py
@@ -13,10 +13,10 @@ SELENIUM_DRIVER_NAME = 'firefox'
 SELENIUM_DRIVER_EXECUTABLE_PATH = [
     'http://154.90.40.71:28095',
     'http://154.90.63.14:28095',
-    'http://156.244.20.57:28095',
-    # 'http://10.55.13.121:28095',
-    # 'http://10.55.13.108:28095',
-    # 'http://10.55.13.3:28095',
+    # 'http://156.244.20.57:28095',
+    'http://10.55.13.121:28095',
+    'http://10.55.13.108:28095',
+    'http://10.55.13.3:28095',
 ]
 SELENIUM_DRIVER_ARGUMENTS = ['-headless']  # '--headless' if using chrome instead of firefox
 SELENIUM_DRIVER_PREFERENCES = {
@@ -27,10 +27,10 @@ SELENIUM_DRIVER_PREFERENCES = {
 PROXY_SERVICE = 'http://107.182.191.3:6800'
 PER_BATCH_IP_USE_TIMES = 5  # 代理中间件每次从ip池获取一批ip，定义这批ip使用次数，达到次数后重新从ip池获取新的一批
 
-REDIS_HOST = '38.54.94.107'
-REDIS_PORT = '28097'
-# REDIS_HOST = '10.55.13.3'
-# REDIS_PORT = '7379'
+# REDIS_HOST = '38.54.94.107'
+# REDIS_PORT = '28097'
+REDIS_HOST = '10.55.13.3'
+REDIS_PORT = '7379'
 REDIS_PWD = 'jlkj-841-2-redis'
 REDIS_PARAMS = {
     'password': 'jlkj-841-2-redis',
@@ -39,9 +39,9 @@ REDIS_PARAMS = {
 USCARRIERS_KEY = 'USCARRIERS_ID'
 
 ZIP_FILE_PATH = ''
-KAFKA_PROCESS_QUEUE = ['stream-protobuf']
-# KAFKA_PROCESS_QUEUE = ['stream-protobuf', 'stream-db']
-KAFKA_SERVER = '38.54.125.182:9092'
+# KAFKA_PROCESS_QUEUE = ['stream-protobuf']
+KAFKA_PROCESS_QUEUE = ['stream-protobuf', 'stream-db']
+KAFKA_SERVER = '47.113.231.200:9092'
 
 RANDOMIZE_DOWNLOAD_DELAY = True
 
@@ -61,7 +61,7 @@ CRAWL_DEEPTH = 3  # 若起始url为门户首页或板块首页，则深度至少
 
 # Configure maximum concurrent requests performed by Scrapy (default: 16)
 # 控制并发采集数量，即同时请求url数量，也可控制scrapy-redis从队列中读取url得数量
-CONCURRENT_REQUESTS = 16
+CONCURRENT_REQUESTS = 12
 
 PROTO_MODULE_PATH = 'WebsiteSpider.proto.Es_pb2'
 PROTO_CLASS_NAME = 'EsSets'
@@ -81,9 +81,9 @@ IMG_ZIP_FILE_NAME = 'image_data_publicinfo_'
 MYEXT_ENABLED = True  # 开启超时关闭scrapy扩展
 IDLE_NUMBER = 36  # 配置允许的空闲时长，每5秒会增加一次IDLE_NUMBER，直到增加到60，程序才会close,空闲时间=5*IDLE_NUMBER 秒
 
-CRAWL_JOB_UPDATE_API = 'http://38.54.94.107:28081/api/open/crawljob'
-WORD_BANK_QUERY_API = 'http://38.54.94.107:28081/api/open/wordBank/queryAll'
-CRAWL_RULE_QUERY_API = 'http://38.54.94.107:28081/api/open/target/website/queryAll'
+CRAWL_JOB_UPDATE_API = 'http://47.115.228.133:28081/api/open/crawljob'
+WORD_BANK_QUERY_API = 'http://47.115.228.133:28081/api/open/wordBank/queryAll'
+CRAWL_RULE_QUERY_API = 'http://47.115.228.133:28081/api/open/target/website/queryAll'
 
 BATCH_SAVE_SIZE = 5
 PROTO_SAVE_FILE_PATH = '/usr/local/spider_data'