Update settings.py
配置项改回ZQ
This commit is contained in:
parent
e2da209f39
commit
8f1999376f
@ -13,10 +13,10 @@ SELENIUM_DRIVER_NAME = 'firefox'
|
|||||||
SELENIUM_DRIVER_EXECUTABLE_PATH = [
|
SELENIUM_DRIVER_EXECUTABLE_PATH = [
|
||||||
'http://154.90.40.71:28095',
|
'http://154.90.40.71:28095',
|
||||||
'http://154.90.63.14:28095',
|
'http://154.90.63.14:28095',
|
||||||
'http://156.244.20.57:28095',
|
# 'http://156.244.20.57:28095',
|
||||||
# 'http://10.55.13.121:28095',
|
'http://10.55.13.121:28095',
|
||||||
# 'http://10.55.13.108:28095',
|
'http://10.55.13.108:28095',
|
||||||
# 'http://10.55.13.3:28095',
|
'http://10.55.13.3:28095',
|
||||||
]
|
]
|
||||||
SELENIUM_DRIVER_ARGUMENTS = ['-headless'] # '--headless' if using chrome instead of firefox
|
SELENIUM_DRIVER_ARGUMENTS = ['-headless'] # '--headless' if using chrome instead of firefox
|
||||||
SELENIUM_DRIVER_PREFERENCES = {
|
SELENIUM_DRIVER_PREFERENCES = {
|
||||||
@ -27,10 +27,10 @@ SELENIUM_DRIVER_PREFERENCES = {
|
|||||||
PROXY_SERVICE = 'http://107.182.191.3:6800'
|
PROXY_SERVICE = 'http://107.182.191.3:6800'
|
||||||
PER_BATCH_IP_USE_TIMES = 5 # 代理中间件每次从ip池获取一批ip,定义这批ip使用次数,达到次数后重新从ip池获取新的一批
|
PER_BATCH_IP_USE_TIMES = 5 # 代理中间件每次从ip池获取一批ip,定义这批ip使用次数,达到次数后重新从ip池获取新的一批
|
||||||
|
|
||||||
REDIS_HOST = '38.54.94.107'
|
# REDIS_HOST = '38.54.94.107'
|
||||||
REDIS_PORT = '28097'
|
# REDIS_PORT = '28097'
|
||||||
# REDIS_HOST = '10.55.13.3'
|
REDIS_HOST = '10.55.13.3'
|
||||||
# REDIS_PORT = '7379'
|
REDIS_PORT = '7379'
|
||||||
REDIS_PWD = 'jlkj-841-2-redis'
|
REDIS_PWD = 'jlkj-841-2-redis'
|
||||||
REDIS_PARAMS = {
|
REDIS_PARAMS = {
|
||||||
'password': 'jlkj-841-2-redis',
|
'password': 'jlkj-841-2-redis',
|
||||||
@ -39,9 +39,9 @@ REDIS_PARAMS = {
|
|||||||
USCARRIERS_KEY = 'USCARRIERS_ID'
|
USCARRIERS_KEY = 'USCARRIERS_ID'
|
||||||
|
|
||||||
ZIP_FILE_PATH = ''
|
ZIP_FILE_PATH = ''
|
||||||
KAFKA_PROCESS_QUEUE = ['stream-protobuf']
|
# KAFKA_PROCESS_QUEUE = ['stream-protobuf']
|
||||||
# KAFKA_PROCESS_QUEUE = ['stream-protobuf', 'stream-db']
|
KAFKA_PROCESS_QUEUE = ['stream-protobuf', 'stream-db']
|
||||||
KAFKA_SERVER = '38.54.125.182:9092'
|
KAFKA_SERVER = '47.113.231.200:9092'
|
||||||
|
|
||||||
RANDOMIZE_DOWNLOAD_DELAY = True
|
RANDOMIZE_DOWNLOAD_DELAY = True
|
||||||
|
|
||||||
@ -61,7 +61,7 @@ CRAWL_DEEPTH = 3 # 若起始url为门户首页或板块首页,则深度至少
|
|||||||
|
|
||||||
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
||||||
# 控制并发采集数量,即同时请求url数量,也可控制scrapy-redis从队列中读取url得数量
|
# 控制并发采集数量,即同时请求url数量,也可控制scrapy-redis从队列中读取url得数量
|
||||||
CONCURRENT_REQUESTS = 16
|
CONCURRENT_REQUESTS = 12
|
||||||
|
|
||||||
PROTO_MODULE_PATH = 'WebsiteSpider.proto.Es_pb2'
|
PROTO_MODULE_PATH = 'WebsiteSpider.proto.Es_pb2'
|
||||||
PROTO_CLASS_NAME = 'EsSets'
|
PROTO_CLASS_NAME = 'EsSets'
|
||||||
@ -81,9 +81,9 @@ IMG_ZIP_FILE_NAME = 'image_data_publicinfo_'
|
|||||||
MYEXT_ENABLED = True # 开启超时关闭scrapy扩展
|
MYEXT_ENABLED = True # 开启超时关闭scrapy扩展
|
||||||
IDLE_NUMBER = 36 # 配置允许的空闲时长,每5秒会增加一次IDLE_NUMBER,直到增加到60,程序才会close,空闲时间=5*IDLE_NUMBER 秒
|
IDLE_NUMBER = 36 # 配置允许的空闲时长,每5秒会增加一次IDLE_NUMBER,直到增加到60,程序才会close,空闲时间=5*IDLE_NUMBER 秒
|
||||||
|
|
||||||
CRAWL_JOB_UPDATE_API = 'http://38.54.94.107:28081/api/open/crawljob'
|
CRAWL_JOB_UPDATE_API = 'http://47.115.228.133:28081/api/open/crawljob'
|
||||||
WORD_BANK_QUERY_API = 'http://38.54.94.107:28081/api/open/wordBank/queryAll'
|
WORD_BANK_QUERY_API = 'http://47.115.228.133:28081/api/open/wordBank/queryAll'
|
||||||
CRAWL_RULE_QUERY_API = 'http://38.54.94.107:28081/api/open/target/website/queryAll'
|
CRAWL_RULE_QUERY_API = 'http://47.115.228.133:28081/api/open/target/website/queryAll'
|
||||||
|
|
||||||
BATCH_SAVE_SIZE = 5
|
BATCH_SAVE_SIZE = 5
|
||||||
PROTO_SAVE_FILE_PATH = '/usr/local/spider_data'
|
PROTO_SAVE_FILE_PATH = '/usr/local/spider_data'
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user