init
This commit is contained in:
parent
572c3c8e76
commit
cf4a6e2854
10
deploy/ProxyPool/.travis.yml
Normal file
10
deploy/ProxyPool/.travis.yml
Normal file
@ -0,0 +1,10 @@
|
||||
language: python
|
||||
python:
|
||||
- 2.7
|
||||
# - nightly
|
||||
os:
|
||||
- linux
|
||||
install:
|
||||
- pip install -r requirements.txt
|
||||
|
||||
script: python test.py
|
||||
150
deploy/ProxyPool/Api/ProxyApi.py
Normal file
150
deploy/ProxyPool/Api/ProxyApi.py
Normal file
@ -0,0 +1,150 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# !/usr/bin/env python
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: ProxyApi.py
|
||||
Description : WebApi
|
||||
Author : JHao
|
||||
date: 2016/12/4
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2016/12/04: WebApi
|
||||
2019/08/14: 集成Gunicorn启动方式
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'JHao'
|
||||
|
||||
import json
|
||||
import sys
|
||||
import platform
|
||||
import requests
|
||||
from werkzeug.wrappers import Response
|
||||
from flask import Flask, jsonify, request
|
||||
|
||||
sys.path.append('../')
|
||||
|
||||
from Config.ConfigGetter import config
|
||||
from Manager.ProxyManager import ProxyManager
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
class JsonResponse(Response):
|
||||
@classmethod
|
||||
def force_type(cls, response, environ=None):
|
||||
if isinstance(response, (dict, list)):
|
||||
response = jsonify(response)
|
||||
|
||||
return super(JsonResponse, cls).force_type(response, environ)
|
||||
|
||||
|
||||
app.response_class = JsonResponse
|
||||
|
||||
api_list = {
|
||||
'get': u'get an useful proxy',
|
||||
# 'refresh': u'refresh proxy pool',
|
||||
'get_all': u'get all proxy from proxy pool',
|
||||
'delete?proxy=127.0.0.1:8080': u'delete an unable proxy',
|
||||
'get_status': u'proxy number'
|
||||
}
|
||||
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
return api_list
|
||||
|
||||
|
||||
@app.route('/get/')
|
||||
def get():
|
||||
proxy = ProxyManager().get()
|
||||
return proxy.info_json if proxy else {"code": 0, "src": "no proxy"}
|
||||
|
||||
|
||||
@app.route('/refresh/')
|
||||
def refresh():
|
||||
# TODO refresh会有守护程序定时执行,由api直接调用性能较差,暂不使用
|
||||
# ProxyManager().refresh()
|
||||
pass
|
||||
return 'success'
|
||||
|
||||
|
||||
@app.route('/get_all/')
|
||||
def getAll():
|
||||
proxies = ProxyManager().getAll()
|
||||
return jsonify([_.info_dict for _ in proxies])
|
||||
|
||||
|
||||
@app.route('/delete/', methods=['GET'])
|
||||
def delete():
|
||||
proxy = request.args.get('proxy')
|
||||
ProxyManager().delete(proxy)
|
||||
return {"code": 0, "src": "success"}
|
||||
|
||||
|
||||
@app.route('/get_status/')
|
||||
def getStatus():
|
||||
status = ProxyManager().getNumber()
|
||||
return status
|
||||
|
||||
|
||||
@app.route('/get_balance')
|
||||
def getBalance():
|
||||
try:
|
||||
form_data = {
|
||||
'appkey': '0af3f486bb6988283af092cf24aace57',
|
||||
'uid': '341358'
|
||||
}
|
||||
# rsp = requests.post('https://api.ipidea.net/api/open/flow_left', data=form_data)
|
||||
rsp = requests.get('http://big_customer.willmam.com/index/index/get_my_balance?neek=112361&appkey=d2f6393b46afab108b038ab5b95f45d6')
|
||||
rsp_str = rsp.content.decode()
|
||||
rsp_json= json.loads(rsp_str)
|
||||
# if rsp_json['ret_data']['flow_left'] > 0:
|
||||
# rsp_json['flow_status'] = 'ok'
|
||||
if rsp_json['data']['balance'] > 0:
|
||||
rsp_json['balance_status'] = 'ok'
|
||||
return rsp_json
|
||||
except Exception as e:
|
||||
return repr(e)
|
||||
|
||||
|
||||
if platform.system() != "Windows":
|
||||
import gunicorn.app.base
|
||||
from six import iteritems
|
||||
|
||||
|
||||
class StandaloneApplication(gunicorn.app.base.BaseApplication):
|
||||
|
||||
def __init__(self, app, options=None):
|
||||
self.options = options or {}
|
||||
self.application = app
|
||||
super(StandaloneApplication, self).__init__()
|
||||
|
||||
def load_config(self):
|
||||
_config = dict([(key, value) for key, value in iteritems(self.options)
|
||||
if key in self.cfg.settings and value is not None])
|
||||
for key, value in iteritems(_config):
|
||||
self.cfg.set(key.lower(), value)
|
||||
|
||||
def load(self):
|
||||
return self.application
|
||||
|
||||
|
||||
def runFlask():
|
||||
app.run(host=config.host_ip, port=config.host_port)
|
||||
|
||||
|
||||
def runFlaskWithGunicorn():
|
||||
_options = {
|
||||
'bind': '%s:%s' % (config.host_ip, config.host_port),
|
||||
'workers': 4,
|
||||
'accesslog': '-', # log to stdout
|
||||
'access_log_format': '%(h)s %(l)s %(t)s "%(r)s" %(s)s "%(a)s"'
|
||||
}
|
||||
StandaloneApplication(app, _options).run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if platform.system() == "Windows":
|
||||
runFlask()
|
||||
else:
|
||||
runFlaskWithGunicorn()
|
||||
14
deploy/ProxyPool/Api/__init__.py
Normal file
14
deploy/ProxyPool/Api/__init__.py
Normal file
@ -0,0 +1,14 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: __init__.py
|
||||
Description :
|
||||
Author : JHao
|
||||
date: 2016/12/3
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2016/12/3:
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'JHao'
|
||||
|
||||
71
deploy/ProxyPool/Config/ConfigGetter.py
Normal file
71
deploy/ProxyPool/Config/ConfigGetter.py
Normal file
@ -0,0 +1,71 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: ConfigGetter
|
||||
Description : 读取配置
|
||||
Author : JHao
|
||||
date: 2019/2/15
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2019/2/15:
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'JHao'
|
||||
|
||||
|
||||
from Util.utilClass import LazyProperty
|
||||
from Config.setting import *
|
||||
|
||||
|
||||
class ConfigGetter(object):
|
||||
"""
|
||||
get config
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
@LazyProperty
|
||||
def db_type(self):
|
||||
return DATABASES.get("default", {}).get("TYPE", "SSDB")
|
||||
|
||||
@LazyProperty
|
||||
def db_name(self):
|
||||
return DATABASES.get("default", {}).get("NAME", "proxy")
|
||||
|
||||
@LazyProperty
|
||||
def db_host(self):
|
||||
return DATABASES.get("default", {}).get("HOST", "127.0.0.1")
|
||||
|
||||
@LazyProperty
|
||||
def db_port(self):
|
||||
return DATABASES.get("default", {}).get("PORT", 8888)
|
||||
|
||||
@LazyProperty
|
||||
def db_password(self):
|
||||
return DATABASES.get("default", {}).get("PASSWORD", "")
|
||||
|
||||
@LazyProperty
|
||||
def proxy_getter_functions(self):
|
||||
return PROXY_GETTER
|
||||
|
||||
@LazyProperty
|
||||
def host_ip(self):
|
||||
return SERVER_API.get("HOST", "127.0.0.1")
|
||||
|
||||
@LazyProperty
|
||||
def host_port(self):
|
||||
return SERVER_API.get("PORT", 5010)
|
||||
|
||||
|
||||
config = ConfigGetter()
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(config.db_type)
|
||||
print(config.db_name)
|
||||
print(config.db_host)
|
||||
print(config.db_port)
|
||||
print(config.proxy_getter_functions)
|
||||
print(config.host_ip)
|
||||
print(config.host_port)
|
||||
print(config.db_password)
|
||||
12
deploy/ProxyPool/Config/__init__.py
Normal file
12
deploy/ProxyPool/Config/__init__.py
Normal file
@ -0,0 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: __init__
|
||||
Description :
|
||||
Author : JHao
|
||||
date: 2019/2/15
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2019/2/15:
|
||||
-------------------------------------------------
|
||||
"""
|
||||
98
deploy/ProxyPool/Config/setting.py
Normal file
98
deploy/ProxyPool/Config/setting.py
Normal file
@ -0,0 +1,98 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: setting.py
|
||||
Description : 配置文件
|
||||
Author : JHao
|
||||
date: 2019/2/15
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2019/2/15:
|
||||
-------------------------------------------------
|
||||
"""
|
||||
|
||||
import sys
|
||||
from os import getenv
|
||||
from logging import getLogger
|
||||
|
||||
log = getLogger(__name__)
|
||||
|
||||
HEADER = """
|
||||
****************************************************************
|
||||
*** ______ ********************* ______ *********** _ ********
|
||||
*** | ___ \_ ******************** | ___ \ ********* | | ********
|
||||
*** | |_/ / \__ __ __ _ __ _ | |_/ /___ * ___ | | ********
|
||||
*** | __/| _// _ \ \ \/ /| | | || __// _ \ / _ \ | | ********
|
||||
*** | | | | | (_) | > < \ |_| || | | (_) | (_) || |___ ****
|
||||
*** \_| |_| \___/ /_/\_\ \__ |\_| \___/ \___/ \_____/ ****
|
||||
**** __ / / *****
|
||||
************************* /___ / *******************************
|
||||
************************* ********************************
|
||||
****************************************************************
|
||||
"""
|
||||
|
||||
PY3 = sys.version_info >= (3,)
|
||||
|
||||
DB_TYPE = getenv('db_type', 'REDIS').upper()
|
||||
DB_HOST = getenv('db_host', '107.182.191.3')
|
||||
DB_PORT = getenv('db_port', 7379)
|
||||
DB_PASSWORD = getenv('db_password', 'jlkj-841-2-redis')
|
||||
|
||||
USEFUL_PROXY_COUNT = 10
|
||||
ZHIMA_PROXY_API = 'http://http.tiqu.alicdns.com/getip3?num=10&type=2&pro=0&city=0&yys=0&port=1&time=1&ts=0&ys=0&cs=0&lb=1&sb=0&pb=45&mr=2®ions=&gm=4'
|
||||
# ZHIMA_PROXY_API = 'http://api.proxy.ipidea.io/getProxyIp?num=10&return_type=json&lb=1&sb=0&flow=1®ions=&protocol=http'
|
||||
|
||||
""" 数据库配置 """
|
||||
DATABASES = {
|
||||
"default": {
|
||||
"TYPE": DB_TYPE,
|
||||
"HOST": DB_HOST,
|
||||
"PORT": DB_PORT,
|
||||
"NAME": "proxy",
|
||||
"PASSWORD": DB_PASSWORD
|
||||
}
|
||||
}
|
||||
|
||||
# register the proxy getter function
|
||||
|
||||
PROXY_GETTER = [
|
||||
# # "freeProxy01",
|
||||
# "freeProxy02",
|
||||
# "freeProxy03",
|
||||
# "freeProxy04",
|
||||
# "freeProxy05",
|
||||
# # "freeProxy06",
|
||||
# "freeProxy07",
|
||||
# # "freeProxy08",
|
||||
# "freeProxy09",
|
||||
# "freeProxy13",
|
||||
# #"freeProxy14",
|
||||
# "freeProxy15",
|
||||
"zhimaProxy"
|
||||
]
|
||||
|
||||
""" API config http://127.0.0.1:5010 """
|
||||
SERVER_API = {
|
||||
"HOST": "0.0.0.0", # The ip specified which starting the web API
|
||||
"PORT": 5010 # port number to which the server listens to
|
||||
}
|
||||
|
||||
|
||||
class ConfigError(BaseException):
|
||||
pass
|
||||
|
||||
|
||||
def checkConfig():
|
||||
if DB_TYPE not in ["SSDB", "REDIS"]:
|
||||
raise ConfigError('db_type Do not support: %s, must SSDB/REDIS .' % DB_TYPE)
|
||||
|
||||
if type(DB_PORT) == str and not DB_PORT.isdigit():
|
||||
raise ConfigError('if db_port is string, it must be digit, not %s' % DB_PORT)
|
||||
|
||||
from ProxyGetter import getFreeProxy
|
||||
illegal_getter = list(filter(lambda key: not hasattr(getFreeProxy.GetFreeProxy, key), PROXY_GETTER))
|
||||
if len(illegal_getter) > 0:
|
||||
raise ConfigError("ProxyGetter: %s does not exists" % "/".join(illegal_getter))
|
||||
|
||||
|
||||
checkConfig()
|
||||
111
deploy/ProxyPool/DB/DbClient.py
Normal file
111
deploy/ProxyPool/DB/DbClient.py
Normal file
@ -0,0 +1,111 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# !/usr/bin/env python
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: DbClient.py
|
||||
Description : DB工厂类
|
||||
Author : JHao
|
||||
date: 2016/12/2
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2016/12/2:
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'JHao'
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from Config.ConfigGetter import config
|
||||
from Util import Singleton
|
||||
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
|
||||
class DbClient(object):
|
||||
"""
|
||||
DbClient DB工厂类 提供get/put/update/pop/delete/exists/getAll/clean/getNumber/changeTable方法
|
||||
|
||||
目前存放代理的有两种, 使用changeTable方法切换操作对象:
|
||||
raw_proxy: 存放原始的代理;
|
||||
useful_proxy: 存放检验后的代理;
|
||||
|
||||
|
||||
抽象方法定义:
|
||||
get(proxy): 返回指定proxy的信息;
|
||||
put(proxy): 存入一个proxy信息;
|
||||
pop(): 返回并删除一个proxy信息;
|
||||
update(proxy): 更新指定proxy信息;
|
||||
delete(proxy): 删除指定proxy;
|
||||
exists(proxy): 判断指定proxy是否存在;
|
||||
getAll(): 列表形式返回所有代理;
|
||||
clean(): 清除所有proxy信息;
|
||||
getNumber(): 返回proxy数据量;
|
||||
changeTable(name): 切换操作对象 raw_proxy/useful_proxy
|
||||
|
||||
|
||||
所有方法需要相应类去具体实现:
|
||||
ssdb: SsdbClient.py
|
||||
redis: RedisClient.py
|
||||
mongodb: MongodbClient.py
|
||||
|
||||
"""
|
||||
|
||||
__metaclass__ = Singleton
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
init
|
||||
:return:
|
||||
"""
|
||||
self.__initDbClient()
|
||||
|
||||
def __initDbClient(self):
|
||||
"""
|
||||
init DB Client
|
||||
:return:
|
||||
"""
|
||||
__type = None
|
||||
if "SSDB" == config.db_type:
|
||||
__type = "SsdbClient"
|
||||
elif "REDIS" == config.db_type:
|
||||
__type = "RedisClient"
|
||||
elif "MONGODB" == config.db_type:
|
||||
__type = "MongodbClient"
|
||||
else:
|
||||
pass
|
||||
assert __type, 'type error, Not support DB type: {}'.format(config.db_type)
|
||||
self.client = getattr(__import__(__type), __type)(name=config.db_name,
|
||||
host=config.db_host,
|
||||
port=config.db_port,
|
||||
password=config.db_password)
|
||||
|
||||
def get(self, key, **kwargs):
|
||||
return self.client.get(key, **kwargs)
|
||||
|
||||
def put(self, key, **kwargs):
|
||||
return self.client.put(key, **kwargs)
|
||||
|
||||
def update(self, key, value, **kwargs):
|
||||
return self.client.update(key, value, **kwargs)
|
||||
|
||||
def delete(self, key, **kwargs):
|
||||
return self.client.delete(key, **kwargs)
|
||||
|
||||
def exists(self, key, **kwargs):
|
||||
return self.client.exists(key, **kwargs)
|
||||
|
||||
def pop(self, **kwargs):
|
||||
return self.client.pop(**kwargs)
|
||||
|
||||
def getAll(self):
|
||||
return self.client.getAll()
|
||||
|
||||
def clear(self):
|
||||
return self.client.clear()
|
||||
|
||||
def changeTable(self, name):
|
||||
self.client.changeTable(name)
|
||||
|
||||
def getNumber(self):
|
||||
return self.client.getNumber()
|
||||
74
deploy/ProxyPool/DB/MongodbClient.py
Normal file
74
deploy/ProxyPool/DB/MongodbClient.py
Normal file
@ -0,0 +1,74 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: MongodbClient.py
|
||||
Description : 封装mongodb操作
|
||||
Author : JHao netAir
|
||||
date: 2017/3/3
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2017/3/3:
|
||||
2017/9/26:完成对mongodb的支持
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'Maps netAir'
|
||||
|
||||
from pymongo import MongoClient
|
||||
|
||||
|
||||
class MongodbClient(object):
|
||||
def __init__(self, name, host, port, **kwargs):
|
||||
self.name = name
|
||||
self.client = MongoClient(host, port, **kwargs)
|
||||
self.db = self.client.proxy
|
||||
|
||||
def changeTable(self, name):
|
||||
self.name = name
|
||||
|
||||
def get(self, proxy):
|
||||
data = self.db[self.name].find_one({'proxy': proxy})
|
||||
return data['num'] if data != None else None
|
||||
|
||||
def put(self, proxy, num=1):
|
||||
if self.db[self.name].find_one({'proxy': proxy}):
|
||||
return None
|
||||
else:
|
||||
self.db[self.name].insert({'proxy': proxy, 'num': num})
|
||||
|
||||
def pop(self):
|
||||
data = list(self.db[self.name].aggregate([{'$sample': {'size': 1}}]))
|
||||
if data:
|
||||
data = data[0]
|
||||
value = data['proxy']
|
||||
self.delete(value)
|
||||
return {'proxy': value, 'value': data['num']}
|
||||
return None
|
||||
|
||||
def delete(self, value):
|
||||
self.db[self.name].remove({'proxy': value})
|
||||
|
||||
def getAll(self):
|
||||
return {p['proxy']: p['num'] for p in self.db[self.name].find()}
|
||||
|
||||
def clean(self):
|
||||
self.client.drop_database('proxy')
|
||||
|
||||
def delete_all(self):
|
||||
self.db[self.name].remove()
|
||||
|
||||
def update(self, key, value):
|
||||
self.db[self.name].update({'proxy': key}, {'$inc': {'num': value}})
|
||||
|
||||
def exists(self, key):
|
||||
return True if self.db[self.name].find_one({'proxy': key}) != None else False
|
||||
|
||||
def getNumber(self):
|
||||
return self.db[self.name].count()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
db = MongodbClient('first', 'localhost', 27017)
|
||||
# db.put('127.0.0.1:1')
|
||||
# db2 = MongodbClient('second', 'localhost', 27017)
|
||||
# db2.put('127.0.0.1:2')
|
||||
print(db.pop())
|
||||
133
deploy/ProxyPool/DB/RedisClient.py
Normal file
133
deploy/ProxyPool/DB/RedisClient.py
Normal file
@ -0,0 +1,133 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: RedisClient
|
||||
Description : 封装Redis相关操作
|
||||
Author : JHao
|
||||
date: 2019/8/9
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2019/8/9: 封装Redis相关操作
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'JHao'
|
||||
|
||||
from Config.setting import PY3
|
||||
|
||||
from redis.connection import BlockingConnectionPool
|
||||
from redis import Redis
|
||||
|
||||
|
||||
class RedisClient(object):
|
||||
"""
|
||||
Redis client 和SSDB协议一致 数据结构一致, 但部分方法不通用
|
||||
|
||||
Redis中代理存放的结构为hash:
|
||||
原始代理存放在name为raw_proxy的hash中, key为代理的ip:por, value为代理属性的字典;
|
||||
验证后的代理存放在name为useful_proxy的hash中, key为代理的ip:port, value为代理属性的字典;
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, name, **kwargs):
|
||||
"""
|
||||
init
|
||||
:param name: hash name
|
||||
:param host: host
|
||||
:param port: port
|
||||
:param password: password
|
||||
:return:
|
||||
"""
|
||||
self.name = name
|
||||
self.__conn = Redis(connection_pool=BlockingConnectionPool(**kwargs))
|
||||
|
||||
def get(self, proxy_str):
|
||||
"""
|
||||
从hash中获取对应的proxy, 使用前需要调用changeTable()
|
||||
:param proxy_str: proxy str
|
||||
:return:
|
||||
"""
|
||||
data = self.__conn.hget(name=self.name, key=proxy_str)
|
||||
if data:
|
||||
return data.decode('utf-8') if PY3 else data
|
||||
else:
|
||||
return None
|
||||
|
||||
def put(self, proxy_obj):
|
||||
"""
|
||||
将代理放入hash, 使用changeTable指定hash name
|
||||
:param proxy_obj: Proxy obj
|
||||
:return:
|
||||
"""
|
||||
data = self.__conn.hset(self.name, proxy_obj.proxy, proxy_obj.info_json)
|
||||
return data
|
||||
|
||||
def delete(self, proxy_str):
|
||||
"""
|
||||
移除指定代理, 使用changeTable指定hash name
|
||||
:param proxy_str: proxy str
|
||||
:return:
|
||||
"""
|
||||
self.__conn.hdel(self.name, proxy_str)
|
||||
|
||||
def exists(self, proxy_str):
|
||||
"""
|
||||
判断指定代理是否存在, 使用changeTable指定hash name
|
||||
:param proxy_str: proxy str
|
||||
:return:
|
||||
"""
|
||||
return self.__conn.hexists(self.name, proxy_str)
|
||||
|
||||
def update(self, proxy_obj):
|
||||
"""
|
||||
更新 proxy 属性
|
||||
:param proxy_obj:
|
||||
:return:
|
||||
"""
|
||||
self.__conn.hset(self.name, proxy_obj.proxy, proxy_obj.info_json)
|
||||
|
||||
def pop(self):
|
||||
"""
|
||||
弹出一个代理
|
||||
:return: dict {proxy: value}
|
||||
"""
|
||||
# proxies = self.__conn.hkeys(self.name)
|
||||
# if proxies:
|
||||
# proxy = random.choice(proxies)
|
||||
# value = self.__conn.hget(self.name, proxy)
|
||||
# self.delete(proxy)
|
||||
# return {'proxy': proxy.decode('utf-8') if PY3 else proxy,
|
||||
# 'value': value.decode('utf-8') if PY3 and value else value}
|
||||
return None
|
||||
|
||||
def getAll(self):
|
||||
"""
|
||||
列表形式返回所有代理, 使用changeTable指定hash name
|
||||
:return:
|
||||
"""
|
||||
item_dict = self.__conn.hgetall(self.name)
|
||||
if PY3:
|
||||
return [value.decode('utf8') for key, value in item_dict.items()]
|
||||
else:
|
||||
return item_dict.values()
|
||||
|
||||
def clear(self):
|
||||
"""
|
||||
清空所有代理, 使用changeTable指定hash name
|
||||
:return:
|
||||
"""
|
||||
return self.__conn.delete(self.name)
|
||||
|
||||
def getNumber(self):
|
||||
"""
|
||||
返回代理数量
|
||||
:return:
|
||||
"""
|
||||
return self.__conn.hlen(self.name)
|
||||
|
||||
def changeTable(self, name):
|
||||
"""
|
||||
切换操作对象
|
||||
:param name: raw_proxy/useful_proxy
|
||||
:return:
|
||||
"""
|
||||
self.name = name
|
||||
135
deploy/ProxyPool/DB/SsdbClient.py
Normal file
135
deploy/ProxyPool/DB/SsdbClient.py
Normal file
@ -0,0 +1,135 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# !/usr/bin/env python
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: SsdbClient.py
|
||||
Description : 封装SSDB操作
|
||||
Author : JHao
|
||||
date: 2016/12/2
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2016/12/2:
|
||||
2017/09/22: PY3中 redis-py返回的数据是bytes型
|
||||
2017/09/27: 修改pop()方法 返回{proxy:value}字典
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'JHao'
|
||||
|
||||
from Config.setting import PY3
|
||||
|
||||
from redis.connection import BlockingConnectionPool
|
||||
from redis import Redis
|
||||
|
||||
|
||||
class SsdbClient(object):
|
||||
"""
|
||||
SSDB client
|
||||
|
||||
SSDB中代理存放的结构为hash:
|
||||
原始代理存放在name为raw_proxy的hash中, key为代理的ip:por, value为代理属性的字典;
|
||||
验证后的代理存放在name为useful_proxy的hash中, key为代理的ip:port, value为代理属性的字典;
|
||||
|
||||
"""
|
||||
def __init__(self, name, **kwargs):
|
||||
"""
|
||||
init
|
||||
:param name: hash name
|
||||
:param host: host
|
||||
:param port: port
|
||||
:param password: password
|
||||
:return:
|
||||
"""
|
||||
self.name = name
|
||||
self.__conn = Redis(connection_pool=BlockingConnectionPool(**kwargs))
|
||||
|
||||
def get(self, proxy_str):
|
||||
"""
|
||||
从hash中获取对应的proxy, 使用前需要调用changeTable()
|
||||
:param proxy_str: proxy str
|
||||
:return:
|
||||
"""
|
||||
data = self.__conn.hget(name=self.name, key=proxy_str)
|
||||
if data:
|
||||
return data.decode('utf-8') if PY3 else data
|
||||
else:
|
||||
return None
|
||||
|
||||
def put(self, proxy_obj):
|
||||
"""
|
||||
将代理放入hash, 使用changeTable指定hash name
|
||||
:param proxy_obj: Proxy obj
|
||||
:return:
|
||||
"""
|
||||
data = self.__conn.hset(self.name, proxy_obj.proxy, proxy_obj.info_json)
|
||||
return data
|
||||
|
||||
def delete(self, proxy_str):
|
||||
"""
|
||||
移除指定代理, 使用changeTable指定hash name
|
||||
:param proxy_str: proxy str
|
||||
:return:
|
||||
"""
|
||||
self.__conn.hdel(self.name, proxy_str)
|
||||
|
||||
def exists(self, proxy_str):
|
||||
"""
|
||||
判断指定代理是否存在, 使用changeTable指定hash name
|
||||
:param proxy_str: proxy str
|
||||
:return:
|
||||
"""
|
||||
return self.__conn.hexists(self.name, proxy_str)
|
||||
|
||||
def update(self, proxy_obj):
|
||||
"""
|
||||
更新 proxy 属性
|
||||
:param proxy_obj:
|
||||
:return:
|
||||
"""
|
||||
self.__conn.hset(self.name, proxy_obj.proxy, proxy_obj.info_json)
|
||||
|
||||
def pop(self):
|
||||
"""
|
||||
弹出一个代理
|
||||
:return: dict {proxy: value}
|
||||
"""
|
||||
# proxies = self.__conn.hkeys(self.name)
|
||||
# if proxies:
|
||||
# proxy = random.choice(proxies)
|
||||
# value = self.__conn.hget(self.name, proxy)
|
||||
# self.delete(proxy)
|
||||
# return {'proxy': proxy.decode('utf-8') if PY3 else proxy,
|
||||
# 'value': value.decode('utf-8') if PY3 and value else value}
|
||||
return None
|
||||
|
||||
def getAll(self):
|
||||
"""
|
||||
列表形式返回所有代理, 使用changeTable指定hash name
|
||||
:return:
|
||||
"""
|
||||
item_dict = self.__conn.hgetall(self.name)
|
||||
if PY3:
|
||||
return [value.decode('utf8') for key, value in item_dict.items()]
|
||||
else:
|
||||
return item_dict.values()
|
||||
|
||||
def clear(self):
|
||||
"""
|
||||
清空所有代理, 使用changeTable指定hash name
|
||||
:return:
|
||||
"""
|
||||
return self.__conn.execute_command("hclear", self.name)
|
||||
|
||||
def getNumber(self):
|
||||
"""
|
||||
返回代理数量
|
||||
:return:
|
||||
"""
|
||||
return self.__conn.hlen(self.name)
|
||||
|
||||
def changeTable(self, name):
|
||||
"""
|
||||
切换操作对象
|
||||
:param name: raw_proxy/useful_proxy
|
||||
:return:
|
||||
"""
|
||||
self.name = name
|
||||
12
deploy/ProxyPool/DB/__init__.py
Normal file
12
deploy/ProxyPool/DB/__init__.py
Normal file
@ -0,0 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: __init__.py.py
|
||||
Description :
|
||||
Author : JHao
|
||||
date: 2016/12/2
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2016/12/2:
|
||||
-------------------------------------------------
|
||||
"""
|
||||
17
deploy/ProxyPool/Dockerfile
Normal file
17
deploy/ProxyPool/Dockerfile
Normal file
@ -0,0 +1,17 @@
|
||||
FROM python:3.8.2-slim
|
||||
|
||||
ENV TZ Asia/Shanghai
|
||||
|
||||
WORKDIR /usr/src/app
|
||||
|
||||
COPY ./requirements.txt .
|
||||
|
||||
RUN pip install --no-cache-dir -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
|
||||
COPY . .
|
||||
|
||||
EXPOSE 5010
|
||||
|
||||
WORKDIR /usr/src/app/cli
|
||||
|
||||
ENTRYPOINT [ "sh", "start.sh" ]
|
||||
21
deploy/ProxyPool/LICENSE
Normal file
21
deploy/ProxyPool/LICENSE
Normal file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2017 J_hao104
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
113
deploy/ProxyPool/Manager/ProxyManager.py
Normal file
113
deploy/ProxyPool/Manager/ProxyManager.py
Normal file
@ -0,0 +1,113 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# !/usr/bin/env python
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: ProxyManager.py
|
||||
Description :
|
||||
Author : JHao
|
||||
date: 2016/12/3
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2016/12/3:
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'JHao'
|
||||
|
||||
import random
|
||||
import json
|
||||
from ProxyHelper import Proxy
|
||||
from DB.DbClient import DbClient
|
||||
from Config.ConfigGetter import config
|
||||
from Util.LogHandler import LogHandler
|
||||
from Util.utilFunction import verifyProxyFormat
|
||||
from ProxyGetter.getFreeProxy import GetFreeProxy
|
||||
|
||||
|
||||
class ProxyManager(object):
|
||||
"""
|
||||
ProxyManager
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.db = DbClient()
|
||||
self.raw_proxy_queue = 'raw_proxy'
|
||||
self.log = LogHandler('proxy_manager')
|
||||
# self.useful_proxy_queue = 'ProxyPool:useful_proxy_3'
|
||||
self.useful_proxy_queue = 'ProxyPool:useful_proxy_63'
|
||||
|
||||
def fetch(self):
|
||||
"""
|
||||
fetch proxy into db by ProxyGetter
|
||||
:return:
|
||||
"""
|
||||
self.db.changeTable(self.raw_proxy_queue)
|
||||
proxy_set = set()
|
||||
self.log.info("ProxyFetch : start")
|
||||
for proxyGetter in config.proxy_getter_functions:
|
||||
self.log.info("ProxyFetch - {func}: start".format(func=proxyGetter))
|
||||
try:
|
||||
for proxy_str in getattr(GetFreeProxy, proxyGetter.strip())():
|
||||
proxy_dict = json.loads(proxy_str, encoding='utf-8')
|
||||
proxy = proxy_dict['proxy']
|
||||
proxy_type = proxy_dict['proxy_type']
|
||||
anonimity = proxy_dict['anonimity']
|
||||
#self.log.info(anonimity)
|
||||
|
||||
if not proxy or not verifyProxyFormat(proxy):
|
||||
self.log.error('ProxyFetch - {func}: '
|
||||
'{proxy} illegal'.format(func=proxyGetter, proxy=proxy.ljust(20)))
|
||||
continue
|
||||
elif proxy in proxy_set:
|
||||
self.log.info('ProxyFetch - {func}: '
|
||||
'{proxy} exist'.format(func=proxyGetter, proxy=proxy.ljust(20)))
|
||||
continue
|
||||
else:
|
||||
self.db.put(Proxy(proxy, source=proxyGetter, proxy_type=proxy_type, anonimity=anonimity))
|
||||
proxy_set.add(proxy)
|
||||
self.log.info('ProxyFetch - {func}: '
|
||||
'{proxy} added'.format(func=proxyGetter, proxy=proxy.ljust(20)))
|
||||
except Exception as e:
|
||||
self.log.error("ProxyFetch - {func}: error".format(func=proxyGetter))
|
||||
self.log.error(str(e))
|
||||
|
||||
def get(self):
|
||||
"""
|
||||
return a useful proxy
|
||||
:return:
|
||||
"""
|
||||
self.db.changeTable(self.useful_proxy_queue)
|
||||
item_list = self.db.getAll()
|
||||
if item_list:
|
||||
random_choice = random.choice(item_list)
|
||||
return Proxy.newProxyFromJson(random_choice)
|
||||
return None
|
||||
|
||||
def delete(self, proxy_str):
|
||||
"""
|
||||
delete proxy from pool
|
||||
:param proxy_str:
|
||||
:return:
|
||||
"""
|
||||
self.db.changeTable(self.useful_proxy_queue)
|
||||
self.db.delete(proxy_str)
|
||||
|
||||
def getAll(self):
|
||||
"""
|
||||
get all proxy from pool as list
|
||||
:return:
|
||||
"""
|
||||
self.db.changeTable(self.useful_proxy_queue)
|
||||
item_list = self.db.getAll()
|
||||
return [Proxy.newProxyFromJson(_) for _ in item_list]
|
||||
|
||||
def getNumber(self):
|
||||
self.db.changeTable(self.raw_proxy_queue)
|
||||
total_raw_proxy = self.db.getNumber()
|
||||
self.db.changeTable(self.useful_proxy_queue)
|
||||
total_useful_queue = self.db.getNumber()
|
||||
return {'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pp = ProxyManager()
|
||||
pp.fetch()
|
||||
15
deploy/ProxyPool/Manager/__init__.py
Normal file
15
deploy/ProxyPool/Manager/__init__.py
Normal file
@ -0,0 +1,15 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: __init__.py.py
|
||||
Description :
|
||||
Author : JHao
|
||||
date: 2016/12/3
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2016/12/3:
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'JHao'
|
||||
|
||||
from Manager.ProxyManager import ProxyManager
|
||||
70
deploy/ProxyPool/ProxyGetter/CheckProxy.py
Normal file
70
deploy/ProxyPool/ProxyGetter/CheckProxy.py
Normal file
@ -0,0 +1,70 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: CheckProxy
|
||||
Description : used for check getFreeProxy.py
|
||||
Author : JHao
|
||||
date: 2018/7/10
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2018/7/10: CheckProxy
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'JHao'
|
||||
|
||||
from getFreeProxy import GetFreeProxy
|
||||
from Util.utilFunction import verifyProxyFormat
|
||||
|
||||
|
||||
from Util.LogHandler import LogHandler
|
||||
|
||||
log = LogHandler('check_proxy', file=False)
|
||||
|
||||
|
||||
class CheckProxy(object):
|
||||
|
||||
@staticmethod
|
||||
def checkAllGetProxyFunc():
|
||||
"""
|
||||
检查getFreeProxy所有代理获取函数运行情况
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
import inspect
|
||||
member_list = inspect.getmembers(GetFreeProxy, predicate=inspect.isfunction)
|
||||
proxy_count_dict = dict()
|
||||
for func_name, func in member_list:
|
||||
log.info(u"开始运行 {}".format(func_name))
|
||||
try:
|
||||
proxy_list = [_ for _ in func() if verifyProxyFormat(_)]
|
||||
proxy_count_dict[func_name] = len(proxy_list)
|
||||
except Exception as e:
|
||||
log.info(u"代理获取函数 {} 运行出错!".format(func_name))
|
||||
log.error(str(e))
|
||||
log.info(u"所有函数运行完毕 " + "***" * 5)
|
||||
for func_name, func in member_list:
|
||||
log.info(u"函数 {n}, 获取到代理数: {c}".format(n=func_name, c=proxy_count_dict.get(func_name, 0)))
|
||||
|
||||
@staticmethod
|
||||
def checkGetProxyFunc(func):
|
||||
"""
|
||||
检查指定的getFreeProxy某个function运行情况
|
||||
Args:
|
||||
func: getFreeProxy中某个可调用方法
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
func_name = getattr(func, '__name__', "None")
|
||||
log.info("start running func: {}".format(func_name))
|
||||
count = 0
|
||||
for proxy in func():
|
||||
if verifyProxyFormat(proxy):
|
||||
log.info("{} fetch proxy: {}".format(func_name, proxy))
|
||||
count += 1
|
||||
log.info("{n} completed, fetch proxy number: {c}".format(n=func_name, c=count))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
CheckProxy.checkAllGetProxyFunc()
|
||||
CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy01)
|
||||
12
deploy/ProxyPool/ProxyGetter/__init__.py
Normal file
12
deploy/ProxyPool/ProxyGetter/__init__.py
Normal file
@ -0,0 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: __init__.py.py
|
||||
Description :
|
||||
Author : JHao
|
||||
date: 2016/11/25
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2016/11/25:
|
||||
-------------------------------------------------
|
||||
"""
|
||||
452
deploy/ProxyPool/ProxyGetter/getFreeProxy.py
Normal file
452
deploy/ProxyPool/ProxyGetter/getFreeProxy.py
Normal file
@ -0,0 +1,452 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# !/usr/bin/env python
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: GetFreeProxy.py
|
||||
Description : 抓取免费代理
|
||||
Author : JHao
|
||||
date: 2016/11/25
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2016/11/25:
|
||||
-------------------------------------------------
|
||||
"""
|
||||
import re
|
||||
import sys
|
||||
import json
|
||||
import requests
|
||||
from time import sleep
|
||||
|
||||
sys.path.append('..')
|
||||
|
||||
from Util.WebRequest import WebRequest
|
||||
from Util.utilFunction import getHtmlTree
|
||||
import requests
|
||||
from DB.DbClient import DbClient
|
||||
from Config.setting import USEFUL_PROXY_COUNT
|
||||
from Config.setting import ZHIMA_PROXY_API
|
||||
|
||||
# for debug to disable insecureWarning
|
||||
requests.packages.urllib3.disable_warnings()
|
||||
proxy_dict = {
|
||||
"proxy": "",
|
||||
"region": "",
|
||||
"anonimity": "",
|
||||
"proxy_type": "",
|
||||
"source": ""
|
||||
}
|
||||
db = DbClient()
|
||||
|
||||
|
||||
def init_proxy_dict():
|
||||
for _ in proxy_dict.keys():
|
||||
proxy_dict[_] = ""
|
||||
|
||||
|
||||
class GetFreeProxy(object):
|
||||
"""
|
||||
proxy getter
|
||||
"""
|
||||
|
||||
# @staticmethod
|
||||
# def freeProxy01():
|
||||
# """
|
||||
# 无忧代理 http://www.data5u.com/
|
||||
# 几乎没有能用的
|
||||
# :return:
|
||||
# """
|
||||
# url_list = [
|
||||
# 'http://www.data5u.com/',
|
||||
# 'http://www.data5u.com/free/gngn/index.shtml',
|
||||
# 'http://www.data5u.com/free/gnpt/index.shtml'
|
||||
# ]
|
||||
# key = 'ABCDEFGHIZ'
|
||||
# for url in url_list:
|
||||
# html_tree = getHtmlTree(url)
|
||||
# ul_list = html_tree.xpath('//ul[@class="l2"]')
|
||||
# for ul in ul_list:
|
||||
# try:
|
||||
# ip = ul.xpath('./span[1]/li/text()')[0]
|
||||
# classnames = ul.xpath('./span[2]/li/attribute::class')[0]
|
||||
# classname = classnames.split(' ')[1]
|
||||
# port_sum = 0
|
||||
# for c in classname:
|
||||
# port_sum *= 10
|
||||
# port_sum += key.index(c)
|
||||
# port = port_sum >> 3
|
||||
# yield '{}:{}'.format(ip, port)
|
||||
# except Exception as e:
|
||||
# print(e)
|
||||
|
||||
@staticmethod
|
||||
def freeProxy02(count=20):
|
||||
"""
|
||||
代理66 http://www.66ip.cn/
|
||||
:param count: 提取数量
|
||||
:return:
|
||||
"""
|
||||
urls = [
|
||||
"http://www.66ip.cn/mo.php?sxb=&tqsl={}&port=&export=&ktip=&sxa=&submit=%CC%E1++%C8%A1&textarea=",
|
||||
"http://www.66ip.cn/nmtq.php?getnum={}&isp=0&anonymoustype=0&s"
|
||||
"tart=&ports=&export=&ipaddress=&area=0&proxytype=2&api=66ip"
|
||||
]
|
||||
|
||||
try:
|
||||
import execjs
|
||||
import requests
|
||||
|
||||
headers = {
|
||||
'User-Agent':
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0',
|
||||
'Accept': '*/*',
|
||||
'Connection': 'keep-alive',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.8'
|
||||
}
|
||||
session = requests.session()
|
||||
src = session.get("http://www.66ip.cn/", headers=headers).text
|
||||
src = src.split("</script>")[0] + '}'
|
||||
src = src.replace("<script>", "function test() {")
|
||||
src = src.replace("while(z++)try{eval(",
|
||||
';var num=10;while(z++)try{var tmp=')
|
||||
src = src.replace(
|
||||
");break}",
|
||||
";num--;if(tmp.search('cookie') != -1 | num<0){return tmp}}")
|
||||
ctx = execjs.compile(src)
|
||||
src = ctx.call("test")
|
||||
src = src[src.find("document.cookie="):src.find("};if((")]
|
||||
src = src.replace("document.cookie=", "")
|
||||
src = "function test() {var window={}; return %s }" % src
|
||||
cookie = execjs.compile(src).call('test')
|
||||
js_cookie = cookie.split(";")[0].split("=")[-1]
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return
|
||||
|
||||
for url in urls:
|
||||
try:
|
||||
html = session.get(url.format(count),
|
||||
cookies={
|
||||
"__jsl_clearance": js_cookie
|
||||
},
|
||||
headers=headers).text
|
||||
ips = re.findall(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}",
|
||||
html)
|
||||
for ip in ips:
|
||||
init_proxy_dict()
|
||||
proxy_dict["proxy"] = ip.strip()
|
||||
yield json.dumps(proxy_dict)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def freeProxy03(page_count=1):
|
||||
"""
|
||||
西刺代理 http://www.xicidaili.com
|
||||
:return:
|
||||
"""
|
||||
url_list = [
|
||||
'http://www.xicidaili.com/nn/', # 高匿
|
||||
# 'http://www.xicidaili.com/nt/', # 透明
|
||||
]
|
||||
for each_url in url_list:
|
||||
for i in range(1, page_count + 1):
|
||||
page_url = each_url + str(i)
|
||||
tree = getHtmlTree(page_url)
|
||||
proxy_list = tree.xpath(
|
||||
'.//table[@id="ip_list"]//tr[position()>1]')
|
||||
for proxy in proxy_list:
|
||||
try:
|
||||
init_proxy_dict()
|
||||
proxy_dict["proxy"] = ':'.join(
|
||||
proxy.xpath('./td/text()')[0:2])
|
||||
proxy_dict["proxy_type"] = proxy.xpath('./td/text()')[5]
|
||||
yield json.dumps(proxy_dict)
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def freeProxy04():
|
||||
"""
|
||||
guobanjia http://www.goubanjia.com/
|
||||
:return:
|
||||
"""
|
||||
url = "http://www.goubanjia.com/"
|
||||
tree = getHtmlTree(url)
|
||||
proxy_list = tree.xpath('//td[@class="ip"]')
|
||||
proxy_attr = tree.xpath('//tr[@class="success" or @class="warning"]')
|
||||
# 此网站有隐藏的数字干扰,或抓取到多余的数字或.符号
|
||||
# 需要过滤掉<p style="display:none;">的内容
|
||||
xpath_str = """.//*[not(contains(@style, 'display: none'))
|
||||
and not(contains(@style, 'display:none'))
|
||||
and not(contains(@class, 'port'))
|
||||
]/text()
|
||||
"""
|
||||
for each_proxy, each_attr in proxy_list, proxy_attr:
|
||||
try:
|
||||
# :符号裸放在td下,其他放在div span p中,先分割找出ip,再找port
|
||||
ip_addr = ''.join(each_proxy.xpath(xpath_str))
|
||||
|
||||
# HTML中的port是随机数,真正的端口编码在class后面的字母中。
|
||||
# 比如这个:
|
||||
# <span class="port CFACE">9054</span>
|
||||
# CFACE解码后对应的是3128。
|
||||
port = 0
|
||||
for _ in each_proxy.xpath(".//span[contains(@class, 'port')]"
|
||||
"/attribute::class")[0]. \
|
||||
replace("port ", ""):
|
||||
port *= 10
|
||||
port += (ord(_) - ord('A'))
|
||||
port /= 8
|
||||
init_proxy_dict()
|
||||
proxy_dict["proxy"] = '{}:{}'.format(ip_addr, int(port))
|
||||
proxy_dict["anonimity"] = each_attr.xpath(".//td/text()")[2]
|
||||
proxy_dict["proxy_type"] = each_attr.xpath(".//td/text()")[3]
|
||||
yield json.dumps(proxy_dict)
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def freeProxy05():
|
||||
"""
|
||||
快代理 https://www.kuaidaili.com
|
||||
"""
|
||||
url_list = [
|
||||
'https://www.kuaidaili.com/free/inha/'
|
||||
# 'https://www.kuaidaili.com/free/intr/'
|
||||
]
|
||||
for url in url_list:
|
||||
tree = getHtmlTree(url)
|
||||
proxy_list = tree.xpath('.//table//tr')
|
||||
sleep(1) # 必须sleep 不然第二条请求不到数据
|
||||
for tr in proxy_list[1:]:
|
||||
init_proxy_dict()
|
||||
proxy_dict['proxy'] = ':'.join(tr.xpath('./td/text()')[0:2])
|
||||
proxy_dict['proxy_type'] = tr.xpath('./td/text()')[3]
|
||||
proxy_dict['anonimity'] = tr.xpath('./td/text()')[2]
|
||||
yield json.dumps(proxy_dict)
|
||||
|
||||
# @staticmethod
|
||||
# def freeProxy06():
|
||||
# """
|
||||
# 码农代理 https://proxy.coderbusy.com/
|
||||
# :return:
|
||||
# """
|
||||
# urls = ['https://proxy.coderbusy.com/']
|
||||
# for url in urls:
|
||||
# tree = getHtmlTree(url)
|
||||
# proxy_list = tree.xpath('.//table//tr')
|
||||
# for tr in proxy_list[1:]:
|
||||
# init_proxy_dict()
|
||||
# proxy_dict['proxy'] = ':'.join(tr.xpath('./td/text()')[0:2])
|
||||
# yield json.dumps(proxy_dict)
|
||||
|
||||
@staticmethod
|
||||
def freeProxy07():
|
||||
"""
|
||||
云代理 http://www.ip3366.net/free/
|
||||
:return:
|
||||
"""
|
||||
urls = [
|
||||
'http://www.ip3366.net/free/?stype=1',
|
||||
"http://www.ip3366.net/free/?stype=2"
|
||||
]
|
||||
request = WebRequest()
|
||||
for url in urls:
|
||||
tree = getHtmlTree(url)
|
||||
proxy_list = tree.xpath('.//table//tr')
|
||||
sleep(1)
|
||||
for tr in proxy_list[1:]:
|
||||
init_proxy_dict()
|
||||
proxy_dict['proxy'] = ':'.join(tr.xpath('./td/text()')[0:2])
|
||||
proxy_dict['proxy_type'] = tr.xpath('./td/text()')[3]
|
||||
proxy_dict['anonimity'] = tr.xpath('./td/text()')[2]
|
||||
yield json.dumps(proxy_dict)
|
||||
|
||||
# r = request.get(url, timeout=10)
|
||||
# proxies = re.findall(
|
||||
# r'<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\s\S]*?<td>(\d+)</td>',
|
||||
# r.text)
|
||||
# for proxy in proxies:
|
||||
# init_proxy_dict()
|
||||
# proxy_dict['proxy'] = ":".join(proxy)
|
||||
# yield json.dumps(proxy_dict)
|
||||
|
||||
# @staticmethod
|
||||
# def freeProxy08():
|
||||
# """
|
||||
# IP海 http://www.iphai.com/free/ng
|
||||
# :return:
|
||||
# """
|
||||
# urls = [
|
||||
# 'http://www.iphai.com/free/ng', 'http://www.iphai.com/free/np',
|
||||
# 'http://www.iphai.com/free/wg', 'http://www.iphai.com/free/wp'
|
||||
# ]
|
||||
# request = WebRequest()
|
||||
# for url in urls:
|
||||
# r = request.get(url, timeout=10)
|
||||
# proxies = re.findall(
|
||||
# r'<td>\s*?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s*?</td>[\s\S]*?<td>\s*?(\d+)\s*?</td>',
|
||||
# r.text)
|
||||
# for proxy in proxies:
|
||||
# init_proxy_dict()
|
||||
# proxy_dict['proxy'] = ":".join(proxy)
|
||||
# yield json.dumps(proxy_dict)
|
||||
|
||||
@staticmethod
|
||||
def freeProxy09(page_count=1):
|
||||
"""
|
||||
http://ip.jiangxianli.com/?page=
|
||||
免费代理库
|
||||
:return:
|
||||
"""
|
||||
for i in range(1, page_count + 1):
|
||||
url = 'http://ip.jiangxianli.com/?country=中国&?page={}'.format(i)
|
||||
html_tree = getHtmlTree(url)
|
||||
for index, tr in enumerate(html_tree.xpath("//table//tr")):
|
||||
if index == 0:
|
||||
continue
|
||||
init_proxy_dict()
|
||||
proxy_dict['proxy'] = ":".join(tr.xpath("./td/text()")[0:2]).strip()
|
||||
proxy_dict['proxy_type'] = tr.xpath("./td/text()")[3]
|
||||
proxy_dict['anonimity'] = tr.xpath("./td/text()")[2]
|
||||
yield json.dumps(proxy_dict)
|
||||
|
||||
# @staticmethod
|
||||
# def freeProxy10():
|
||||
# """
|
||||
# 墙外网站 cn-proxy
|
||||
# :return:
|
||||
# """
|
||||
# urls = ['http://cn-proxy.com/', 'http://cn-proxy.com/archives/218']
|
||||
# request = WebRequest()
|
||||
# for url in urls:
|
||||
# r = request.get(url, timeout=10)
|
||||
# proxies = re.findall(r'<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\w\W]<td>(\d+)</td>', r.text)
|
||||
# for proxy in proxies:
|
||||
# yield ':'.join(proxy)
|
||||
|
||||
# @staticmethod
|
||||
# def freeProxy11():
|
||||
# """
|
||||
# https://proxy-list.org/english/index.php
|
||||
# :return:
|
||||
# """
|
||||
# urls = ['https://proxy-list.org/english/index.php?p=%s' % n for n in range(1, 10)]
|
||||
# request = WebRequest()
|
||||
# import base64
|
||||
# for url in urls:
|
||||
# r = request.get(url, timeout=10)
|
||||
# proxies = re.findall(r"Proxy\('(.*?)'\)", r.text)
|
||||
# for proxy in proxies:
|
||||
# yield base64.b64decode(proxy).decode()
|
||||
|
||||
# @staticmethod
|
||||
# def freeProxy12():
|
||||
# urls = ['https://list.proxylistplus.com/Fresh-HTTP-Proxy-List-1']
|
||||
# request = WebRequest()
|
||||
# for url in urls:
|
||||
# r = request.get(url, timeout=10)
|
||||
# proxies = re.findall(r'<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\s\S]*?<td>(\d+)</td>', r.text)
|
||||
# for proxy in proxies:
|
||||
# yield ':'.join(proxy)
|
||||
|
||||
@staticmethod
|
||||
def freeProxy13(max_page=2):
|
||||
"""
|
||||
http://www.qydaili.com/free/?action=china&page=1
|
||||
齐云代理
|
||||
:param max_page:
|
||||
:return:
|
||||
"""
|
||||
base_url = 'http://www.qydaili.com/free/?action=china&page='
|
||||
for page in range(1, max_page + 1):
|
||||
url = base_url + str(page)
|
||||
tree = getHtmlTree(url)
|
||||
proxy_list = tree.xpath('.//table//tr')
|
||||
sleep(1)
|
||||
for tr in proxy_list[1:]:
|
||||
init_proxy_dict()
|
||||
proxy_dict['proxy'] = ':'.join(tr.xpath('./td/text()')[0:2])
|
||||
proxy_dict['proxy_type'] = tr.xpath('./td/text()')[3]
|
||||
proxy_dict['anonimity'] = tr.xpath('./td/text()')[2]
|
||||
yield json.dumps(proxy_dict)
|
||||
|
||||
# r = request.get(url, timeout=10)
|
||||
# proxies = re.findall(
|
||||
# r'<td.*?>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\s\S]*?<td.*?>(\d+)</td>',
|
||||
# r.text)
|
||||
# for proxy in proxies:
|
||||
# init_proxy_dict()
|
||||
# proxy_dict['proxy'] = ':'.join(proxy)
|
||||
# yield json.dumps(proxy_dict)
|
||||
|
||||
# @staticmethod
|
||||
# def freeProxy14(max_page=2):
|
||||
# """
|
||||
# http://www.89ip.cn/index.html
|
||||
# 89免费代理
|
||||
# :param max_page:
|
||||
# :return:
|
||||
# """
|
||||
# base_url = 'http://www.89ip.cn/index_{}.html'
|
||||
# request = WebRequest()
|
||||
# for page in range(1, max_page + 1):
|
||||
# url = base_url.format(page)
|
||||
# r = request.get(url, timeout=10)
|
||||
# proxies = re.findall(
|
||||
# r'<td.*?>[\s\S]*?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})[\s\S]*?</td>[\s\S]*?<td.*?>[\s\S]*?(\d+)[\s\S]*?</td>',
|
||||
# r.text)
|
||||
# for proxy in proxies:
|
||||
# init_proxy_dict()
|
||||
# proxy_dict['proxy'] = ':'.join(proxy)
|
||||
# yield json.dumps(proxy_dict)
|
||||
|
||||
@staticmethod
|
||||
def freeProxy15():
|
||||
urls = [
|
||||
# 'http://www.xiladaili.com/putong/',
|
||||
"http://www.xiladaili.com/gaoni/"
|
||||
# "http://www.xiladaili.com/http/",
|
||||
# "http://www.xiladaili.com/https/"
|
||||
]
|
||||
for url in urls:
|
||||
tree = getHtmlTree(url)
|
||||
proxy_list = tree.xpath('.//table//tr')
|
||||
sleep(1)
|
||||
for tr in proxy_list[1:]:
|
||||
init_proxy_dict()
|
||||
proxy_dict['proxy'] = tr.xpath('./td/text()')[0]
|
||||
proxy_dict['proxy_type'] = re.sub(re.compile(r'[\u4e00-\u9fa5]'), '', tr.xpath('./td/text()')[1])
|
||||
proxy_dict['anonimity'] = tr.xpath('./td/text()')[2]
|
||||
yield json.dumps(proxy_dict)
|
||||
|
||||
@staticmethod
|
||||
def zhimaProxy():
|
||||
# db.changeTable('ProxyPool:useful_proxy_3')
|
||||
db.changeTable('ProxyPool:useful_proxy_63')
|
||||
if db.getNumber() < USEFUL_PROXY_COUNT / 2:
|
||||
rsp = json.loads(requests.get(ZHIMA_PROXY_API).text)
|
||||
if rsp['success']:
|
||||
for proxy in rsp['data']:
|
||||
proxy_dict['proxy'] = proxy['ip'] + ':' + str(proxy['port'])
|
||||
yield json.dumps(proxy_dict)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from CheckProxy import CheckProxy
|
||||
|
||||
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy01)
|
||||
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy02)
|
||||
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy03)
|
||||
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy04)
|
||||
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy05)
|
||||
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy06)
|
||||
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy07)
|
||||
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy08)
|
||||
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy09)
|
||||
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy13)
|
||||
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy14)
|
||||
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy15)
|
||||
|
||||
CheckProxy.checkAllGetProxyFunc()
|
||||
145
deploy/ProxyPool/ProxyHelper/Proxy.py
Normal file
145
deploy/ProxyPool/ProxyHelper/Proxy.py
Normal file
@ -0,0 +1,145 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: Proxy
|
||||
Description : 代理对象类型封装
|
||||
Author : JHao
|
||||
date: 2019/7/11
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2019/7/11: 代理对象类型封装
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'JHao'
|
||||
|
||||
import json
|
||||
|
||||
|
||||
class Proxy(object):
|
||||
|
||||
def __init__(self, proxy, fail_count=0, region="", anonimity="", proxy_type="",
|
||||
source="", check_count=0, last_status="", last_time=""):
|
||||
self._proxy = proxy
|
||||
self._fail_count = fail_count
|
||||
self._region = region
|
||||
self._anonimity=anonimity
|
||||
self._type = proxy_type
|
||||
self._source = source
|
||||
self._check_count = check_count
|
||||
self._last_status = last_status
|
||||
self._last_time = last_time
|
||||
|
||||
@classmethod
|
||||
def newProxyFromJson(cls, proxy_json):
|
||||
"""
|
||||
根据proxy属性json创建Proxy实例
|
||||
:param proxy_json:
|
||||
:return:
|
||||
"""
|
||||
proxy_dict = json.loads(proxy_json, encoding='utf-8')
|
||||
return cls(proxy=proxy_dict.get("proxy", ""),
|
||||
fail_count=proxy_dict.get("fail_count", 0),
|
||||
region=proxy_dict.get("region", ""),
|
||||
anonimity=proxy_dict.get("anonimity", ""),
|
||||
proxy_type=proxy_dict.get("type", ""),
|
||||
source=proxy_dict.get("source", ""),
|
||||
check_count=proxy_dict.get("check_count", 0),
|
||||
last_status=proxy_dict.get("last_status", ""),
|
||||
last_time=proxy_dict.get("last_time", "")
|
||||
)
|
||||
|
||||
@property
|
||||
def proxy(self):
|
||||
""" 代理 ip:port """
|
||||
return self._proxy
|
||||
|
||||
@property
|
||||
def fail_count(self):
|
||||
""" 检测失败次数 """
|
||||
return self._fail_count
|
||||
|
||||
@property
|
||||
def region(self):
|
||||
""" 地理位置(国家/城市) """
|
||||
return self._region
|
||||
|
||||
@property
|
||||
def anonimity(self):
|
||||
""" 透明/高匿 """
|
||||
return self._anonimity
|
||||
|
||||
@property
|
||||
def type(self):
|
||||
""" HTTP/HTTPS等 """
|
||||
return self._type
|
||||
|
||||
@property
|
||||
def source(self):
|
||||
""" 代理来源 """
|
||||
return self._source
|
||||
|
||||
@property
|
||||
def check_count(self):
|
||||
""" 代理检测次数 """
|
||||
return self._check_count
|
||||
|
||||
@property
|
||||
def last_status(self):
|
||||
""" 最后一次检测结果 1 -> 可用; 0 -> 不可用"""
|
||||
return self._last_status
|
||||
|
||||
@property
|
||||
def last_time(self):
|
||||
""" 最后一次检测时间 """
|
||||
return self._last_time
|
||||
|
||||
@property
|
||||
def info_dict(self):
|
||||
""" 属性字典 """
|
||||
return {"proxy": self._proxy,
|
||||
"fail_count": self._fail_count,
|
||||
"region": self._region,
|
||||
"anonimity": self._anonimity,
|
||||
"type": self._type,
|
||||
"source": self._source,
|
||||
"check_count": self.check_count,
|
||||
"last_status": self.last_status,
|
||||
"last_time": self.last_time}
|
||||
|
||||
@property
|
||||
def info_json(self):
|
||||
""" 属性json格式 """
|
||||
return json.dumps(self.info_dict, ensure_ascii=False)
|
||||
|
||||
# --- proxy method ---
|
||||
@fail_count.setter
|
||||
def fail_count(self, value):
|
||||
self._fail_count = value
|
||||
|
||||
@region.setter
|
||||
def region(self, value):
|
||||
self._region = value
|
||||
|
||||
@anonimity.setter
|
||||
def anonimity(self, value):
|
||||
self._anonimity = value
|
||||
|
||||
@type.setter
|
||||
def type(self, value):
|
||||
self._type = value
|
||||
|
||||
@source.setter
|
||||
def source(self, value):
|
||||
self._source = value
|
||||
|
||||
@check_count.setter
|
||||
def check_count(self, value):
|
||||
self._check_count = value
|
||||
|
||||
@last_status.setter
|
||||
def last_status(self, value):
|
||||
self._last_status = value
|
||||
|
||||
@last_time.setter
|
||||
def last_time(self, value):
|
||||
self._last_time = value
|
||||
40
deploy/ProxyPool/ProxyHelper/ProxyUtil.py
Normal file
40
deploy/ProxyPool/ProxyHelper/ProxyUtil.py
Normal file
@ -0,0 +1,40 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: ProxyHelper
|
||||
Description :
|
||||
Author : JHao
|
||||
date: 2019/8/8
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2019/8/8:
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'JHao'
|
||||
|
||||
from Util import validUsefulProxy
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def checkProxyUseful(proxy_obj):
|
||||
"""
|
||||
检测代理是否可用
|
||||
:param proxy_obj: Proxy object
|
||||
:return: Proxy object, status
|
||||
"""
|
||||
|
||||
if validUsefulProxy(proxy_obj.proxy):
|
||||
# 检测通过 更新proxy属性
|
||||
proxy_obj.check_count += 1
|
||||
proxy_obj.last_status = 1
|
||||
proxy_obj.last_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
if proxy_obj.fail_count > 0:
|
||||
proxy_obj.fail_count -= 1
|
||||
return proxy_obj, True
|
||||
else:
|
||||
proxy_obj.check_count += 1
|
||||
proxy_obj.last_status = 0
|
||||
proxy_obj.last_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
proxy_obj.fail_count += 1
|
||||
return proxy_obj, False
|
||||
16
deploy/ProxyPool/ProxyHelper/__init__.py
Normal file
16
deploy/ProxyPool/ProxyHelper/__init__.py
Normal file
@ -0,0 +1,16 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: __init__.py
|
||||
Description :
|
||||
Author : JHao
|
||||
date: 2019/7/11
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2019/7/11:
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'JHao'
|
||||
|
||||
from ProxyHelper.Proxy import Proxy
|
||||
from ProxyHelper.ProxyUtil import checkProxyUseful
|
||||
239
deploy/ProxyPool/README.md
Normal file
239
deploy/ProxyPool/README.md
Normal file
@ -0,0 +1,239 @@
|
||||
|
||||
爬虫IP代理池
|
||||
=======
|
||||
[](https://travis-ci.org/jhao104/proxy_pool)
|
||||
[](http://www.spiderpy.cn/blog/)
|
||||
[](https://requires.io/github/jhao104/proxy_pool/requirements/?branch=master)
|
||||
[](https://github.com/jhao104/proxy_pool/blob/master/LICENSE)
|
||||
[](https://github.com/jhao104/proxy_pool/graphs/contributors)
|
||||
[](https://github.com/jhao104/proxy_pool)
|
||||
|
||||
______ ______ _
|
||||
| ___ \_ | ___ \ | |
|
||||
| |_/ / \__ __ __ _ __ _ | |_/ /___ ___ | |
|
||||
| __/| _// _ \ \ \/ /| | | || __// _ \ / _ \ | |
|
||||
| | | | | (_) | > < \ |_| || | | (_) | (_) || |___
|
||||
\_| |_| \___/ /_/\_\ \__ |\_| \___/ \___/ \_____\
|
||||
__ / /
|
||||
/___ /
|
||||
|
||||
##### [介绍文档](https://github.com/jhao104/proxy_pool/blob/master/doc/introduce.md)
|
||||
|
||||
* 支持版本:  
|
||||
|
||||
* 测试地址: http://118.24.52.95 (单机勿压, 感谢。 恶意访问关[小黑屋](https://github.com/jhao104/proxy_pool/blob/bff423dffe6e2881ee45d5b66d8a6ad682c8e4ab/doc/block_ips.md)哦)
|
||||
|
||||
### 下载安装
|
||||
|
||||
* 下载源码:
|
||||
|
||||
```shell
|
||||
git clone git@github.com:jhao104/proxy_pool.git
|
||||
|
||||
或者直接到https://github.com/jhao104/proxy_pool/releases 下载zip文件
|
||||
```
|
||||
|
||||
* 安装依赖:
|
||||
|
||||
```shell
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
* 配置Config/setting.py:
|
||||
|
||||
```shell
|
||||
# Config/setting.py 为项目配置文件
|
||||
|
||||
# 配置DB
|
||||
DATABASES = {
|
||||
"default": {
|
||||
"TYPE": "SSDB", # 目前支持SSDB或REDIS数据库
|
||||
"HOST": "127.0.0.1", # db host
|
||||
"PORT": 8888, # db port,例如SSDB通常使用8888,REDIS通常默认使用6379
|
||||
"NAME": "proxy", # 默认配置
|
||||
"PASSWORD": "" # db password
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# 配置 ProxyGetter
|
||||
|
||||
PROXY_GETTER = [
|
||||
"freeProxy01", # 这里是启用的代理抓取函数名,可在ProxyGetter/getFreeProxy.py 扩展
|
||||
"freeProxy02",
|
||||
....
|
||||
]
|
||||
|
||||
|
||||
# 配置 API服务
|
||||
|
||||
SERVER_API = {
|
||||
"HOST": "0.0.0.0", # 监听ip, 0.0.0.0 监听所有IP
|
||||
"PORT": 5010 # 监听端口
|
||||
}
|
||||
|
||||
# 上面配置启动后,代理池访问地址为 http://127.0.0.1:5010
|
||||
|
||||
```
|
||||
|
||||
* 启动:
|
||||
|
||||
```shell
|
||||
# 如果你的依赖已经安装完成并且具备运行条件,可以在cli目录下通过ProxyPool.py启。动
|
||||
# 程序分为: schedule 调度程序 和 webserver Api服务
|
||||
|
||||
# 首先启动调度程序
|
||||
>>>python proxyPool.py schedule
|
||||
|
||||
# 然后启动webApi服务
|
||||
>>>python proxyPool.py webserver
|
||||
|
||||
|
||||
```
|
||||
|
||||
### Docker
|
||||
|
||||
```bash
|
||||
docker pull jhao104/proxy_pool
|
||||
|
||||
# 远程数据库
|
||||
docker run --env db_type=REDIS --env db_host=x.x.x.x --env db_port=6379 --env db_password=pwd_str -p 5010:5010 jhao104/proxy_pool
|
||||
|
||||
# 宿主机上的数据库
|
||||
docker run --env db_type=REDIS --env db_host=host.docker.internal --env db_port=6379 --env db_password=pwd_str -p 5010:5010 jhao104/proxy_pool
|
||||
|
||||
```
|
||||
|
||||
|
||||
### 使用
|
||||
|
||||
启动过几分钟后就能看到抓取到的代理IP,你可以直接到数据库中查看,推荐一个[SSDB可视化工具](https://github.com/jhao104/SSDBAdmin)。
|
||||
|
||||
也可以通过api访问http://127.0.0.1:5010 查看。
|
||||
|
||||
* Api
|
||||
|
||||
| api | method | Description | arg|
|
||||
| ----| ---- | ---- | ----|
|
||||
| / | GET | api介绍 | None |
|
||||
| /get | GET | 随机获取一个代理 | None|
|
||||
| /get_all | GET | 获取所有代理 |None|
|
||||
| /get_status | GET | 查看代理数量 |None|
|
||||
| /delete | GET | 删除代理 |proxy=host:ip|
|
||||
|
||||
* 爬虫使用
|
||||
|
||||
如果要在爬虫代码中使用的话, 可以将此api封装成函数直接使用,例如:
|
||||
|
||||
```python
|
||||
import requests
|
||||
|
||||
def get_proxy():
|
||||
return requests.get("http://127.0.0.1:5010/get/").json()
|
||||
|
||||
def delete_proxy(proxy):
|
||||
requests.get("http://127.0.0.1:5010/delete/?proxy={}".format(proxy))
|
||||
|
||||
# your spider code
|
||||
|
||||
def getHtml():
|
||||
# ....
|
||||
retry_count = 5
|
||||
proxy = get_proxy().get("proxy")
|
||||
while retry_count > 0:
|
||||
try:
|
||||
html = requests.get('http://www.example.com', proxies={"http": "http://{}".format(proxy)})
|
||||
# 使用代理访问
|
||||
return html
|
||||
except Exception:
|
||||
retry_count -= 1
|
||||
# 出错5次, 删除代理池中代理
|
||||
delete_proxy(proxy)
|
||||
return None
|
||||
```
|
||||
|
||||
### 扩展代理
|
||||
|
||||
项目默认包含几个免费的代理获取方法,但是免费的毕竟质量不好,所以如果直接运行可能拿到的代理质量不理想。所以,提供了代理获取的扩展方法。
|
||||
|
||||
添加一个新的代理获取方法如下:
|
||||
|
||||
* 1、首先在[GetFreeProxy](https://github.com/jhao104/proxy_pool/blob/b9ccdfaada51b57cfb1bbd0c01d4258971bc8352/ProxyGetter/getFreeProxy.py#L32)类中添加你的获取代理的静态方法,
|
||||
该方法需要以生成器(yield)形式返回`host:ip`格式的代理,例如:
|
||||
|
||||
```python
|
||||
|
||||
class GetFreeProxy(object):
|
||||
# ....
|
||||
|
||||
# 你自己的方法
|
||||
@staticmethod
|
||||
def freeProxyCustom(): # 命名不和已有重复即可
|
||||
|
||||
# 通过某网站或者某接口或某数据库获取代理 任意你喜欢的姿势都行
|
||||
# 假设你拿到了一个代理列表
|
||||
proxies = ["139.129.166.68:3128", "139.129.166.61:3128", ...]
|
||||
for proxy in proxies:
|
||||
yield proxy
|
||||
# 确保每个proxy都是 host:ip正确的格式就行
|
||||
```
|
||||
|
||||
* 2、添加好方法后,修改Config/setting.py文件中的`PROXY_GETTER`项:
|
||||
|
||||
在`PROXY_GETTER`下添加自定义的方法的名字:
|
||||
|
||||
```shell
|
||||
PROXY_GETTER = [
|
||||
"freeProxy01",
|
||||
"freeProxy02",
|
||||
....
|
||||
"freeProxyCustom" # # 确保名字和你添加方法名字一致
|
||||
]
|
||||
```
|
||||
|
||||
|
||||
`ProxySchedule`会每隔一段时间抓取一次代理,下次抓取时会自动识别调用你定义的方法。
|
||||
|
||||
### 代理采集
|
||||
|
||||
目前实现的采集免费代理网站有(排名不分先后, 下面仅是对其发布的免费代理情况, 付费代理测评可以参考[这里](https://zhuanlan.zhihu.com/p/33576641)):
|
||||
|
||||
| 厂商名称 | 状态 | 更新速度 | 可用率 | 是否被墙 | 地址 |
|
||||
| ----- | ---- | -------- | ------ | --------- | ----- |
|
||||
| 无忧代理 | 可用 | 几分钟一次 | * | 否 | [地址](http://www.data5u.com/free/index.html) |
|
||||
| 66代理 | 可用 | 更新很慢 | * | 否 | [地址](http://www.66ip.cn/) |
|
||||
| 西刺代理 | 可用 | 几分钟一次 | * | 否 | [地址](http://www.xicidaili.com)|
|
||||
| 全网代理 | 可用 | 几分钟一次 | * | 否 | [地址](http://www.goubanjia.com/)|
|
||||
| 训代理 | 已关闭免费代理 | * | * | 否 | [地址](http://www.xdaili.cn/)|
|
||||
| 快代理 | 可用 |几分钟一次| * | 否 | [地址](https://www.kuaidaili.com/)|
|
||||
| 云代理 | 可用 |几分钟一次| * | 否 | [地址](http://www.ip3366.net/)|
|
||||
| IP海 | 可用 |几小时一次| * | 否 | [地址](http://www.iphai.com/)|
|
||||
| 免费IP代理库 | 可用 |快| * | 否 | [地址](http://ip.jiangxianli.com/)|
|
||||
| 中国IP地址 | 可用 |几分钟一次| * | 是 | [地址](http://cn-proxy.com/)|
|
||||
| Proxy List | 可用 |几分钟一次| * | 是 | [地址](https://proxy-list.org/chinese/index.php)|
|
||||
| ProxyList+ | 可用 |几分钟一次| * | 是 | [地址](https://list.proxylistplus.com/Fresh-HTTP-Proxy-List-1)|
|
||||
|
||||
如果还有其他好的免费代理网站, 可以在提交在[issues](https://github.com/jhao104/proxy_pool/issues/71), 下次更新时会考虑在项目中支持。
|
||||
|
||||
### 问题反馈
|
||||
|
||||
任何问题欢迎在[Issues](https://github.com/jhao104/proxy_pool/issues) 中反馈,如果没有账号可以去 我的[博客](http://www.spiderpy.cn/blog/message)中留言。
|
||||
|
||||
你的反馈会让此项目变得更加完美。
|
||||
|
||||
### 贡献代码
|
||||
|
||||
本项目仅作为基本的通用的代理池架构,不接收特有功能(当然,不限于特别好的idea)。
|
||||
|
||||
本项目依然不够完善,如果发现bug或有新的功能添加,请在[Issues](https://github.com/jhao104/proxy_pool/issues)中提交bug(或新功能)描述,在确认后提交你的代码。
|
||||
|
||||
这里感谢以下contributor的无私奉献:
|
||||
|
||||
[@kangnwh](https://github.com/kangnwh)| [@bobobo80](https://github.com/bobobo80)| [@halleywj](https://github.com/halleywj)| [@newlyedward](https://github.com/newlyedward)| [@wang-ye](https://github.com/wang-ye)| [@gladmo](https://github.com/gladmo)| [@bernieyangmh](https://github.com/bernieyangmh)| [@PythonYXY](https://github.com/PythonYXY)| [@zuijiawoniu](https://github.com/zuijiawoniu)| [@netAir](https://github.com/netAir)| [@scil](https://github.com/scil)| [@tangrela](https://github.com/tangrela)| [@highroom](https://github.com/highroom)| [@luocaodan](https://github.com/luocaodan)| [@vc5](https://github.com/vc5)| [@1again](https://github.com/1again)| [@obaiyan](https://github.com/obaiyan)
|
||||
|
||||
|
||||
### Release Notes
|
||||
|
||||
[release notes](https://github.com/jhao104/proxy_pool/blob/master/doc/release_notes.md)
|
||||
|
||||
61
deploy/ProxyPool/Schedule/ProxyScheduler.py
Normal file
61
deploy/ProxyPool/Schedule/ProxyScheduler.py
Normal file
@ -0,0 +1,61 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: ProxyScheduler
|
||||
Description :
|
||||
Author : JHao
|
||||
date: 2019/8/5
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2019/8/5: ProxyScheduler
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'JHao'
|
||||
|
||||
import sys
|
||||
from apscheduler.schedulers.blocking import BlockingScheduler
|
||||
|
||||
sys.path.append('../')
|
||||
|
||||
from Schedule import doRawProxyCheck, doUsefulProxyCheck
|
||||
from Manager import ProxyManager
|
||||
from Util import LogHandler
|
||||
|
||||
|
||||
class DoFetchProxy(ProxyManager):
|
||||
""" fetch proxy"""
|
||||
|
||||
def __init__(self):
|
||||
ProxyManager.__init__(self)
|
||||
self.log = LogHandler('fetch_proxy')
|
||||
|
||||
def main(self):
|
||||
self.log.info("start fetch proxy")
|
||||
self.fetch()
|
||||
self.log.info("finish fetch proxy")
|
||||
|
||||
|
||||
def rawProxyScheduler():
|
||||
DoFetchProxy().main()
|
||||
doRawProxyCheck()
|
||||
|
||||
|
||||
def usefulProxyScheduler():
|
||||
doUsefulProxyCheck()
|
||||
|
||||
|
||||
def runScheduler():
|
||||
rawProxyScheduler()
|
||||
usefulProxyScheduler()
|
||||
|
||||
scheduler_log = LogHandler("scheduler_log")
|
||||
scheduler = BlockingScheduler(logger=scheduler_log)
|
||||
|
||||
scheduler.add_job(rawProxyScheduler, 'interval', minutes=5, id="raw_proxy_check", name="raw_proxy定时采集")
|
||||
scheduler.add_job(usefulProxyScheduler, 'interval', minutes=5, id="useful_proxy_check", name="useful_proxy定时检查")
|
||||
|
||||
scheduler.start()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
runScheduler()
|
||||
81
deploy/ProxyPool/Schedule/RawProxyCheck.py
Normal file
81
deploy/ProxyPool/Schedule/RawProxyCheck.py
Normal file
@ -0,0 +1,81 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: RawProxyCheck
|
||||
Description : check raw_proxy to useful
|
||||
Author : JHao
|
||||
date: 2019/8/6
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2019/8/6: check raw_proxy to useful
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'JHao'
|
||||
|
||||
from threading import Thread
|
||||
|
||||
try:
|
||||
from Queue import Empty, Queue # py2
|
||||
except:
|
||||
from queue import Empty, Queue # py3
|
||||
|
||||
from Util import LogHandler
|
||||
from Manager import ProxyManager
|
||||
from ProxyHelper import Proxy, checkProxyUseful
|
||||
|
||||
|
||||
class RawProxyCheck(ProxyManager, Thread):
|
||||
def __init__(self, queue, thread_name):
|
||||
ProxyManager.__init__(self)
|
||||
Thread.__init__(self, name=thread_name)
|
||||
self.log = LogHandler('raw_proxy_check')
|
||||
self.queue = queue
|
||||
|
||||
def run(self):
|
||||
self.log.info("RawProxyCheck - {} : start".format(self.name))
|
||||
self.db.changeTable(self.useful_proxy_queue)
|
||||
while True:
|
||||
try:
|
||||
proxy_json = self.queue.get(block=False)
|
||||
except Empty:
|
||||
self.log.info("RawProxyCheck - {} : exit".format(self.name))
|
||||
break
|
||||
|
||||
proxy_obj = Proxy.newProxyFromJson(proxy_json)
|
||||
|
||||
proxy_obj, status = checkProxyUseful(proxy_obj)
|
||||
if status:
|
||||
if self.db.exists(proxy_obj.proxy):
|
||||
self.log.info('RawProxyCheck - {} : {} validation exists'.format(self.name,
|
||||
proxy_obj.proxy.ljust(20)))
|
||||
else:
|
||||
self.db.put(proxy_obj)
|
||||
self.log.info(
|
||||
'RawProxyCheck - {} : {} validation pass'.format(self.name, proxy_obj.proxy.ljust(20)))
|
||||
else:
|
||||
self.log.info('RawProxyCheck - {} : {} validation fail'.format(self.name, proxy_obj.proxy.ljust(20)))
|
||||
self.queue.task_done()
|
||||
|
||||
|
||||
def doRawProxyCheck():
|
||||
proxy_queue = Queue()
|
||||
|
||||
pm = ProxyManager()
|
||||
pm.db.changeTable(pm.raw_proxy_queue)
|
||||
for _proxy in pm.db.getAll():
|
||||
proxy_queue.put(_proxy)
|
||||
pm.db.clear()
|
||||
|
||||
thread_list = list()
|
||||
for index in range(20):
|
||||
thread_list.append(RawProxyCheck(proxy_queue, "thread_%s" % index))
|
||||
|
||||
for thread in thread_list:
|
||||
thread.start()
|
||||
|
||||
for thread in thread_list:
|
||||
thread.join()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
doRawProxyCheck()
|
||||
83
deploy/ProxyPool/Schedule/UsefulProxyCheck.py
Normal file
83
deploy/ProxyPool/Schedule/UsefulProxyCheck.py
Normal file
@ -0,0 +1,83 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: UsefulProxyCheck
|
||||
Description : check useful proxy
|
||||
Author : JHao
|
||||
date: 2019/8/7
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2019/8/7: check useful proxy
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'JHao'
|
||||
|
||||
from threading import Thread
|
||||
|
||||
try:
|
||||
from Queue import Queue, Empty # py2
|
||||
except:
|
||||
from queue import Queue, Empty # py3
|
||||
|
||||
from Util import LogHandler
|
||||
from Manager import ProxyManager
|
||||
from ProxyHelper import checkProxyUseful, Proxy
|
||||
|
||||
FAIL_COUNT = 0
|
||||
|
||||
|
||||
class UsefulProxyCheck(ProxyManager, Thread):
|
||||
def __init__(self, queue, thread_name):
|
||||
ProxyManager.__init__(self)
|
||||
Thread.__init__(self, name=thread_name)
|
||||
|
||||
self.queue = queue
|
||||
self.log = LogHandler('useful_proxy_check')
|
||||
|
||||
def run(self):
|
||||
self.log.info("UsefulProxyCheck - {} : start".format(self.name))
|
||||
self.db.changeTable(self.useful_proxy_queue)
|
||||
while True:
|
||||
try:
|
||||
proxy_str = self.queue.get(block=False)
|
||||
except Empty:
|
||||
self.log.info("UsefulProxyCheck - {} : exit".format(
|
||||
self.name))
|
||||
break
|
||||
|
||||
proxy_obj = Proxy.newProxyFromJson(proxy_str)
|
||||
proxy_obj, status = checkProxyUseful(proxy_obj)
|
||||
if (status or proxy_obj.fail_count < FAIL_COUNT):
|
||||
self.db.put(proxy_obj)
|
||||
self.log.info(
|
||||
'UsefulProxyCheck - {} : {} validation pass'.format(
|
||||
self.name, proxy_obj.proxy.ljust(20)))
|
||||
else:
|
||||
self.log.info(
|
||||
'UsefulProxyCheck - {} : {} validation fail'.format(
|
||||
self.name, proxy_obj.proxy.ljust(20)))
|
||||
self.db.delete(proxy_obj.proxy)
|
||||
self.queue.task_done()
|
||||
|
||||
|
||||
def doUsefulProxyCheck():
|
||||
proxy_queue = Queue()
|
||||
|
||||
pm = ProxyManager()
|
||||
pm.db.changeTable(pm.useful_proxy_queue)
|
||||
for _proxy in pm.db.getAll():
|
||||
proxy_queue.put(_proxy)
|
||||
|
||||
thread_list = list()
|
||||
for index in range(10):
|
||||
thread_list.append(UsefulProxyCheck(proxy_queue, "thread_%s" % index))
|
||||
|
||||
for thread in thread_list:
|
||||
thread.start()
|
||||
|
||||
for thread in thread_list:
|
||||
thread.join()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
doUsefulProxyCheck()
|
||||
16
deploy/ProxyPool/Schedule/__init__.py
Normal file
16
deploy/ProxyPool/Schedule/__init__.py
Normal file
@ -0,0 +1,16 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: __init__.py.py
|
||||
Description :
|
||||
Author : JHao
|
||||
date: 2016/12/3
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2016/12/3:
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'JHao'
|
||||
|
||||
from Schedule.RawProxyCheck import doRawProxyCheck
|
||||
from Schedule.UsefulProxyCheck import doUsefulProxyCheck
|
||||
13
deploy/ProxyPool/Test/__init__.py
Normal file
13
deploy/ProxyPool/Test/__init__.py
Normal file
@ -0,0 +1,13 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: __init__
|
||||
Description :
|
||||
Author : JHao
|
||||
date: 2019/2/15
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2019/2/15:
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'JHao'
|
||||
33
deploy/ProxyPool/Test/testConfig.py
Normal file
33
deploy/ProxyPool/Test/testConfig.py
Normal file
@ -0,0 +1,33 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: testGetConfig
|
||||
Description : testGetConfig
|
||||
Author : J_hao
|
||||
date: 2017/7/31
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2017/7/31:
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'J_hao'
|
||||
|
||||
from Config.ConfigGetter import config
|
||||
|
||||
|
||||
# noinspection PyPep8Naming
|
||||
def testConfig():
|
||||
"""
|
||||
:return:
|
||||
"""
|
||||
print(config.db_type)
|
||||
print(config.db_name)
|
||||
print(config.db_host)
|
||||
print(config.db_port)
|
||||
print(config.db_password)
|
||||
assert isinstance(config.proxy_getter_functions, list)
|
||||
print(config.proxy_getter_functions)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
testConfig()
|
||||
37
deploy/ProxyPool/Test/testGetFreeProxy.py
Normal file
37
deploy/ProxyPool/Test/testGetFreeProxy.py
Normal file
@ -0,0 +1,37 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: testGetFreeProxy
|
||||
Description : test model ProxyGetter/getFreeProxy
|
||||
Author : J_hao
|
||||
date: 2017/7/31
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2017/7/31:function testGetFreeProxy
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'J_hao'
|
||||
|
||||
|
||||
from ProxyGetter.getFreeProxy import GetFreeProxy
|
||||
from Config.ConfigGetter import config
|
||||
|
||||
|
||||
def testGetFreeProxy():
|
||||
"""
|
||||
test class GetFreeProxy in ProxyGetter/GetFreeProxy
|
||||
:return:
|
||||
"""
|
||||
proxy_getter_functions = config.proxy_getter_functions
|
||||
for proxyGetter in proxy_getter_functions:
|
||||
proxy_count = 0
|
||||
for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
|
||||
if proxy:
|
||||
print('{func}: fetch proxy {proxy},proxy_count:{proxy_count}'.format(func=proxyGetter, proxy=proxy,
|
||||
proxy_count=proxy_count))
|
||||
proxy_count += 1
|
||||
# assert proxy_count >= 20, '{} fetch proxy fail'.format(proxyGetter)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
testGetFreeProxy()
|
||||
35
deploy/ProxyPool/Test/testLogHandler.py
Normal file
35
deploy/ProxyPool/Test/testLogHandler.py
Normal file
@ -0,0 +1,35 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: testLogHandler
|
||||
Description :
|
||||
Author : J_hao
|
||||
date: 2017/8/2
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2017/8/2:
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'J_hao'
|
||||
|
||||
from Util.LogHandler import LogHandler
|
||||
|
||||
|
||||
# noinspection PyPep8Naming
|
||||
def testLogHandler():
|
||||
"""
|
||||
test function LogHandler in Util/LogHandler
|
||||
:return:
|
||||
"""
|
||||
log = LogHandler('test')
|
||||
log.info('this is a log from test')
|
||||
|
||||
log.resetName(name='test1')
|
||||
log.info('this is a log from test1')
|
||||
|
||||
log.resetName(name='test2')
|
||||
log.info('this is a log from test2')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
testLogHandler()
|
||||
33
deploy/ProxyPool/Test/testProxyClass.py
Normal file
33
deploy/ProxyPool/Test/testProxyClass.py
Normal file
@ -0,0 +1,33 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: testProxyClass
|
||||
Description :
|
||||
Author : JHao
|
||||
date: 2019/8/8
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2019/8/8:
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'JHao'
|
||||
|
||||
import json
|
||||
from ProxyHelper import Proxy
|
||||
|
||||
|
||||
def testProxyClass():
|
||||
proxy = Proxy("127.0.0.1:8080")
|
||||
|
||||
print(proxy.info_dict)
|
||||
|
||||
proxy.source = "test"
|
||||
|
||||
proxy_str = json.dumps(proxy.info_dict, ensure_ascii=False)
|
||||
|
||||
print(proxy_str)
|
||||
|
||||
print(Proxy.newProxyFromJson(proxy_str).info_dict)
|
||||
|
||||
|
||||
testProxyClass()
|
||||
30
deploy/ProxyPool/Test/testWebRequest.py
Normal file
30
deploy/ProxyPool/Test/testWebRequest.py
Normal file
@ -0,0 +1,30 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: testWebRequest
|
||||
Description : test class WebRequest
|
||||
Author : J_hao
|
||||
date: 2017/7/31
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2017/7/31: function testWebRequest
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'J_hao'
|
||||
|
||||
from Util.WebRequest import WebRequest
|
||||
|
||||
|
||||
# noinspection PyPep8Naming
|
||||
def testWebRequest():
|
||||
"""
|
||||
test class WebRequest in Util/WebRequest.py
|
||||
:return:
|
||||
"""
|
||||
wr = WebRequest()
|
||||
request_object = wr.get('https://www.baidu.com/')
|
||||
assert request_object.status_code == 200
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
testWebRequest()
|
||||
102
deploy/ProxyPool/Util/LogHandler.py
Normal file
102
deploy/ProxyPool/Util/LogHandler.py
Normal file
@ -0,0 +1,102 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: LogHandler.py
|
||||
Description : 日志操作模块
|
||||
Author : JHao
|
||||
date: 2017/3/6
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2017/3/6: log handler
|
||||
2017/9/21: 屏幕输出/文件输出 可选(默认屏幕和文件均输出)
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'JHao'
|
||||
|
||||
import os
|
||||
|
||||
import logging
|
||||
|
||||
from logging.handlers import TimedRotatingFileHandler
|
||||
|
||||
# 日志级别
|
||||
CRITICAL = 50
|
||||
FATAL = CRITICAL
|
||||
ERROR = 40
|
||||
WARNING = 30
|
||||
WARN = WARNING
|
||||
INFO = 20
|
||||
DEBUG = 10
|
||||
NOTSET = 0
|
||||
|
||||
CURRENT_PATH = os.path.dirname(os.path.abspath(__file__))
|
||||
ROOT_PATH = os.path.join(CURRENT_PATH, os.pardir)
|
||||
LOG_PATH = os.path.join(ROOT_PATH, 'log')
|
||||
|
||||
if not os.path.exists(LOG_PATH):
|
||||
os.mkdir(LOG_PATH)
|
||||
|
||||
|
||||
class LogHandler(logging.Logger):
|
||||
"""
|
||||
LogHandler
|
||||
"""
|
||||
|
||||
def __init__(self, name, level=DEBUG, stream=True, file=True):
|
||||
self.name = name
|
||||
self.level = level
|
||||
logging.Logger.__init__(self, self.name, level=level)
|
||||
if stream:
|
||||
self.__setStreamHandler__()
|
||||
if file:
|
||||
self.__setFileHandler__()
|
||||
|
||||
def __setFileHandler__(self, level=None):
|
||||
"""
|
||||
set file handler
|
||||
:param level:
|
||||
:return:
|
||||
"""
|
||||
file_name = os.path.join(LOG_PATH, '{name}.log'.format(name=self.name))
|
||||
# 设置日志回滚, 保存在log目录, 一天保存一个文件, 保留15天
|
||||
file_handler = TimedRotatingFileHandler(filename=file_name, when='D', interval=1, backupCount=15)
|
||||
file_handler.suffix = '%Y%m%d.log'
|
||||
if not level:
|
||||
file_handler.setLevel(self.level)
|
||||
else:
|
||||
file_handler.setLevel(level)
|
||||
formatter = logging.Formatter('%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s')
|
||||
|
||||
file_handler.setFormatter(formatter)
|
||||
self.file_handler = file_handler
|
||||
self.addHandler(file_handler)
|
||||
|
||||
def __setStreamHandler__(self, level=None):
|
||||
"""
|
||||
set stream handler
|
||||
:param level:
|
||||
:return:
|
||||
"""
|
||||
stream_handler = logging.StreamHandler()
|
||||
formatter = logging.Formatter('%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s')
|
||||
stream_handler.setFormatter(formatter)
|
||||
if not level:
|
||||
stream_handler.setLevel(self.level)
|
||||
else:
|
||||
stream_handler.setLevel(level)
|
||||
self.addHandler(stream_handler)
|
||||
|
||||
def resetName(self, name):
|
||||
"""
|
||||
reset name
|
||||
:param name:
|
||||
:return:
|
||||
"""
|
||||
self.name = name
|
||||
self.removeHandler(self.file_handler)
|
||||
self.__setFileHandler__()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
log = LogHandler('test')
|
||||
log.info('this is a test msg')
|
||||
85
deploy/ProxyPool/Util/WebRequest.py
Normal file
85
deploy/ProxyPool/Util/WebRequest.py
Normal file
@ -0,0 +1,85 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: WebRequest
|
||||
Description : Network Requests Class
|
||||
Author : J_hao
|
||||
date: 2017/7/31
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2017/7/31:
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'J_hao'
|
||||
|
||||
from requests.models import Response
|
||||
import requests
|
||||
import random
|
||||
import time
|
||||
|
||||
|
||||
class WebRequest(object):
|
||||
def __init__(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
@property
|
||||
def user_agent(self):
|
||||
"""
|
||||
return an User-Agent at random
|
||||
:return:
|
||||
"""
|
||||
ua_list = [
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71',
|
||||
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)',
|
||||
'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50',
|
||||
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0',
|
||||
]
|
||||
return random.choice(ua_list)
|
||||
|
||||
@property
|
||||
def header(self):
|
||||
"""
|
||||
basic header
|
||||
:return:
|
||||
"""
|
||||
return {'User-Agent': self.user_agent,
|
||||
'Accept': '*/*',
|
||||
'Connection': 'keep-alive',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.8'}
|
||||
|
||||
def get(self, url, header=None, retry_time=5, timeout=30,
|
||||
retry_flag=list(), retry_interval=5, *args, **kwargs):
|
||||
"""
|
||||
get method
|
||||
:param url: target url
|
||||
:param header: headers
|
||||
:param retry_time: retry time when network error
|
||||
:param timeout: network timeout
|
||||
:param retry_flag: if retry_flag in content. do retry
|
||||
:param retry_interval: retry interval(second)
|
||||
:param args:
|
||||
:param kwargs:
|
||||
:return:
|
||||
"""
|
||||
headers = self.header
|
||||
if header and isinstance(header, dict):
|
||||
headers.update(header)
|
||||
while True:
|
||||
try:
|
||||
html = requests.get(url, headers=headers, timeout=timeout, **kwargs)
|
||||
if any(f in html.content for f in retry_flag):
|
||||
raise Exception
|
||||
return html
|
||||
except Exception as e:
|
||||
print(e)
|
||||
retry_time -= 1
|
||||
if retry_time <= 0:
|
||||
# 多次请求失败
|
||||
resp = Response()
|
||||
resp.status_code = 200
|
||||
return resp
|
||||
time.sleep(retry_interval)
|
||||
16
deploy/ProxyPool/Util/__init__.py
Normal file
16
deploy/ProxyPool/Util/__init__.py
Normal file
@ -0,0 +1,16 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: __init__.py.py
|
||||
Description :
|
||||
Author : JHao
|
||||
date: 2016/11/25
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2016/11/25:
|
||||
-------------------------------------------------
|
||||
"""
|
||||
|
||||
from Util.utilFunction import validUsefulProxy
|
||||
from Util.LogHandler import LogHandler
|
||||
from Util.utilClass import Singleton
|
||||
45
deploy/ProxyPool/Util/utilClass.py
Normal file
45
deploy/ProxyPool/Util/utilClass.py
Normal file
@ -0,0 +1,45 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# !/usr/bin/env python
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: utilClass.py
|
||||
Description : tool class
|
||||
Author : JHao
|
||||
date: 2016/12/3
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2016/12/3: Class LazyProperty
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'JHao'
|
||||
|
||||
|
||||
class LazyProperty(object):
|
||||
"""
|
||||
LazyProperty
|
||||
explain: http://www.spiderpy.cn/blog/5/
|
||||
"""
|
||||
|
||||
def __init__(self, func):
|
||||
self.func = func
|
||||
|
||||
def __get__(self, instance, owner):
|
||||
if instance is None:
|
||||
return self
|
||||
else:
|
||||
value = self.func(instance)
|
||||
setattr(instance, self.func.__name__, value)
|
||||
return value
|
||||
|
||||
|
||||
class Singleton(type):
|
||||
"""
|
||||
Singleton Metaclass
|
||||
"""
|
||||
|
||||
_inst = {}
|
||||
|
||||
def __call__(cls, *args, **kwargs):
|
||||
if cls not in cls._inst:
|
||||
cls._inst[cls] = super(Singleton, cls).__call__(*args)
|
||||
return cls._inst[cls]
|
||||
95
deploy/ProxyPool/Util/utilFunction.py
Normal file
95
deploy/ProxyPool/Util/utilFunction.py
Normal file
@ -0,0 +1,95 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# !/usr/bin/env python
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: utilFunction.py
|
||||
Description : tool function
|
||||
Author : JHao
|
||||
date: 2016/11/25
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2016/11/25: 添加robustCrawl、verifyProxy、getHtmlTree
|
||||
-------------------------------------------------
|
||||
"""
|
||||
import requests
|
||||
from lxml import etree
|
||||
|
||||
from Util.WebRequest import WebRequest
|
||||
|
||||
|
||||
def robustCrawl(func):
|
||||
def decorate(*args, **kwargs):
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except Exception as e:
|
||||
pass
|
||||
# logger.info(u"sorry, 抓取出错。错误原因:")
|
||||
# logger.info(e)
|
||||
|
||||
return decorate
|
||||
|
||||
|
||||
def verifyProxyFormat(proxy):
|
||||
"""
|
||||
检查代理格式
|
||||
:param proxy:
|
||||
:return:
|
||||
"""
|
||||
import re
|
||||
verify_regex = r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}"
|
||||
_proxy = re.findall(verify_regex, proxy)
|
||||
return True if len(_proxy) == 1 and _proxy[0] == proxy else False
|
||||
|
||||
|
||||
def getHtmlTree(url, **kwargs):
|
||||
"""
|
||||
获取html树
|
||||
:param url:
|
||||
:param kwargs:
|
||||
:return:
|
||||
"""
|
||||
|
||||
header = {'Connection': 'keep-alive',
|
||||
'Cache-Control': 'max-age=0',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko)',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Accept-Encoding': 'gzip, deflate, sdch',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.8',
|
||||
}
|
||||
# TODO 取代理服务器用代理服务器访问
|
||||
wr = WebRequest()
|
||||
html = wr.get(url=url, header=header).content
|
||||
return etree.HTML(html)
|
||||
|
||||
|
||||
def tcpConnect(proxy):
|
||||
"""
|
||||
TCP 三次握手
|
||||
:param proxy:
|
||||
:return:
|
||||
"""
|
||||
from socket import socket, AF_INET, SOCK_STREAM
|
||||
s = socket(AF_INET, SOCK_STREAM)
|
||||
ip, port = proxy.split(':')
|
||||
result = s.connect_ex((ip, int(port)))
|
||||
return True if result == 0 else False
|
||||
|
||||
|
||||
def validUsefulProxy(proxy):
|
||||
"""
|
||||
检验代理是否可用
|
||||
:param proxy:
|
||||
:return:
|
||||
"""
|
||||
if isinstance(proxy, bytes):
|
||||
proxy = proxy.decode("utf8")
|
||||
proxies = {"http": "http://{proxy}".format(proxy=proxy)}
|
||||
try:
|
||||
r = requests.get('http://www.baidu.com', proxies=proxies, timeout=10, verify=False)
|
||||
if r.status_code == 200:
|
||||
return True
|
||||
except Exception as e:
|
||||
pass
|
||||
return False
|
||||
|
||||
13
deploy/ProxyPool/__init__.py
Normal file
13
deploy/ProxyPool/__init__.py
Normal file
@ -0,0 +1,13 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: __init__.py
|
||||
Description :
|
||||
Author : JHao
|
||||
date: 2016/12/3
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2016/12/3:
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'JHao'
|
||||
1
deploy/ProxyPool/_config.yml
Normal file
1
deploy/ProxyPool/_config.yml
Normal file
@ -0,0 +1 @@
|
||||
theme: jekyll-theme-time-machine
|
||||
52
deploy/ProxyPool/cli/proxyPool.py
Normal file
52
deploy/ProxyPool/cli/proxyPool.py
Normal file
@ -0,0 +1,52 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: proxy_pool
|
||||
Description :
|
||||
Author : JHao
|
||||
date: 2019/8/2
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2019/8/2:
|
||||
-------------------------------------------------
|
||||
"""
|
||||
__author__ = 'JHao'
|
||||
|
||||
import sys
|
||||
import click
|
||||
import platform
|
||||
|
||||
sys.path.append('../')
|
||||
|
||||
from Config.setting import HEADER
|
||||
from Schedule.ProxyScheduler import runScheduler
|
||||
from Api.ProxyApi import runFlask,runFlaskWithGunicorn
|
||||
|
||||
CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
|
||||
|
||||
|
||||
@click.group(context_settings=CONTEXT_SETTINGS)
|
||||
@click.version_option(version='2.0.0')
|
||||
def cli():
|
||||
"""ProxyPool cli工具"""
|
||||
|
||||
|
||||
@cli.command(name="schedule")
|
||||
def schedule():
|
||||
""" 启动调度程序 """
|
||||
click.echo(HEADER)
|
||||
runScheduler()
|
||||
|
||||
|
||||
@cli.command(name="webserver")
|
||||
def webserver():
|
||||
""" 启动web服务 """
|
||||
click.echo(HEADER)
|
||||
if platform.system() == "Windows":
|
||||
runFlask()
|
||||
else:
|
||||
runFlaskWithGunicorn()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
cli()
|
||||
3
deploy/ProxyPool/cli/start.sh
Normal file
3
deploy/ProxyPool/cli/start.sh
Normal file
@ -0,0 +1,3 @@
|
||||
#!/usr/bin/env bash
|
||||
python proxyPool.py webserver &
|
||||
python proxyPool.py schedule
|
||||
8
deploy/ProxyPool/doc/block_ips.md
Normal file
8
deploy/ProxyPool/doc/block_ips.md
Normal file
@ -0,0 +1,8 @@
|
||||
| block IP | block 日期 | msg |
|
||||
| ----- | ---- | -------- |
|
||||
| 144.52.45.149 | 20190815 | 恶意访问 |
|
||||
| 39.100.153.226 | 20190816 | 恶意访问 |
|
||||
| 47.102.47.42 | 20190819 | 恶意访问 |
|
||||
| 125.71.211.125 | 20190820 | 恶意访问 |
|
||||
|
||||
如需正常访问请提issues说明
|
||||
173
deploy/ProxyPool/doc/introduce.md
Normal file
173
deploy/ProxyPool/doc/introduce.md
Normal file
@ -0,0 +1,173 @@
|
||||
|
||||
## 代理池介绍
|
||||
|
||||
本项目通过爬虫方式持续抓取代理网站公布的免费代理IP,实时校验,维护部分可以使用的代理,并通过api的形式提供外部使用。
|
||||
|
||||
### 1、问题
|
||||
|
||||
构建一个代理IP池,可能有下面这些问题:
|
||||
|
||||
* 代理IP从何而来?
|
||||
|
||||
许多刚接触爬虫的,都试过去西刺、快代理之类有免费代理的网站去抓些免费代理,还是有一些代理能用。
|
||||
当然,如果你有更好的代理接口也可以自己接入。
|
||||
|
||||
免费代理的采集也很简单,无非就是:`访问页面`` —> `正则/xpath提取` —> `保存`
|
||||
|
||||
* 如何保证代理质量?
|
||||
|
||||
可以肯定免费的代理IP大部分都是不能用的,不然别人还提供付费接口干嘛(不过事实上很多代理商的付费IP也不稳定,也有很多是不能用)。
|
||||
所以采集回来的代理IP不能直接使用,检测的办法也很简单:可以写个程序不断的用代理访问一个稳定的网站,看是否可以正常访问即可。
|
||||
这个过程可以使用多线/进程或异步的方式,因为检测代理是个很慢的过程。
|
||||
|
||||
* 采集回来的代理如何存储?
|
||||
|
||||
这里不得不推荐一个国人开发的高性能支持多种数据结构的NoSQL数据库[SSDB](http://ssdb.io/docs/zh_cn/),用于替代Redis。支持队列、hash、set、k-v对,支持T级别数据。是做分布式爬虫很好中间存储工具。
|
||||
|
||||
* 如何让爬虫更方便的用到这些代理?
|
||||
|
||||
答案肯定是做成服务咯,Python有这么多的web框架,随便拿一个来写个api供爬虫调用。这样代理和爬虫架构分离有很多好处,
|
||||
比如:当爬虫完全不用考虑如何校验代理,如何保证拿到的代理可用,这些都由代理池来完成。这样只需要安静的码爬虫代码就行啦。
|
||||
|
||||
### 2、代理池设计
|
||||
|
||||
代理池由四部分组成:
|
||||
|
||||
* ProxyGetter:
|
||||
|
||||
代理获取接口,目前有5个免费代理源,每调用一次就会抓取这个5个网站的最新代理放入DB,支持自定义扩展额外的代理获取接口;
|
||||
|
||||
* DB:
|
||||
|
||||
用于存放代理IP,目前支持SSDB和Redis(推荐SSDB)。至于为什么选择SSDB,大家可以参考这篇[文章](https://www.sdk.cn/news/2684),个人觉得SSDB是个不错的Redis替代方案,如果你没有用过SSDB,安装起来也很简单,可以参考[这里](https://github.com/jhao104/memory-notes/blob/master/SSDB/SSDB%E5%AE%89%E8%A3%85%E9%85%8D%E7%BD%AE%E8%AE%B0%E5%BD%95.md);
|
||||
|
||||
* Schedule:
|
||||
|
||||
计划任务,定时去检测DB中的代理可用性,删除不可用的代理。同时也会主动通过ProxyGetter去获取最新代理放入DB;
|
||||
|
||||
* ProxyApi:
|
||||
|
||||
代理池的外部接口,由[Flask](http://flask.pocoo.org/)实现,功能是给爬虫提供与代理池交互的接口。
|
||||
|
||||
<!--#### 功能图纸-->
|
||||

|
||||
|
||||
### 3、代码模块
|
||||
|
||||
Python中高层次的数据结构,动态类型和动态绑定,使得它非常适合于快速应用开发,也适合于作为胶水语言连接已有的软件部件。用Python来搞这个代理IP池也很简单,代码分为6个模块:
|
||||
|
||||
* Api:
|
||||
|
||||
api接口相关代码,目前api是由Flask实现,代码也非常简单。客户端请求传给Flask,Flask调用`ProxyManager`中的实现,包括`get/delete/refresh/get_all`;
|
||||
|
||||
* DB:
|
||||
|
||||
数据库相关代码,目前数据库是支持SSDB/Redis。代码用工厂模式实现,方便日后扩展其他类型数据库;
|
||||
|
||||
* Manager:
|
||||
|
||||
`get/delete/refresh/get_all`等接口的具体实现类,目前代理池只负责管理proxy,日后可能会有更多功能,比如代理和爬虫的绑定,代理和账号的绑定等等;
|
||||
|
||||
* ProxyGetter:
|
||||
|
||||
代理获取的相关代码,目前抓取了[快代理](http://www.kuaidaili.com)、[代理66](http://www.66ip.cn/)、[有代理](http://www.youdaili.net/Daili/http/)、[西刺代理](http://api.xicidaili.com/free2016.txt)、[guobanjia](http://www.goubanjia.com/free/gngn/index.shtml)这个五个网站的免费代理,经测试这个5个网站每天更新的可用代理只有六七十个,当然也支持自己扩展代理接口;
|
||||
|
||||
* Schedule:
|
||||
|
||||
定时任务相关代码,现在只是实现定时去刷新代理,并验证可用代理,采用多进程方式;
|
||||
|
||||
* Util:
|
||||
|
||||
存放一些公共的模块方法或函数,包含`GetConfig`:读取配置文件config.ini的类,`ConfigParse`: 扩展ConfigParser的类,使其对大小写敏感, `Singleton`:实现单例,`LazyProperty`:实现类属性惰性计算。等等;
|
||||
|
||||
* 其他文件:
|
||||
|
||||
配置文件:`Config.ini``,数据库配置和代理获取接口配置,可以在GetFreeProxy中添加新的代理获取方法,并在Config.ini中注册即可使用;
|
||||
|
||||
### 4、安装
|
||||
|
||||
下载代码:
|
||||
```
|
||||
git clone git@github.com:jhao104/proxy_pool.git
|
||||
|
||||
或者直接到https://github.com/jhao104/proxy_pool 下载zip文件
|
||||
```
|
||||
|
||||
安装依赖:
|
||||
```
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
启动:
|
||||
|
||||
```
|
||||
如果你的依赖已经安全完成并且具备运行条件,可以直接在Run下运行main.py
|
||||
到Run目录下:
|
||||
>>>python main.py
|
||||
|
||||
如果运行成功你应该可以看到有4个main.py进程在
|
||||
|
||||
|
||||
你也可以分别运行他们,依次到Api下启动ProxyApi.py,Schedule下启动ProxyRefreshSchedule.py和ProxyValidSchedule.py即可
|
||||
```
|
||||
|
||||
docker:
|
||||
```
|
||||
git clone git@github.com:jhao104/proxy_pool.git
|
||||
|
||||
cd proxy_pool
|
||||
|
||||
docker build -t proxy:latest -f Dockerfile .
|
||||
|
||||
docker run -p 5010:5010 -d proxy:latest
|
||||
|
||||
# Wait a few minutes
|
||||
curl localhost:5010/get/
|
||||
# result: xxx.xxx.xxx.xxx:xxxx
|
||||
|
||||
curl localhost:5010/get_all/
|
||||
```
|
||||
|
||||
### 5、使用
|
||||
定时任务启动后,会通过GetFreeProxy中的方法抓取代理存入数据库并验证。此后默认每10分钟会重复执行一次。定时任务启动大概一两分钟后,便可在[SSDB](https://github.com/jhao104/SSDBAdmin)中看到刷新出来的可用的代理:
|
||||
|
||||

|
||||
|
||||
启动ProxyApi.py后即可在浏览器中使用接口获取代理,一下是浏览器中的截图:
|
||||
|
||||
index页面:
|
||||
|
||||

|
||||
|
||||
get:
|
||||
|
||||

|
||||
|
||||
get_all:
|
||||
|
||||

|
||||
|
||||
|
||||
爬虫中使用,如果要在爬虫代码中使用的话, 可以将此api封装成函数直接使用,例如:
|
||||
```
|
||||
import requests
|
||||
|
||||
def get_proxy():
|
||||
return requests.get("http://127.0.0.1:5010/get/").content
|
||||
|
||||
def delete_proxy(proxy):
|
||||
requests.get("http://127.0.0.1:5010/delete/?proxy={}".format(proxy))
|
||||
|
||||
# your spider code
|
||||
|
||||
def spider():
|
||||
# ....
|
||||
requests.get('https://www.example.com', proxies={"http": "http://{}".format(get_proxy())})
|
||||
# ....
|
||||
|
||||
```
|
||||
|
||||
测试地址:http://123.207.35.36:5010 单机勿压测。谢谢
|
||||
|
||||
### 6、最后
|
||||
时间仓促,功能和代码都比较简陋,以后有时间再改进。喜欢的在github上给个star。感谢!
|
||||
50
deploy/ProxyPool/doc/release_notes.md
Normal file
50
deploy/ProxyPool/doc/release_notes.md
Normal file
@ -0,0 +1,50 @@
|
||||
## Release Notes
|
||||
|
||||
* master
|
||||
|
||||
1. 新增免费代理源 `西拉代理` (2020-03-30)
|
||||
|
||||
* 2.0.1
|
||||
|
||||
1. 新增免费代理源 `89免费代理`;
|
||||
2. 新增免费代理源 `齐云代理`
|
||||
|
||||
* 2.0.0 (201908)
|
||||
|
||||
1. WebApi集成Gunicorn方式启动, Windows平台暂不支持;
|
||||
2. 优化Proxy调度程序;
|
||||
3. 扩展Proxy属性;
|
||||
4. 提供cli工具, 更加方便启动proxyPool
|
||||
|
||||
* 1.14 (2019.07)
|
||||
|
||||
1. 修复`ProxyValidSchedule`假死bug,原因是Queue阻塞;
|
||||
2. 修改代理源 `云代理` 抓取;
|
||||
3. 修改代理源 `码农代理` 抓取;
|
||||
4. 修改代理源 `代理66` 抓取, 引入 `PyExecJS` 模块破解加速乐动态Cookies加密;
|
||||
|
||||
* 1.13 (2019.02)
|
||||
|
||||
1.使用.py文件替换.ini作为配置文件;
|
||||
|
||||
2.更新代理采集部分;
|
||||
|
||||
* 1.12 (2018.4)
|
||||
|
||||
1.优化代理格式检查;
|
||||
|
||||
2.增加代理源;
|
||||
|
||||
3.fix bug [#122](https://github.com/jhao104/proxy_pool/issues/122) [#126](https://github.com/jhao104/proxy_pool/issues/126)
|
||||
|
||||
* 1.11 (2017.8)
|
||||
|
||||
1.使用多线程验证useful_pool;
|
||||
|
||||
* 1.10 (2016.11)
|
||||
|
||||
1. 第一版;
|
||||
|
||||
2. 支持PY2/PY3;
|
||||
|
||||
3. 代理池基本功能;
|
||||
11
deploy/ProxyPool/docker-run.md
Normal file
11
deploy/ProxyPool/docker-run.md
Normal file
@ -0,0 +1,11 @@
|
||||
```
|
||||
docker run -itd --name proxy-pool \
|
||||
-p 6800:5010 \
|
||||
-e TZ=Asia/Shanghai \
|
||||
-e db_type=REDIS \
|
||||
-e db_host=107.182.191.3 \
|
||||
-e db_port=7379 \
|
||||
-e db_password=jlkj-841-2-redis \
|
||||
-m 256m --memory-swap -1 \
|
||||
proxypool:latest
|
||||
```
|
||||
20
deploy/ProxyPool/requirements.txt
Normal file
20
deploy/ProxyPool/requirements.txt
Normal file
@ -0,0 +1,20 @@
|
||||
APScheduler==3.2.0
|
||||
certifi==2020.6.20
|
||||
chardet==3.0.4
|
||||
Click==7.0
|
||||
Flask==1.0
|
||||
gunicorn==19.9.0
|
||||
idna==2.7
|
||||
itsdangerous==1.1.0
|
||||
Jinja2==2.11.2
|
||||
lxml==4.4.2
|
||||
MarkupSafe==1.1.1
|
||||
PyExecJS==1.5.1
|
||||
pymongo==3.11.0
|
||||
pytz==2020.1
|
||||
redis==3.5.3
|
||||
requests==2.20.0
|
||||
six==1.15.0
|
||||
tzlocal==2.1
|
||||
urllib3==1.24.3
|
||||
Werkzeug==0.15.5
|
||||
20
deploy/ProxyPool/test.py
Normal file
20
deploy/ProxyPool/test.py
Normal file
@ -0,0 +1,20 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
-------------------------------------------------
|
||||
File Name: test.py
|
||||
Description :
|
||||
Author : JHao
|
||||
date: 2017/3/7
|
||||
-------------------------------------------------
|
||||
Change Activity:
|
||||
2017/3/7:
|
||||
-------------------------------------------------
|
||||
"""
|
||||
# __author__ = 'JHao'
|
||||
|
||||
# from Test import testConfig
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# testConfig.testConfig()
|
||||
|
||||
print(0<0)
|
||||
1
deploy/README.md
Normal file
1
deploy/README.md
Normal file
@ -0,0 +1 @@
|
||||
# 安装部署
|
||||
32
deploy/kafka/docker-compose.yml
Normal file
32
deploy/kafka/docker-compose.yml
Normal file
@ -0,0 +1,32 @@
|
||||
version: '2'
|
||||
services:
|
||||
zookeeper:
|
||||
container_name: kafka-zk
|
||||
image: zookeeper:3.7.0
|
||||
ports:
|
||||
- "2181:2181"
|
||||
- "2888:2888"
|
||||
- "3888:3888"
|
||||
restart: always
|
||||
environment:
|
||||
TZ: CST-8
|
||||
kafka:
|
||||
container_name: kafka-server
|
||||
image: wurstmeister/kafka:2.13-2.7.0
|
||||
ports:
|
||||
- "9092:9092"
|
||||
- "1099:1099"
|
||||
- "9999:9999"
|
||||
environment:
|
||||
TZ: CST-8
|
||||
HOSTNAME: B144
|
||||
KAFKA_ADVERTISED_HOST_NAME: 104.225.146.144
|
||||
KAFKA_CREATE_TOPICS: "test:1:1"
|
||||
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
|
||||
KAFKA_LOG_RETENTION_HOURS: 72
|
||||
KAFKA_JMX_OPTS: "-Djava.rmi.server.hostname=104.225.146.144 -Dcom.sun.management.jmxremote=true -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=1099 -Dcom.sun.management.jmxremote.rmi.port=9999"
|
||||
JMX_PORT: 1099
|
||||
restart: always
|
||||
volumes:
|
||||
- /usr/local/dockerfs/kafka/logs:/kafka
|
||||
- /usr/local/dockerfs/kafka/docker.sock:/var/run/docker.sock
|
||||
3
deploy/proto/README.md
Normal file
3
deploy/proto/README.md
Normal file
@ -0,0 +1,3 @@
|
||||
## 操作方法
|
||||
`generate-py.bat` 中的 protoc 可执行程序在 [protobuf-3.11.4.zip](http://39.98.151.140:28080/software/protobuf-3.11.4.zip) 解压后的压缩包里
|
||||
脚本后面的定义文件目录改成 proto definition 文件所在的目录,输出目录可以自定
|
||||
30
deploy/proto/definition/Airport.proto
Normal file
30
deploy/proto/definition/Airport.proto
Normal file
@ -0,0 +1,30 @@
|
||||
syntax = "proto3";
|
||||
|
||||
message AirportInfoSets //机场基本信息
|
||||
{
|
||||
repeated AirportInfo AirportInfo = 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
message AirportInfo
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string Name = 2; //名称
|
||||
string ICAO = 3; //ICAO码
|
||||
string IATA = 4; //IATA码
|
||||
string GPS = 5; //GPS代码
|
||||
string Type = 6; //类型
|
||||
string UsageType = 7; //军民属性
|
||||
string Continent = 8; //所属大洲
|
||||
string Nation = 9; //所属国家/地区
|
||||
string City = 10; //所属城市
|
||||
string Height = 11; //海拔
|
||||
string Longitude = 12; //经度
|
||||
string Latitude = 13; //纬度
|
||||
string Image = 14; //图片
|
||||
string UpdateTime = 15; //更新时间
|
||||
string LastTime = 16; //最后更新时间
|
||||
string Sensitive = 17; //是否高敏(0/否;1/是)
|
||||
string DataSource = 18; //数据源
|
||||
}
|
||||
159
deploy/proto/definition/Argo.proto
Normal file
159
deploy/proto/definition/Argo.proto
Normal file
@ -0,0 +1,159 @@
|
||||
syntax = "proto3";
|
||||
|
||||
message ArgoInfoSets //浮标基本信息
|
||||
{
|
||||
repeated ArgoInfo ArgoInfo = 1;
|
||||
}
|
||||
|
||||
message ArgoFirstDeployInfoSets //浮标首次部署下水情况
|
||||
{
|
||||
repeated ArgoFirstDeployInfo ArgoFirstDeployInfo = 1;
|
||||
}
|
||||
|
||||
message ArgoCommunicationInfoSets //浮标通讯传输信息
|
||||
{
|
||||
repeated ArgoCommunicationInfo ArgoCommunicationInfo = 1;
|
||||
}
|
||||
|
||||
message ArgoSensorInfoSets //浮标浮标搭载传感器情况
|
||||
{
|
||||
repeated ArgoSensorInfo ArgoSensorInfo = 1;
|
||||
}
|
||||
|
||||
message ArgoTechnicalInfoSets //浮标技术参数信息
|
||||
{
|
||||
repeated ArgoTechnicalInfo ArgoTechnicalInfo = 1;
|
||||
}
|
||||
|
||||
message ArgoDataManipulationInfoSets //浮标数据操作情况信息
|
||||
{
|
||||
repeated ArgoDataManipulationInfo ArgoDataManipulationInfo = 1;
|
||||
}
|
||||
|
||||
message ArgoMeasureInfoSets //浮标测量信息
|
||||
{
|
||||
repeated ArgoMeasureInfo ArgoMeasureInfo = 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
message ArgoInfo
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string PlatformNum = 2; //浮标唯一标识
|
||||
string FloatSerialNum = 3; //浮标序号
|
||||
string PlatformMaker = 4; //制造商
|
||||
string PlatformType = 5; //浮标类型
|
||||
string PlatformModel = 6; //浮标型号
|
||||
string ProjectName = 7; //项目名称
|
||||
string PiName = 8; //首席研究员
|
||||
string DataCenter = 9; //数据中心
|
||||
string HistoryInstitution = 10; //操作机构
|
||||
string PositioningSys = 11; //定位系统
|
||||
string InstReference = 12; //仪器类型
|
||||
string WMOInstType = 13; //WMO编码仪器类型
|
||||
string Image = 14; //图片
|
||||
string Nation = 15; //所属国家
|
||||
string DataSource = 16; //数据来源
|
||||
string UpdateTime = 17; //更新时间
|
||||
string LastTime = 18; //最后更新时间
|
||||
string Sensitive = 19; //是否高敏(0/否;1/是)
|
||||
}
|
||||
|
||||
message ArgoFirstDeployInfo
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string PlatformNum = 2; //浮标唯一标识
|
||||
string LaunchDate = 3; //下水日期
|
||||
string LaunchLatitude = 4; //下水纬度
|
||||
string LaunchLongitude = 5; //下水经度
|
||||
string StartDate = 6; //部署后首次激活时间
|
||||
string LaunchQC = 7; //下水时相关数据质量标记
|
||||
string DeployedPlatform = 8; //部署平台标识符
|
||||
string DeployedMission = 9; //部署任务标识符
|
||||
string UpdateTime = 10; //更新时间
|
||||
string LastTime = 11; //最后更新时间
|
||||
}
|
||||
|
||||
message ArgoCommunicationInfo
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string PlatformNum = 2; //浮标唯一标识
|
||||
string PTT = 3; //无线电通讯系统标识号
|
||||
string TransSys = 4; //无线电通讯系统名称
|
||||
string TransFreq = 5; //无线电通信传输频率
|
||||
string TransRepetition = 6; //无线电传输采样频次
|
||||
string UpdateTime = 7; //更新时间
|
||||
string LastTime = 8; //最后更新时间
|
||||
}
|
||||
|
||||
message ArgoSensorInfo
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string PlatformNum = 2; //浮标唯一标识
|
||||
string SensorSerial = 3; //传感器序列号
|
||||
string SensorName = 4; //传感器名称
|
||||
string SensorMaker = 5; //传感器生产商
|
||||
string SensorModel = 6; //传感器型号
|
||||
string SensorUnits = 7; //传感器精度分辨率单位
|
||||
string SensorResolution = 8; //传感器分辨率
|
||||
string SensorAccuracy = 9; //传感器精度
|
||||
string FirmwareVersion = 10; //仪器版本
|
||||
string UpdateTime = 11; //更新时间
|
||||
string LastTime = 12; //最后更新时间
|
||||
}
|
||||
|
||||
message ArgoTechnicalInfo
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string PlatformNum = 2; //浮标唯一标识
|
||||
string DataType = 3; //数据类型
|
||||
string FormatVersion = 4; //文件格式版本
|
||||
string HandbookVersion = 5; //数据手册版本号
|
||||
string DataCenter = 6; //数据中心
|
||||
string CreationDate = 7; //nc文件创建日期
|
||||
string UpdateDate = 8; //nc文件更新日期
|
||||
string UpdateTime = 9; //更新时间
|
||||
string LastTime = 10; //最后更新时间
|
||||
}
|
||||
|
||||
message ArgoDataManipulationInfo
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string PlatformNum = 2; //浮标唯一标识
|
||||
string HistoryStep = 3; //数据处理步骤
|
||||
string HistorySoftware = 4; //数据操作软件名称
|
||||
string HistorySoftwareRelease = 5; //数据操作软件版本号
|
||||
string HistoryReference = 6; //数据库引用
|
||||
string HistoryDate = 7; //历史数据记录创建时间
|
||||
string HistoryAction = 8; //数据操作记录
|
||||
string HistoryParameter = 9; //站点参数操作记录
|
||||
string HistoryStartPres = 10; //启动作用压强
|
||||
string HistoryStopPres = 11; //终止作用压强
|
||||
string HistoryPreviousValue = 12; //数据操作的上一条记录(参数/校准值)
|
||||
string HistoryQctest = 13; //测试记录文档(以十六进制数格式)
|
||||
string EndMissionDate = 14; //任务结束日期
|
||||
string EndMissionStatus = 15; //任务结束浮标状态
|
||||
string UpdateTime = 16; //更新时间
|
||||
string LastTime = 17; //最后更新时间
|
||||
}
|
||||
|
||||
message ArgoMeasureInfo
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string PlatformNum = 2; //浮标ID
|
||||
string Sensor = 3; //使用传感器名称
|
||||
string Longitude = 4; //经度
|
||||
string Latitude = 5; //纬度
|
||||
string Pres = 6; //压力值
|
||||
string PresQC = 7; //压力数据质量标记
|
||||
string Temp = 8; //温度值
|
||||
string TempQC = 9; //温度数据质量标记
|
||||
string Psal = 10; //盐度值
|
||||
string PsalQC = 11; //盐度数据质量标记
|
||||
string ScientificCalibEquation = 12; //参数校准公式
|
||||
string ScientificCalibCoefficient = 13; //参数校准系数
|
||||
string UpdateTime = 14; //更新时间
|
||||
string LastTime = 15; //最后更新时间
|
||||
string Status = 16; //当前状态:1:在线,2:下线
|
||||
}
|
||||
85
deploy/proto/definition/Es.proto
Normal file
85
deploy/proto/definition/Es.proto
Normal file
@ -0,0 +1,85 @@
|
||||
syntax = "proto3";
|
||||
|
||||
message EsSets //es<EFBFBD><EFBFBD>
|
||||
{
|
||||
repeated Es Es = 1;
|
||||
}
|
||||
|
||||
|
||||
message Es
|
||||
{
|
||||
string es_sid = 1; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_subjectId = 2; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD>id
|
||||
string es_hkey = 3; //URLΨһ<EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_pkey = 4; //<EFBFBD><EFBFBD>URL<EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_startid = 5; //<EFBFBD><EFBFBD>ʼ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_urlname = 6; //URL<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_sitename = 7; //<EFBFBD><EFBFBD>վ<EFBFBD><EFBFBD>
|
||||
string es_extname = 8; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_channel = 9; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƶ<EFBFBD><EFBFBD>
|
||||
string es_groupname = 10; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_urltitle = 11; //<EFBFBD><EFBFBD>ҳ<EFBFBD><EFBFBD><EFBFBD>ñ<EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_urltopic = 12; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ҳ<title><EFBFBD>ñ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ı<EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_lasttime = 13; //<EFBFBD>ɼ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_loadtime = 14; //<EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD>䣨ʵ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ES<EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD>䣩
|
||||
string es_urldate = 15; //<EFBFBD><EFBFBD><EFBFBD>µķ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_urltime = 16; //<EFBFBD><EFBFBD><EFBFBD>µķ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_srcname = 17; //<EFBFBD><EFBFBD><EFBFBD>µ<EFBFBD><EFBFBD><EFBFBD>Դ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȱʧ<EFBFBD><EFBFBD>
|
||||
string es_authors = 18; //<EFBFBD><EFBFBD><EFBFBD>µ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ߣ<EFBFBD><EFBFBD><EFBFBD>ȱʧ<EFBFBD><EFBFBD>
|
||||
string es_district = 19; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>µ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>µĵ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȱʧ<EFBFBD><EFBFBD>
|
||||
string es_catalog = 20; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_catalog1 = 21; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_catalog2 = 22; //<EFBFBD><EFBFBD><EFBFBD>¶<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_keywords = 23; //<EFBFBD><EFBFBD><EFBFBD>¹ؼ<EFBFBD><EFBFBD>ʣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>½<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ģ<EFBFBD>
|
||||
string es_abstract = 24; //<EFBFBD><EFBFBD><EFBFBD>µ<EFBFBD>ժҪ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>½<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ģ<EFBFBD>
|
||||
string es_simflag = 25; //<EFBFBD>ظ<EFBFBD><EFBFBD><EFBFBD>ǣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>֮<EFBFBD>ظ<EFBFBD><EFBFBD><EFBFBD>ҳ<EFBFBD><EFBFBD>HKEY
|
||||
string es_simrank = 26; //<EFBFBD><EFBFBD><EFBFBD>ƶ<EFBFBD><EFBFBD><EFBFBD>ֵ
|
||||
string es_urlimage = 27; //ͼƬ<EFBFBD><EFBFBD>ַ
|
||||
string es_imageflag = 28; //<EFBFBD><EFBFBD>ҳ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͼƬ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ŀ
|
||||
string es_tableflag = 29; //<EFBFBD><EFBFBD>ҳ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ŀ
|
||||
string es_doclength = 30; //<EFBFBD><EFBFBD><EFBFBD>ij<EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_content = 31; //<EFBFBD><EFBFBD>ҳ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݣ<EFBFBD><EFBFBD><EFBFBD>ͼƬ<EFBFBD><EFBFBD>
|
||||
string es_urlcontent = 32; //<EFBFBD><EFBFBD>ҳ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͼƬ<EFBFBD><EFBFBD>
|
||||
string es_bbsnum = 33; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_pagelevel = 34; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʼҳ<EFBFBD>濪ʼ<EFBFBD>IJ<EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_urllevel = 35; //<EFBFBD><EFBFBD><EFBFBD>ӵ<EFBFBD>Ŀ¼<EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_simhash = 36; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>simhashֵ
|
||||
string es_ip = 37; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ip
|
||||
string es_heat = 38; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȶ<EFBFBD>
|
||||
string es_similaritycount = 39; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_similarity = 40; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>id
|
||||
string es_similaritytime = 41; //<EFBFBD><EFBFBD><EFBFBD>ƶȼ<EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD><EFBFBD>
|
||||
string es_emotion = 42; //<EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_warningtime = 43; //Ԥ<EFBFBD><EFBFBD>ʱ<EFBFBD><EFBFBD>
|
||||
string es_carriertype = 44; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_commentcount = 45; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_forwardcount = 46; //ת<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_positiveWords = 47; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_negativeWords = 48; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_negativeProbability = 49; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_reportinfo = 50; //<EFBFBD>Ƿ<EFBFBD><EFBFBD>ϱ<EFBFBD><EFBFBD><EFBFBD>Ϣ
|
||||
string es_attention = 51; //<EFBFBD>Ƿ<EFBFBD><EFBFBD>ע
|
||||
string es_warning = 52; //<EFBFBD>Ƿ<EFBFBD>Ԥ<EFBFBD><EFBFBD>
|
||||
string es_readsign = 53; //<EFBFBD>Ƿ<EFBFBD><EFBFBD>Ѷ<EFBFBD>
|
||||
string es_briefing = 54; //<EFBFBD>Ƿ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_warning_word = 55; //Ԥ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_attentiontime = 56; //<EFBFBD><EFBFBD>עʱ<EFBFBD><EFBFBD>
|
||||
string es_collection = 57; //<EFBFBD>Ƿ<EFBFBD><EFBFBD>ղ<EFBFBD>
|
||||
string es_attachment = 58; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_userid = 59;//number,<EFBFBD>û<EFBFBD>id<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>罻ý<EFBFBD><EFBFBD><EFBFBD>˻<EFBFBD>)
|
||||
string es_contenttype = 60;//string,<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Post<EFBFBD><EFBFBD><EFBFBD>ͣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>status<EFBFBD><EFBFBD>link<EFBFBD><EFBFBD>photo<EFBFBD><EFBFBD>video<EFBFBD><EFBFBD>event<EFBFBD><EFBFBD>music<EFBFBD><EFBFBD>note<EFBFBD><EFBFBD>offer<EFBFBD><EFBFBD>album<EFBFBD>ȣ<EFBFBD>
|
||||
string es_likecount = 61;//number,<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_links = 62;//string<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>е<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ӵ<EFBFBD>ַ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƶ<EFBFBD>ļ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ӵ<EFBFBD>ַ
|
||||
string es_reactioncount = 63;//number,<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_linkdesc = 64;//string<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><EFBFBD>post <EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϊ<EFBFBD><EFBFBD><EFBFBD>ӣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ӵ<EFBFBD>һЩ<EFBFBD><EFBFBD>Ϣ
|
||||
string es_repostuid = 65;//number<EFBFBD><EFBFBD>ת<EFBFBD><EFBFBD>ԭ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ߵ<EFBFBD>ID
|
||||
string es_repostuname =66;//string<EFBFBD><EFBFBD>ת<EFBFBD><EFBFBD>ԭ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ߵ<EFBFBD>name
|
||||
string es_repostid = 67;//string<EFBFBD><EFBFBD>ת<EFBFBD><EFBFBD>ԭ<EFBFBD><EFBFBD>ID
|
||||
string es_tags = 68;//string<EFBFBD><EFBFBD><EFBFBD>ἰ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_mentionsaccount = 69;//string<EFBFBD><EFBFBD><EFBFBD>ἰ<EFBFBD>˺<EFBFBD>
|
||||
string es_video = 70;//string<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>е<EFBFBD><EFBFBD><EFBFBD>Ƶ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_isrepost = 71;//boolean<EFBFBD><EFBFBD><EFBFBD>Ƿ<EFBFBD>ת<EFBFBD><EFBFBD>
|
||||
string es_lang = 72;//string<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_client = 73;//string<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͻ<EFBFBD><EFBFBD><EFBFBD>
|
||||
string es_snapshot = 74;
|
||||
}
|
||||
21
deploy/proto/definition/FaceBookUserInfo.proto
Normal file
21
deploy/proto/definition/FaceBookUserInfo.proto
Normal file
@ -0,0 +1,21 @@
|
||||
syntax = "proto3";
|
||||
|
||||
message FaceBookUserInfo
|
||||
{
|
||||
string ID = 1; //id主键
|
||||
string NAME = 2; //姓名
|
||||
string GENDER = 3; //性别
|
||||
string BRITHDAY = 4; //出生日期
|
||||
repeated string EDUCATION = 5; //教育经历,["学历:学校"]
|
||||
repeated string WORK = 6; //工作经历 ["工作单位"]
|
||||
repeated string LIVING = 7; //居住地址 ["家乡:地址","现住地:地址"]
|
||||
repeated string CONTACT = 8; //联系方式 ["类别:地址"]
|
||||
repeated string YEAR_OVERVIEW = 9; //生活纪事 ["日期:事件"]
|
||||
string RELATIONSHIP = 10; //感情状况
|
||||
repeated string FRIEND_IDS = 11;
|
||||
}
|
||||
|
||||
message FaceBookUserInfoSets
|
||||
{
|
||||
repeated FaceBookUserInfo SETS = 1;
|
||||
}
|
||||
37
deploy/proto/definition/LaunchSiteInfo.proto
Normal file
37
deploy/proto/definition/LaunchSiteInfo.proto
Normal file
@ -0,0 +1,37 @@
|
||||
syntax = "proto3";
|
||||
|
||||
message LaunchSiteInfoSets //发射场基本信息
|
||||
{
|
||||
repeated LaunchSiteInfo LaunchSiteInfo = 1;
|
||||
}
|
||||
|
||||
message LaunchPlaneInfoSets //发射场基本信息
|
||||
{
|
||||
repeated LaunchPlaneInfo LaunchPlaneInfo = 1;
|
||||
}
|
||||
|
||||
message LaunchSiteInfo
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string Name_Zh = 2; //中文名称
|
||||
string Name_En = 3; //外文名称
|
||||
string Position = 4; //地理位置
|
||||
string Nation = 5; //所属国家
|
||||
string BuilderTime = 6; //建造时间
|
||||
string Affiliates = 7; //隶属机构
|
||||
string Longitude = 8; //经度
|
||||
string Latitude = 9; //纬度
|
||||
string Image = 10; //图片
|
||||
string UpdateTime = 11; //更新时间
|
||||
string LastTime = 12; //最后更新时间
|
||||
string Sensitive = 13; //是否高敏(0/否;1/是)
|
||||
}
|
||||
|
||||
message LaunchPlaneInfo
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string postid =2; //计划发布id
|
||||
string date =3; //日期
|
||||
string title =4; //标题
|
||||
string excerpt = 5; //摘要
|
||||
}
|
||||
29
deploy/proto/definition/Notam.proto
Normal file
29
deploy/proto/definition/Notam.proto
Normal file
@ -0,0 +1,29 @@
|
||||
syntax = "proto3";
|
||||
|
||||
message NotamInfoSets
|
||||
{
|
||||
repeated NotamInfo NotamInfo =1;
|
||||
}
|
||||
|
||||
message NotamInfo
|
||||
{
|
||||
string ID = 1;
|
||||
string NotamNumber = 2;
|
||||
string IssueDate = 3;
|
||||
string Location = 4;
|
||||
string BeginningDatetime =5;
|
||||
string EndingDateTime =6;
|
||||
string Reason = 7;
|
||||
string Type =8;
|
||||
string AffectedAreaType = 9;
|
||||
string AffectedAreas = 10;
|
||||
string EffectiveDates = 11;
|
||||
string Image = 12;
|
||||
string DelFlag = 13;
|
||||
string UpdateTime = 14;
|
||||
string Lasttime = 15;
|
||||
string EditStatus = 16;
|
||||
|
||||
}
|
||||
|
||||
|
||||
163
deploy/proto/definition/Plane.proto
Normal file
163
deploy/proto/definition/Plane.proto
Normal file
@ -0,0 +1,163 @@
|
||||
syntax = "proto3";
|
||||
|
||||
message PlaneInfoSets //飞机基本信息
|
||||
{
|
||||
repeated PlaneInfo PlaneInfo = 1;
|
||||
}
|
||||
|
||||
message FligthInfoSets //飞机航班信息
|
||||
{
|
||||
repeated FligthInfo FligthInfo = 1;
|
||||
}
|
||||
|
||||
message FligthTrackpointInfoSets //飞机轨迹信息
|
||||
{
|
||||
repeated FligthTrackpointInfo FligthTrackpointInfo = 1;
|
||||
}
|
||||
|
||||
message FligthTrackpointAllInfoSets //飞机航班和轨迹信息
|
||||
{
|
||||
repeated FligthTrackpointAllInfo FligthTrackpointAllInfo = 1;
|
||||
}
|
||||
|
||||
message PlaneBaseInfoSets //飞机基本扩展信息
|
||||
{
|
||||
repeated PlaneBaseInfo PlaneBaseInfo = 1;
|
||||
}
|
||||
|
||||
message PlaneInfo
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string MSN = 2; //制造商序列号
|
||||
string Reg = 3; //飞机注册号
|
||||
string ICAO = 4; //所属航司ICAO
|
||||
string Callsign = 5; //呼号
|
||||
string Owner = 6; //所属者
|
||||
string Nation = 7; //所属国家
|
||||
string UsageType = 8; //军民类型
|
||||
string Model = 9; //机型名称
|
||||
string Image = 10; //图片
|
||||
string Age = 11; //机龄
|
||||
string UpdateTime = 12; //数据更新时间
|
||||
string DataSource = 13; //数据来源
|
||||
string LastTime = 14; //最后更新时间
|
||||
string Type = 15; //飞机类型
|
||||
string DisplayModel = 16; //显示机型名称
|
||||
string Sensitive = 17; //是否高敏(0/否;1/是)
|
||||
}
|
||||
|
||||
message FligthInfo
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string FlightID = 2; //航班ID
|
||||
string ICAO = 3; //航班ICAO
|
||||
string IATA = 4; //航班IATA
|
||||
string PlaneReg = 5; //执行飞机注册号
|
||||
string TakeoffBase = 6; //起飞基地名称
|
||||
string TakeoffBaseICAO = 7; //起飞基地ICAO
|
||||
string TakeoffBaseIATA = 8; //起飞基地IATA
|
||||
string LandBase = 9; //降落基地名称
|
||||
string LandBaseICAO = 10; //降落基地ICAO
|
||||
string LandBaseIATA = 11; //降落基地IATA
|
||||
string ScheduleBegTime = 12; //计划起飞实际
|
||||
string ActualBegTime = 13; //实际起飞时间
|
||||
string ScheduleArrTime = 14; //预计到达时间
|
||||
string ActualArrTime = 15; //实际到达时间
|
||||
string UpdateTime = 16; //数据更新时间
|
||||
string LastTime = 17; //最后更新时间
|
||||
string Status = 18; //状态:1在航,2:结束
|
||||
}
|
||||
|
||||
message FligthTrackpointInfo
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string FlightID = 2; //所属航班ID
|
||||
string FlightIATA = 3; //所属航班IATA
|
||||
string FlightICAO = 4; //所属航班ICAO
|
||||
string PlaneREG = 5; //飞机注册号
|
||||
string Longitude = 6; //经度
|
||||
string Latitude = 7; //纬度
|
||||
string Height = 8; //高度
|
||||
string Speed = 9; //航速
|
||||
string Angle = 10; //方位角
|
||||
string UpdateTime = 11; //更新时间
|
||||
string LastTime = 12; //最后更新时间
|
||||
string Status = 13; //当前状态:1:在线,2:下线
|
||||
}
|
||||
|
||||
message FligthTrackpointAllInfo
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string FlightID = 2; //所属航班ID
|
||||
string FlightIATA = 3; //所属航班IATA
|
||||
string FlightICAO = 4; //所属航班ICAO
|
||||
string PlaneREG = 5; //飞机注册号
|
||||
string Longitude = 6; //经度
|
||||
string Latitude = 7; //纬度
|
||||
string Height = 8; //高度
|
||||
string Speed = 9; //航速
|
||||
string Angle = 10; //方位角
|
||||
string UpdateTime = 11; //更新时间
|
||||
string LastTime = 12; //最后更新时间
|
||||
string Status = 13; //当前状态:1:在线,2:下线
|
||||
string TakeoffBase = 14; //起飞基地名称
|
||||
string TakeoffBaseICAO = 15; //起飞基地ICAO
|
||||
string TakeoffBaseIATA = 16; //起飞基地IATA
|
||||
string LandBase = 17; //降落基地名称
|
||||
string LandBaseICAO = 18; //降落基地ICAO
|
||||
string LandBaseIATA = 19; //降落基地IATA
|
||||
string ScheduleBegTime = 20; //计划起飞实际
|
||||
string ActualBegTime = 21; //实际起飞时间
|
||||
string ScheduleArrTime = 22; //预计到达时间
|
||||
string ActualArrTime = 23; //实际到达时间
|
||||
}
|
||||
|
||||
message PlaneBaseInfo
|
||||
{
|
||||
string ID = 1; // id
|
||||
string Type = 2; // 机型
|
||||
string BaseInfo = 3; // 基本情况
|
||||
string BaseInfoSource = 4; // 基本情况信息来源
|
||||
string Crew = 5; // 乘员
|
||||
string CrewSource = 6; // 乘员信息来源
|
||||
string PracticalCeiling = 7; // 实用升限
|
||||
string PracticalCeilingSource = 8; // 实用升限信息来源
|
||||
string Length = 9; // 机长
|
||||
string LengthSource = 10; // 机长信息来源
|
||||
string MaxRange = 11; // 最大航程
|
||||
string MaxRangeSource = 12; // 最大航程信息来源
|
||||
string WingSpan = 13; // 翼展
|
||||
string WingSpanSource = 14; // 翼展信息来源
|
||||
string EmptyWeight = 15; // 空重
|
||||
string EmptyWeightSource = 16; // 空重信息来源
|
||||
string ZeroFuelWeight = 17; // 负载重量
|
||||
string ZeroFuelWeightSource = 18; // 负载重量信息来源
|
||||
string MaxTakeoffWeight = 19; // 最大起飞重量
|
||||
string MaxTakeoffWeightSource = 20; // 最大起飞重量信息来源
|
||||
string Height = 21; // 机高
|
||||
string HeightSource = 22; // 机高信息来源
|
||||
string MaxSpeed = 23; // 最大速度
|
||||
string MaxSpeedSource = 24; // 最大速度信息来源
|
||||
string CruiseSpeed = 25; // 巡航速度
|
||||
string CruiseSpeedSource = 26; // 巡航速度信息来源
|
||||
string CombatRange = 27; // 作战半径
|
||||
string CombatRangeSource = 28; // 作战半径信息来源
|
||||
string Endurance = 29; // 续航时间
|
||||
string EnduranceSource = 30; // 续航时间信息来源
|
||||
string Radar = 31; // 雷达
|
||||
string RadarSource = 32; // 雷达信息来源
|
||||
string ElectronicWarfare = 33; // 电子战设备
|
||||
string ElectronicWarfareSource = 34; // 电子战设备信息来源
|
||||
string AntiSubmarine = 35; // 反潜设备
|
||||
string AntiSubmarineSource = 36; // 反潜设备信息来源
|
||||
string Missile = 37; // 导弹
|
||||
string MissileSource = 38; // 导弹信息来源
|
||||
string TorpedoMine = 39; // 鱼雷/水雷
|
||||
string TorpedoMineSource = 40; // 鱼雷/水雷信息来源
|
||||
string CommandAndControl = 41; // 指控装备
|
||||
string CommandAndControlSource = 42; // 指控装备信息来源
|
||||
string Communication = 43; // 通信装备
|
||||
string CommunicationSource = 44; // 通信装备信息来源
|
||||
string UpdateTime = 45; // 更新时间
|
||||
string LastTime = 46; // 最后更新时间
|
||||
}
|
||||
27
deploy/proto/definition/Port.proto
Normal file
27
deploy/proto/definition/Port.proto
Normal file
@ -0,0 +1,27 @@
|
||||
syntax = "proto3";
|
||||
|
||||
message PortInfoSets //港口基本信息
|
||||
{
|
||||
repeated PortInfo PortInfo = 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
message PortInfo
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string GlobalCode = 2; //港口id(全球)
|
||||
string Name_En = 3; //港口名称(英文)
|
||||
string Code = 4; //港口代码
|
||||
string UsageType = 5; //军民属性
|
||||
string Nation = 6; //所属国家
|
||||
string Image = 7; //图片
|
||||
string Route = 8; //航线
|
||||
string DataSource = 9; //数据源
|
||||
string Longitude = 10; //经度
|
||||
string Latitude = 11; //纬度
|
||||
string UpdateTime = 12; //更新时间
|
||||
string LastTime = 13; //最后更新时间
|
||||
string Name_Zh = 14; //中文名称
|
||||
string Sensitive = 15; //是否高敏(0/否;1/是)
|
||||
}
|
||||
55
deploy/proto/definition/Satellite.proto
Normal file
55
deploy/proto/definition/Satellite.proto
Normal file
@ -0,0 +1,55 @@
|
||||
syntax = "proto3";
|
||||
|
||||
message SatelliteInfoSets //卫星基本信息
|
||||
{
|
||||
repeated SatelliteInfo SatelliteInfo = 1;
|
||||
}
|
||||
|
||||
message SatelliteTrackpointInfoSets //卫星轨迹信息
|
||||
{
|
||||
repeated SatelliteTrackpointInfo SatelliteTrackpointInfo = 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
message SatelliteInfo
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string Image = 2; //图片
|
||||
string NORAD = 3; //NORAD编号
|
||||
string NSSDC = 4; //国际编号
|
||||
string Nation = 5; //所属国家
|
||||
string Name = 6; //卫星名称
|
||||
string Type = 7; //类型
|
||||
string LaunchTime = 8; //发射时间
|
||||
string RCS = 9; //RCS
|
||||
string Perigee = 10; //近地点(km)
|
||||
string Apogee = 11; //远地点(km)
|
||||
string Inclination = 12; //倾斜度(倾角)
|
||||
string MonitorDiam = 13; //覆盖直径(km)
|
||||
string Eccentricity = 14; //离心率
|
||||
string Period = 15; //周期
|
||||
string LaunchSite = 16; //发射地点(发射场)
|
||||
string LaunchTimes = 17; //发射次数
|
||||
string CarrierCode = 18; //发射物体编号
|
||||
string CarrierName = 19; //发射载体名称
|
||||
string DataSource = 20; //数据源
|
||||
string IsOnRail = 21; //是否在轨
|
||||
string UpdateTime = 22; //更新时间
|
||||
string LastTime = 23; //最后更新时间
|
||||
string Sensitive = 24; //是否高敏(0/否;1/是)
|
||||
}
|
||||
|
||||
message SatelliteTrackpointInfo
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string SatelliteNORAD = 2; //卫星NORAD
|
||||
string Longitude = 3; //经度
|
||||
string Latitude = 4; //纬度
|
||||
string Height = 5; //高度
|
||||
string Speed = 6; //卫星速度(km/s)
|
||||
string UpdateTime = 7; //数据更新时间
|
||||
string LastTime = 8; //最后更新时间
|
||||
string Status = 9; //当前状态:1:在线,2:下线
|
||||
}
|
||||
145
deploy/proto/definition/Ship.proto
Normal file
145
deploy/proto/definition/Ship.proto
Normal file
@ -0,0 +1,145 @@
|
||||
syntax = "proto3";
|
||||
|
||||
message ShipInfoSets //船舶基本信息
|
||||
{
|
||||
repeated ShipInfo ShipInfo = 1;
|
||||
}
|
||||
|
||||
message VoyageTrackpointInfoSets //船舶轨迹信息
|
||||
{
|
||||
repeated VoyageTrackpointInfo VoyageTrackpointInfo = 1;
|
||||
}
|
||||
|
||||
message VoyageInfoSets //船舶航班信息
|
||||
{
|
||||
repeated VoyageInfo VoyageInfo = 1;
|
||||
}
|
||||
|
||||
message ShipBaseInfoSets //船舶基本扩展信息
|
||||
{
|
||||
repeated ShipBaseInfo ShipBaseInfo = 1;
|
||||
}
|
||||
|
||||
message ShipInfo
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string Name = 2; //船舶名称
|
||||
string IMO = 3; //IMO编号
|
||||
string MMSI = 4; //MMSI编号
|
||||
string Callsign = 5; //呼号
|
||||
string Image = 6; //图片
|
||||
string RegCountry = 7; //注册国家
|
||||
string Owner = 8; //所属者
|
||||
string Builder = 9; //建造单位
|
||||
string BuilderTime = 10; //建造时间
|
||||
string Type = 11; //船舶类型(货船等)
|
||||
string SourceType = 12; //源网站船舶类型(驱逐舰等)
|
||||
string UsageType = 13; //军民属性
|
||||
string Nettonnage = 14; //净吨
|
||||
string Width = 15; //船宽(型宽)
|
||||
string Length = 16; //船长(chang)
|
||||
string LeftPost = 17; //左舷距
|
||||
string Trail = 18; //船尾舷距
|
||||
string Draught = 19; //满载吃水
|
||||
string CrewNum = 20; //船员数量
|
||||
string DataSource = 21; //数据来源
|
||||
string UpdateTime = 22; //更新时间
|
||||
string LastTime = 23; //最后更新时间
|
||||
string Sensitive = 24; //是否高敏(0/否;1/是)
|
||||
}
|
||||
|
||||
message VoyageTrackpointInfo
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string VoyageID = 2; //所属航次ID
|
||||
string ShipMMSI = 3; //船舶MMSI
|
||||
string Head = 4; //船首向
|
||||
string Trace = 5; //船迹向
|
||||
string Speed = 6; //航速
|
||||
string Status = 7; //航行状态(搁浅/在航等)
|
||||
string Longitude = 8; //纬度
|
||||
string Latitude = 9; //经度
|
||||
string FromPort = 10; //始发港
|
||||
string FromGlobalCode = 11; //始发港全球id
|
||||
string DestPort = 12; //目的港
|
||||
string DestGlobalCode = 13; //目的港全球id
|
||||
string UpdateTime = 14; //更新时间
|
||||
string LastTime = 15; //最后更新时间
|
||||
string DepTime = 16; //出发时间
|
||||
string DestTime = 17; // 预计抵达时间
|
||||
string Position = 18; // 所在海域
|
||||
}
|
||||
|
||||
message VoyageInfo
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string VoyageID = 2; //航次ID
|
||||
string NaviDistance = 3; //航行距离
|
||||
string DepAtbTime = 4; //
|
||||
string Sog1 = 5; //速度1
|
||||
string DepTime = 6; //出发时间
|
||||
string DestPortName_En = 7; // 目的港口英文
|
||||
string DestPortZone = 8; // 目的地时区
|
||||
string Sog2 = 9; //速度2
|
||||
string DestTime = 10; // 预计抵达时间
|
||||
string MMSI = 11; // MMSI
|
||||
string DepCountryCode = 12; // 出发国家代码
|
||||
string DepPortName_En = 13; // 出发港口英文
|
||||
string DestCountryCode = 14; // 目的国家代码
|
||||
string NaviTime = 15; // 航行时间
|
||||
string DestPortName_Cn = 16; // 目的港口中文
|
||||
string DepPortName_Cn = 17; // 出发港口中文
|
||||
string Position = 18; // 所在海域
|
||||
string Status = 19; // 航行状态
|
||||
string DepPortZone = 20; // 出发地时区
|
||||
string UpdateTime = 21; //更新时间
|
||||
string LastTime = 22; //最后更新时间
|
||||
}
|
||||
|
||||
message ShipBaseInfo
|
||||
{
|
||||
string ID = 1; // ID
|
||||
string DepthType = 2; // 舰船级别
|
||||
string BaseInfo = 3; // 基本情况
|
||||
string BaseInfoSource = 4; // 基本情况信息来源
|
||||
string Crew = 5; // 乘员
|
||||
string CrewSource = 6; // 乘员信息来源
|
||||
string ShipNumber = 7; // 舷号
|
||||
string ShipNumberSource = 8; // 舷号信息来源
|
||||
string Length = 9; // 舰长
|
||||
string LengthSource = 10; // 舰长信息来源
|
||||
string MaxSpeed = 11; // 最大航速
|
||||
string MaxSpeedSource = 12; // 最大航速信息来源
|
||||
string Width = 13; // 舰宽
|
||||
string WidthSource = 14; // 舰宽信息来源
|
||||
string CruisingSpeed = 15; // 巡航速度
|
||||
string CruisingSpeedSource = 16; // 巡航速度信息来源
|
||||
string Displacement = 17; // 排水量
|
||||
string DisplacementSource = 18; // 排水量信息来源
|
||||
string Draft = 19; // 吃水
|
||||
string DraftSource = 20; // 吃水信息来源
|
||||
string Endurance = 21; // 续航力
|
||||
string EnduranceSource = 22; // 续航力信息来源
|
||||
string PowerPlant = 23; // 动力装置
|
||||
string PowerPlantSource = 24; // 动力装置信息来源
|
||||
string NavalGun = 25; // 舰炮
|
||||
string NavalGunSource = 26; // 舰炮信息来源
|
||||
string Missile = 27; // 导弹
|
||||
string MissileSource = 28; // 导弹信息来源
|
||||
string Torpedo = 29; // 鱼雷
|
||||
string TorpedoSource = 30; // 鱼雷信息来源
|
||||
string CarrierAircraft = 31; // 舰载机
|
||||
string CarrierAircraftSource = 32; // 舰载机信息来源
|
||||
string Radar = 33; // 雷达
|
||||
string RadarSource = 34; // 雷达信息来源
|
||||
string CommandAndControl = 35; // 指控系统
|
||||
string CommandAndControlSource = 36; // 指控系统信息来源
|
||||
string Sonar = 37; // 声呐
|
||||
string SonarSource = 38; // 声呐信息来源
|
||||
string ElectricWarfare = 39; // 电战系统
|
||||
string ElectricWarfareSource = 40; // 电战系统信息来源
|
||||
string UpdateTime = 41; //更新时间
|
||||
string LastTime = 42; //最后更新时间
|
||||
}
|
||||
|
||||
|
||||
59
deploy/proto/definition/Temp.proto
Normal file
59
deploy/proto/definition/Temp.proto
Normal file
@ -0,0 +1,59 @@
|
||||
syntax = "proto3";
|
||||
|
||||
message PersonInfoSets //人物基本信息
|
||||
{
|
||||
repeated PersonInfo PersonInfo = 1;
|
||||
}
|
||||
|
||||
message AirportInfoSets //机场信息
|
||||
{
|
||||
repeated AirportInfoS AirportInfoS = 1;
|
||||
}
|
||||
|
||||
message PortInfoSets //港口信息
|
||||
{
|
||||
repeated PortInfo PortInfo = 1;
|
||||
}
|
||||
|
||||
message OrganizationInfoSets //港口信息
|
||||
{
|
||||
repeated OrganizationInfo OrganizationInfo = 1;
|
||||
}
|
||||
|
||||
|
||||
message PersonInfo
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string NAME = 2; //名称
|
||||
string Age = 3; //年龄
|
||||
string Nation = 4; //国籍
|
||||
string Introduction = 5; //简介
|
||||
}
|
||||
|
||||
message AirportInfoS
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string NAME = 2; //名称
|
||||
string Location = 3; //所在地
|
||||
string Longitude = 4; //经度
|
||||
string Latitude = 5; //纬度
|
||||
}
|
||||
|
||||
message PortInfo
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string NAME = 2; //名称
|
||||
string Location = 3; //所在地
|
||||
string Longitude = 4; //经度
|
||||
string Latitude = 5; //纬度
|
||||
}
|
||||
|
||||
message OrganizationInfo
|
||||
{
|
||||
string ID = 1; //ID
|
||||
string NAME = 2; //名称
|
||||
string Nation = 3;
|
||||
string Commander = 4;
|
||||
string Type = 5;
|
||||
string Introduction = 6;
|
||||
}
|
||||
2
deploy/proto/generate-py.bat
Normal file
2
deploy/proto/generate-py.bat
Normal file
@ -0,0 +1,2 @@
|
||||
C:\Users\DELL-1\Downloads\protobuf-3.11.4\protoc.exe -I=D:\git\osc\devops\deploy\proto\definition --python_out=C:\Users\DELL-1\Downloads\proto --java_out=C:\Users\DELL-1\Downloads\proto D:\git\osc\devops\deploy\proto\definition\Es.proto
|
||||
pause
|
||||
3
deploy/redis-stack/redis.conf
Normal file
3
deploy/redis-stack/redis.conf
Normal file
@ -0,0 +1,3 @@
|
||||
appendonly yes
|
||||
appendfsync everysec
|
||||
requirepass jlkj-841-2-redis
|
||||
13
deploy/redis-stack/redis_clear.py
Normal file
13
deploy/redis-stack/redis_clear.py
Normal file
@ -0,0 +1,13 @@
|
||||
# redis_clear.py 用于清理因单站点任务运行结束而产生的废弃 Redis 队列,通过服务器上的 crontab 服务定期启动
|
||||
from redisbloom.client import Client
|
||||
import re
|
||||
|
||||
redis_client = Client(host='107.182.191.3', port=7379, password='jlkj-841-2-redis')
|
||||
pattern = "WebSite_\w+(\.\w+)+_\w{8}\-\w{4}\-\w{4}\-\w{4}\-\w{12}:\w+"
|
||||
|
||||
keys = redis_client.keys()
|
||||
for key in keys:
|
||||
key_str = key.decode()
|
||||
if re.match(pattern, key_str):
|
||||
redis_client.delete(key)
|
||||
print(key_str)
|
||||
11
deploy/scrapyd/Dockerfile
Normal file
11
deploy/scrapyd/Dockerfile
Normal file
@ -0,0 +1,11 @@
|
||||
FROM python:3.8.2
|
||||
|
||||
ENV TZ Asia/Shanghai
|
||||
ENV PATH /usr/local/bin:$PATH
|
||||
ADD ./requirements.txt /usr/local
|
||||
ADD ./default_scrapyd.conf /etc/scrapyd/scrapyd.conf
|
||||
|
||||
RUN pip install -r /usr/local/requirements.txt
|
||||
|
||||
EXPOSE 6800
|
||||
WORKDIR /root
|
||||
2
deploy/scrapyd/MANIFEST
Normal file
2
deploy/scrapyd/MANIFEST
Normal file
@ -0,0 +1,2 @@
|
||||
# file GENERATED by distutils, do NOT edit
|
||||
setup.py
|
||||
29
deploy/scrapyd/default_scrapyd.conf
Normal file
29
deploy/scrapyd/default_scrapyd.conf
Normal file
@ -0,0 +1,29 @@
|
||||
[scrapyd]
|
||||
eggs_dir = eggs
|
||||
logs_dir = logs
|
||||
items_dir =
|
||||
jobs_to_keep = 5
|
||||
dbs_dir = dbs
|
||||
max_proc = 0
|
||||
max_proc_per_cpu = 4
|
||||
finished_to_keep = 100
|
||||
poll_interval = 5.0
|
||||
bind_address = 0.0.0.0
|
||||
http_port = 6800
|
||||
debug = off
|
||||
runner = scrapyd.runner
|
||||
application = scrapyd.app.application
|
||||
launcher = scrapyd.launcher.Launcher
|
||||
webroot = scrapyd.website.Root
|
||||
|
||||
[services]
|
||||
schedule.json = scrapyd.webservice.Schedule
|
||||
cancel.json = scrapyd.webservice.Cancel
|
||||
addversion.json = scrapyd.webservice.AddVersion
|
||||
listprojects.json = scrapyd.webservice.ListProjects
|
||||
listversions.json = scrapyd.webservice.ListVersions
|
||||
listspiders.json = scrapyd.webservice.ListSpiders
|
||||
delproject.json = scrapyd.webservice.DeleteProject
|
||||
delversion.json = scrapyd.webservice.DeleteVersion
|
||||
listjobs.json = scrapyd.webservice.ListJobs
|
||||
daemonstatus.json = scrapyd.webservice.DaemonStatus
|
||||
84
deploy/scrapyd/requirements.txt
Normal file
84
deploy/scrapyd/requirements.txt
Normal file
@ -0,0 +1,84 @@
|
||||
async-timeout==4.0.2
|
||||
attrs==23.1.0
|
||||
Automat==20.2.0
|
||||
autopep8==1.5
|
||||
cachetools==5.3.1
|
||||
certifi==2020.4.5.1
|
||||
cffi==1.14.0
|
||||
chardet==5.1.0
|
||||
colorama==0.4.6
|
||||
constantly==15.1.0
|
||||
coverage==7.2.7
|
||||
cryptography==41.0.1
|
||||
cssselect==1.1.0
|
||||
decorator==4.4.2
|
||||
distlib==0.3.6
|
||||
exceptiongroup==1.1.1
|
||||
exif==1.6.0
|
||||
filelock==3.12.1
|
||||
flake8==6.0.0
|
||||
global-land-mask==1.0.0
|
||||
hiredis==1.0.1
|
||||
hyperlink==19.0.0
|
||||
idna==2.9
|
||||
incremental==22.10.0
|
||||
iniconfig==2.0.0
|
||||
itemadapter==0.8.0
|
||||
itemloaders==1.1.0
|
||||
jmespath==1.0.1
|
||||
kafka-python==2.0.1
|
||||
lxml==4.5.0
|
||||
mccabe==0.7.0
|
||||
mock==5.0.2
|
||||
msedge-selenium-tools==3.141.4
|
||||
numpy==1.18.3
|
||||
packaging==23.1
|
||||
parsel==1.5.2
|
||||
Pillow==7.1.2
|
||||
platformdirs==3.5.3
|
||||
pluggy==1.0.0
|
||||
plum-py==0.8.6
|
||||
Protego==0.1.16
|
||||
protobuf==3.12.2
|
||||
pyaes==1.6.1
|
||||
pyasn1==0.4.8
|
||||
pyasn1-modules==0.2.8
|
||||
pycodestyle==2.10.0
|
||||
pycparser==2.20
|
||||
PyDispatcher==2.0.5
|
||||
pyflakes==3.0.1
|
||||
pyhamcrest==2.0.4
|
||||
PyMySQL==1.0.3
|
||||
pyOpenSSL==23.2.0
|
||||
pyproject-api==1.5.1
|
||||
pytest==7.3.2
|
||||
pytest-cov==4.1.0
|
||||
queuelib==1.5.0
|
||||
redis==3.5.3
|
||||
redisbloom==0.3.2
|
||||
requests==2.23.0
|
||||
requests-file==1.5.1
|
||||
rmtest==0.7.0
|
||||
rsa==4.9
|
||||
Scrapy==2.9.0
|
||||
scrapy-redis==0.7.3
|
||||
scrapy-selenium==0.0.7
|
||||
scrapy-splash==0.9.0
|
||||
scrapyd==1.4.2
|
||||
selenium==3.141.0
|
||||
service-identity==18.1.0
|
||||
simhash==2.0.0
|
||||
six==1.16.0
|
||||
Telethon==1.28.5
|
||||
tldextract==3.4.4
|
||||
tomli==2.0.1
|
||||
tox==4.6.0
|
||||
Twisted==22.10.0
|
||||
typing-extensions==4.6.3
|
||||
urllib3==1.25.9
|
||||
validators==0.15.0
|
||||
virtualenv==20.23.0
|
||||
w3lib==1.21.0
|
||||
wget==3.2
|
||||
xlrd==1.2.0
|
||||
zope.interface==5.1.0
|
||||
11
deploy/shipxy-decoder/Dockerfile
Normal file
11
deploy/shipxy-decoder/Dockerfile
Normal file
@ -0,0 +1,11 @@
|
||||
FROM python:3.8.2-slim
|
||||
ENV TZ Asia/Shanghai
|
||||
ENV PATH /usr/local/bin:$PATH
|
||||
WORKDIR /usr/local
|
||||
RUN mkdir shipxy
|
||||
COPY ./requirements.txt .
|
||||
COPY shipxy ./shipxy
|
||||
RUN pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple && cd shipxy && tar -zxvf static.tar.gz
|
||||
EXPOSE 5000
|
||||
WORKDIR /usr/local/shipxy
|
||||
ENTRYPOINT [ "sh", "start.sh" ]
|
||||
3
deploy/shipxy-decoder/requirements.txt
Normal file
3
deploy/shipxy-decoder/requirements.txt
Normal file
@ -0,0 +1,3 @@
|
||||
selenium~=3.141.0
|
||||
Flask~=2.2.3
|
||||
urllib3~=1.25.8
|
||||
86
deploy/shipxy-decoder/shipxy/server.py
Normal file
86
deploy/shipxy-decoder/shipxy/server.py
Normal file
@ -0,0 +1,86 @@
|
||||
import selenium
|
||||
from flask import Flask, render_template, request
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
||||
from selenium.webdriver.firefox.options import Options
|
||||
|
||||
driver_options = Options()
|
||||
driver_options.add_argument('--headless')
|
||||
driver_options.add_argument('--no-sandbox')
|
||||
browser = selenium.webdriver.remote.webdriver.WebDriver(command_executor="http://172.18.0.2:4444",
|
||||
desired_capabilities=DesiredCapabilities.EDGE,
|
||||
options=driver_options)
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
@app.route('/')
|
||||
def hello_world():
|
||||
return 'shipxy decoder'
|
||||
|
||||
|
||||
@app.route('/decode')
|
||||
def decode():
|
||||
return render_template('./templates/decode.html')
|
||||
|
||||
|
||||
@app.route('/api/decode', methods=['post'])
|
||||
def decodeApi():
|
||||
try:
|
||||
# 注意!这里 get 的 url 只能以最终部署在容器内部的容器内 IP 作为域名,否则模拟浏览器无法获知页面内容。
|
||||
# 这主要是因为本工程在部署时与模拟浏览器分处不同的容器中,因此只能将本工程容器和模拟浏览器的容器创建在同一个 docker network 下
|
||||
browser.get('http://172.18.0.2:5000/decode')
|
||||
except:
|
||||
try:
|
||||
browser.start_session(capabilities=DesiredCapabilities.EDGE)
|
||||
browser.get('http://172.18.0.2:5000/decode')
|
||||
except:
|
||||
return '500'
|
||||
data = request.get_data()
|
||||
# inputEle.send_keys(bytes.decode(data))
|
||||
js = 'var txt = document.getElementById("encode");txt.value="%s";' % bytes.decode(data)
|
||||
browser.execute_script(js)
|
||||
button = browser.find_element_by_id('decode')
|
||||
button.click()
|
||||
inputEle = browser.find_element_by_id('encode')
|
||||
text = inputEle.get_attribute('value')
|
||||
# browser.close()
|
||||
return text
|
||||
|
||||
|
||||
@app.route('/decode_track')
|
||||
def decode_track():
|
||||
return render_template('./templates/decode_track.html')
|
||||
|
||||
|
||||
@app.route('/api/decode_track', methods=['post'])
|
||||
def decodeTrackApi():
|
||||
try:
|
||||
browser.get('http://172.18.0.2:5000/decode_track')
|
||||
except:
|
||||
try:
|
||||
browser.start_session(capabilities=DesiredCapabilities.EDGE)
|
||||
browser.get('http://172.18.0.2:5000/decode')
|
||||
except:
|
||||
return '500'
|
||||
data = request.get_data()
|
||||
# inputEle.send_keys(bytes.decode(data))
|
||||
js = 'var txt = document.getElementById("encode");txt.value="%s";' % bytes.decode(data)
|
||||
browser.execute_script(js)
|
||||
button = browser.find_element_by_id('decode')
|
||||
button.click()
|
||||
inputEle = browser.find_element_by_id('encode')
|
||||
text = inputEle.get_attribute('value')
|
||||
# browser.close()
|
||||
return text
|
||||
|
||||
|
||||
@app.route('/api/getvalue')
|
||||
def decodeApi2():
|
||||
inputEle = browser.find_element_by_id('encode')
|
||||
text = inputEle.text
|
||||
return text
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(host='0.0.0.0', port=5000)
|
||||
2
deploy/shipxy-decoder/shipxy/start.sh
Normal file
2
deploy/shipxy-decoder/shipxy/start.sh
Normal file
@ -0,0 +1,2 @@
|
||||
#!/usr/bin/env bash
|
||||
python server.py
|
||||
3
deploy/shipxy-decoder/shipxy/static.tar.gz
Normal file
3
deploy/shipxy-decoder/shipxy/static.tar.gz
Normal file
@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a47344e460415d23794fcf429f544d8409c3bfdb2ac660541757783953b89427
|
||||
size 1124476
|
||||
19
deploy/shipxy-decoder/shipxy/templates/decode.html
Normal file
19
deploy/shipxy-decoder/shipxy/templates/decode.html
Normal file
@ -0,0 +1,19 @@
|
||||
<!DOCTYPE HTML>
|
||||
<html>
|
||||
<head>
|
||||
<script src="{{url_for('static',filename='jquery-1.8.2.min.js')}}"></script>
|
||||
<script src="{{url_for('static',filename='ElaneMap.min.f.js')}}"></script>
|
||||
</head>
|
||||
<body>
|
||||
<input type="text" id="encode">
|
||||
<input type="button" value="decode" id="decode" onclick="decode()">
|
||||
<script>
|
||||
function decode(){
|
||||
var encodeContent = document.getElementById('encode').value;
|
||||
// var temp = ShipxyAPI._0x44f5d9;//['_requestAreaShips_v3'];
|
||||
var result = dedecode(encodeContent,"1");
|
||||
document.getElementById('encode').value = JSON.stringify(result.data);
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
20
deploy/shipxy-decoder/shipxy/templates/decode_track.html
Normal file
20
deploy/shipxy-decoder/shipxy/templates/decode_track.html
Normal file
@ -0,0 +1,20 @@
|
||||
<!DOCTYPE HTML>
|
||||
<html>
|
||||
<head>
|
||||
<script src="{{url_for('static',filename='jquery-1.8.2.min.js')}}"></script>
|
||||
<script src="{{url_for('static',filename='ElaneMap.min.f.js')}}"></script>
|
||||
<script src="{{url_for('static',filename='DeAnalyseManager.js')}}"></script>
|
||||
</head>
|
||||
<body>
|
||||
<input type="text" id="encode">
|
||||
<input type="button" value="decode" id="decode" onclick="decode_track()">
|
||||
<script>
|
||||
function decode_track(){
|
||||
var encodeContent = document.getElementById('encode').value;
|
||||
// var temp = ShipxyAPI._0x44f5d9;//['_requestAreaShips_v3'];
|
||||
var result = analyseAisTrack(encodeContent);
|
||||
document.getElementById('encode').value = JSON.stringify(result.data);
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
78
deploy/wgcloud-server/README.md
Normal file
78
deploy/wgcloud-server/README.md
Normal file
@ -0,0 +1,78 @@
|
||||
# WGCLOUD介绍
|
||||
WGCLOUD是一款轻量高效的运维监控系统,专注Linux、Windows等服务器主机性能监测,故障发送告警通知。WGCLOUD部署使用简单、轻量、分布式、开源、免注册、也可内网使用。
|
||||
|
||||
支持主机各种指标监测(cpu使用率,cpu温度,内存使用率,磁盘容量,磁盘IO,硬盘SMART健康状态,系统负载,连接数量,网卡流量,硬件系统信息等)。支持监测服务器上的进程应用、文件、端口、日志、DOCKER容器、数据库、数据表等资源。支持监测服务接口API、数通设备(如交换机、路由器、打印机)等。自动生成网络拓扑图,大屏可视化,web SSH,统计分析图表,巡检报告,指令下发批量执行,FTP/SFTP监测,告警信息推送(如邮件、钉钉、微信、短信等)
|
||||
|
||||
WGCLOUD的唯一官网:https://www.wgstart.com
|
||||
|
||||
# 如何安装
|
||||
## 在服务器上创建docker-compose.yml文件
|
||||
```
|
||||
version: '3'
|
||||
services:
|
||||
wgcloud:
|
||||
image: tianshiyeben/wgcloud-server:v3.4.6
|
||||
container_name: wgcloud-server
|
||||
restart: unless-stopped
|
||||
privileged: true
|
||||
environment:
|
||||
- TZ=Asia/Shanghai
|
||||
ports:
|
||||
- 9997:9997
|
||||
- 9998:9998
|
||||
- 9999:9999
|
||||
volumes:
|
||||
- ./config:/wgcloud-server/config
|
||||
- ./logo:/wgcloud-sever/logo
|
||||
```
|
||||
## 运行:docker-compose up -d
|
||||
运行如上指令后,会在docker-compose.yml文件同级目录下,生成一个config文件夹,需要在里面存入server的配置文件:application.yml和daemon.properties(这两个配置文件在安装包wgcloud-server/config/下,也可以下载 https://www.wgstart.com/download/3.4.6/wgcloud-v3.4.6.tar.gz ,解压后在wgcloud-v3.4.6/server/config下找到这两个配置文件),这两个文件一定要放入config下,我们配置好config/application.yml(比如数据库连接信息),然后重新运行指令:`docker-compose up -d`
|
||||
|
||||
注意把config/application.yml中的守护进程url中的localhost改为宿主机ip,有时候默认用localhost也可以,看具体环境,如下
|
||||
```
|
||||
#守护进程访问url,server服务使用,agent不使用(一般保持默认即可)
|
||||
daemonUrl: http://localhost:9997
|
||||
```
|
||||
如果没有生效,那先停止容器:`docker stop wgcloud-server`,再启动:`docker start wgcloud-server`
|
||||
|
||||
至此wgcloud服务端就算启动完成了,这里的server是v3.4.6版本
|
||||
|
||||
## 解析授权文件license.txt(付费功能,普通版可跳过)
|
||||
我们获取到授权文件license.txt,先把授权文件license.txt放到config/下,然后进入容器,把license.txt复制到wgcloud-server/目录下即可,如下
|
||||
```
|
||||
root@vultr:/docker# docker container exec -it wgcloud-server /bin/bash
|
||||
root@981a404efc50:/wgcloud-server# cd config/
|
||||
root@981a404efc50:/wgcloud-server/config# ls
|
||||
application.yml license.txt
|
||||
root@981a404efc50:/wgcloud-server/config# cp license.txt ../
|
||||
root@981a404efc50:/wgcloud-server/config# rm -rf license.txt
|
||||
```
|
||||
然后重启容器就好了
|
||||
|
||||
## 当server部署在arm,macOS、龙芯mips等平台时,需要替换守护进程下wgcloud-daemon-release
|
||||
> 参考 https://www.wgstart.com/help/docs60.html
|
||||
|
||||
下载对应版本的守护进程后 先把wgcloud-daemon-release放到config/下,然后进入容器,先备份或删除wgcloud-server/wgcloud-daemon-release,把config/wgcloud-daemon-release复制到wgcloud-server/目录下即可,记得给wgcloud-daemon-release加可执行权限,如下
|
||||
```
|
||||
root@vultr:/docker# docker container exec -it wgcloud-server /bin/bash
|
||||
root@981a404efc50:/wgcloud-server# mv wgcloud-daemon-release wgcloud-daemon-release_0312
|
||||
root@981a404efc50:/wgcloud-server# cd config/
|
||||
root@981a404efc50:/wgcloud-server/config# ls
|
||||
application.yml wgcloud-daemon-release
|
||||
root@981a404efc50:/wgcloud-server/config# cp wgcloud-daemon-release ../
|
||||
root@981a404efc50:/wgcloud-server/config# rm -rf wgcloud-daemon-release
|
||||
root@981a404efc50:/wgcloud-server/config# chmod +x ../wgcloud-daemon-release
|
||||
```
|
||||
然后重启容器就好了
|
||||
|
||||
## 什么是agent?
|
||||
agent是探针端,agent和server需要保持相同版本(https://www.wgstart.com/docs.html ),需要在被监控的服务器部署,拷贝出agent目录到在需要被监控的服务器上,运行start.sh脚本命令即可
|
||||
|
||||
# web页面访问地址
|
||||
`http://[ip]:9999/wgcloud`
|
||||
|
||||
docker 部署时,上面的 9999 替换成映射的宿主机端口
|
||||
|
||||
**默认登录账号密码:admin/111111**
|
||||
|
||||
> 本 README 整理自 https://hub.docker.com/r/tianshiyeben/wgcloud-server
|
||||
191
deploy/wgcloud-server/config/application.yml
Normal file
191
deploy/wgcloud-server/config/application.yml
Normal file
@ -0,0 +1,191 @@
|
||||
server:
|
||||
port: 9999
|
||||
servlet:
|
||||
session:
|
||||
timeout: 120m
|
||||
context-path: /wgcloud
|
||||
#日志文件输出路径
|
||||
logging:
|
||||
file:
|
||||
path: ./log
|
||||
# 数据库 相关设置
|
||||
spring:
|
||||
application:
|
||||
name: wgcloud-server
|
||||
datasource:
|
||||
driver-class-name: org.postgresql.Driver
|
||||
url: jdbc:postgresql://39.98.151.140:23306/wgcloud
|
||||
username: postgres
|
||||
password: passok123A
|
||||
hikari:
|
||||
validationTimeout: 3000
|
||||
connectionTimeout: 60000
|
||||
idleTimeout: 60000
|
||||
minimumIdle: 10
|
||||
maximumPoolSize: 10
|
||||
maxLifeTime: 60000
|
||||
connectionTestQuery: select 1
|
||||
mvc:
|
||||
static-path-pattern: /static/**
|
||||
thymeleaf:
|
||||
cache: false
|
||||
mybatis:
|
||||
config-location: classpath:mybatis/mybatis-config.xml
|
||||
mapper-locations: classpath:mybatis/mapper/*.xml
|
||||
|
||||
|
||||
#自定义配置参数
|
||||
base:
|
||||
#管理员登录账号
|
||||
account: admin
|
||||
#管理员登录账号的密码
|
||||
accountPwd: 111111
|
||||
#只读账号(只有浏览权限,没有修改、删除、添加权限,此功能需升级到专业版才生效)
|
||||
guestAccount: guest
|
||||
#只读账号的密码
|
||||
guestAccountPwd: 111111
|
||||
#通信token,agent端的wgToken和此保持一致
|
||||
wgToken: wgcloud
|
||||
#每页显示多少条数据,建议不小于10
|
||||
pageSize: 20
|
||||
#是否开启web ssh客户端,yes开启,no关闭
|
||||
webSsh: yes
|
||||
#web ssh客户端的服务端口
|
||||
webSshPort: 9998
|
||||
#守护进程访问url,server服务使用,agent不使用,若守护端口修改,那此处的端口也要同步修改下(一般保持默认即可)
|
||||
daemonUrl: http://localhost:9997
|
||||
#是否开启公众看板,yes开启,no关闭,开启后看板页面无需登陆
|
||||
dashView: yes
|
||||
#公众看板页面主机IP是否开启脱敏显示,yes开启,no关闭
|
||||
dashViewIpHide: yes
|
||||
#是否开启大屏展示看板,yes开启,no关闭,开启后看板页面无需登陆
|
||||
dapingView: yes
|
||||
#是否开启数据开放接口,yes开启,no关闭
|
||||
openDataAPI: no
|
||||
#是否自动闭合左侧菜单,yes是,no否
|
||||
sidebarCollapse: yes
|
||||
#是否在列表页面显示告警次数,yes是,no否,这个会对性能有些影响,不建议开启
|
||||
showWarnCount: no
|
||||
#是否开启指令下发(若关闭,将不能再新增和下发指令),yes是,no否
|
||||
shellToRun: yes
|
||||
#linux指令下发不能包含的敏感字符,小写即可,多个用逗号隔开
|
||||
shellToRunLinuxBlock: 'rm ,mkfs, /dev/,:(){:|:&};:,mv ,wget , install '
|
||||
#windows指令下发不能包含的敏感字符,小写即可,多个用逗号隔开
|
||||
shellToRunWinBlock: 'del ,delete ,format ,ren ,rd ,rd/s/q ,rmdir '
|
||||
#数据监控的sql语句,不能编写出现的敏感字符(即sql可能注入的关键字),小写即可,多个用逗号隔开
|
||||
sqlInKeys: 'execute ,update ,delete ,insert ,create ,drop ,alter ,rename ,modify '
|
||||
#数据表监控间隔,单位秒,默认60分钟
|
||||
dbTableTimes: 3600
|
||||
#服务接口监控间隔,单位秒,默认10分钟
|
||||
heathTimes: 120
|
||||
#ftp/sftp服务监控间隔,单位秒,默认10分钟
|
||||
ftpTimes: 600
|
||||
#数通设备PING监控间隔,单位秒,默认15分钟
|
||||
dceTimes: 600
|
||||
#数通设备snmp监测间隔,单位秒,默认20分钟
|
||||
snmpTimes: 1200
|
||||
#告警缓存时间间隔(即告警静默时间,此时间段内同一告警通知不再重复发),单位秒,默认120分钟
|
||||
warnCacheTimes: 7200
|
||||
#监控数据保留天数,默认30天
|
||||
historyDataOut: 10
|
||||
#节点类型(集群才会用到,一般保持默认即可),master或slave,一个集群只能有一个master和N(最多31)个slave,如:slave1,slave2
|
||||
nodeType: master
|
||||
#是否开启使用标签,yes开启,no关闭
|
||||
hostGroup: no
|
||||
#是否开启成员账号管理(即每个成员可管理自己的资源),yes开启,no关闭,关闭后新增的成员不能再登录(管理员和只读账号可以登录)此功能需升级到专业版
|
||||
userInfoManage: no
|
||||
#server/logo/下的ico图标名称,建议32*32,如favicon.png,此功能需升级到专业版
|
||||
icoUrl:
|
||||
#server/logo/下的logo图标名称,建议120*120,如logo.png,此功能需升级到专业版
|
||||
logoUrl:
|
||||
#系统全称,如wgcloud运维监控系统,此功能需升级到专业版
|
||||
wgName:
|
||||
#系统简称,如wgcloud,此功能需升级到专业版
|
||||
wgShortName:
|
||||
#告警邮件标题前缀,此功能需升级到专业版
|
||||
mailTitlePrefix: '[WGCLOUD]'
|
||||
#告警邮件内容后缀,此功能需升级到专业版
|
||||
mailContentSuffix: '<p><p><p>WGCLOUD敬上'
|
||||
#是否显示页面底部版权、网址信息,yes显示,no不显示,此功能需升级到专业版
|
||||
copyRight: yes
|
||||
|
||||
#告警配置,策略优先级按照前后顺序执行,比如告警总开关关闭时,那下面所有告警开关都会失效
|
||||
mail:
|
||||
#告警总开关,yes开启,no关闭。总开关开启后,以下子开关设置才会生效。以下开关均遵循此规则。
|
||||
allWarnMail: yes
|
||||
#告警时间段cron表达式设置(在该时间段内发送告警,其他时间不发),默认为空会持续发送,如'* * 8-20 ? * MON-FRI'表示周1到周5的8点-20点发送告警,'* * 8-20 * * ?'表示每天的8点-20点发送告警,带单引号
|
||||
warnCronTime:
|
||||
#不需要告警ip集合,多个用逗号,隔开,此主机所有监控资源都不会再告警
|
||||
blockIps:
|
||||
#主机上行传输速率bytes sent告警开关,yes开启,no关闭
|
||||
upSpeedMail: no
|
||||
#主机上行传输速率告警值,单位KB/s,默认10MB,超过此值即发送告警
|
||||
upSpeedVal: 10240
|
||||
#主机上行传输速率低于此值时发送告警,此配置项一般不用,默认即可,单位KB/s,默认0MB
|
||||
upSpeedMinVal: 0
|
||||
#主机下行传输速率bytes received告警开关,yes开启,no关闭
|
||||
downSpeedMail: no
|
||||
#主机下行传输速率告警值,单位KB/s,默认10MB,超过此值即发送告警
|
||||
downSpeedVal: 10240
|
||||
#主机下行传输速率低于此值时发送告警,此配置项一般不用,默认即可,单位KB/s,默认0MB
|
||||
downSpeedMinVal: 0
|
||||
#内存告警开关,yes开启,no关闭
|
||||
memWarnMail: yes
|
||||
#主机内存使用率%告警值,超过此值即发送告警
|
||||
memWarnVal: 90
|
||||
#主机系统负载告警开关,yes开启,no关闭,
|
||||
sysLoadWarnMail: no
|
||||
#主机系统负载告警值(以5分钟系统负载值为准进行告警),可以为小数,如1.2,超过此值即发送告警
|
||||
sysLoadWarnVal: 20
|
||||
#主机CPU使用率告警开关,yes开启,no关闭
|
||||
cpuWarnMail: no
|
||||
#主机cpu使用率%告警值,可大于100,超过此值即发送告警
|
||||
cpuWarnVal: 99
|
||||
#主机CPU温度告警开关,yes开启,no关闭
|
||||
cpuTemperatureWarnMail: no
|
||||
#主机CPU温度告警值℃,超过此值即发送告警
|
||||
cpuTemperatureWarnVal: 92
|
||||
#主机磁盘使用率告警开关,yes开启,no关闭
|
||||
diskWarnMail: yes
|
||||
#主机磁盘使用率%告警值,超过此值即发送告警
|
||||
diskWarnVal: 96
|
||||
#主机不需要告警磁盘在此屏蔽,多个盘符用,隔开,如/boot,/dev。支持Ant路径匹配规则,如/dev/**。特殊符号用单引号,如'C:'
|
||||
diskBlock: /dev,/snap/**,'C:','E:'
|
||||
#主机磁盘SMART健康检测告警开关,yes开启,no关闭
|
||||
smartWarnMail: yes
|
||||
#主机下线告警开关,yes开启,no关闭
|
||||
hostDownWarnMail: yes
|
||||
#进程下线告警开关,yes开启,no关闭
|
||||
appDownWarnMail: yes
|
||||
#DOCKER下线告警开关,yes开启,no关闭
|
||||
dockerDownWarnMail: yes
|
||||
#服务接口告警开关,yes开启,no关闭
|
||||
heathWarnMail: yes
|
||||
#服务接口监测失败连续几次后发送告警通知,默认2次
|
||||
heathWarnCount: 3
|
||||
#ftp/sftp告警开关,yes开启,no关闭
|
||||
ftpWarnMail: yes
|
||||
#数通设备PING告警,yes开启,no关闭
|
||||
dceWarnMail: yes
|
||||
#数通设备PING监测失败连续几次后发送告警通知,默认2次
|
||||
dceWarnCount: 2
|
||||
#数通设备SNMP监测告警,yes开启,no关闭
|
||||
snmpWarnMail: yes
|
||||
#数据源、数据表告警开关,yes开启,no关闭
|
||||
dbDownWarnMail: yes
|
||||
#日志文件监控告警开关,yes开启,no关闭
|
||||
fileLogWarnMail: yes
|
||||
#端口telnet不通告警开关,yes开启,no关闭
|
||||
portWarnMail: yes
|
||||
#文件防篡改告警开关,yes开启,no关闭
|
||||
fileSafeWarnMail: yes
|
||||
#指令下发通知开关,yes开启,no关闭
|
||||
shellWarnMail: yes
|
||||
#自定义监控项告警开关,yes开启,no关闭
|
||||
customInfoWarnMail: yes
|
||||
#服务器登录提醒通知开关,yes开启,no关闭,此功能需升级到专业版
|
||||
hostLoginWarnMail: yes
|
||||
#告警脚本完整路径(若配置脚本,无论是否配置过邮件,都会执行该脚本),可以为空
|
||||
warnScript:
|
||||
#是否将告警内容转为unicode(针对告警脚本生效),yes是,no否,钉钉微信等告警建议设置为yes,windows部署server需设置为yes,否则接受到会是乱码
|
||||
warnToUnicode: no
|
||||
2
deploy/wgcloud-server/config/daemon.properties
Normal file
2
deploy/wgcloud-server/config/daemon.properties
Normal file
@ -0,0 +1,2 @@
|
||||
#守护进程端口,agent配置的守护进程端口需和此处保持一致
|
||||
port=9997
|
||||
16
deploy/wgcloud-server/docker-compose.yml
Normal file
16
deploy/wgcloud-server/docker-compose.yml
Normal file
@ -0,0 +1,16 @@
|
||||
version: '3'
|
||||
services:
|
||||
wgcloud:
|
||||
image: tianshiyeben/wgcloud-server:v3.4.6
|
||||
container_name: wgcloud-server
|
||||
restart: unless-stopped
|
||||
privileged: true
|
||||
environment:
|
||||
- TZ=Asia/Shanghai
|
||||
ports:
|
||||
- 28084:9997
|
||||
- 28085:9998
|
||||
- 28086:9999
|
||||
volumes:
|
||||
- ./config:/wgcloud-server/config
|
||||
- ./logo:/wgcloud-sever/logo
|
||||
15
deploy/youtube-dl/Dockerfile
Normal file
15
deploy/youtube-dl/Dockerfile
Normal file
@ -0,0 +1,15 @@
|
||||
FROM python:3.8.2-slim
|
||||
|
||||
ENV TZ Asia/Shanghai
|
||||
|
||||
ENV PATH /usr/local/bin:$PATH
|
||||
|
||||
WORKDIR /usr/local
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN python -m pip install --force-reinstall https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz && pip install Flask
|
||||
|
||||
EXPOSE 5000
|
||||
|
||||
ENTRYPOINT [ "sh", "start.sh" ]
|
||||
96
deploy/youtube-dl/nginx.conf
Normal file
96
deploy/youtube-dl/nginx.conf
Normal file
@ -0,0 +1,96 @@
|
||||
worker_processes auto;
|
||||
#error_log logs/error.log;
|
||||
#error_log logs/error.log notice;
|
||||
#error_log logs/error.log info;
|
||||
|
||||
#pid logs/nginx.pid;
|
||||
|
||||
|
||||
events {
|
||||
worker_connections 1024;
|
||||
}
|
||||
|
||||
http {
|
||||
include mime.types;
|
||||
default_type application/octet-stream;
|
||||
|
||||
#log_format main '$remote_addr - $remote_user [$time_local] "$request" '
|
||||
# '$status $body_bytes_sent "$http_referer" '
|
||||
# '"$http_user_agent" "$http_x_forwarded_for"';
|
||||
|
||||
#access_log logs/access.log main;
|
||||
|
||||
sendfile on;
|
||||
#tcp_nopush on;
|
||||
|
||||
#keepalive_timeout 0;
|
||||
keepalive_timeout 65;
|
||||
|
||||
#gzip on;
|
||||
|
||||
client_max_body_size 20m;
|
||||
server {
|
||||
listen 8080;
|
||||
server_name localhost;
|
||||
|
||||
#charset koi8-r;
|
||||
charset utf-8;
|
||||
|
||||
#access_log logs/host.access.log main;
|
||||
location / {
|
||||
root /usr/share/nginx/html;
|
||||
index index.html index.htm;
|
||||
autoindex on;
|
||||
autoindex_exact_size off;
|
||||
autoindex_localtime on;
|
||||
}
|
||||
|
||||
#error_page 404 /404.html;
|
||||
|
||||
# redirect server error pages to the static page /50x.html
|
||||
#
|
||||
error_page 500 502 503 504 /50x.html;
|
||||
location = /50x.html {
|
||||
root html;
|
||||
}
|
||||
|
||||
# proxy the PHP scripts to Apache listening on 127.0.0.1:80
|
||||
#
|
||||
#location ~ \.php$ {
|
||||
# proxy_pass http://127.0.0.1;
|
||||
#}
|
||||
|
||||
# pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000
|
||||
#
|
||||
#location ~ \.php$ {
|
||||
# root html;
|
||||
# fastcgi_pass 127.0.0.1:9000;
|
||||
# fastcgi_index index.php;
|
||||
# fastcgi_param SCRIPT_FILENAME /scripts$fastcgi_script_name;
|
||||
# include fastcgi_params;
|
||||
#}
|
||||
|
||||
# deny access to .htaccess files, if Apache's document root
|
||||
# concurs with nginx's one
|
||||
#
|
||||
#location ~ /\.ht {
|
||||
# deny all;
|
||||
#}
|
||||
}
|
||||
|
||||
|
||||
# another virtual host using mix of IP-, name-, and port-based configuration
|
||||
#
|
||||
#server {
|
||||
# listen 8000;
|
||||
# listen somename:8080;
|
||||
# server_name somename alias another.alias;
|
||||
|
||||
# location / {
|
||||
# root html;
|
||||
# index index.html index.htm;
|
||||
# }
|
||||
#}
|
||||
|
||||
|
||||
}
|
||||
63
deploy/youtube-dl/server.py
Normal file
63
deploy/youtube-dl/server.py
Normal file
@ -0,0 +1,63 @@
|
||||
# -*- coding: UTF-8 -*-
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
|
||||
from flask import Flask, request
|
||||
from threading import Thread
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
def single_dl(url, options):
|
||||
command = f'yt-dlp {options} {url}'
|
||||
os.system(command)
|
||||
|
||||
|
||||
def batch_dl(urls, options):
|
||||
for url in urls:
|
||||
print('[Start download] Video url: %s' % url.strip())
|
||||
single_dl(url, options)
|
||||
|
||||
|
||||
@app.route('/ping')
|
||||
def ping():
|
||||
return 'ytb-dl'
|
||||
|
||||
|
||||
@app.route('/download', methods=['post'])
|
||||
def download():
|
||||
try:
|
||||
data = request.get_json()
|
||||
options = '-f best --output "/usr/local/download/%(id)s.%(ext)s"'
|
||||
if 'options' in data:
|
||||
options = data['options']
|
||||
if 'urls' in data:
|
||||
video_urls = data['urls']
|
||||
if len(video_urls) > 0:
|
||||
dl_thread = Thread(target=batch_dl, args=(video_urls, options))
|
||||
dl_thread.start()
|
||||
return json.dumps({
|
||||
'code': 200,
|
||||
'message': f'开始下载 {len(video_urls)} 个视频'
|
||||
}, ensure_ascii=False)
|
||||
return json.dumps({
|
||||
'code': 300,
|
||||
'message': '未下载视频'
|
||||
}, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
return json.dumps({
|
||||
'code': 500,
|
||||
'message': repr(e)
|
||||
}, ensure_ascii=False)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
output_path = '/usr/local/download/'
|
||||
v_options = '-f best '
|
||||
if str.upper(platform.system()) == 'WINDOWS':
|
||||
output_path = 'E:/youtube-dl/'
|
||||
if not os.path.exists(output_path):
|
||||
os.makedirs(output_path)
|
||||
|
||||
app.run(host='0.0.0.0', port=5000)
|
||||
2
deploy/youtube-dl/start.sh
Normal file
2
deploy/youtube-dl/start.sh
Normal file
@ -0,0 +1,2 @@
|
||||
#!/usr/bin/env bash
|
||||
python server.py
|
||||
183
deploy/youtube-dl/youtube-dl-manual-cn.txt
Normal file
183
deploy/youtube-dl/youtube-dl-manual-cn.txt
Normal file
@ -0,0 +1,183 @@
|
||||
Usage: youtube-dl [OPTIONS] URL [URL...]
|
||||
|
||||
Options:
|
||||
通用选项:
|
||||
-h, --help 打印帮助文档
|
||||
--version 打印版本信息
|
||||
-U, --update 更新到最新版(需要权限)
|
||||
-i, --ignore-errors 遇到下载错误时跳过
|
||||
--abort-on-error 遇到下载错误时终止
|
||||
--dump-user-agent 显示当前使用的浏览器(User-agent)
|
||||
--list-extractors 列出所有的提取器(支持的网站)
|
||||
--extractor-descriptions 同上
|
||||
--force-generic-extractor 强制使用通用提取器下载
|
||||
--default-search PREFIX 使用此前缀补充不完整的URLs,例如:"ytsearch2 yt-dl" 从youtube搜索并下载两个关于yt-dl视频. 使用"auto"youtube-dl就会猜一个,一般效果等价于"ytsearch"("auto_warning"猜测时加入警告).我已知支持的PREFIX:ytsearch (youtube), ytsearchdate (youtube), yvsearch (yahoo videos), gvsearch (google videos)
|
||||
--ignore-config 不读取配置文件,当时用了全局配置文件/etc/youtube-dl.conf:不再读取 ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows)
|
||||
--config-location PATH 使用指定路径下的配置文件
|
||||
--flat-playlist 列出列表视频但不下载
|
||||
--mark-watched 标记看过此视频 (YouTube only)
|
||||
--no-mark-watched 不标记看过此视频 (YouTube only)
|
||||
--no-color 打印到屏幕上的代码不带色
|
||||
|
||||
网络选项:
|
||||
--proxy URL 使用HTTP/HTTPS/SOCKS协议的代理.如:socks5://127.0.0.1:1080/.
|
||||
--socket-timeout SECONDS 放弃连接前等待时间
|
||||
--source-address IP 绑定的客户端IP地址
|
||||
-4, --force-ipv4 所有连接通过IPv4
|
||||
-6, --force-ipv6 所有连接通过IPv6
|
||||
|
||||
地理限制:
|
||||
--geo-verification-proxy URL 使用此代理地址测试一些有地理限制的地址
|
||||
--geo-bypass 绕过地理限制通过伪装X-Forwarded-For HTTP头部的客户端ip (实验)
|
||||
--no-geo-bypass 不 绕过地理限制通过伪装X-Forwarded-For HTTP头部的客户端ip (实验)
|
||||
--geo-bypass-country CODE 强制绕过地理限制通过提供准确的ISO 3166-2标准的国别代码(实验)
|
||||
|
||||
视频选择:
|
||||
--playlist-start NUMBER 指定列表中开始下载的视频(默认为1)
|
||||
--playlist-end NUMBER 指定列表中结束的视频(默认为last)
|
||||
--playlist-items ITEM_SPEC 指定列表中要下载的视频项目编号.如:"--playlist-items 1,2,5,8"或"--playlist-items 1-3,7,10-13"
|
||||
--match-title REGEX 下载标题匹配的视频(正则表达式或区分大小写的字符串)
|
||||
--reject-title REGEX 跳过下载标题匹配的视频(正则表达式或区分大小写的字符串)
|
||||
--max-downloads NUMBER 下载NUMBER个视频后停止
|
||||
--min-filesize SIZE 不下载小于SIZE的视频(e.g. 50k or 44.6m)
|
||||
--max-filesize SIZE 不下载大于SIZE的视频(e.g. 50k or 44.6m)
|
||||
--date DATE 仅下载上传日期在指定日期的视频
|
||||
--datebefore DATE 仅下载上传日期在指定日期或之前的视频 (i.e. inclusive)
|
||||
--dateafter DATE 仅下载上传日期在指定日期或之后的视频 (i.e. inclusive)
|
||||
--min-views COUNT 不下载观影数小于指定值的视频
|
||||
--max-views COUNT 不下载观影数大于指定值的视频
|
||||
--match-filter FILTER 通用视频过滤器. Specify any key (see help for -o for a list of available keys) to match if the key is present, !key to check if the key is not present, key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against a number,key = ‘LITERAL‘ (like "uploader = ‘Mike Smith‘", also works with !=) to match against a string literal and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the operator. For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 & dislike_count <? 50 & description" .
|
||||
--no-playlist 当视频链接到一个视频和一个播放列表时,仅下载视频
|
||||
--yes-playlist 当视频链接到一个视频和一个播放列表时,下载视频和播放列表
|
||||
--age-limit YEARS 下载合适上传年限的视频
|
||||
--download-archive FILE 仅下载档案文件中未列出的影片,已下载的记录ID
|
||||
--include-ads 同时下载广告(实验)
|
||||
|
||||
下载选项:
|
||||
-r, --limit-rate RATE 最大bps (e.g. 50K or 4.2M)
|
||||
-R, --retries RETRIES 重试次数 (默认10), or "infinite".
|
||||
--fragment-retries RETRIES 一个分段的最大重试次数(default is 10), or "infinite" (DASH, hlsnative and ISM)
|
||||
--skip-unavailable-fragments 跳过不可用分段(DASH, hlsnative and ISM)
|
||||
--abort-on-unavailable-fragment 放弃某个分段当不可获取时
|
||||
--keep-fragments 下载完成后,将下载的片段保存在磁盘上; 片段默认被删除
|
||||
--buffer-size SIZE 设置缓冲区大小buffer (e.g. 1024 or 16K) (default is 1024)
|
||||
--no-resize-buffer 不自动调整缓冲区大小.默认情况下自动调整
|
||||
--playlist-reverse 以相反的顺序下载播放列表视频
|
||||
--playlist-random 以随机的顺序下载播放列表视频
|
||||
--xattr-set-filesize Set file xattribute ytdl.filesize with expected file size (experimental)
|
||||
--hls-prefer-native 使用本机默认HLS下载器而不是ffmpeg
|
||||
--hls-prefer-ffmpeg 使用ffmpeg而不是本机HLS下载器
|
||||
--hls-use-mpegts 使用TS流容器来存放HLS视频,一些高级播放器允许在下载的同时播放视频
|
||||
--external-downloader COMMAND 使用指定的第三方下载工具,当前支持:aria2c,avconv,axel,curl,ffmpeg,httpie,wget
|
||||
--external-downloader-args ARGS 给第三方下载工具指定参数,如:--external-downloader aria2c --external-downloader-args -j8
|
||||
|
||||
文件系统选项:
|
||||
-a, --batch-file FILE 文件中包含需要下载的URL
|
||||
--id 仅使用文件名中的视频ID
|
||||
-o, --output TEMPLATE Output filename template, see the "OUTPUT TEMPLATE" for all the info
|
||||
--autonumber-start NUMBER 指定%(autonumber)s的起始值(默认为1)
|
||||
--restrict-filenames 将文件名限制为ASCII字符,并避免文件名中的“&”和空格
|
||||
-w, --no-overwrites 不要覆盖文件
|
||||
-c, --continue 强制恢复部分下载的文件。 默认情况下,youtube-dl仅在可能时将恢复下载。
|
||||
--no-continue 不要恢复部分下载的文件(从头开始重新启动)
|
||||
--no-part 不使用.part文件 - 直接写入输出文件
|
||||
--no-mtime 不使用Last-modified header来设置文件最后修改时间
|
||||
--write-description 将视频描述写入.description文件
|
||||
--write-info-json 将视频元数据写入.info.json文件
|
||||
--write-annotations 将视频注释写入.annotations.xml文件
|
||||
--load-info-json FILE 包含视频信息的JSON文件(使用“--write-info-json”选项创建)
|
||||
--cookies FILE 文件从中读取Cookie(经测试,export cookies插件可以使用,但firebug导出的cookies导致错误)
|
||||
--cache-dir DIR 文件存储位置。youtube-dl需要永久保存一些下载的信息。默认为$XDG_CACHE_HOME/youtube-dl或/.cache/youtube-dl。目前,只有YouTube播放器文件(对于具有模糊签名的视频)进行缓存,但可能会发生变化。
|
||||
--no-cache-dir 不用缓存
|
||||
--rm-cache-dir 删除所有缓存文件
|
||||
|
||||
缩略图:
|
||||
--write-thumbnail 把缩略图写入硬盘
|
||||
--write-all-thumbnails 将所有缩略图写入磁盘
|
||||
--list-thumbnails 列出所有可用的缩略图格式
|
||||
|
||||
详细/模拟选项:
|
||||
-q, --quiet 激活退出模式
|
||||
--no-warnings 忽略警告
|
||||
-s, --simulate 不下载不存储任何文件到硬盘,模拟下载模式
|
||||
--skip-download 不下载视频
|
||||
-g, --get-url 模拟下载获取视频直连
|
||||
-e, --get-title 模拟下载获取标题
|
||||
--get-id 模拟下载获取id
|
||||
--get-thumbnail 模拟下载获取缩略图URL
|
||||
--get-description 模拟下载获取视频描述
|
||||
--get-duration 模拟下载获取视频长度
|
||||
--get-filename 模拟下载获取输出视频文件名
|
||||
--get-format 模拟下载获取输出视频格式
|
||||
-j, --dump-json 模拟下载获取JSON information.
|
||||
-J, --dump-single-json 模拟下载获取每条命令行参数的JSON information.如果是个播放列表,就获取整个播放列表的JSON
|
||||
--print-json 下载的同时获取视频信息的JSON
|
||||
--newline 进度条在新行输出
|
||||
--no-progress 不打印进度条
|
||||
--console-title 在控制台标题栏显示进度
|
||||
-v, --verbose 打印各种调试信息
|
||||
--dump-pages 打印下载下来的使用base64编码的页面来调试问题(非常冗长)
|
||||
--write-pages 将下载的中间页以文件的形式写入当前目录中以调试问题
|
||||
--print-traffic 显示发送和读取HTTP流量
|
||||
-C, --call-home 联系youtube-dl服务器进行调试
|
||||
--no-call-home 不联系youtube-dl服务器进行调试
|
||||
|
||||
解决方法:
|
||||
--encoding ENCODING 强制指定编码(实验)
|
||||
--no-check-certificate 禁止HTTPS证书验证
|
||||
--prefer-insecure 使用未加密的连接来检索有关视频的信息(目前仅支持YouTube)
|
||||
--user-agent UA 指定user agent
|
||||
--referer URL 指定自定义的referer,仅限视频来源于同一网站
|
||||
--add-header FIELD:VALUE 指定一个自定义值的HTTP头文件,使用分号分割,可以多次使用此选项
|
||||
--bidi-workaround 围绕缺少双向文本支持的终端工作。需要在PATH中有bidiv或fribidi可执行文件
|
||||
--sleep-interval SECONDS 在每次下载之前休眠的秒数,或者每次下载之前的随机睡眠的范围的下限(最小可能的睡眠秒数)与-max-sleep-interval一起使用。
|
||||
--max-sleep-interval SECONDS 每次下载前随机睡眠范围的上限(最大可能睡眠秒数)。只能与--min-sleep-interval一起使用。
|
||||
|
||||
视频格式选项:
|
||||
-f, --format FORMAT 视频格式代码,查看"FORMAT SELECTION"获取所有信息
|
||||
--all-formats 获取所有视频格式
|
||||
--prefer-free-formats 开源的视频格式优先,除非有特定的请求
|
||||
-F, --list-formats 列出请求视频的所有可用格式
|
||||
--youtube-skip-dash-manifest 不要下载关于YouTube视频的DASH清单和相关数据
|
||||
--merge-output-format FORMAT 如果需要合并(例如bestvideo + bestaudio),则输出到给定的容器格式。mkv,mp4,ogg,webm,flv之一。如果不需要合并,则忽略
|
||||
|
||||
字幕选项:
|
||||
--write-sub 下载字幕文件
|
||||
--write-auto-sub 下载自动生成的字幕文件 (YouTube only)
|
||||
--all-subs 下载所有可用的字幕
|
||||
--list-subs 列出所有字幕
|
||||
--sub-format FORMAT 字幕格式,接受格式偏好,如:"srt" or "ass/srt/best"
|
||||
--sub-lang LANGS 要下载的字幕的语言(可选)用逗号分隔,请使用--list-subs表示可用的语言标签
|
||||
|
||||
验证选项:
|
||||
-u, --username USERNAME 使用ID登录
|
||||
-p, --password PASSWORD 账户密码,如果此选项未使用,youtube-dl将交互式地询问。
|
||||
-2, --twofactor TWOFACTOR 双因素认证码
|
||||
-n, --netrc 使用.netrc认证数据
|
||||
--video-password PASSWORD 视频密码(vimeo, smotri, youku)
|
||||
|
||||
Adobe Pass Options:
|
||||
--ap-mso MSO Adobe Pass多系统运营商(电视提供商)标识符,使用--ap-list-mso列出可用的MSO
|
||||
--ap-username USERNAME MSO账号登录
|
||||
--ap-password PASSWORD 账户密码,如果此选项未使用,youtube-dl将交互式地询问。
|
||||
--ap-list-mso 列出所有支持的MSO
|
||||
|
||||
后处理选项:
|
||||
-x, --extract-audio 将视频文件转换为纯音频文件(需要ffmpeg或avconv和ffprobe或avprobe)
|
||||
--audio-format FORMAT 指定音频格式: "best", "aac", "flac", "mp3", "m4a", "opus", "vorbis", or "wav"; "best" by default;-x存在时无效
|
||||
--audio-quality QUALITY 指定ffmpeg/avconv音频质量,为VBR插入一个0(best)-9(worse)的值(默认5),或者指定比特率
|
||||
--recode-video FORMAT 必要时将视频转码为其他格式(当前支持: mp4|flv|ogg|webm|mkv|avi)
|
||||
--postprocessor-args ARGS 给后处理器提供这些参数
|
||||
-k, --keep-video 视频文件在后处理后保存在磁盘上; 该视频默认被删除
|
||||
--no-post-overwrites 不要覆盖后处理文件; 默认情况下,后处理文件将被覆盖
|
||||
--embed-subs 在视频中嵌入字幕(仅适用于mp4,webm和mkv视频)
|
||||
--embed-thumbnail 将缩略图嵌入音频作为封面艺术
|
||||
--add-metadata 将元数据写入视频文件
|
||||
--metadata-from-title FORMAT 从视频标题中解析附加元数据,如歌曲标题/艺术家。格式语法和--output相似.也可以使用带有命名捕获组的正则表达式。解析的参数替换现有值。Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like "Coldplay - Paradise". Example (regex): --metadata-from-title "(?P<artist>.+?) - (?P<title>.+)"
|
||||
--xattrs 将元数据写入视频文件的xattrs(使用dublin core 和 xdg标准)
|
||||
--fixup POLICY 自动更正文件的已知故障。never(不做警告), warn(只发出警告), detect_or_warn (默认;如果可以的话修复文件,否则警告)
|
||||
--prefer-avconv 后处理时相较ffmpeg偏向于avconv
|
||||
--prefer-ffmpeg 后处理优先使用ffmpeg
|
||||
--ffmpeg-location PATH ffmpeg/avconv程序位置;PATH为二进制所在文件夹或者目录.
|
||||
--exec CMD 在下载后对文件执行命令,类似于find -exec语法.示例:--exec‘adb push {} /sdcard/Music/ && rm {}‘
|
||||
--convert-subs FORMAT 转换字幕格式(当前支持: srt|ass|vtt)
|
||||
1
dsp/README.md
Normal file
1
dsp/README.md
Normal file
@ -0,0 +1 @@
|
||||
# 后处理服务
|
||||
199
dsp/dsp.iml
Normal file
199
dsp/dsp.iml
Normal file
@ -0,0 +1,199 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
|
||||
<component name="FacetManager">
|
||||
<facet type="web" name="Web">
|
||||
<configuration>
|
||||
<webroots />
|
||||
</configuration>
|
||||
</facet>
|
||||
<facet type="Spring" name="Spring">
|
||||
<configuration />
|
||||
</facet>
|
||||
</component>
|
||||
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
|
||||
<output url="file://$MODULE_DIR$/target/classes" />
|
||||
<output-test url="file://$MODULE_DIR$/target/test-classes" />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/target" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" name="Maven: org.jetbrains:annotations:24.1.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.commons:commons-compress:1.18" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.alibaba:fastjson:1.2.75" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.projectlombok:lombok:1.18.16" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-web:2.4.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter:2.4.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-logging:2.4.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: ch.qos.logback:logback-classic:1.2.3" level="project" />
|
||||
<orderEntry type="library" name="Maven: ch.qos.logback:logback-core:1.2.3" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-to-slf4j:2.13.3" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-api:2.13.3" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.slf4j:jul-to-slf4j:1.7.30" level="project" />
|
||||
<orderEntry type="library" name="Maven: jakarta.annotation:jakarta.annotation-api:1.3.5" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.yaml:snakeyaml:1.27" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-json:2.4.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.fasterxml.jackson.datatype:jackson-datatype-jdk8:2.11.3" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.11.3" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.fasterxml.jackson.module:jackson-module-parameter-names:2.11.3" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-tomcat:2.4.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.tomcat.embed:tomcat-embed-core:9.0.41" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.glassfish:jakarta.el:3.0.3" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.tomcat.embed:tomcat-embed-websocket:9.0.41" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework:spring-web:5.3.2" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework:spring-beans:5.3.2" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework:spring-webmvc:5.3.2" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework:spring-aop:5.3.2" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework:spring-expression:5.3.2" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-data-elasticsearch:2.4.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework.data:spring-data-elasticsearch:4.1.2" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework:spring-tx:5.3.2" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework.data:spring-data-commons:2.4.2" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.elasticsearch.plugin:transport-netty4-client:7.7.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: io.netty:netty-buffer:4.1.55.Final" level="project" />
|
||||
<orderEntry type="library" name="Maven: io.netty:netty-codec:4.1.55.Final" level="project" />
|
||||
<orderEntry type="library" name="Maven: io.netty:netty-codec-http:4.1.55.Final" level="project" />
|
||||
<orderEntry type="library" name="Maven: io.netty:netty-common:4.1.55.Final" level="project" />
|
||||
<orderEntry type="library" name="Maven: io.netty:netty-handler:4.1.55.Final" level="project" />
|
||||
<orderEntry type="library" name="Maven: io.netty:netty-resolver:4.1.55.Final" level="project" />
|
||||
<orderEntry type="library" name="Maven: io.netty:netty-transport:4.1.55.Final" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.elasticsearch.client:elasticsearch-rest-high-level-client:7.7.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.elasticsearch:elasticsearch:7.7.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.elasticsearch:elasticsearch-core:7.7.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.elasticsearch:elasticsearch-secure-sm:7.7.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.elasticsearch:elasticsearch-x-content:7.7.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.fasterxml.jackson.dataformat:jackson-dataformat-smile:2.11.3" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.11.3" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.fasterxml.jackson.dataformat:jackson-dataformat-cbor:2.11.3" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.elasticsearch:elasticsearch-geo:7.7.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-core:8.5.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-analyzers-common:8.5.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-backward-codecs:8.5.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-grouping:8.5.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-highlighter:8.5.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-join:8.5.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-memory:8.5.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-misc:8.5.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-queries:8.5.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-queryparser:8.5.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-sandbox:8.5.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-spatial-extras:8.5.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-spatial3d:8.5.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-suggest:8.5.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.elasticsearch:elasticsearch-cli:7.7.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.carrotsearch:hppc:0.8.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: joda-time:joda-time:2.10.4" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.tdunning:t-digest:3.2" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.hdrhistogram:HdrHistogram:2.1.9" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.elasticsearch:jna:4.5.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.elasticsearch.client:elasticsearch-rest-client:7.7.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpclient:4.5.13" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore:4.4.14" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpasyncclient:4.1.4" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore-nio:4.4.14" level="project" />
|
||||
<orderEntry type="library" name="Maven: commons-codec:commons-codec:1.15" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.elasticsearch.plugin:mapper-extras-client:7.7.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.elasticsearch.plugin:parent-join-client:7.7.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.elasticsearch.plugin:aggs-matrix-stats-client:7.7.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.elasticsearch.plugin:rank-eval-client:7.7.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.elasticsearch.plugin:lang-mustache-client:7.7.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.github.spullara.mustache.java:compiler:0.9.6" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-core:2.11.3" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.30" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework.cloud:spring-cloud-stream:3.0.7.RELEASE" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-validation:2.4.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.hibernate.validator:hibernate-validator:6.1.6.Final" level="project" />
|
||||
<orderEntry type="library" name="Maven: jakarta.validation:jakarta.validation-api:2.0.2" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.jboss.logging:jboss-logging:3.4.1.Final" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.fasterxml:classmate:1.5.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework:spring-messaging:5.3.2" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework.integration:spring-integration-core:5.4.2" level="project" />
|
||||
<orderEntry type="library" name="Maven: io.projectreactor:reactor-core:3.4.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.reactivestreams:reactive-streams:1.0.3" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework.integration:spring-integration-jmx:5.4.2" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework.retry:spring-retry:1.3.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: javax.annotation:javax.annotation-api:1.3.2" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework.cloud:spring-cloud-function-context:3.0.9.RELEASE" level="project" />
|
||||
<orderEntry type="library" name="Maven: net.jodah:typetools:0.6.2" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework.cloud:spring-cloud-function-core:3.0.9.RELEASE" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework.cloud:spring-cloud-stream-binder-kafka:3.0.7.RELEASE" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework.cloud:spring-cloud-stream-binder-kafka-core:3.0.7.RELEASE" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework.integration:spring-integration-kafka:5.4.2" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.apache.kafka:kafka-clients:2.6.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.github.luben:zstd-jni:1.4.4-7" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.lz4:lz4-java:1.7.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.xerial.snappy:snappy-java:1.1.7.3" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework.kafka:spring-kafka:2.6.4" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.google.protobuf:protobuf-java:3.11.4" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.squareup.okhttp3:okhttp:4.8.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.squareup.okio:okio:2.7.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.jetbrains.kotlin:kotlin-stdlib-common:1.4.21" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.jetbrains.kotlin:kotlin-stdlib:1.4.21" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.springframework.boot:spring-boot-starter-test:2.4.1" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.springframework.boot:spring-boot-test:2.4.1" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.springframework.boot:spring-boot-test-autoconfigure:2.4.1" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: com.jayway.jsonpath:json-path:2.4.0" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: net.minidev:json-smart:2.3" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: net.minidev:accessors-smart:1.2" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.ow2.asm:asm:5.0.4" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: jakarta.xml.bind:jakarta.xml.bind-api:2.3.3" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: jakarta.activation:jakarta.activation-api:1.2.2" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.assertj:assertj-core:3.18.1" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.hamcrest:hamcrest:2.2" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.junit.jupiter:junit-jupiter:5.7.0" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.junit.jupiter:junit-jupiter-params:5.7.0" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.junit.jupiter:junit-jupiter-engine:5.7.0" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.junit.platform:junit-platform-engine:1.7.0" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.mockito:mockito-core:3.6.28" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: net.bytebuddy:byte-buddy:1.10.18" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: net.bytebuddy:byte-buddy-agent:1.10.18" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.objenesis:objenesis:3.1" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.mockito:mockito-junit-jupiter:3.6.28" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.skyscreamer:jsonassert:1.5.0" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: com.vaadin.external.google:android-json:0.0.20131108.vaadin1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework:spring-core:5.3.2" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework:spring-jcl:5.3.2" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.springframework:spring-test:5.3.2" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.xmlunit:xmlunit-core:2.7.0" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.springframework.cloud:spring-cloud-stream-test-support:3.0.7.RELEASE" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-autoconfigure:2.4.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-databind:2.11.3" level="project" />
|
||||
<orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-annotations:2.11.3" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.springframework.kafka:spring-kafka-test:2.6.4" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework:spring-context:5.3.2" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.apache.kafka:kafka-clients:test:2.6.0" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.apache.kafka:kafka-streams:2.6.0" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.apache.kafka:connect-json:2.6.0" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.apache.kafka:connect-api:2.6.0" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.rocksdb:rocksdbjni:5.18.4" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.apache.kafka:kafka-streams-test-utils:2.6.0" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.apache.kafka:kafka_2.13:2.6.0" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: com.fasterxml.jackson.module:jackson-module-scala_2.13:2.11.3" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: com.fasterxml.jackson.module:jackson-module-paranamer:2.11.3" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: com.thoughtworks.paranamer:paranamer:2.8" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: com.fasterxml.jackson.dataformat:jackson-dataformat-csv:2.11.3" level="project" />
|
||||
<orderEntry type="library" name="Maven: net.sf.jopt-simple:jopt-simple:5.0.4" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: com.yammer.metrics:metrics-core:2.2.0" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.scala-lang.modules:scala-collection-compat_2.13:2.1.6" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.scala-lang.modules:scala-java8-compat_2.13:0.9.1" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.scala-lang:scala-library:2.13.2" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.scala-lang:scala-reflect:2.13.2" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: com.typesafe.scala-logging:scala-logging_2.13:3.9.2" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.apache.zookeeper:zookeeper:3.5.8" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.apache.zookeeper:zookeeper-jute:3.5.8" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.apache.yetus:audience-annotations:0.5.0" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: io.netty:netty-transport-native-epoll:4.1.55.Final" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: io.netty:netty-transport-native-unix-common:4.1.55.Final" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: commons-cli:commons-cli:1.4" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.apache.kafka:kafka_2.13:test:2.6.0" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.junit.jupiter:junit-jupiter-api:5.7.0" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.apiguardian:apiguardian-api:1.1.0" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.opentest4j:opentest4j:1.2.0" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="Maven: org.junit.platform:junit-platform-commons:1.7.0" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-devtools:2.4.1" level="project" />
|
||||
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot:2.4.1" level="project" />
|
||||
<orderEntry type="library" scope="RUNTIME" name="Maven: mysql:mysql-connector-java:8.0.22" level="project" />
|
||||
</component>
|
||||
</module>
|
||||
310
dsp/mvnw
vendored
Normal file
310
dsp/mvnw
vendored
Normal file
@ -0,0 +1,310 @@
|
||||
#!/bin/sh
|
||||
# ----------------------------------------------------------------------------
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Maven Start Up Batch script
|
||||
#
|
||||
# Required ENV vars:
|
||||
# ------------------
|
||||
# JAVA_HOME - location of a JDK home dir
|
||||
#
|
||||
# Optional ENV vars
|
||||
# -----------------
|
||||
# M2_HOME - location of maven2's installed home dir
|
||||
# MAVEN_OPTS - parameters passed to the Java VM when running Maven
|
||||
# e.g. to debug Maven itself, use
|
||||
# set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000
|
||||
# MAVEN_SKIP_RC - flag to disable loading of mavenrc files
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
if [ -z "$MAVEN_SKIP_RC" ] ; then
|
||||
|
||||
if [ -f /etc/mavenrc ] ; then
|
||||
. /etc/mavenrc
|
||||
fi
|
||||
|
||||
if [ -f "$HOME/.mavenrc" ] ; then
|
||||
. "$HOME/.mavenrc"
|
||||
fi
|
||||
|
||||
fi
|
||||
|
||||
# OS specific support. $var _must_ be set to either true or false.
|
||||
cygwin=false;
|
||||
darwin=false;
|
||||
mingw=false
|
||||
case "`uname`" in
|
||||
CYGWIN*) cygwin=true ;;
|
||||
MINGW*) mingw=true;;
|
||||
Darwin*) darwin=true
|
||||
# Use /usr/libexec/java_home if available, otherwise fall back to /Library/Java/Home
|
||||
# See https://developer.apple.com/library/mac/qa/qa1170/_index.html
|
||||
if [ -z "$JAVA_HOME" ]; then
|
||||
if [ -x "/usr/libexec/java_home" ]; then
|
||||
export JAVA_HOME="`/usr/libexec/java_home`"
|
||||
else
|
||||
export JAVA_HOME="/Library/Java/Home"
|
||||
fi
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
if [ -z "$JAVA_HOME" ] ; then
|
||||
if [ -r /etc/gentoo-release ] ; then
|
||||
JAVA_HOME=`java-config --jre-home`
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -z "$M2_HOME" ] ; then
|
||||
## resolve links - $0 may be a link to maven's home
|
||||
PRG="$0"
|
||||
|
||||
# need this for relative symlinks
|
||||
while [ -h "$PRG" ] ; do
|
||||
ls=`ls -ld "$PRG"`
|
||||
link=`expr "$ls" : '.*-> \(.*\)$'`
|
||||
if expr "$link" : '/.*' > /dev/null; then
|
||||
PRG="$link"
|
||||
else
|
||||
PRG="`dirname "$PRG"`/$link"
|
||||
fi
|
||||
done
|
||||
|
||||
saveddir=`pwd`
|
||||
|
||||
M2_HOME=`dirname "$PRG"`/..
|
||||
|
||||
# make it fully qualified
|
||||
M2_HOME=`cd "$M2_HOME" && pwd`
|
||||
|
||||
cd "$saveddir"
|
||||
# echo Using m2 at $M2_HOME
|
||||
fi
|
||||
|
||||
# For Cygwin, ensure paths are in UNIX format before anything is touched
|
||||
if $cygwin ; then
|
||||
[ -n "$M2_HOME" ] &&
|
||||
M2_HOME=`cygpath --unix "$M2_HOME"`
|
||||
[ -n "$JAVA_HOME" ] &&
|
||||
JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
|
||||
[ -n "$CLASSPATH" ] &&
|
||||
CLASSPATH=`cygpath --path --unix "$CLASSPATH"`
|
||||
fi
|
||||
|
||||
# For Mingw, ensure paths are in UNIX format before anything is touched
|
||||
if $mingw ; then
|
||||
[ -n "$M2_HOME" ] &&
|
||||
M2_HOME="`(cd "$M2_HOME"; pwd)`"
|
||||
[ -n "$JAVA_HOME" ] &&
|
||||
JAVA_HOME="`(cd "$JAVA_HOME"; pwd)`"
|
||||
fi
|
||||
|
||||
if [ -z "$JAVA_HOME" ]; then
|
||||
javaExecutable="`which javac`"
|
||||
if [ -n "$javaExecutable" ] && ! [ "`expr \"$javaExecutable\" : '\([^ ]*\)'`" = "no" ]; then
|
||||
# readlink(1) is not available as standard on Solaris 10.
|
||||
readLink=`which readlink`
|
||||
if [ ! `expr "$readLink" : '\([^ ]*\)'` = "no" ]; then
|
||||
if $darwin ; then
|
||||
javaHome="`dirname \"$javaExecutable\"`"
|
||||
javaExecutable="`cd \"$javaHome\" && pwd -P`/javac"
|
||||
else
|
||||
javaExecutable="`readlink -f \"$javaExecutable\"`"
|
||||
fi
|
||||
javaHome="`dirname \"$javaExecutable\"`"
|
||||
javaHome=`expr "$javaHome" : '\(.*\)/bin'`
|
||||
JAVA_HOME="$javaHome"
|
||||
export JAVA_HOME
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -z "$JAVACMD" ] ; then
|
||||
if [ -n "$JAVA_HOME" ] ; then
|
||||
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
|
||||
# IBM's JDK on AIX uses strange locations for the executables
|
||||
JAVACMD="$JAVA_HOME/jre/sh/java"
|
||||
else
|
||||
JAVACMD="$JAVA_HOME/bin/java"
|
||||
fi
|
||||
else
|
||||
JAVACMD="`which java`"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ! -x "$JAVACMD" ] ; then
|
||||
echo "Error: JAVA_HOME is not defined correctly." >&2
|
||||
echo " We cannot execute $JAVACMD" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$JAVA_HOME" ] ; then
|
||||
echo "Warning: JAVA_HOME environment variable is not set."
|
||||
fi
|
||||
|
||||
CLASSWORLDS_LAUNCHER=org.codehaus.plexus.classworlds.launcher.Launcher
|
||||
|
||||
# traverses directory structure from process work directory to filesystem root
|
||||
# first directory with .mvn subdirectory is considered project base directory
|
||||
find_maven_basedir() {
|
||||
|
||||
if [ -z "$1" ]
|
||||
then
|
||||
echo "Path not specified to find_maven_basedir"
|
||||
return 1
|
||||
fi
|
||||
|
||||
basedir="$1"
|
||||
wdir="$1"
|
||||
while [ "$wdir" != '/' ] ; do
|
||||
if [ -d "$wdir"/.mvn ] ; then
|
||||
basedir=$wdir
|
||||
break
|
||||
fi
|
||||
# workaround for JBEAP-8937 (on Solaris 10/Sparc)
|
||||
if [ -d "${wdir}" ]; then
|
||||
wdir=`cd "$wdir/.."; pwd`
|
||||
fi
|
||||
# end of workaround
|
||||
done
|
||||
echo "${basedir}"
|
||||
}
|
||||
|
||||
# concatenates all lines of a file
|
||||
concat_lines() {
|
||||
if [ -f "$1" ]; then
|
||||
echo "$(tr -s '\n' ' ' < "$1")"
|
||||
fi
|
||||
}
|
||||
|
||||
BASE_DIR=`find_maven_basedir "$(pwd)"`
|
||||
if [ -z "$BASE_DIR" ]; then
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
##########################################################################################
|
||||
# Extension to allow automatically downloading the maven-wrapper.jar from Maven-central
|
||||
# This allows using the maven wrapper in projects that prohibit checking in binary data.
|
||||
##########################################################################################
|
||||
if [ -r "$BASE_DIR/.mvn/wrapper/maven-wrapper.jar" ]; then
|
||||
if [ "$MVNW_VERBOSE" = true ]; then
|
||||
echo "Found .mvn/wrapper/maven-wrapper.jar"
|
||||
fi
|
||||
else
|
||||
if [ "$MVNW_VERBOSE" = true ]; then
|
||||
echo "Couldn't find .mvn/wrapper/maven-wrapper.jar, downloading it ..."
|
||||
fi
|
||||
if [ -n "$MVNW_REPOURL" ]; then
|
||||
jarUrl="$MVNW_REPOURL/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
|
||||
else
|
||||
jarUrl="https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
|
||||
fi
|
||||
while IFS="=" read key value; do
|
||||
case "$key" in (wrapperUrl) jarUrl="$value"; break ;;
|
||||
esac
|
||||
done < "$BASE_DIR/.mvn/wrapper/maven-wrapper.properties"
|
||||
if [ "$MVNW_VERBOSE" = true ]; then
|
||||
echo "Downloading from: $jarUrl"
|
||||
fi
|
||||
wrapperJarPath="$BASE_DIR/.mvn/wrapper/maven-wrapper.jar"
|
||||
if $cygwin; then
|
||||
wrapperJarPath=`cygpath --path --windows "$wrapperJarPath"`
|
||||
fi
|
||||
|
||||
if command -v wget > /dev/null; then
|
||||
if [ "$MVNW_VERBOSE" = true ]; then
|
||||
echo "Found wget ... using wget"
|
||||
fi
|
||||
if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then
|
||||
wget "$jarUrl" -O "$wrapperJarPath"
|
||||
else
|
||||
wget --http-user=$MVNW_USERNAME --http-password=$MVNW_PASSWORD "$jarUrl" -O "$wrapperJarPath"
|
||||
fi
|
||||
elif command -v curl > /dev/null; then
|
||||
if [ "$MVNW_VERBOSE" = true ]; then
|
||||
echo "Found curl ... using curl"
|
||||
fi
|
||||
if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then
|
||||
curl -o "$wrapperJarPath" "$jarUrl" -f
|
||||
else
|
||||
curl --user $MVNW_USERNAME:$MVNW_PASSWORD -o "$wrapperJarPath" "$jarUrl" -f
|
||||
fi
|
||||
|
||||
else
|
||||
if [ "$MVNW_VERBOSE" = true ]; then
|
||||
echo "Falling back to using Java to download"
|
||||
fi
|
||||
javaClass="$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.java"
|
||||
# For Cygwin, switch paths to Windows format before running javac
|
||||
if $cygwin; then
|
||||
javaClass=`cygpath --path --windows "$javaClass"`
|
||||
fi
|
||||
if [ -e "$javaClass" ]; then
|
||||
if [ ! -e "$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.class" ]; then
|
||||
if [ "$MVNW_VERBOSE" = true ]; then
|
||||
echo " - Compiling MavenWrapperDownloader.java ..."
|
||||
fi
|
||||
# Compiling the Java class
|
||||
("$JAVA_HOME/bin/javac" "$javaClass")
|
||||
fi
|
||||
if [ -e "$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.class" ]; then
|
||||
# Running the downloader
|
||||
if [ "$MVNW_VERBOSE" = true ]; then
|
||||
echo " - Running MavenWrapperDownloader.java ..."
|
||||
fi
|
||||
("$JAVA_HOME/bin/java" -cp .mvn/wrapper MavenWrapperDownloader "$MAVEN_PROJECTBASEDIR")
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
##########################################################################################
|
||||
# End of extension
|
||||
##########################################################################################
|
||||
|
||||
export MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"}
|
||||
if [ "$MVNW_VERBOSE" = true ]; then
|
||||
echo $MAVEN_PROJECTBASEDIR
|
||||
fi
|
||||
MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS"
|
||||
|
||||
# For Cygwin, switch paths to Windows format before running java
|
||||
if $cygwin; then
|
||||
[ -n "$M2_HOME" ] &&
|
||||
M2_HOME=`cygpath --path --windows "$M2_HOME"`
|
||||
[ -n "$JAVA_HOME" ] &&
|
||||
JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"`
|
||||
[ -n "$CLASSPATH" ] &&
|
||||
CLASSPATH=`cygpath --path --windows "$CLASSPATH"`
|
||||
[ -n "$MAVEN_PROJECTBASEDIR" ] &&
|
||||
MAVEN_PROJECTBASEDIR=`cygpath --path --windows "$MAVEN_PROJECTBASEDIR"`
|
||||
fi
|
||||
|
||||
# Provide a "standardized" way to retrieve the CLI args that will
|
||||
# work with both Windows and non-Windows executions.
|
||||
MAVEN_CMD_LINE_ARGS="$MAVEN_CONFIG $@"
|
||||
export MAVEN_CMD_LINE_ARGS
|
||||
|
||||
WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain
|
||||
|
||||
exec "$JAVACMD" \
|
||||
$MAVEN_OPTS \
|
||||
-classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \
|
||||
"-Dmaven.home=${M2_HOME}" "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \
|
||||
${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@"
|
||||
182
dsp/mvnw.cmd
vendored
Normal file
182
dsp/mvnw.cmd
vendored
Normal file
@ -0,0 +1,182 @@
|
||||
@REM ----------------------------------------------------------------------------
|
||||
@REM Licensed to the Apache Software Foundation (ASF) under one
|
||||
@REM or more contributor license agreements. See the NOTICE file
|
||||
@REM distributed with this work for additional information
|
||||
@REM regarding copyright ownership. The ASF licenses this file
|
||||
@REM to you under the Apache License, Version 2.0 (the
|
||||
@REM "License"); you may not use this file except in compliance
|
||||
@REM with the License. You may obtain a copy of the License at
|
||||
@REM
|
||||
@REM https://www.apache.org/licenses/LICENSE-2.0
|
||||
@REM
|
||||
@REM Unless required by applicable law or agreed to in writing,
|
||||
@REM software distributed under the License is distributed on an
|
||||
@REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
@REM KIND, either express or implied. See the License for the
|
||||
@REM specific language governing permissions and limitations
|
||||
@REM under the License.
|
||||
@REM ----------------------------------------------------------------------------
|
||||
|
||||
@REM ----------------------------------------------------------------------------
|
||||
@REM Maven Start Up Batch script
|
||||
@REM
|
||||
@REM Required ENV vars:
|
||||
@REM JAVA_HOME - location of a JDK home dir
|
||||
@REM
|
||||
@REM Optional ENV vars
|
||||
@REM M2_HOME - location of maven2's installed home dir
|
||||
@REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands
|
||||
@REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a keystroke before ending
|
||||
@REM MAVEN_OPTS - parameters passed to the Java VM when running Maven
|
||||
@REM e.g. to debug Maven itself, use
|
||||
@REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000
|
||||
@REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files
|
||||
@REM ----------------------------------------------------------------------------
|
||||
|
||||
@REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on'
|
||||
@echo off
|
||||
@REM set title of command window
|
||||
title %0
|
||||
@REM enable echoing by setting MAVEN_BATCH_ECHO to 'on'
|
||||
@if "%MAVEN_BATCH_ECHO%" == "on" echo %MAVEN_BATCH_ECHO%
|
||||
|
||||
@REM set %HOME% to equivalent of $HOME
|
||||
if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%")
|
||||
|
||||
@REM Execute a user defined script before this one
|
||||
if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre
|
||||
@REM check for pre script, once with legacy .bat ending and once with .cmd ending
|
||||
if exist "%HOME%\mavenrc_pre.bat" call "%HOME%\mavenrc_pre.bat"
|
||||
if exist "%HOME%\mavenrc_pre.cmd" call "%HOME%\mavenrc_pre.cmd"
|
||||
:skipRcPre
|
||||
|
||||
@setlocal
|
||||
|
||||
set ERROR_CODE=0
|
||||
|
||||
@REM To isolate internal variables from possible post scripts, we use another setlocal
|
||||
@setlocal
|
||||
|
||||
@REM ==== START VALIDATION ====
|
||||
if not "%JAVA_HOME%" == "" goto OkJHome
|
||||
|
||||
echo.
|
||||
echo Error: JAVA_HOME not found in your environment. >&2
|
||||
echo Please set the JAVA_HOME variable in your environment to match the >&2
|
||||
echo location of your Java installation. >&2
|
||||
echo.
|
||||
goto error
|
||||
|
||||
:OkJHome
|
||||
if exist "%JAVA_HOME%\bin\java.exe" goto init
|
||||
|
||||
echo.
|
||||
echo Error: JAVA_HOME is set to an invalid directory. >&2
|
||||
echo JAVA_HOME = "%JAVA_HOME%" >&2
|
||||
echo Please set the JAVA_HOME variable in your environment to match the >&2
|
||||
echo location of your Java installation. >&2
|
||||
echo.
|
||||
goto error
|
||||
|
||||
@REM ==== END VALIDATION ====
|
||||
|
||||
:init
|
||||
|
||||
@REM Find the project base dir, i.e. the directory that contains the folder ".mvn".
|
||||
@REM Fallback to current working directory if not found.
|
||||
|
||||
set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR%
|
||||
IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir
|
||||
|
||||
set EXEC_DIR=%CD%
|
||||
set WDIR=%EXEC_DIR%
|
||||
:findBaseDir
|
||||
IF EXIST "%WDIR%"\.mvn goto baseDirFound
|
||||
cd ..
|
||||
IF "%WDIR%"=="%CD%" goto baseDirNotFound
|
||||
set WDIR=%CD%
|
||||
goto findBaseDir
|
||||
|
||||
:baseDirFound
|
||||
set MAVEN_PROJECTBASEDIR=%WDIR%
|
||||
cd "%EXEC_DIR%"
|
||||
goto endDetectBaseDir
|
||||
|
||||
:baseDirNotFound
|
||||
set MAVEN_PROJECTBASEDIR=%EXEC_DIR%
|
||||
cd "%EXEC_DIR%"
|
||||
|
||||
:endDetectBaseDir
|
||||
|
||||
IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig
|
||||
|
||||
@setlocal EnableExtensions EnableDelayedExpansion
|
||||
for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a
|
||||
@endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS%
|
||||
|
||||
:endReadAdditionalConfig
|
||||
|
||||
SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe"
|
||||
set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar"
|
||||
set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain
|
||||
|
||||
set DOWNLOAD_URL="https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
|
||||
|
||||
FOR /F "tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO (
|
||||
IF "%%A"=="wrapperUrl" SET DOWNLOAD_URL=%%B
|
||||
)
|
||||
|
||||
@REM Extension to allow automatically downloading the maven-wrapper.jar from Maven-central
|
||||
@REM This allows using the maven wrapper in projects that prohibit checking in binary data.
|
||||
if exist %WRAPPER_JAR% (
|
||||
if "%MVNW_VERBOSE%" == "true" (
|
||||
echo Found %WRAPPER_JAR%
|
||||
)
|
||||
) else (
|
||||
if not "%MVNW_REPOURL%" == "" (
|
||||
SET DOWNLOAD_URL="%MVNW_REPOURL%/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
|
||||
)
|
||||
if "%MVNW_VERBOSE%" == "true" (
|
||||
echo Couldn't find %WRAPPER_JAR%, downloading it ...
|
||||
echo Downloading from: %DOWNLOAD_URL%
|
||||
)
|
||||
|
||||
powershell -Command "&{"^
|
||||
"$webclient = new-object System.Net.WebClient;"^
|
||||
"if (-not ([string]::IsNullOrEmpty('%MVNW_USERNAME%') -and [string]::IsNullOrEmpty('%MVNW_PASSWORD%'))) {"^
|
||||
"$webclient.Credentials = new-object System.Net.NetworkCredential('%MVNW_USERNAME%', '%MVNW_PASSWORD%');"^
|
||||
"}"^
|
||||
"[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; $webclient.DownloadFile('%DOWNLOAD_URL%', '%WRAPPER_JAR%')"^
|
||||
"}"
|
||||
if "%MVNW_VERBOSE%" == "true" (
|
||||
echo Finished downloading %WRAPPER_JAR%
|
||||
)
|
||||
)
|
||||
@REM End of extension
|
||||
|
||||
@REM Provide a "standardized" way to retrieve the CLI args that will
|
||||
@REM work with both Windows and non-Windows executions.
|
||||
set MAVEN_CMD_LINE_ARGS=%*
|
||||
|
||||
%MAVEN_JAVA_EXE% %JVM_CONFIG_MAVEN_PROPS% %MAVEN_OPTS% %MAVEN_DEBUG_OPTS% -classpath %WRAPPER_JAR% "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %*
|
||||
if ERRORLEVEL 1 goto error
|
||||
goto end
|
||||
|
||||
:error
|
||||
set ERROR_CODE=1
|
||||
|
||||
:end
|
||||
@endlocal & set ERROR_CODE=%ERROR_CODE%
|
||||
|
||||
if not "%MAVEN_SKIP_RC%" == "" goto skipRcPost
|
||||
@REM check for post script, once with legacy .bat ending and once with .cmd ending
|
||||
if exist "%HOME%\mavenrc_post.bat" call "%HOME%\mavenrc_post.bat"
|
||||
if exist "%HOME%\mavenrc_post.cmd" call "%HOME%\mavenrc_post.cmd"
|
||||
:skipRcPost
|
||||
|
||||
@REM pause the script if MAVEN_BATCH_PAUSE is set to 'on'
|
||||
if "%MAVEN_BATCH_PAUSE%" == "on" pause
|
||||
|
||||
if "%MAVEN_TERMINATE_CMD%" == "on" exit %ERROR_CODE%
|
||||
|
||||
exit /B %ERROR_CODE%
|
||||
138
dsp/pom.xml
Normal file
138
dsp/pom.xml
Normal file
@ -0,0 +1,138 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-parent</artifactId>
|
||||
<version>2.4.1</version>
|
||||
<relativePath/> <!-- lookup parent from repository -->
|
||||
</parent>
|
||||
<groupId>com.jsc</groupId>
|
||||
<artifactId>dsp</artifactId>
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
<name>dsp</name>
|
||||
|
||||
<properties>
|
||||
<java.version>1.8</java.version>
|
||||
<spring-cloud.version>Hoxton.SR7</spring-cloud.version>
|
||||
<elasticsearch.version>7.7.0</elasticsearch.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.jetbrains</groupId>
|
||||
<artifactId>annotations</artifactId>
|
||||
<version>RELEASE</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.commons</groupId>
|
||||
<artifactId>commons-compress</artifactId>
|
||||
<version>1.18</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.alibaba</groupId>
|
||||
<artifactId>fastjson</artifactId>
|
||||
<version>1.2.75</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-web</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.cloud</groupId>
|
||||
<artifactId>spring-cloud-stream</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.cloud</groupId>
|
||||
<artifactId>spring-cloud-stream-binder-kafka</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.google.protobuf</groupId>
|
||||
<artifactId>protobuf-java</artifactId>
|
||||
<version>3.11.4</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.squareup.okhttp3</groupId>
|
||||
<artifactId>okhttp</artifactId>
|
||||
<version>4.8.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-test</artifactId>
|
||||
<scope>test</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.junit.vintage</groupId>
|
||||
<artifactId>junit-vintage-engine</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.cloud</groupId>
|
||||
<artifactId>spring-cloud-stream-test-support</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.springframework.kafka</groupId>
|
||||
<artifactId>spring-kafka-test</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!--devtools热部署 -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-devtools</artifactId>
|
||||
<optional>true</optional>
|
||||
<scope>true</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.projectlombok</groupId>
|
||||
<artifactId>lombok</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>mysql</groupId>
|
||||
<artifactId>mysql-connector-java</artifactId>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<dependencyManagement>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.springframework.cloud</groupId>
|
||||
<artifactId>spring-cloud-dependencies</artifactId>
|
||||
<version>${spring-cloud.version}</version>
|
||||
<type>pom</type>
|
||||
<scope>import</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
<repositories>
|
||||
<repository>
|
||||
<id>nexus-aliyun</id>
|
||||
<name>Nexus aliyun</name>
|
||||
<url>http://maven.aliyun.com/nexus/content/groups/public</url>
|
||||
</repository>
|
||||
</repositories>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-maven-plugin</artifactId>
|
||||
<configuration>
|
||||
<includeSystemScope>true</includeSystemScope>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
</project>
|
||||
17
dsp/src/main/java/com/jsc/dsp/DspApplication.java
Normal file
17
dsp/src/main/java/com/jsc/dsp/DspApplication.java
Normal file
@ -0,0 +1,17 @@
|
||||
package com.jsc.dsp;
|
||||
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
|
||||
import org.springframework.scheduling.annotation.EnableScheduling;
|
||||
|
||||
@SpringBootApplication(exclude = DataSourceAutoConfiguration.class)
|
||||
@EnableScheduling
|
||||
|
||||
public class DspApplication {
|
||||
|
||||
public static void main(String[] args) {
|
||||
SpringApplication.run(DspApplication.class, args);
|
||||
}
|
||||
|
||||
}
|
||||
17
dsp/src/main/java/com/jsc/dsp/binding/FileDlBinding.java
Normal file
17
dsp/src/main/java/com/jsc/dsp/binding/FileDlBinding.java
Normal file
@ -0,0 +1,17 @@
|
||||
package com.jsc.dsp.binding;
|
||||
|
||||
import org.springframework.cloud.stream.annotation.Input;
|
||||
import org.springframework.cloud.stream.annotation.Output;
|
||||
import org.springframework.messaging.MessageChannel;
|
||||
import org.springframework.messaging.SubscribableChannel;
|
||||
|
||||
public interface FileDlBinding {
|
||||
public static String FILE_DL_PIPELINE_IN = "file_dl_pipeline_in";
|
||||
public static String FILE_DL_PIPELINE_OUT = "file_dl_pipeline_out";
|
||||
|
||||
@Input(FileDlBinding.FILE_DL_PIPELINE_IN)
|
||||
SubscribableChannel fileDlInput();
|
||||
|
||||
@Output(FileDlBinding.FILE_DL_PIPELINE_OUT)
|
||||
MessageChannel fileDlOutPut();
|
||||
}
|
||||
18
dsp/src/main/java/com/jsc/dsp/binding/ProtobufBinding.java
Normal file
18
dsp/src/main/java/com/jsc/dsp/binding/ProtobufBinding.java
Normal file
@ -0,0 +1,18 @@
|
||||
package com.jsc.dsp.binding;
|
||||
|
||||
import org.springframework.cloud.stream.annotation.Input;
|
||||
import org.springframework.cloud.stream.annotation.Output;
|
||||
import org.springframework.messaging.MessageChannel;
|
||||
import org.springframework.messaging.SubscribableChannel;
|
||||
|
||||
public interface ProtobufBinding {
|
||||
public static String PROTOBUF_PIPELINE_IN = "protobuf_pipeline_in";
|
||||
public static String PROTOBUF_PIPELINE_OUT = "protobuf_pipeline_out";
|
||||
|
||||
@Input(ProtobufBinding.PROTOBUF_PIPELINE_IN)
|
||||
SubscribableChannel protobufInput();
|
||||
|
||||
@Output(ProtobufBinding.PROTOBUF_PIPELINE_OUT)
|
||||
MessageChannel protobufOutPut();
|
||||
|
||||
}
|
||||
20
dsp/src/main/java/com/jsc/dsp/binding/StorageBinding.java
Normal file
20
dsp/src/main/java/com/jsc/dsp/binding/StorageBinding.java
Normal file
@ -0,0 +1,20 @@
|
||||
package com.jsc.dsp.binding;
|
||||
|
||||
import org.springframework.cloud.stream.annotation.Input;
|
||||
import org.springframework.cloud.stream.annotation.Output;
|
||||
import org.springframework.messaging.MessageChannel;
|
||||
import org.springframework.messaging.SubscribableChannel;
|
||||
|
||||
public interface StorageBinding {
|
||||
|
||||
public static String STORAGE_PIPELINE_IN = "storage_pipeline_in";
|
||||
public static String STORAGE_PIPELINE_OUT = "storage_pipeline_out";
|
||||
|
||||
@Input(StorageBinding.STORAGE_PIPELINE_IN)
|
||||
SubscribableChannel StorageInput();
|
||||
|
||||
|
||||
@Output(StorageBinding.STORAGE_PIPELINE_OUT)
|
||||
MessageChannel StorageOutput();
|
||||
|
||||
}
|
||||
55
dsp/src/main/java/com/jsc/dsp/config/Configuration.java
Normal file
55
dsp/src/main/java/com/jsc/dsp/config/Configuration.java
Normal file
@ -0,0 +1,55 @@
|
||||
package com.jsc.dsp.config;
|
||||
|
||||
import com.jsc.dsp.utils.EsUtils;
|
||||
import org.elasticsearch.client.RestHighLevelClient;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.boot.json.JacksonJsonParser;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.Base64;
|
||||
import java.util.Base64.Decoder;
|
||||
import java.util.Base64.Encoder;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
|
||||
@Component
|
||||
public class Configuration {
|
||||
|
||||
@Value("${es.ip}")
|
||||
String esIp;
|
||||
|
||||
@Value("${es.port}")
|
||||
Integer esPort;
|
||||
|
||||
@Value("${es.username}")
|
||||
String esUsername;
|
||||
|
||||
@Value("${es.password}")
|
||||
String esPassword;
|
||||
|
||||
@Bean
|
||||
public JacksonJsonParser getJacksonParser() {
|
||||
return new JacksonJsonParser();
|
||||
}
|
||||
|
||||
@Bean
|
||||
public Decoder getBase64() {
|
||||
return Base64.getDecoder();
|
||||
}
|
||||
|
||||
@Bean
|
||||
public Encoder getEncoder() {
|
||||
return Base64.getEncoder();
|
||||
}
|
||||
|
||||
@Bean
|
||||
public ExecutorService getTheadPool() {
|
||||
return Executors.newFixedThreadPool(4);
|
||||
}
|
||||
|
||||
@Bean
|
||||
public RestHighLevelClient esClient() {
|
||||
return EsUtils.getElasticsearchClient(esIp, esPort, esUsername, esPassword);
|
||||
}
|
||||
}
|
||||
24
dsp/src/main/java/com/jsc/dsp/config/TomcatConfig.java
Normal file
24
dsp/src/main/java/com/jsc/dsp/config/TomcatConfig.java
Normal file
@ -0,0 +1,24 @@
|
||||
package com.jsc.dsp.config;
|
||||
|
||||
import org.springframework.boot.web.embedded.tomcat.TomcatConnectorCustomizer;
|
||||
import org.springframework.boot.web.embedded.tomcat.TomcatServletWebServerFactory;
|
||||
import org.springframework.boot.web.servlet.server.ServletWebServerFactory;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
|
||||
@Configuration
|
||||
public class TomcatConfig {
|
||||
//Tomcat的高版本(具体从哪个版本开始没有具体了解)中增加了一个新特性,就是严格按照 RFC 3986规范进行访问解析,
|
||||
//而 RFC 3986规范规定Url中只允许包含英文字母(a-zA-Z)、数字(0-9)、-_.~4个特殊字符以及所有保留字符
|
||||
//(RFC3986中指定了以下字符为保留字符:! * ’ ( ) ; : @ & = + $ , / ? # [ ])
|
||||
/**
|
||||
* 配置tomcat
|
||||
* @return
|
||||
*/
|
||||
@Bean
|
||||
public ServletWebServerFactory webServerFactory() {
|
||||
TomcatServletWebServerFactory fa = new TomcatServletWebServerFactory();
|
||||
fa.addConnectorCustomizers((TomcatConnectorCustomizer) connector -> connector.setProperty("relaxedQueryChars", "[]{}"));
|
||||
return fa;
|
||||
}
|
||||
}
|
||||
82
dsp/src/main/java/com/jsc/dsp/model/Indeximos.java
Normal file
82
dsp/src/main/java/com/jsc/dsp/model/Indeximos.java
Normal file
@ -0,0 +1,82 @@
|
||||
package com.jsc.dsp.model;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
@Data
|
||||
public class Indeximos implements Serializable {
|
||||
String es_abstract;
|
||||
String es_annex;
|
||||
String es_attachment;
|
||||
String es_attchment;
|
||||
String es_attention;
|
||||
String es_attentiontime;
|
||||
String es_author;
|
||||
String es_authors;
|
||||
String es_briefing;
|
||||
String es_carriertype;
|
||||
String es_catalog;
|
||||
String es_catalog1;
|
||||
String es_catalog2;
|
||||
String es_client;
|
||||
String es_collection;
|
||||
String es_commentcount;
|
||||
String es_content;
|
||||
String es_contenttype;
|
||||
String es_district;
|
||||
Float es_doclength;
|
||||
String es_emotion;
|
||||
String es_extname;
|
||||
String es_forwardcount;
|
||||
String es_groupname;
|
||||
String es_heat;
|
||||
String es_hkey;
|
||||
String es_hotkey;
|
||||
String es_imageflag;
|
||||
String es_images;
|
||||
String es_ip;
|
||||
String es_isrepost;
|
||||
String es_keywords;
|
||||
String es_lang;
|
||||
String es_lasttime;
|
||||
String es_likecount;
|
||||
String es_links;
|
||||
String es_loadtime;
|
||||
String es_mentionsaccount;
|
||||
Float es_negativeProbability;
|
||||
String es_negativeWords;
|
||||
String es_pkey;
|
||||
String es_positiveWords;
|
||||
String es_publisher;
|
||||
String es_reactioncount;
|
||||
String es_readsign;
|
||||
String es_reportinfo;
|
||||
String es_repostid;
|
||||
String es_repostuid;
|
||||
String es_repostuname;
|
||||
String es_rultopic;
|
||||
String es_sid;
|
||||
String es_simhash;
|
||||
String es_similarity;
|
||||
String es_similaritycount;
|
||||
String es_similaritytime;
|
||||
Float es_simrank;
|
||||
String es_sitename;
|
||||
String es_srcname;
|
||||
String es_subjectId;
|
||||
String es_tableflag;
|
||||
String es_tags;
|
||||
String es_title;
|
||||
String es_urlcontent;
|
||||
String es_urlimage;
|
||||
String es_urlname;
|
||||
String es_urltime;
|
||||
String es_urltitle;
|
||||
String es_urltopic;
|
||||
String es_userid;
|
||||
String es_video;
|
||||
String es_warning;
|
||||
String es_warning_word;
|
||||
String es_warningtime;
|
||||
}
|
||||
16
dsp/src/main/java/com/jsc/dsp/model/ReturnT.java
Normal file
16
dsp/src/main/java/com/jsc/dsp/model/ReturnT.java
Normal file
@ -0,0 +1,16 @@
|
||||
package com.jsc.dsp.model;
|
||||
|
||||
public class ReturnT<T> {
|
||||
public Integer code;
|
||||
public String message;
|
||||
public T content;
|
||||
|
||||
public ReturnT() {
|
||||
|
||||
}
|
||||
public ReturnT(Integer code, String message, T content) {
|
||||
this.code = code;
|
||||
this.message = message;
|
||||
this.content = content;
|
||||
}
|
||||
}
|
||||
16
dsp/src/main/java/com/jsc/dsp/model/SearchAggregation.java
Normal file
16
dsp/src/main/java/com/jsc/dsp/model/SearchAggregation.java
Normal file
@ -0,0 +1,16 @@
|
||||
package com.jsc.dsp.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
import java.util.Date;
|
||||
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class SearchAggregation {
|
||||
String name;
|
||||
Integer count;
|
||||
Date lastTime;
|
||||
}
|
||||
25
dsp/src/main/java/com/jsc/dsp/model/TargetSocial.java
Normal file
25
dsp/src/main/java/com/jsc/dsp/model/TargetSocial.java
Normal file
@ -0,0 +1,25 @@
|
||||
package com.jsc.dsp.model;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
import java.util.Date;
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
public class TargetSocial {
|
||||
private Integer id;
|
||||
private String userName;
|
||||
private String userUid;
|
||||
private String userType;
|
||||
private String userFlag;
|
||||
private String keywords;
|
||||
private Date updateTime;
|
||||
private String memo;
|
||||
private Integer checkTotalNum;
|
||||
private Date checkLastTime;
|
||||
private Date checkUpdateTime;
|
||||
|
||||
}
|
||||
30
dsp/src/main/java/com/jsc/dsp/model/TargetWebsite.java
Normal file
30
dsp/src/main/java/com/jsc/dsp/model/TargetWebsite.java
Normal file
@ -0,0 +1,30 @@
|
||||
package com.jsc.dsp.model;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
import java.util.Date;
|
||||
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
public class TargetWebsite {
|
||||
private Integer id;
|
||||
private String startUrl;
|
||||
private String siteName;
|
||||
private String region;
|
||||
private String lang;
|
||||
private String project;
|
||||
private Integer weight;
|
||||
private String carrierType;
|
||||
private String siteType;
|
||||
private String resourceType;
|
||||
private String storageOption;
|
||||
private Integer status;
|
||||
private Integer parserConfig;
|
||||
private Integer filterConfig;
|
||||
private String memo;
|
||||
private Integer checkTotalNum;
|
||||
private Date checkLastTime;
|
||||
private Date checkUpdateTime;
|
||||
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user