This commit is contained in:
yuxin-pc 2025-05-28 19:16:17 +08:00
parent 572c3c8e76
commit cf4a6e2854
1010 changed files with 261274 additions and 0 deletions

View File

@ -0,0 +1,10 @@
language: python
python:
- 2.7
# - nightly
os:
- linux
install:
- pip install -r requirements.txt
script: python test.py

View File

@ -0,0 +1,150 @@
# -*- coding: utf-8 -*-
# !/usr/bin/env python
"""
-------------------------------------------------
File Name ProxyApi.py
Description : WebApi
Author : JHao
date 2016/12/4
-------------------------------------------------
Change Activity:
2016/12/04: WebApi
2019/08/14: 集成Gunicorn启动方式
-------------------------------------------------
"""
__author__ = 'JHao'
import json
import sys
import platform
import requests
from werkzeug.wrappers import Response
from flask import Flask, jsonify, request
sys.path.append('../')
from Config.ConfigGetter import config
from Manager.ProxyManager import ProxyManager
app = Flask(__name__)
class JsonResponse(Response):
@classmethod
def force_type(cls, response, environ=None):
if isinstance(response, (dict, list)):
response = jsonify(response)
return super(JsonResponse, cls).force_type(response, environ)
app.response_class = JsonResponse
api_list = {
'get': u'get an useful proxy',
# 'refresh': u'refresh proxy pool',
'get_all': u'get all proxy from proxy pool',
'delete?proxy=127.0.0.1:8080': u'delete an unable proxy',
'get_status': u'proxy number'
}
@app.route('/')
def index():
return api_list
@app.route('/get/')
def get():
proxy = ProxyManager().get()
return proxy.info_json if proxy else {"code": 0, "src": "no proxy"}
@app.route('/refresh/')
def refresh():
# TODO refresh会有守护程序定时执行由api直接调用性能较差暂不使用
# ProxyManager().refresh()
pass
return 'success'
@app.route('/get_all/')
def getAll():
proxies = ProxyManager().getAll()
return jsonify([_.info_dict for _ in proxies])
@app.route('/delete/', methods=['GET'])
def delete():
proxy = request.args.get('proxy')
ProxyManager().delete(proxy)
return {"code": 0, "src": "success"}
@app.route('/get_status/')
def getStatus():
status = ProxyManager().getNumber()
return status
@app.route('/get_balance')
def getBalance():
try:
form_data = {
'appkey': '0af3f486bb6988283af092cf24aace57',
'uid': '341358'
}
# rsp = requests.post('https://api.ipidea.net/api/open/flow_left', data=form_data)
rsp = requests.get('http://big_customer.willmam.com/index/index/get_my_balance?neek=112361&appkey=d2f6393b46afab108b038ab5b95f45d6')
rsp_str = rsp.content.decode()
rsp_json= json.loads(rsp_str)
# if rsp_json['ret_data']['flow_left'] > 0:
# rsp_json['flow_status'] = 'ok'
if rsp_json['data']['balance'] > 0:
rsp_json['balance_status'] = 'ok'
return rsp_json
except Exception as e:
return repr(e)
if platform.system() != "Windows":
import gunicorn.app.base
from six import iteritems
class StandaloneApplication(gunicorn.app.base.BaseApplication):
def __init__(self, app, options=None):
self.options = options or {}
self.application = app
super(StandaloneApplication, self).__init__()
def load_config(self):
_config = dict([(key, value) for key, value in iteritems(self.options)
if key in self.cfg.settings and value is not None])
for key, value in iteritems(_config):
self.cfg.set(key.lower(), value)
def load(self):
return self.application
def runFlask():
app.run(host=config.host_ip, port=config.host_port)
def runFlaskWithGunicorn():
_options = {
'bind': '%s:%s' % (config.host_ip, config.host_port),
'workers': 4,
'accesslog': '-', # log to stdout
'access_log_format': '%(h)s %(l)s %(t)s "%(r)s" %(s)s "%(a)s"'
}
StandaloneApplication(app, _options).run()
if __name__ == '__main__':
if platform.system() == "Windows":
runFlask()
else:
runFlaskWithGunicorn()

View File

@ -0,0 +1,14 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name __init__.py
Description :
Author : JHao
date 2016/12/3
-------------------------------------------------
Change Activity:
2016/12/3:
-------------------------------------------------
"""
__author__ = 'JHao'

View File

@ -0,0 +1,71 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name ConfigGetter
Description : 读取配置
Author : JHao
date 2019/2/15
-------------------------------------------------
Change Activity:
2019/2/15:
-------------------------------------------------
"""
__author__ = 'JHao'
from Util.utilClass import LazyProperty
from Config.setting import *
class ConfigGetter(object):
"""
get config
"""
def __init__(self):
pass
@LazyProperty
def db_type(self):
return DATABASES.get("default", {}).get("TYPE", "SSDB")
@LazyProperty
def db_name(self):
return DATABASES.get("default", {}).get("NAME", "proxy")
@LazyProperty
def db_host(self):
return DATABASES.get("default", {}).get("HOST", "127.0.0.1")
@LazyProperty
def db_port(self):
return DATABASES.get("default", {}).get("PORT", 8888)
@LazyProperty
def db_password(self):
return DATABASES.get("default", {}).get("PASSWORD", "")
@LazyProperty
def proxy_getter_functions(self):
return PROXY_GETTER
@LazyProperty
def host_ip(self):
return SERVER_API.get("HOST", "127.0.0.1")
@LazyProperty
def host_port(self):
return SERVER_API.get("PORT", 5010)
config = ConfigGetter()
if __name__ == '__main__':
print(config.db_type)
print(config.db_name)
print(config.db_host)
print(config.db_port)
print(config.proxy_getter_functions)
print(config.host_ip)
print(config.host_port)
print(config.db_password)

View File

@ -0,0 +1,12 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name __init__
Description :
Author : JHao
date 2019/2/15
-------------------------------------------------
Change Activity:
2019/2/15:
-------------------------------------------------
"""

View File

@ -0,0 +1,98 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name setting.py
Description : 配置文件
Author : JHao
date 2019/2/15
-------------------------------------------------
Change Activity:
2019/2/15:
-------------------------------------------------
"""
import sys
from os import getenv
from logging import getLogger
log = getLogger(__name__)
HEADER = """
****************************************************************
*** ______ ********************* ______ *********** _ ********
*** | ___ \_ ******************** | ___ \ ********* | | ********
*** | |_/ / \__ __ __ _ __ _ | |_/ /___ * ___ | | ********
*** | __/| _// _ \ \ \/ /| | | || __// _ \ / _ \ | | ********
*** | | | | | (_) | > < \ |_| || | | (_) | (_) || |___ ****
*** \_| |_| \___/ /_/\_\ \__ |\_| \___/ \___/ \_____/ ****
**** __ / / *****
************************* /___ / *******************************
************************* ********************************
****************************************************************
"""
PY3 = sys.version_info >= (3,)
DB_TYPE = getenv('db_type', 'REDIS').upper()
DB_HOST = getenv('db_host', '107.182.191.3')
DB_PORT = getenv('db_port', 7379)
DB_PASSWORD = getenv('db_password', 'jlkj-841-2-redis')
USEFUL_PROXY_COUNT = 10
ZHIMA_PROXY_API = 'http://http.tiqu.alicdns.com/getip3?num=10&type=2&pro=0&city=0&yys=0&port=1&time=1&ts=0&ys=0&cs=0&lb=1&sb=0&pb=45&mr=2&regions=&gm=4'
# ZHIMA_PROXY_API = 'http://api.proxy.ipidea.io/getProxyIp?num=10&return_type=json&lb=1&sb=0&flow=1&regions=&protocol=http'
""" 数据库配置 """
DATABASES = {
"default": {
"TYPE": DB_TYPE,
"HOST": DB_HOST,
"PORT": DB_PORT,
"NAME": "proxy",
"PASSWORD": DB_PASSWORD
}
}
# register the proxy getter function
PROXY_GETTER = [
# # "freeProxy01",
# "freeProxy02",
# "freeProxy03",
# "freeProxy04",
# "freeProxy05",
# # "freeProxy06",
# "freeProxy07",
# # "freeProxy08",
# "freeProxy09",
# "freeProxy13",
# #"freeProxy14",
# "freeProxy15",
"zhimaProxy"
]
""" API config http://127.0.0.1:5010 """
SERVER_API = {
"HOST": "0.0.0.0", # The ip specified which starting the web API
"PORT": 5010 # port number to which the server listens to
}
class ConfigError(BaseException):
pass
def checkConfig():
if DB_TYPE not in ["SSDB", "REDIS"]:
raise ConfigError('db_type Do not support: %s, must SSDB/REDIS .' % DB_TYPE)
if type(DB_PORT) == str and not DB_PORT.isdigit():
raise ConfigError('if db_port is string, it must be digit, not %s' % DB_PORT)
from ProxyGetter import getFreeProxy
illegal_getter = list(filter(lambda key: not hasattr(getFreeProxy.GetFreeProxy, key), PROXY_GETTER))
if len(illegal_getter) > 0:
raise ConfigError("ProxyGetter: %s does not exists" % "/".join(illegal_getter))
checkConfig()

View File

@ -0,0 +1,111 @@
# -*- coding: utf-8 -*-
# !/usr/bin/env python
"""
-------------------------------------------------
File Name DbClient.py
Description : DB工厂类
Author : JHao
date 2016/12/2
-------------------------------------------------
Change Activity:
2016/12/2:
-------------------------------------------------
"""
__author__ = 'JHao'
import os
import sys
from Config.ConfigGetter import config
from Util import Singleton
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
class DbClient(object):
"""
DbClient DB工厂类 提供get/put/update/pop/delete/exists/getAll/clean/getNumber/changeTable方法
目前存放代理的有两种, 使用changeTable方法切换操作对象
raw_proxy 存放原始的代理
useful_proxy 存放检验后的代理
抽象方法定义
get(proxy): 返回指定proxy的信息;
put(proxy): 存入一个proxy信息;
pop(): 返回并删除一个proxy信息;
update(proxy): 更新指定proxy信息;
delete(proxy): 删除指定proxy;
exists(proxy): 判断指定proxy是否存在;
getAll(): 列表形式返回所有代理;
clean(): 清除所有proxy信息;
getNumber(): 返回proxy数据量;
changeTable(name): 切换操作对象 raw_proxy/useful_proxy
所有方法需要相应类去具体实现
ssdb: SsdbClient.py
redis: RedisClient.py
mongodb: MongodbClient.py
"""
__metaclass__ = Singleton
def __init__(self):
"""
init
:return:
"""
self.__initDbClient()
def __initDbClient(self):
"""
init DB Client
:return:
"""
__type = None
if "SSDB" == config.db_type:
__type = "SsdbClient"
elif "REDIS" == config.db_type:
__type = "RedisClient"
elif "MONGODB" == config.db_type:
__type = "MongodbClient"
else:
pass
assert __type, 'type error, Not support DB type: {}'.format(config.db_type)
self.client = getattr(__import__(__type), __type)(name=config.db_name,
host=config.db_host,
port=config.db_port,
password=config.db_password)
def get(self, key, **kwargs):
return self.client.get(key, **kwargs)
def put(self, key, **kwargs):
return self.client.put(key, **kwargs)
def update(self, key, value, **kwargs):
return self.client.update(key, value, **kwargs)
def delete(self, key, **kwargs):
return self.client.delete(key, **kwargs)
def exists(self, key, **kwargs):
return self.client.exists(key, **kwargs)
def pop(self, **kwargs):
return self.client.pop(**kwargs)
def getAll(self):
return self.client.getAll()
def clear(self):
return self.client.clear()
def changeTable(self, name):
self.client.changeTable(name)
def getNumber(self):
return self.client.getNumber()

View File

@ -0,0 +1,74 @@
# coding: utf-8
"""
-------------------------------------------------
File Name MongodbClient.py
Description : 封装mongodb操作
Author : JHao netAir
date 2017/3/3
-------------------------------------------------
Change Activity:
2017/3/3:
2017/9/26:完成对mongodb的支持
-------------------------------------------------
"""
__author__ = 'Maps netAir'
from pymongo import MongoClient
class MongodbClient(object):
def __init__(self, name, host, port, **kwargs):
self.name = name
self.client = MongoClient(host, port, **kwargs)
self.db = self.client.proxy
def changeTable(self, name):
self.name = name
def get(self, proxy):
data = self.db[self.name].find_one({'proxy': proxy})
return data['num'] if data != None else None
def put(self, proxy, num=1):
if self.db[self.name].find_one({'proxy': proxy}):
return None
else:
self.db[self.name].insert({'proxy': proxy, 'num': num})
def pop(self):
data = list(self.db[self.name].aggregate([{'$sample': {'size': 1}}]))
if data:
data = data[0]
value = data['proxy']
self.delete(value)
return {'proxy': value, 'value': data['num']}
return None
def delete(self, value):
self.db[self.name].remove({'proxy': value})
def getAll(self):
return {p['proxy']: p['num'] for p in self.db[self.name].find()}
def clean(self):
self.client.drop_database('proxy')
def delete_all(self):
self.db[self.name].remove()
def update(self, key, value):
self.db[self.name].update({'proxy': key}, {'$inc': {'num': value}})
def exists(self, key):
return True if self.db[self.name].find_one({'proxy': key}) != None else False
def getNumber(self):
return self.db[self.name].count()
if __name__ == "__main__":
db = MongodbClient('first', 'localhost', 27017)
# db.put('127.0.0.1:1')
# db2 = MongodbClient('second', 'localhost', 27017)
# db2.put('127.0.0.1:2')
print(db.pop())

View File

@ -0,0 +1,133 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name RedisClient
Description : 封装Redis相关操作
Author : JHao
date 2019/8/9
-------------------------------------------------
Change Activity:
2019/8/9: 封装Redis相关操作
-------------------------------------------------
"""
__author__ = 'JHao'
from Config.setting import PY3
from redis.connection import BlockingConnectionPool
from redis import Redis
class RedisClient(object):
"""
Redis client 和SSDB协议一致 数据结构一致, 但部分方法不通用
Redis中代理存放的结构为hash
原始代理存放在name为raw_proxy的hash中, key为代理的ip:por, value为代理属性的字典;
验证后的代理存放在name为useful_proxy的hash中, key为代理的ip:port, value为代理属性的字典;
"""
def __init__(self, name, **kwargs):
"""
init
:param name: hash name
:param host: host
:param port: port
:param password: password
:return:
"""
self.name = name
self.__conn = Redis(connection_pool=BlockingConnectionPool(**kwargs))
def get(self, proxy_str):
"""
从hash中获取对应的proxy, 使用前需要调用changeTable()
:param proxy_str: proxy str
:return:
"""
data = self.__conn.hget(name=self.name, key=proxy_str)
if data:
return data.decode('utf-8') if PY3 else data
else:
return None
def put(self, proxy_obj):
"""
将代理放入hash, 使用changeTable指定hash name
:param proxy_obj: Proxy obj
:return:
"""
data = self.__conn.hset(self.name, proxy_obj.proxy, proxy_obj.info_json)
return data
def delete(self, proxy_str):
"""
移除指定代理, 使用changeTable指定hash name
:param proxy_str: proxy str
:return:
"""
self.__conn.hdel(self.name, proxy_str)
def exists(self, proxy_str):
"""
判断指定代理是否存在, 使用changeTable指定hash name
:param proxy_str: proxy str
:return:
"""
return self.__conn.hexists(self.name, proxy_str)
def update(self, proxy_obj):
"""
更新 proxy 属性
:param proxy_obj:
:return:
"""
self.__conn.hset(self.name, proxy_obj.proxy, proxy_obj.info_json)
def pop(self):
"""
弹出一个代理
:return: dict {proxy: value}
"""
# proxies = self.__conn.hkeys(self.name)
# if proxies:
# proxy = random.choice(proxies)
# value = self.__conn.hget(self.name, proxy)
# self.delete(proxy)
# return {'proxy': proxy.decode('utf-8') if PY3 else proxy,
# 'value': value.decode('utf-8') if PY3 and value else value}
return None
def getAll(self):
"""
列表形式返回所有代理, 使用changeTable指定hash name
:return:
"""
item_dict = self.__conn.hgetall(self.name)
if PY3:
return [value.decode('utf8') for key, value in item_dict.items()]
else:
return item_dict.values()
def clear(self):
"""
清空所有代理, 使用changeTable指定hash name
:return:
"""
return self.__conn.delete(self.name)
def getNumber(self):
"""
返回代理数量
:return:
"""
return self.__conn.hlen(self.name)
def changeTable(self, name):
"""
切换操作对象
:param name: raw_proxy/useful_proxy
:return:
"""
self.name = name

View File

@ -0,0 +1,135 @@
# -*- coding: utf-8 -*-
# !/usr/bin/env python
"""
-------------------------------------------------
File Name SsdbClient.py
Description : 封装SSDB操作
Author : JHao
date 2016/12/2
-------------------------------------------------
Change Activity:
2016/12/2:
2017/09/22: PY3中 redis-py返回的数据是bytes型
2017/09/27: 修改pop()方法 返回{proxy:value}字典
-------------------------------------------------
"""
__author__ = 'JHao'
from Config.setting import PY3
from redis.connection import BlockingConnectionPool
from redis import Redis
class SsdbClient(object):
"""
SSDB client
SSDB中代理存放的结构为hash
原始代理存放在name为raw_proxy的hash中, key为代理的ip:por, value为代理属性的字典;
验证后的代理存放在name为useful_proxy的hash中, key为代理的ip:port, value为代理属性的字典;
"""
def __init__(self, name, **kwargs):
"""
init
:param name: hash name
:param host: host
:param port: port
:param password: password
:return:
"""
self.name = name
self.__conn = Redis(connection_pool=BlockingConnectionPool(**kwargs))
def get(self, proxy_str):
"""
从hash中获取对应的proxy, 使用前需要调用changeTable()
:param proxy_str: proxy str
:return:
"""
data = self.__conn.hget(name=self.name, key=proxy_str)
if data:
return data.decode('utf-8') if PY3 else data
else:
return None
def put(self, proxy_obj):
"""
将代理放入hash, 使用changeTable指定hash name
:param proxy_obj: Proxy obj
:return:
"""
data = self.__conn.hset(self.name, proxy_obj.proxy, proxy_obj.info_json)
return data
def delete(self, proxy_str):
"""
移除指定代理, 使用changeTable指定hash name
:param proxy_str: proxy str
:return:
"""
self.__conn.hdel(self.name, proxy_str)
def exists(self, proxy_str):
"""
判断指定代理是否存在, 使用changeTable指定hash name
:param proxy_str: proxy str
:return:
"""
return self.__conn.hexists(self.name, proxy_str)
def update(self, proxy_obj):
"""
更新 proxy 属性
:param proxy_obj:
:return:
"""
self.__conn.hset(self.name, proxy_obj.proxy, proxy_obj.info_json)
def pop(self):
"""
弹出一个代理
:return: dict {proxy: value}
"""
# proxies = self.__conn.hkeys(self.name)
# if proxies:
# proxy = random.choice(proxies)
# value = self.__conn.hget(self.name, proxy)
# self.delete(proxy)
# return {'proxy': proxy.decode('utf-8') if PY3 else proxy,
# 'value': value.decode('utf-8') if PY3 and value else value}
return None
def getAll(self):
"""
列表形式返回所有代理, 使用changeTable指定hash name
:return:
"""
item_dict = self.__conn.hgetall(self.name)
if PY3:
return [value.decode('utf8') for key, value in item_dict.items()]
else:
return item_dict.values()
def clear(self):
"""
清空所有代理, 使用changeTable指定hash name
:return:
"""
return self.__conn.execute_command("hclear", self.name)
def getNumber(self):
"""
返回代理数量
:return:
"""
return self.__conn.hlen(self.name)
def changeTable(self, name):
"""
切换操作对象
:param name: raw_proxy/useful_proxy
:return:
"""
self.name = name

View File

@ -0,0 +1,12 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name __init__.py.py
Description :
Author : JHao
date 2016/12/2
-------------------------------------------------
Change Activity:
2016/12/2:
-------------------------------------------------
"""

View File

@ -0,0 +1,17 @@
FROM python:3.8.2-slim
ENV TZ Asia/Shanghai
WORKDIR /usr/src/app
COPY ./requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
COPY . .
EXPOSE 5010
WORKDIR /usr/src/app/cli
ENTRYPOINT [ "sh", "start.sh" ]

21
deploy/ProxyPool/LICENSE Normal file
View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) 2017 J_hao104
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,113 @@
# -*- coding: utf-8 -*-
# !/usr/bin/env python
"""
-------------------------------------------------
File Name ProxyManager.py
Description :
Author : JHao
date 2016/12/3
-------------------------------------------------
Change Activity:
2016/12/3:
-------------------------------------------------
"""
__author__ = 'JHao'
import random
import json
from ProxyHelper import Proxy
from DB.DbClient import DbClient
from Config.ConfigGetter import config
from Util.LogHandler import LogHandler
from Util.utilFunction import verifyProxyFormat
from ProxyGetter.getFreeProxy import GetFreeProxy
class ProxyManager(object):
"""
ProxyManager
"""
def __init__(self):
self.db = DbClient()
self.raw_proxy_queue = 'raw_proxy'
self.log = LogHandler('proxy_manager')
# self.useful_proxy_queue = 'ProxyPool:useful_proxy_3'
self.useful_proxy_queue = 'ProxyPool:useful_proxy_63'
def fetch(self):
"""
fetch proxy into db by ProxyGetter
:return:
"""
self.db.changeTable(self.raw_proxy_queue)
proxy_set = set()
self.log.info("ProxyFetch : start")
for proxyGetter in config.proxy_getter_functions:
self.log.info("ProxyFetch - {func}: start".format(func=proxyGetter))
try:
for proxy_str in getattr(GetFreeProxy, proxyGetter.strip())():
proxy_dict = json.loads(proxy_str, encoding='utf-8')
proxy = proxy_dict['proxy']
proxy_type = proxy_dict['proxy_type']
anonimity = proxy_dict['anonimity']
#self.log.info(anonimity)
if not proxy or not verifyProxyFormat(proxy):
self.log.error('ProxyFetch - {func}: '
'{proxy} illegal'.format(func=proxyGetter, proxy=proxy.ljust(20)))
continue
elif proxy in proxy_set:
self.log.info('ProxyFetch - {func}: '
'{proxy} exist'.format(func=proxyGetter, proxy=proxy.ljust(20)))
continue
else:
self.db.put(Proxy(proxy, source=proxyGetter, proxy_type=proxy_type, anonimity=anonimity))
proxy_set.add(proxy)
self.log.info('ProxyFetch - {func}: '
'{proxy} added'.format(func=proxyGetter, proxy=proxy.ljust(20)))
except Exception as e:
self.log.error("ProxyFetch - {func}: error".format(func=proxyGetter))
self.log.error(str(e))
def get(self):
"""
return a useful proxy
:return:
"""
self.db.changeTable(self.useful_proxy_queue)
item_list = self.db.getAll()
if item_list:
random_choice = random.choice(item_list)
return Proxy.newProxyFromJson(random_choice)
return None
def delete(self, proxy_str):
"""
delete proxy from pool
:param proxy_str:
:return:
"""
self.db.changeTable(self.useful_proxy_queue)
self.db.delete(proxy_str)
def getAll(self):
"""
get all proxy from pool as list
:return:
"""
self.db.changeTable(self.useful_proxy_queue)
item_list = self.db.getAll()
return [Proxy.newProxyFromJson(_) for _ in item_list]
def getNumber(self):
self.db.changeTable(self.raw_proxy_queue)
total_raw_proxy = self.db.getNumber()
self.db.changeTable(self.useful_proxy_queue)
total_useful_queue = self.db.getNumber()
return {'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue}
if __name__ == '__main__':
pp = ProxyManager()
pp.fetch()

View File

@ -0,0 +1,15 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name __init__.py.py
Description :
Author : JHao
date 2016/12/3
-------------------------------------------------
Change Activity:
2016/12/3:
-------------------------------------------------
"""
__author__ = 'JHao'
from Manager.ProxyManager import ProxyManager

View File

@ -0,0 +1,70 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name CheckProxy
Description : used for check getFreeProxy.py
Author : JHao
date 2018/7/10
-------------------------------------------------
Change Activity:
2018/7/10: CheckProxy
-------------------------------------------------
"""
__author__ = 'JHao'
from getFreeProxy import GetFreeProxy
from Util.utilFunction import verifyProxyFormat
from Util.LogHandler import LogHandler
log = LogHandler('check_proxy', file=False)
class CheckProxy(object):
@staticmethod
def checkAllGetProxyFunc():
"""
检查getFreeProxy所有代理获取函数运行情况
Returns:
None
"""
import inspect
member_list = inspect.getmembers(GetFreeProxy, predicate=inspect.isfunction)
proxy_count_dict = dict()
for func_name, func in member_list:
log.info(u"开始运行 {}".format(func_name))
try:
proxy_list = [_ for _ in func() if verifyProxyFormat(_)]
proxy_count_dict[func_name] = len(proxy_list)
except Exception as e:
log.info(u"代理获取函数 {} 运行出错!".format(func_name))
log.error(str(e))
log.info(u"所有函数运行完毕 " + "***" * 5)
for func_name, func in member_list:
log.info(u"函数 {n}, 获取到代理数: {c}".format(n=func_name, c=proxy_count_dict.get(func_name, 0)))
@staticmethod
def checkGetProxyFunc(func):
"""
检查指定的getFreeProxy某个function运行情况
Args:
func: getFreeProxy中某个可调用方法
Returns:
None
"""
func_name = getattr(func, '__name__', "None")
log.info("start running func: {}".format(func_name))
count = 0
for proxy in func():
if verifyProxyFormat(proxy):
log.info("{} fetch proxy: {}".format(func_name, proxy))
count += 1
log.info("{n} completed, fetch proxy number: {c}".format(n=func_name, c=count))
if __name__ == '__main__':
CheckProxy.checkAllGetProxyFunc()
CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy01)

View File

@ -0,0 +1,12 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name __init__.py.py
Description :
Author : JHao
date 2016/11/25
-------------------------------------------------
Change Activity:
2016/11/25:
-------------------------------------------------
"""

View File

@ -0,0 +1,452 @@
# -*- coding: utf-8 -*-
# !/usr/bin/env python
"""
-------------------------------------------------
File Name GetFreeProxy.py
Description : 抓取免费代理
Author : JHao
date 2016/11/25
-------------------------------------------------
Change Activity:
2016/11/25:
-------------------------------------------------
"""
import re
import sys
import json
import requests
from time import sleep
sys.path.append('..')
from Util.WebRequest import WebRequest
from Util.utilFunction import getHtmlTree
import requests
from DB.DbClient import DbClient
from Config.setting import USEFUL_PROXY_COUNT
from Config.setting import ZHIMA_PROXY_API
# for debug to disable insecureWarning
requests.packages.urllib3.disable_warnings()
proxy_dict = {
"proxy": "",
"region": "",
"anonimity": "",
"proxy_type": "",
"source": ""
}
db = DbClient()
def init_proxy_dict():
for _ in proxy_dict.keys():
proxy_dict[_] = ""
class GetFreeProxy(object):
"""
proxy getter
"""
# @staticmethod
# def freeProxy01():
# """
# 无忧代理 http://www.data5u.com/
# 几乎没有能用的
# :return:
# """
# url_list = [
# 'http://www.data5u.com/',
# 'http://www.data5u.com/free/gngn/index.shtml',
# 'http://www.data5u.com/free/gnpt/index.shtml'
# ]
# key = 'ABCDEFGHIZ'
# for url in url_list:
# html_tree = getHtmlTree(url)
# ul_list = html_tree.xpath('//ul[@class="l2"]')
# for ul in ul_list:
# try:
# ip = ul.xpath('./span[1]/li/text()')[0]
# classnames = ul.xpath('./span[2]/li/attribute::class')[0]
# classname = classnames.split(' ')[1]
# port_sum = 0
# for c in classname:
# port_sum *= 10
# port_sum += key.index(c)
# port = port_sum >> 3
# yield '{}:{}'.format(ip, port)
# except Exception as e:
# print(e)
@staticmethod
def freeProxy02(count=20):
"""
代理66 http://www.66ip.cn/
:param count: 提取数量
:return:
"""
urls = [
"http://www.66ip.cn/mo.php?sxb=&tqsl={}&port=&export=&ktip=&sxa=&submit=%CC%E1++%C8%A1&textarea=",
"http://www.66ip.cn/nmtq.php?getnum={}&isp=0&anonymoustype=0&s"
"tart=&ports=&export=&ipaddress=&area=0&proxytype=2&api=66ip"
]
try:
import execjs
import requests
headers = {
'User-Agent':
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0',
'Accept': '*/*',
'Connection': 'keep-alive',
'Accept-Language': 'zh-CN,zh;q=0.8'
}
session = requests.session()
src = session.get("http://www.66ip.cn/", headers=headers).text
src = src.split("</script>")[0] + '}'
src = src.replace("<script>", "function test() {")
src = src.replace("while(z++)try{eval(",
';var num=10;while(z++)try{var tmp=')
src = src.replace(
");break}",
";num--;if(tmp.search('cookie') != -1 | num<0){return tmp}}")
ctx = execjs.compile(src)
src = ctx.call("test")
src = src[src.find("document.cookie="):src.find("};if((")]
src = src.replace("document.cookie=", "")
src = "function test() {var window={}; return %s }" % src
cookie = execjs.compile(src).call('test')
js_cookie = cookie.split(";")[0].split("=")[-1]
except Exception as e:
print(e)
return
for url in urls:
try:
html = session.get(url.format(count),
cookies={
"__jsl_clearance": js_cookie
},
headers=headers).text
ips = re.findall(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}",
html)
for ip in ips:
init_proxy_dict()
proxy_dict["proxy"] = ip.strip()
yield json.dumps(proxy_dict)
except Exception as e:
print(e)
pass
@staticmethod
def freeProxy03(page_count=1):
"""
西刺代理 http://www.xicidaili.com
:return:
"""
url_list = [
'http://www.xicidaili.com/nn/', # 高匿
# 'http://www.xicidaili.com/nt/', # 透明
]
for each_url in url_list:
for i in range(1, page_count + 1):
page_url = each_url + str(i)
tree = getHtmlTree(page_url)
proxy_list = tree.xpath(
'.//table[@id="ip_list"]//tr[position()>1]')
for proxy in proxy_list:
try:
init_proxy_dict()
proxy_dict["proxy"] = ':'.join(
proxy.xpath('./td/text()')[0:2])
proxy_dict["proxy_type"] = proxy.xpath('./td/text()')[5]
yield json.dumps(proxy_dict)
except Exception as e:
pass
@staticmethod
def freeProxy04():
"""
guobanjia http://www.goubanjia.com/
:return:
"""
url = "http://www.goubanjia.com/"
tree = getHtmlTree(url)
proxy_list = tree.xpath('//td[@class="ip"]')
proxy_attr = tree.xpath('//tr[@class="success" or @class="warning"]')
# 此网站有隐藏的数字干扰,或抓取到多余的数字或.符号
# 需要过滤掉<p style="display:none;">的内容
xpath_str = """.//*[not(contains(@style, 'display: none'))
and not(contains(@style, 'display:none'))
and not(contains(@class, 'port'))
]/text()
"""
for each_proxy, each_attr in proxy_list, proxy_attr:
try:
# :符号裸放在td下其他放在div span p中先分割找出ip再找port
ip_addr = ''.join(each_proxy.xpath(xpath_str))
# HTML中的port是随机数真正的端口编码在class后面的字母中。
# 比如这个:
# <span class="port CFACE">9054</span>
# CFACE解码后对应的是3128。
port = 0
for _ in each_proxy.xpath(".//span[contains(@class, 'port')]"
"/attribute::class")[0]. \
replace("port ", ""):
port *= 10
port += (ord(_) - ord('A'))
port /= 8
init_proxy_dict()
proxy_dict["proxy"] = '{}:{}'.format(ip_addr, int(port))
proxy_dict["anonimity"] = each_attr.xpath(".//td/text()")[2]
proxy_dict["proxy_type"] = each_attr.xpath(".//td/text()")[3]
yield json.dumps(proxy_dict)
except Exception as e:
pass
@staticmethod
def freeProxy05():
"""
快代理 https://www.kuaidaili.com
"""
url_list = [
'https://www.kuaidaili.com/free/inha/'
# 'https://www.kuaidaili.com/free/intr/'
]
for url in url_list:
tree = getHtmlTree(url)
proxy_list = tree.xpath('.//table//tr')
sleep(1) # 必须sleep 不然第二条请求不到数据
for tr in proxy_list[1:]:
init_proxy_dict()
proxy_dict['proxy'] = ':'.join(tr.xpath('./td/text()')[0:2])
proxy_dict['proxy_type'] = tr.xpath('./td/text()')[3]
proxy_dict['anonimity'] = tr.xpath('./td/text()')[2]
yield json.dumps(proxy_dict)
# @staticmethod
# def freeProxy06():
# """
# 码农代理 https://proxy.coderbusy.com/
# :return:
# """
# urls = ['https://proxy.coderbusy.com/']
# for url in urls:
# tree = getHtmlTree(url)
# proxy_list = tree.xpath('.//table//tr')
# for tr in proxy_list[1:]:
# init_proxy_dict()
# proxy_dict['proxy'] = ':'.join(tr.xpath('./td/text()')[0:2])
# yield json.dumps(proxy_dict)
@staticmethod
def freeProxy07():
"""
云代理 http://www.ip3366.net/free/
:return:
"""
urls = [
'http://www.ip3366.net/free/?stype=1',
"http://www.ip3366.net/free/?stype=2"
]
request = WebRequest()
for url in urls:
tree = getHtmlTree(url)
proxy_list = tree.xpath('.//table//tr')
sleep(1)
for tr in proxy_list[1:]:
init_proxy_dict()
proxy_dict['proxy'] = ':'.join(tr.xpath('./td/text()')[0:2])
proxy_dict['proxy_type'] = tr.xpath('./td/text()')[3]
proxy_dict['anonimity'] = tr.xpath('./td/text()')[2]
yield json.dumps(proxy_dict)
# r = request.get(url, timeout=10)
# proxies = re.findall(
# r'<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\s\S]*?<td>(\d+)</td>',
# r.text)
# for proxy in proxies:
# init_proxy_dict()
# proxy_dict['proxy'] = ":".join(proxy)
# yield json.dumps(proxy_dict)
# @staticmethod
# def freeProxy08():
# """
# IP海 http://www.iphai.com/free/ng
# :return:
# """
# urls = [
# 'http://www.iphai.com/free/ng', 'http://www.iphai.com/free/np',
# 'http://www.iphai.com/free/wg', 'http://www.iphai.com/free/wp'
# ]
# request = WebRequest()
# for url in urls:
# r = request.get(url, timeout=10)
# proxies = re.findall(
# r'<td>\s*?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s*?</td>[\s\S]*?<td>\s*?(\d+)\s*?</td>',
# r.text)
# for proxy in proxies:
# init_proxy_dict()
# proxy_dict['proxy'] = ":".join(proxy)
# yield json.dumps(proxy_dict)
@staticmethod
def freeProxy09(page_count=1):
"""
http://ip.jiangxianli.com/?page=
免费代理库
:return:
"""
for i in range(1, page_count + 1):
url = 'http://ip.jiangxianli.com/?country=中国&?page={}'.format(i)
html_tree = getHtmlTree(url)
for index, tr in enumerate(html_tree.xpath("//table//tr")):
if index == 0:
continue
init_proxy_dict()
proxy_dict['proxy'] = ":".join(tr.xpath("./td/text()")[0:2]).strip()
proxy_dict['proxy_type'] = tr.xpath("./td/text()")[3]
proxy_dict['anonimity'] = tr.xpath("./td/text()")[2]
yield json.dumps(proxy_dict)
# @staticmethod
# def freeProxy10():
# """
# 墙外网站 cn-proxy
# :return:
# """
# urls = ['http://cn-proxy.com/', 'http://cn-proxy.com/archives/218']
# request = WebRequest()
# for url in urls:
# r = request.get(url, timeout=10)
# proxies = re.findall(r'<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\w\W]<td>(\d+)</td>', r.text)
# for proxy in proxies:
# yield ':'.join(proxy)
# @staticmethod
# def freeProxy11():
# """
# https://proxy-list.org/english/index.php
# :return:
# """
# urls = ['https://proxy-list.org/english/index.php?p=%s' % n for n in range(1, 10)]
# request = WebRequest()
# import base64
# for url in urls:
# r = request.get(url, timeout=10)
# proxies = re.findall(r"Proxy\('(.*?)'\)", r.text)
# for proxy in proxies:
# yield base64.b64decode(proxy).decode()
# @staticmethod
# def freeProxy12():
# urls = ['https://list.proxylistplus.com/Fresh-HTTP-Proxy-List-1']
# request = WebRequest()
# for url in urls:
# r = request.get(url, timeout=10)
# proxies = re.findall(r'<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\s\S]*?<td>(\d+)</td>', r.text)
# for proxy in proxies:
# yield ':'.join(proxy)
@staticmethod
def freeProxy13(max_page=2):
"""
http://www.qydaili.com/free/?action=china&page=1
齐云代理
:param max_page:
:return:
"""
base_url = 'http://www.qydaili.com/free/?action=china&page='
for page in range(1, max_page + 1):
url = base_url + str(page)
tree = getHtmlTree(url)
proxy_list = tree.xpath('.//table//tr')
sleep(1)
for tr in proxy_list[1:]:
init_proxy_dict()
proxy_dict['proxy'] = ':'.join(tr.xpath('./td/text()')[0:2])
proxy_dict['proxy_type'] = tr.xpath('./td/text()')[3]
proxy_dict['anonimity'] = tr.xpath('./td/text()')[2]
yield json.dumps(proxy_dict)
# r = request.get(url, timeout=10)
# proxies = re.findall(
# r'<td.*?>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\s\S]*?<td.*?>(\d+)</td>',
# r.text)
# for proxy in proxies:
# init_proxy_dict()
# proxy_dict['proxy'] = ':'.join(proxy)
# yield json.dumps(proxy_dict)
# @staticmethod
# def freeProxy14(max_page=2):
# """
# http://www.89ip.cn/index.html
# 89免费代理
# :param max_page:
# :return:
# """
# base_url = 'http://www.89ip.cn/index_{}.html'
# request = WebRequest()
# for page in range(1, max_page + 1):
# url = base_url.format(page)
# r = request.get(url, timeout=10)
# proxies = re.findall(
# r'<td.*?>[\s\S]*?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})[\s\S]*?</td>[\s\S]*?<td.*?>[\s\S]*?(\d+)[\s\S]*?</td>',
# r.text)
# for proxy in proxies:
# init_proxy_dict()
# proxy_dict['proxy'] = ':'.join(proxy)
# yield json.dumps(proxy_dict)
@staticmethod
def freeProxy15():
urls = [
# 'http://www.xiladaili.com/putong/',
"http://www.xiladaili.com/gaoni/"
# "http://www.xiladaili.com/http/",
# "http://www.xiladaili.com/https/"
]
for url in urls:
tree = getHtmlTree(url)
proxy_list = tree.xpath('.//table//tr')
sleep(1)
for tr in proxy_list[1:]:
init_proxy_dict()
proxy_dict['proxy'] = tr.xpath('./td/text()')[0]
proxy_dict['proxy_type'] = re.sub(re.compile(r'[\u4e00-\u9fa5]'), '', tr.xpath('./td/text()')[1])
proxy_dict['anonimity'] = tr.xpath('./td/text()')[2]
yield json.dumps(proxy_dict)
@staticmethod
def zhimaProxy():
# db.changeTable('ProxyPool:useful_proxy_3')
db.changeTable('ProxyPool:useful_proxy_63')
if db.getNumber() < USEFUL_PROXY_COUNT / 2:
rsp = json.loads(requests.get(ZHIMA_PROXY_API).text)
if rsp['success']:
for proxy in rsp['data']:
proxy_dict['proxy'] = proxy['ip'] + ':' + str(proxy['port'])
yield json.dumps(proxy_dict)
if __name__ == '__main__':
from CheckProxy import CheckProxy
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy01)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy02)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy03)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy04)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy05)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy06)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy07)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy08)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy09)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy13)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy14)
# CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy15)
CheckProxy.checkAllGetProxyFunc()

View File

@ -0,0 +1,145 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name Proxy
Description : 代理对象类型封装
Author : JHao
date 2019/7/11
-------------------------------------------------
Change Activity:
2019/7/11: 代理对象类型封装
-------------------------------------------------
"""
__author__ = 'JHao'
import json
class Proxy(object):
def __init__(self, proxy, fail_count=0, region="", anonimity="", proxy_type="",
source="", check_count=0, last_status="", last_time=""):
self._proxy = proxy
self._fail_count = fail_count
self._region = region
self._anonimity=anonimity
self._type = proxy_type
self._source = source
self._check_count = check_count
self._last_status = last_status
self._last_time = last_time
@classmethod
def newProxyFromJson(cls, proxy_json):
"""
根据proxy属性json创建Proxy实例
:param proxy_json:
:return:
"""
proxy_dict = json.loads(proxy_json, encoding='utf-8')
return cls(proxy=proxy_dict.get("proxy", ""),
fail_count=proxy_dict.get("fail_count", 0),
region=proxy_dict.get("region", ""),
anonimity=proxy_dict.get("anonimity", ""),
proxy_type=proxy_dict.get("type", ""),
source=proxy_dict.get("source", ""),
check_count=proxy_dict.get("check_count", 0),
last_status=proxy_dict.get("last_status", ""),
last_time=proxy_dict.get("last_time", "")
)
@property
def proxy(self):
""" 代理 ip:port """
return self._proxy
@property
def fail_count(self):
""" 检测失败次数 """
return self._fail_count
@property
def region(self):
""" 地理位置(国家/城市) """
return self._region
@property
def anonimity(self):
""" 透明/高匿 """
return self._anonimity
@property
def type(self):
""" HTTP/HTTPS等 """
return self._type
@property
def source(self):
""" 代理来源 """
return self._source
@property
def check_count(self):
""" 代理检测次数 """
return self._check_count
@property
def last_status(self):
""" 最后一次检测结果 1 -> 可用; 0 -> 不可用"""
return self._last_status
@property
def last_time(self):
""" 最后一次检测时间 """
return self._last_time
@property
def info_dict(self):
""" 属性字典 """
return {"proxy": self._proxy,
"fail_count": self._fail_count,
"region": self._region,
"anonimity": self._anonimity,
"type": self._type,
"source": self._source,
"check_count": self.check_count,
"last_status": self.last_status,
"last_time": self.last_time}
@property
def info_json(self):
""" 属性json格式 """
return json.dumps(self.info_dict, ensure_ascii=False)
# --- proxy method ---
@fail_count.setter
def fail_count(self, value):
self._fail_count = value
@region.setter
def region(self, value):
self._region = value
@anonimity.setter
def anonimity(self, value):
self._anonimity = value
@type.setter
def type(self, value):
self._type = value
@source.setter
def source(self, value):
self._source = value
@check_count.setter
def check_count(self, value):
self._check_count = value
@last_status.setter
def last_status(self, value):
self._last_status = value
@last_time.setter
def last_time(self, value):
self._last_time = value

View File

@ -0,0 +1,40 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name ProxyHelper
Description :
Author : JHao
date 2019/8/8
-------------------------------------------------
Change Activity:
2019/8/8:
-------------------------------------------------
"""
__author__ = 'JHao'
from Util import validUsefulProxy
from datetime import datetime
def checkProxyUseful(proxy_obj):
"""
检测代理是否可用
:param proxy_obj: Proxy object
:return: Proxy object, status
"""
if validUsefulProxy(proxy_obj.proxy):
# 检测通过 更新proxy属性
proxy_obj.check_count += 1
proxy_obj.last_status = 1
proxy_obj.last_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
if proxy_obj.fail_count > 0:
proxy_obj.fail_count -= 1
return proxy_obj, True
else:
proxy_obj.check_count += 1
proxy_obj.last_status = 0
proxy_obj.last_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
proxy_obj.fail_count += 1
return proxy_obj, False

View File

@ -0,0 +1,16 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name __init__.py
Description :
Author : JHao
date 2019/7/11
-------------------------------------------------
Change Activity:
2019/7/11:
-------------------------------------------------
"""
__author__ = 'JHao'
from ProxyHelper.Proxy import Proxy
from ProxyHelper.ProxyUtil import checkProxyUseful

239
deploy/ProxyPool/README.md Normal file
View File

@ -0,0 +1,239 @@
爬虫IP代理池
=======
[![Build Status](https://travis-ci.org/jhao104/proxy_pool.svg?branch=master)](https://travis-ci.org/jhao104/proxy_pool)
[![](https://img.shields.io/badge/Powered%20by-@j_hao104-green.svg)](http://www.spiderpy.cn/blog/)
[![Requirements Status](https://requires.io/github/jhao104/proxy_pool/requirements.svg?branch=master)](https://requires.io/github/jhao104/proxy_pool/requirements/?branch=master)
[![Packagist](https://img.shields.io/packagist/l/doctrine/orm.svg)](https://github.com/jhao104/proxy_pool/blob/master/LICENSE)
[![GitHub contributors](https://img.shields.io/github/contributors/jhao104/proxy_pool.svg)](https://github.com/jhao104/proxy_pool/graphs/contributors)
[![](https://img.shields.io/badge/language-Python-green.svg)](https://github.com/jhao104/proxy_pool)
______ ______ _
| ___ \_ | ___ \ | |
| |_/ / \__ __ __ _ __ _ | |_/ /___ ___ | |
| __/| _// _ \ \ \/ /| | | || __// _ \ / _ \ | |
| | | | | (_) | > < \ |_| || | | (_) | (_) || |___
\_| |_| \___/ /_/\_\ \__ |\_| \___/ \___/ \_____\
__ / /
/___ /
##### [介绍文档](https://github.com/jhao104/proxy_pool/blob/master/doc/introduce.md)
* 支持版本: ![](https://img.shields.io/badge/Python-2.x-green.svg) ![](https://img.shields.io/badge/Python-3.x-blue.svg)
* 测试地址: http://118.24.52.95 (单机勿压, 感谢。 恶意访问关[小黑屋](https://github.com/jhao104/proxy_pool/blob/bff423dffe6e2881ee45d5b66d8a6ad682c8e4ab/doc/block_ips.md)哦)
### 下载安装
* 下载源码:
```shell
git clone git@github.com:jhao104/proxy_pool.git
或者直接到https://github.com/jhao104/proxy_pool/releases 下载zip文件
```
* 安装依赖:
```shell
pip install -r requirements.txt
```
* 配置Config/setting.py:
```shell
# Config/setting.py 为项目配置文件
# 配置DB
DATABASES = {
"default": {
"TYPE": "SSDB", # 目前支持SSDB或REDIS数据库
"HOST": "127.0.0.1", # db host
"PORT": 8888, # db port例如SSDB通常使用8888REDIS通常默认使用6379
"NAME": "proxy", # 默认配置
"PASSWORD": "" # db password
}
}
# 配置 ProxyGetter
PROXY_GETTER = [
"freeProxy01", # 这里是启用的代理抓取函数名可在ProxyGetter/getFreeProxy.py 扩展
"freeProxy02",
....
]
# 配置 API服务
SERVER_API = {
"HOST": "0.0.0.0", # 监听ip, 0.0.0.0 监听所有IP
"PORT": 5010 # 监听端口
}
# 上面配置启动后,代理池访问地址为 http://127.0.0.1:5010
```
* 启动:
```shell
# 如果你的依赖已经安装完成并且具备运行条件,可以在cli目录下通过ProxyPool.py启。动
# 程序分为: schedule 调度程序 和 webserver Api服务
# 首先启动调度程序
>>>python proxyPool.py schedule
# 然后启动webApi服务
>>>python proxyPool.py webserver
```
### Docker
```bash
docker pull jhao104/proxy_pool
# 远程数据库
docker run --env db_type=REDIS --env db_host=x.x.x.x --env db_port=6379 --env db_password=pwd_str -p 5010:5010 jhao104/proxy_pool
# 宿主机上的数据库
docker run --env db_type=REDIS --env db_host=host.docker.internal --env db_port=6379 --env db_password=pwd_str -p 5010:5010 jhao104/proxy_pool
```
### 使用
  启动过几分钟后就能看到抓取到的代理IP你可以直接到数据库中查看推荐一个[SSDB可视化工具](https://github.com/jhao104/SSDBAdmin)。
  也可以通过api访问http://127.0.0.1:5010 查看。
* Api
| api | method | Description | arg|
| ----| ---- | ---- | ----|
| / | GET | api介绍 | None |
| /get | GET | 随机获取一个代理 | None|
| /get_all | GET | 获取所有代理 |None|
| /get_status | GET | 查看代理数量 |None|
| /delete | GET | 删除代理 |proxy=host:ip|
* 爬虫使用
  如果要在爬虫代码中使用的话, 可以将此api封装成函数直接使用例如
```python
import requests
def get_proxy():
return requests.get("http://127.0.0.1:5010/get/").json()
def delete_proxy(proxy):
requests.get("http://127.0.0.1:5010/delete/?proxy={}".format(proxy))
# your spider code
def getHtml():
# ....
retry_count = 5
proxy = get_proxy().get("proxy")
while retry_count > 0:
try:
html = requests.get('http://www.example.com', proxies={"http": "http://{}".format(proxy)})
# 使用代理访问
return html
except Exception:
retry_count -= 1
# 出错5次, 删除代理池中代理
delete_proxy(proxy)
return None
```
### 扩展代理
  项目默认包含几个免费的代理获取方法,但是免费的毕竟质量不好,所以如果直接运行可能拿到的代理质量不理想。所以,提供了代理获取的扩展方法。
  添加一个新的代理获取方法如下:
* 1、首先在[GetFreeProxy](https://github.com/jhao104/proxy_pool/blob/b9ccdfaada51b57cfb1bbd0c01d4258971bc8352/ProxyGetter/getFreeProxy.py#L32)类中添加你的获取代理的静态方法,
该方法需要以生成器(yield)形式返回`host:ip`格式的代理,例如:
```python
class GetFreeProxy(object):
# ....
# 你自己的方法
@staticmethod
def freeProxyCustom(): # 命名不和已有重复即可
# 通过某网站或者某接口或某数据库获取代理 任意你喜欢的姿势都行
# 假设你拿到了一个代理列表
proxies = ["139.129.166.68:3128", "139.129.166.61:3128", ...]
for proxy in proxies:
yield proxy
# 确保每个proxy都是 host:ip正确的格式就行
```
* 2、添加好方法后修改Config/setting.py文件中的`PROXY_GETTER`项:
  在`PROXY_GETTER`下添加自定义的方法的名字:
```shell
PROXY_GETTER = [
"freeProxy01",
"freeProxy02",
....
"freeProxyCustom" # # 确保名字和你添加方法名字一致
]
```
  `ProxySchedule`会每隔一段时间抓取一次代理,下次抓取时会自动识别调用你定义的方法。
### 代理采集
目前实现的采集免费代理网站有(排名不分先后, 下面仅是对其发布的免费代理情况, 付费代理测评可以参考[这里](https://zhuanlan.zhihu.com/p/33576641)):
| 厂商名称 | 状态 | 更新速度 | 可用率 | 是否被墙 | 地址 |
| ----- | ---- | -------- | ------ | --------- | ----- |
| 无忧代理 | 可用 | 几分钟一次 | * | 否 | [地址](http://www.data5u.com/free/index.html) |
| 66代理 | 可用 | 更新很慢 | * | 否 | [地址](http://www.66ip.cn/) |
| 西刺代理 | 可用 | 几分钟一次 | * | 否 | [地址](http://www.xicidaili.com)|
| 全网代理 | 可用 | 几分钟一次 | * | 否 | [地址](http://www.goubanjia.com/)|
| 训代理 | 已关闭免费代理 | * | * | 否 | [地址](http://www.xdaili.cn/)|
| 快代理 | 可用 |几分钟一次| * | 否 | [地址](https://www.kuaidaili.com/)|
| 云代理 | 可用 |几分钟一次| * | 否 | [地址](http://www.ip3366.net/)|
| IP海 | 可用 |几小时一次| * | 否 | [地址](http://www.iphai.com/)|
| 免费IP代理库 | 可用 |快| * | 否 | [地址](http://ip.jiangxianli.com/)|
| 中国IP地址 | 可用 |几分钟一次| * | 是 | [地址](http://cn-proxy.com/)|
| Proxy List | 可用 |几分钟一次| * | 是 | [地址](https://proxy-list.org/chinese/index.php)|
| ProxyList+ | 可用 |几分钟一次| * | 是 | [地址](https://list.proxylistplus.com/Fresh-HTTP-Proxy-List-1)|
如果还有其他好的免费代理网站, 可以在提交在[issues](https://github.com/jhao104/proxy_pool/issues/71), 下次更新时会考虑在项目中支持。
### 问题反馈
  任何问题欢迎在[Issues](https://github.com/jhao104/proxy_pool/issues) 中反馈,如果没有账号可以去 我的[博客](http://www.spiderpy.cn/blog/message)中留言。
  你的反馈会让此项目变得更加完美。
### 贡献代码
  本项目仅作为基本的通用的代理池架构,不接收特有功能(当然,不限于特别好的idea)。
  本项目依然不够完善如果发现bug或有新的功能添加请在[Issues](https://github.com/jhao104/proxy_pool/issues)中提交bug(或新功能)描述,在确认后提交你的代码。
  这里感谢以下contributor的无私奉献
  [@kangnwh](https://github.com/kangnwh)| [@bobobo80](https://github.com/bobobo80)| [@halleywj](https://github.com/halleywj)| [@newlyedward](https://github.com/newlyedward)| [@wang-ye](https://github.com/wang-ye)| [@gladmo](https://github.com/gladmo)| [@bernieyangmh](https://github.com/bernieyangmh)| [@PythonYXY](https://github.com/PythonYXY)| [@zuijiawoniu](https://github.com/zuijiawoniu)| [@netAir](https://github.com/netAir)| [@scil](https://github.com/scil)| [@tangrela](https://github.com/tangrela)| [@highroom](https://github.com/highroom)| [@luocaodan](https://github.com/luocaodan)| [@vc5](https://github.com/vc5)| [@1again](https://github.com/1again)| [@obaiyan](https://github.com/obaiyan)
### Release Notes
[release notes](https://github.com/jhao104/proxy_pool/blob/master/doc/release_notes.md)

View File

@ -0,0 +1,61 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name ProxyScheduler
Description :
Author : JHao
date 2019/8/5
-------------------------------------------------
Change Activity:
2019/8/5: ProxyScheduler
-------------------------------------------------
"""
__author__ = 'JHao'
import sys
from apscheduler.schedulers.blocking import BlockingScheduler
sys.path.append('../')
from Schedule import doRawProxyCheck, doUsefulProxyCheck
from Manager import ProxyManager
from Util import LogHandler
class DoFetchProxy(ProxyManager):
""" fetch proxy"""
def __init__(self):
ProxyManager.__init__(self)
self.log = LogHandler('fetch_proxy')
def main(self):
self.log.info("start fetch proxy")
self.fetch()
self.log.info("finish fetch proxy")
def rawProxyScheduler():
DoFetchProxy().main()
doRawProxyCheck()
def usefulProxyScheduler():
doUsefulProxyCheck()
def runScheduler():
rawProxyScheduler()
usefulProxyScheduler()
scheduler_log = LogHandler("scheduler_log")
scheduler = BlockingScheduler(logger=scheduler_log)
scheduler.add_job(rawProxyScheduler, 'interval', minutes=5, id="raw_proxy_check", name="raw_proxy定时采集")
scheduler.add_job(usefulProxyScheduler, 'interval', minutes=5, id="useful_proxy_check", name="useful_proxy定时检查")
scheduler.start()
if __name__ == '__main__':
runScheduler()

View File

@ -0,0 +1,81 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name RawProxyCheck
Description : check raw_proxy to useful
Author : JHao
date 2019/8/6
-------------------------------------------------
Change Activity:
2019/8/6: check raw_proxy to useful
-------------------------------------------------
"""
__author__ = 'JHao'
from threading import Thread
try:
from Queue import Empty, Queue # py2
except:
from queue import Empty, Queue # py3
from Util import LogHandler
from Manager import ProxyManager
from ProxyHelper import Proxy, checkProxyUseful
class RawProxyCheck(ProxyManager, Thread):
def __init__(self, queue, thread_name):
ProxyManager.__init__(self)
Thread.__init__(self, name=thread_name)
self.log = LogHandler('raw_proxy_check')
self.queue = queue
def run(self):
self.log.info("RawProxyCheck - {} : start".format(self.name))
self.db.changeTable(self.useful_proxy_queue)
while True:
try:
proxy_json = self.queue.get(block=False)
except Empty:
self.log.info("RawProxyCheck - {} : exit".format(self.name))
break
proxy_obj = Proxy.newProxyFromJson(proxy_json)
proxy_obj, status = checkProxyUseful(proxy_obj)
if status:
if self.db.exists(proxy_obj.proxy):
self.log.info('RawProxyCheck - {} : {} validation exists'.format(self.name,
proxy_obj.proxy.ljust(20)))
else:
self.db.put(proxy_obj)
self.log.info(
'RawProxyCheck - {} : {} validation pass'.format(self.name, proxy_obj.proxy.ljust(20)))
else:
self.log.info('RawProxyCheck - {} : {} validation fail'.format(self.name, proxy_obj.proxy.ljust(20)))
self.queue.task_done()
def doRawProxyCheck():
proxy_queue = Queue()
pm = ProxyManager()
pm.db.changeTable(pm.raw_proxy_queue)
for _proxy in pm.db.getAll():
proxy_queue.put(_proxy)
pm.db.clear()
thread_list = list()
for index in range(20):
thread_list.append(RawProxyCheck(proxy_queue, "thread_%s" % index))
for thread in thread_list:
thread.start()
for thread in thread_list:
thread.join()
if __name__ == '__main__':
doRawProxyCheck()

View File

@ -0,0 +1,83 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name UsefulProxyCheck
Description : check useful proxy
Author : JHao
date 2019/8/7
-------------------------------------------------
Change Activity:
2019/8/7: check useful proxy
-------------------------------------------------
"""
__author__ = 'JHao'
from threading import Thread
try:
from Queue import Queue, Empty # py2
except:
from queue import Queue, Empty # py3
from Util import LogHandler
from Manager import ProxyManager
from ProxyHelper import checkProxyUseful, Proxy
FAIL_COUNT = 0
class UsefulProxyCheck(ProxyManager, Thread):
def __init__(self, queue, thread_name):
ProxyManager.__init__(self)
Thread.__init__(self, name=thread_name)
self.queue = queue
self.log = LogHandler('useful_proxy_check')
def run(self):
self.log.info("UsefulProxyCheck - {} : start".format(self.name))
self.db.changeTable(self.useful_proxy_queue)
while True:
try:
proxy_str = self.queue.get(block=False)
except Empty:
self.log.info("UsefulProxyCheck - {} : exit".format(
self.name))
break
proxy_obj = Proxy.newProxyFromJson(proxy_str)
proxy_obj, status = checkProxyUseful(proxy_obj)
if (status or proxy_obj.fail_count < FAIL_COUNT):
self.db.put(proxy_obj)
self.log.info(
'UsefulProxyCheck - {} : {} validation pass'.format(
self.name, proxy_obj.proxy.ljust(20)))
else:
self.log.info(
'UsefulProxyCheck - {} : {} validation fail'.format(
self.name, proxy_obj.proxy.ljust(20)))
self.db.delete(proxy_obj.proxy)
self.queue.task_done()
def doUsefulProxyCheck():
proxy_queue = Queue()
pm = ProxyManager()
pm.db.changeTable(pm.useful_proxy_queue)
for _proxy in pm.db.getAll():
proxy_queue.put(_proxy)
thread_list = list()
for index in range(10):
thread_list.append(UsefulProxyCheck(proxy_queue, "thread_%s" % index))
for thread in thread_list:
thread.start()
for thread in thread_list:
thread.join()
if __name__ == '__main__':
doUsefulProxyCheck()

View File

@ -0,0 +1,16 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name __init__.py.py
Description :
Author : JHao
date 2016/12/3
-------------------------------------------------
Change Activity:
2016/12/3:
-------------------------------------------------
"""
__author__ = 'JHao'
from Schedule.RawProxyCheck import doRawProxyCheck
from Schedule.UsefulProxyCheck import doUsefulProxyCheck

View File

@ -0,0 +1,13 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name __init__
Description :
Author : JHao
date 2019/2/15
-------------------------------------------------
Change Activity:
2019/2/15:
-------------------------------------------------
"""
__author__ = 'JHao'

View File

@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name testGetConfig
Description : testGetConfig
Author : J_hao
date 2017/7/31
-------------------------------------------------
Change Activity:
2017/7/31:
-------------------------------------------------
"""
__author__ = 'J_hao'
from Config.ConfigGetter import config
# noinspection PyPep8Naming
def testConfig():
"""
:return:
"""
print(config.db_type)
print(config.db_name)
print(config.db_host)
print(config.db_port)
print(config.db_password)
assert isinstance(config.proxy_getter_functions, list)
print(config.proxy_getter_functions)
if __name__ == '__main__':
testConfig()

View File

@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name testGetFreeProxy
Description : test model ProxyGetter/getFreeProxy
Author : J_hao
date 2017/7/31
-------------------------------------------------
Change Activity:
2017/7/31:function testGetFreeProxy
-------------------------------------------------
"""
__author__ = 'J_hao'
from ProxyGetter.getFreeProxy import GetFreeProxy
from Config.ConfigGetter import config
def testGetFreeProxy():
"""
test class GetFreeProxy in ProxyGetter/GetFreeProxy
:return:
"""
proxy_getter_functions = config.proxy_getter_functions
for proxyGetter in proxy_getter_functions:
proxy_count = 0
for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
if proxy:
print('{func}: fetch proxy {proxy},proxy_count:{proxy_count}'.format(func=proxyGetter, proxy=proxy,
proxy_count=proxy_count))
proxy_count += 1
# assert proxy_count >= 20, '{} fetch proxy fail'.format(proxyGetter)
if __name__ == '__main__':
testGetFreeProxy()

View File

@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name testLogHandler
Description :
Author : J_hao
date 2017/8/2
-------------------------------------------------
Change Activity:
2017/8/2:
-------------------------------------------------
"""
__author__ = 'J_hao'
from Util.LogHandler import LogHandler
# noinspection PyPep8Naming
def testLogHandler():
"""
test function LogHandler in Util/LogHandler
:return:
"""
log = LogHandler('test')
log.info('this is a log from test')
log.resetName(name='test1')
log.info('this is a log from test1')
log.resetName(name='test2')
log.info('this is a log from test2')
if __name__ == '__main__':
testLogHandler()

View File

@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name testProxyClass
Description :
Author : JHao
date 2019/8/8
-------------------------------------------------
Change Activity:
2019/8/8:
-------------------------------------------------
"""
__author__ = 'JHao'
import json
from ProxyHelper import Proxy
def testProxyClass():
proxy = Proxy("127.0.0.1:8080")
print(proxy.info_dict)
proxy.source = "test"
proxy_str = json.dumps(proxy.info_dict, ensure_ascii=False)
print(proxy_str)
print(Proxy.newProxyFromJson(proxy_str).info_dict)
testProxyClass()

View File

@ -0,0 +1,30 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name testWebRequest
Description : test class WebRequest
Author : J_hao
date 2017/7/31
-------------------------------------------------
Change Activity:
2017/7/31: function testWebRequest
-------------------------------------------------
"""
__author__ = 'J_hao'
from Util.WebRequest import WebRequest
# noinspection PyPep8Naming
def testWebRequest():
"""
test class WebRequest in Util/WebRequest.py
:return:
"""
wr = WebRequest()
request_object = wr.get('https://www.baidu.com/')
assert request_object.status_code == 200
if __name__ == '__main__':
testWebRequest()

View File

@ -0,0 +1,102 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name LogHandler.py
Description : 日志操作模块
Author : JHao
date 2017/3/6
-------------------------------------------------
Change Activity:
2017/3/6: log handler
2017/9/21: 屏幕输出/文件输出 可选(默认屏幕和文件均输出)
-------------------------------------------------
"""
__author__ = 'JHao'
import os
import logging
from logging.handlers import TimedRotatingFileHandler
# 日志级别
CRITICAL = 50
FATAL = CRITICAL
ERROR = 40
WARNING = 30
WARN = WARNING
INFO = 20
DEBUG = 10
NOTSET = 0
CURRENT_PATH = os.path.dirname(os.path.abspath(__file__))
ROOT_PATH = os.path.join(CURRENT_PATH, os.pardir)
LOG_PATH = os.path.join(ROOT_PATH, 'log')
if not os.path.exists(LOG_PATH):
os.mkdir(LOG_PATH)
class LogHandler(logging.Logger):
"""
LogHandler
"""
def __init__(self, name, level=DEBUG, stream=True, file=True):
self.name = name
self.level = level
logging.Logger.__init__(self, self.name, level=level)
if stream:
self.__setStreamHandler__()
if file:
self.__setFileHandler__()
def __setFileHandler__(self, level=None):
"""
set file handler
:param level:
:return:
"""
file_name = os.path.join(LOG_PATH, '{name}.log'.format(name=self.name))
# 设置日志回滚, 保存在log目录, 一天保存一个文件, 保留15天
file_handler = TimedRotatingFileHandler(filename=file_name, when='D', interval=1, backupCount=15)
file_handler.suffix = '%Y%m%d.log'
if not level:
file_handler.setLevel(self.level)
else:
file_handler.setLevel(level)
formatter = logging.Formatter('%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s')
file_handler.setFormatter(formatter)
self.file_handler = file_handler
self.addHandler(file_handler)
def __setStreamHandler__(self, level=None):
"""
set stream handler
:param level:
:return:
"""
stream_handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s')
stream_handler.setFormatter(formatter)
if not level:
stream_handler.setLevel(self.level)
else:
stream_handler.setLevel(level)
self.addHandler(stream_handler)
def resetName(self, name):
"""
reset name
:param name:
:return:
"""
self.name = name
self.removeHandler(self.file_handler)
self.__setFileHandler__()
if __name__ == '__main__':
log = LogHandler('test')
log.info('this is a test msg')

View File

@ -0,0 +1,85 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name WebRequest
Description : Network Requests Class
Author : J_hao
date 2017/7/31
-------------------------------------------------
Change Activity:
2017/7/31:
-------------------------------------------------
"""
__author__ = 'J_hao'
from requests.models import Response
import requests
import random
import time
class WebRequest(object):
def __init__(self, *args, **kwargs):
pass
@property
def user_agent(self):
"""
return an User-Agent at random
:return:
"""
ua_list = [
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71',
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)',
'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0',
]
return random.choice(ua_list)
@property
def header(self):
"""
basic header
:return:
"""
return {'User-Agent': self.user_agent,
'Accept': '*/*',
'Connection': 'keep-alive',
'Accept-Language': 'zh-CN,zh;q=0.8'}
def get(self, url, header=None, retry_time=5, timeout=30,
retry_flag=list(), retry_interval=5, *args, **kwargs):
"""
get method
:param url: target url
:param header: headers
:param retry_time: retry time when network error
:param timeout: network timeout
:param retry_flag: if retry_flag in content. do retry
:param retry_interval: retry interval(second)
:param args:
:param kwargs:
:return:
"""
headers = self.header
if header and isinstance(header, dict):
headers.update(header)
while True:
try:
html = requests.get(url, headers=headers, timeout=timeout, **kwargs)
if any(f in html.content for f in retry_flag):
raise Exception
return html
except Exception as e:
print(e)
retry_time -= 1
if retry_time <= 0:
# 多次请求失败
resp = Response()
resp.status_code = 200
return resp
time.sleep(retry_interval)

View File

@ -0,0 +1,16 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name __init__.py.py
Description :
Author : JHao
date 2016/11/25
-------------------------------------------------
Change Activity:
2016/11/25:
-------------------------------------------------
"""
from Util.utilFunction import validUsefulProxy
from Util.LogHandler import LogHandler
from Util.utilClass import Singleton

View File

@ -0,0 +1,45 @@
# -*- coding: utf-8 -*-
# !/usr/bin/env python
"""
-------------------------------------------------
File Name utilClass.py
Description : tool class
Author : JHao
date 2016/12/3
-------------------------------------------------
Change Activity:
2016/12/3: Class LazyProperty
-------------------------------------------------
"""
__author__ = 'JHao'
class LazyProperty(object):
"""
LazyProperty
explain: http://www.spiderpy.cn/blog/5/
"""
def __init__(self, func):
self.func = func
def __get__(self, instance, owner):
if instance is None:
return self
else:
value = self.func(instance)
setattr(instance, self.func.__name__, value)
return value
class Singleton(type):
"""
Singleton Metaclass
"""
_inst = {}
def __call__(cls, *args, **kwargs):
if cls not in cls._inst:
cls._inst[cls] = super(Singleton, cls).__call__(*args)
return cls._inst[cls]

View File

@ -0,0 +1,95 @@
# -*- coding: utf-8 -*-
# !/usr/bin/env python
"""
-------------------------------------------------
File Name utilFunction.py
Description : tool function
Author : JHao
date 2016/11/25
-------------------------------------------------
Change Activity:
2016/11/25: 添加robustCrawlverifyProxygetHtmlTree
-------------------------------------------------
"""
import requests
from lxml import etree
from Util.WebRequest import WebRequest
def robustCrawl(func):
def decorate(*args, **kwargs):
try:
return func(*args, **kwargs)
except Exception as e:
pass
# logger.info(u"sorry, 抓取出错。错误原因:")
# logger.info(e)
return decorate
def verifyProxyFormat(proxy):
"""
检查代理格式
:param proxy:
:return:
"""
import re
verify_regex = r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}"
_proxy = re.findall(verify_regex, proxy)
return True if len(_proxy) == 1 and _proxy[0] == proxy else False
def getHtmlTree(url, **kwargs):
"""
获取html树
:param url:
:param kwargs:
:return:
"""
header = {'Connection': 'keep-alive',
'Cache-Control': 'max-age=0',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko)',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Language': 'zh-CN,zh;q=0.8',
}
# TODO 取代理服务器用代理服务器访问
wr = WebRequest()
html = wr.get(url=url, header=header).content
return etree.HTML(html)
def tcpConnect(proxy):
"""
TCP 三次握手
:param proxy:
:return:
"""
from socket import socket, AF_INET, SOCK_STREAM
s = socket(AF_INET, SOCK_STREAM)
ip, port = proxy.split(':')
result = s.connect_ex((ip, int(port)))
return True if result == 0 else False
def validUsefulProxy(proxy):
"""
检验代理是否可用
:param proxy:
:return:
"""
if isinstance(proxy, bytes):
proxy = proxy.decode("utf8")
proxies = {"http": "http://{proxy}".format(proxy=proxy)}
try:
r = requests.get('http://www.baidu.com', proxies=proxies, timeout=10, verify=False)
if r.status_code == 200:
return True
except Exception as e:
pass
return False

View File

@ -0,0 +1,13 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name __init__.py
Description :
Author : JHao
date 2016/12/3
-------------------------------------------------
Change Activity:
2016/12/3:
-------------------------------------------------
"""
__author__ = 'JHao'

View File

@ -0,0 +1 @@
theme: jekyll-theme-time-machine

View File

@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name proxy_pool
Description :
Author : JHao
date 2019/8/2
-------------------------------------------------
Change Activity:
2019/8/2:
-------------------------------------------------
"""
__author__ = 'JHao'
import sys
import click
import platform
sys.path.append('../')
from Config.setting import HEADER
from Schedule.ProxyScheduler import runScheduler
from Api.ProxyApi import runFlask,runFlaskWithGunicorn
CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
@click.group(context_settings=CONTEXT_SETTINGS)
@click.version_option(version='2.0.0')
def cli():
"""ProxyPool cli工具"""
@cli.command(name="schedule")
def schedule():
""" 启动调度程序 """
click.echo(HEADER)
runScheduler()
@cli.command(name="webserver")
def webserver():
""" 启动web服务 """
click.echo(HEADER)
if platform.system() == "Windows":
runFlask()
else:
runFlaskWithGunicorn()
if __name__ == '__main__':
cli()

View File

@ -0,0 +1,3 @@
#!/usr/bin/env bash
python proxyPool.py webserver &
python proxyPool.py schedule

View File

@ -0,0 +1,8 @@
| block IP | block 日期 | msg |
| ----- | ---- | -------- |
| 144.52.45.149 | 20190815 | 恶意访问 |
| 39.100.153.226 | 20190816 | 恶意访问 |
| 47.102.47.42 | 20190819 | 恶意访问 |
| 125.71.211.125 | 20190820 | 恶意访问 |
如需正常访问请提issues说明

View File

@ -0,0 +1,173 @@
## 代理池介绍
本项目通过爬虫方式持续抓取代理网站公布的免费代理IP实时校验维护部分可以使用的代理并通过api的形式提供外部使用。
### 1、问题
构建一个代理IP池可能有下面这些问题
* 代理IP从何而来
  许多刚接触爬虫的,都试过去西刺、快代理之类有免费代理的网站去抓些免费代理,还是有一些代理能用。
当然,如果你有更好的代理接口也可以自己接入。
  免费代理的采集也很简单,无非就是:`访问页面`` —> `正则/xpath提取` —> `保存`
* 如何保证代理质量?
  可以肯定免费的代理IP大部分都是不能用的不然别人还提供付费接口干嘛(不过事实上很多代理商的付费IP也不稳定也有很多是不能用)。
所以采集回来的代理IP不能直接使用检测的办法也很简单可以写个程序不断的用代理访问一个稳定的网站看是否可以正常访问即可。
这个过程可以使用多线/进程或异步的方式,因为检测代理是个很慢的过程。
* 采集回来的代理如何存储?
  这里不得不推荐一个国人开发的高性能支持多种数据结构的NoSQL数据库[SSDB](http://ssdb.io/docs/zh_cn/)用于替代Redis。支持队列、hash、set、k-v对支持T级别数据。是做分布式爬虫很好中间存储工具。
* 如何让爬虫更方便的用到这些代理?
  答案肯定是做成服务咯Python有这么多的web框架随便拿一个来写个api供爬虫调用。这样代理和爬虫架构分离有很多好处
比如:当爬虫完全不用考虑如何校验代理,如何保证拿到的代理可用,这些都由代理池来完成。这样只需要安静的码爬虫代码就行啦。
### 2、代理池设计
  代理池由四部分组成:
* ProxyGetter:
  代理获取接口目前有5个免费代理源每调用一次就会抓取这个5个网站的最新代理放入DB支持自定义扩展额外的代理获取接口
* DB:
  用于存放代理IP目前支持SSDB和Redis(推荐SSDB)。至于为什么选择SSDB大家可以参考这篇[文章](https://www.sdk.cn/news/2684),个人觉得SSDB是个不错的Redis替代方案如果你没有用过SSDB安装起来也很简单可以参考[这里](https://github.com/jhao104/memory-notes/blob/master/SSDB/SSDB%E5%AE%89%E8%A3%85%E9%85%8D%E7%BD%AE%E8%AE%B0%E5%BD%95.md)
* Schedule:
  计划任务定时去检测DB中的代理可用性删除不可用的代理。同时也会主动通过ProxyGetter去获取最新代理放入DB
* ProxyApi:
  代理池的外部接口,由[Flask](http://flask.pocoo.org/)实现,功能是给爬虫提供与代理池交互的接口。
<!--#### 功能图纸-->
![设计](https://pic2.zhimg.com/v2-f2756da2986aa8a8cab1f9562a115b55_b.png)
### 3、代码模块
  Python中高层次的数据结构,动态类型和动态绑定,使得它非常适合于快速应用开发,也适合于作为胶水语言连接已有的软件部件。用Python来搞这个代理IP池也很简单代码分为6个模块
* Api:
  api接口相关代码目前api是由Flask实现代码也非常简单。客户端请求传给FlaskFlask调用`ProxyManager`中的实现,包括`get/delete/refresh/get_all`
* DB:
  数据库相关代码目前数据库是支持SSDB/Redis。代码用工厂模式实现方便日后扩展其他类型数据库
* Manager:
  `get/delete/refresh/get_all`等接口的具体实现类目前代理池只负责管理proxy日后可能会有更多功能比如代理和爬虫的绑定代理和账号的绑定等等
* ProxyGetter:
  代理获取的相关代码,目前抓取了[快代理](http://www.kuaidaili.com)、[代理66](http://www.66ip.cn/)、[有代理](http://www.youdaili.net/Daili/http/)、[西刺代理](http://api.xicidaili.com/free2016.txt)、[guobanjia](http://www.goubanjia.com/free/gngn/index.shtml)这个五个网站的免费代理经测试这个5个网站每天更新的可用代理只有六七十个当然也支持自己扩展代理接口
* Schedule:
  定时任务相关代码,现在只是实现定时去刷新代理,并验证可用代理,采用多进程方式;
* Util:
  存放一些公共的模块方法或函数,包含`GetConfig`:读取配置文件config.ini的类`ConfigParse`: 扩展ConfigParser的类使其对大小写敏感 `Singleton`:实现单例,`LazyProperty`:实现类属性惰性计算。等等;
* 其他文件:
  配置文件:`Config.ini``,数据库配置和代理获取接口配置可以在GetFreeProxy中添加新的代理获取方法并在Config.ini中注册即可使用
### 4、安装
下载代码:
```
git clone git@github.com:jhao104/proxy_pool.git
或者直接到https://github.com/jhao104/proxy_pool 下载zip文件
```
安装依赖:
```
pip install -r requirements.txt
```
启动:
```
如果你的依赖已经安全完成并且具备运行条件,可以直接在Run下运行main.py
到Run目录下:
>>>python main.py
如果运行成功你应该可以看到有4个main.py进程在
你也可以分别运行他们,依次到Api下启动ProxyApi.py,Schedule下启动ProxyRefreshSchedule.py和ProxyValidSchedule.py即可
```
docker:
```
git clone git@github.com:jhao104/proxy_pool.git
cd proxy_pool
docker build -t proxy:latest -f Dockerfile .
docker run -p 5010:5010 -d proxy:latest
# Wait a few minutes
curl localhost:5010/get/
# result: xxx.xxx.xxx.xxx:xxxx
curl localhost:5010/get_all/
```
### 5、使用
  定时任务启动后会通过GetFreeProxy中的方法抓取代理存入数据库并验证。此后默认每10分钟会重复执行一次。定时任务启动大概一两分钟后便可在[SSDB](https://github.com/jhao104/SSDBAdmin)中看到刷新出来的可用的代理:
![useful_proxy](https://pic2.zhimg.com/v2-12f9b7eb72f60663212f317535a113d1_b.png)
  启动ProxyApi.py后即可在浏览器中使用接口获取代理一下是浏览器中的截图:
  index页面:
![index](https://pic3.zhimg.com/v2-a867aa3db1d413fea8aeeb4c693f004a_b.png)
  get
![get](https://pic1.zhimg.com/v2-f54b876b428893235533de20f2edbfe0_b.png)
  get_all
![get_all](https://pic3.zhimg.com/v2-5c79f8c07e04f9ef655b9bea406d0306_b.png)
  爬虫中使用,如果要在爬虫代码中使用的话, 可以将此api封装成函数直接使用例如:
```
import requests
def get_proxy():
return requests.get("http://127.0.0.1:5010/get/").content
def delete_proxy(proxy):
requests.get("http://127.0.0.1:5010/delete/?proxy={}".format(proxy))
# your spider code
def spider():
# ....
requests.get('https://www.example.com', proxies={"http": "http://{}".format(get_proxy())})
# ....
```
  测试地址http://123.207.35.36:5010 单机勿压测。谢谢
### 6、最后
  时间仓促功能和代码都比较简陋以后有时间再改进。喜欢的在github上给个star。感谢

View File

@ -0,0 +1,50 @@
## Release Notes
* master
1. 新增免费代理源 `西拉代理` 2020-03-30
* 2.0.1
1. 新增免费代理源 `89免费代理`;
2. 新增免费代理源 `齐云代理`
* 2.0.0 (201908)
1. WebApi集成Gunicorn方式启动, Windows平台暂不支持;
2. 优化Proxy调度程序;
3. 扩展Proxy属性;
4. 提供cli工具, 更加方便启动proxyPool
* 1.14 (2019.07)
1. 修复`ProxyValidSchedule`假死bug,原因是Queue阻塞;
2. 修改代理源 `云代理` 抓取;
3. 修改代理源 `码农代理` 抓取;
4. 修改代理源 `代理66` 抓取, 引入 `PyExecJS` 模块破解加速乐动态Cookies加密;
* 1.13 (2019.02)
1.使用.py文件替换.ini作为配置文件
2.更新代理采集部分;
* 1.12 (2018.4)
1.优化代理格式检查;
2.增加代理源;
3.fix bug [#122](https://github.com/jhao104/proxy_pool/issues/122) [#126](https://github.com/jhao104/proxy_pool/issues/126)
* 1.11 (2017.8)
  1.使用多线程验证useful_pool;
* 1.10 (2016.11)
  1. 第一版;
  2. 支持PY2/PY3;
  3. 代理池基本功能;

View File

@ -0,0 +1,11 @@
```
docker run -itd --name proxy-pool \
-p 6800:5010 \
-e TZ=Asia/Shanghai \
-e db_type=REDIS \
-e db_host=107.182.191.3 \
-e db_port=7379 \
-e db_password=jlkj-841-2-redis \
-m 256m --memory-swap -1 \
proxypool:latest
```

View File

@ -0,0 +1,20 @@
APScheduler==3.2.0
certifi==2020.6.20
chardet==3.0.4
Click==7.0
Flask==1.0
gunicorn==19.9.0
idna==2.7
itsdangerous==1.1.0
Jinja2==2.11.2
lxml==4.4.2
MarkupSafe==1.1.1
PyExecJS==1.5.1
pymongo==3.11.0
pytz==2020.1
redis==3.5.3
requests==2.20.0
six==1.15.0
tzlocal==2.1
urllib3==1.24.3
Werkzeug==0.15.5

20
deploy/ProxyPool/test.py Normal file
View File

@ -0,0 +1,20 @@
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name test.py
Description :
Author : JHao
date 2017/3/7
-------------------------------------------------
Change Activity:
2017/3/7:
-------------------------------------------------
"""
# __author__ = 'JHao'
# from Test import testConfig
# if __name__ == '__main__':
# testConfig.testConfig()
print(0<0)

1
deploy/README.md Normal file
View File

@ -0,0 +1 @@
# 安装部署

View File

@ -0,0 +1,32 @@
version: '2'
services:
zookeeper:
container_name: kafka-zk
image: zookeeper:3.7.0
ports:
- "2181:2181"
- "2888:2888"
- "3888:3888"
restart: always
environment:
TZ: CST-8
kafka:
container_name: kafka-server
image: wurstmeister/kafka:2.13-2.7.0
ports:
- "9092:9092"
- "1099:1099"
- "9999:9999"
environment:
TZ: CST-8
HOSTNAME: B144
KAFKA_ADVERTISED_HOST_NAME: 104.225.146.144
KAFKA_CREATE_TOPICS: "test:1:1"
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_LOG_RETENTION_HOURS: 72
KAFKA_JMX_OPTS: "-Djava.rmi.server.hostname=104.225.146.144 -Dcom.sun.management.jmxremote=true -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=1099 -Dcom.sun.management.jmxremote.rmi.port=9999"
JMX_PORT: 1099
restart: always
volumes:
- /usr/local/dockerfs/kafka/logs:/kafka
- /usr/local/dockerfs/kafka/docker.sock:/var/run/docker.sock

3
deploy/proto/README.md Normal file
View File

@ -0,0 +1,3 @@
## 操作方法
`generate-py.bat` 中的 protoc 可执行程序在 [protobuf-3.11.4.zip](http://39.98.151.140:28080/software/protobuf-3.11.4.zip) 解压后的压缩包里
脚本后面的定义文件目录改成 proto definition 文件所在的目录,输出目录可以自定

View File

@ -0,0 +1,30 @@
syntax = "proto3";
message AirportInfoSets //
{
repeated AirportInfo AirportInfo = 1;
}
message AirportInfo
{
string ID = 1; //ID
string Name = 2; //
string ICAO = 3; //ICAO码
string IATA = 4; //IATA码
string GPS = 5; //GPS代码
string Type = 6; //
string UsageType = 7; //
string Continent = 8; //
string Nation = 9; ///
string City = 10; //
string Height = 11; //
string Longitude = 12; //
string Latitude = 13; //
string Image = 14; //
string UpdateTime = 15; //
string LastTime = 16; //
string Sensitive = 17; //0/1/
string DataSource = 18; //
}

View File

@ -0,0 +1,159 @@
syntax = "proto3";
message ArgoInfoSets //
{
repeated ArgoInfo ArgoInfo = 1;
}
message ArgoFirstDeployInfoSets //
{
repeated ArgoFirstDeployInfo ArgoFirstDeployInfo = 1;
}
message ArgoCommunicationInfoSets //
{
repeated ArgoCommunicationInfo ArgoCommunicationInfo = 1;
}
message ArgoSensorInfoSets //
{
repeated ArgoSensorInfo ArgoSensorInfo = 1;
}
message ArgoTechnicalInfoSets //
{
repeated ArgoTechnicalInfo ArgoTechnicalInfo = 1;
}
message ArgoDataManipulationInfoSets //
{
repeated ArgoDataManipulationInfo ArgoDataManipulationInfo = 1;
}
message ArgoMeasureInfoSets //
{
repeated ArgoMeasureInfo ArgoMeasureInfo = 1;
}
message ArgoInfo
{
string ID = 1; //ID
string PlatformNum = 2; //
string FloatSerialNum = 3; //
string PlatformMaker = 4; //
string PlatformType = 5; //
string PlatformModel = 6; //
string ProjectName = 7; //
string PiName = 8; //
string DataCenter = 9; //
string HistoryInstitution = 10; //
string PositioningSys = 11; //
string InstReference = 12; //
string WMOInstType = 13; //WMO编码仪器类型
string Image = 14; //
string Nation = 15; //
string DataSource = 16; //
string UpdateTime = 17; //
string LastTime = 18; //
string Sensitive = 19; //0/1/
}
message ArgoFirstDeployInfo
{
string ID = 1; //ID
string PlatformNum = 2; //
string LaunchDate = 3; //
string LaunchLatitude = 4; //
string LaunchLongitude = 5; //
string StartDate = 6; //
string LaunchQC = 7; //
string DeployedPlatform = 8; //
string DeployedMission = 9; //
string UpdateTime = 10; //
string LastTime = 11; //
}
message ArgoCommunicationInfo
{
string ID = 1; //ID
string PlatformNum = 2; //
string PTT = 3; //线
string TransSys = 4; //线
string TransFreq = 5; //线
string TransRepetition = 6; //线
string UpdateTime = 7; //
string LastTime = 8; //
}
message ArgoSensorInfo
{
string ID = 1; //ID
string PlatformNum = 2; //
string SensorSerial = 3; //
string SensorName = 4; //
string SensorMaker = 5; //
string SensorModel = 6; //
string SensorUnits = 7; //
string SensorResolution = 8; //
string SensorAccuracy = 9; //
string FirmwareVersion = 10; //
string UpdateTime = 11; //
string LastTime = 12; //
}
message ArgoTechnicalInfo
{
string ID = 1; //ID
string PlatformNum = 2; //
string DataType = 3; //
string FormatVersion = 4; //
string HandbookVersion = 5; //
string DataCenter = 6; //
string CreationDate = 7; //nc文件创建日期
string UpdateDate = 8; //nc文件更新日期
string UpdateTime = 9; //
string LastTime = 10; //
}
message ArgoDataManipulationInfo
{
string ID = 1; //ID
string PlatformNum = 2; //
string HistoryStep = 3; //
string HistorySoftware = 4; //
string HistorySoftwareRelease = 5; //
string HistoryReference = 6; //
string HistoryDate = 7; //
string HistoryAction = 8; //
string HistoryParameter = 9; //
string HistoryStartPres = 10; //
string HistoryStopPres = 11; //
string HistoryPreviousValue = 12; ///
string HistoryQctest = 13; //
string EndMissionDate = 14; //
string EndMissionStatus = 15; //
string UpdateTime = 16; //
string LastTime = 17; //
}
message ArgoMeasureInfo
{
string ID = 1; //ID
string PlatformNum = 2; //ID
string Sensor = 3; //使
string Longitude = 4; //
string Latitude = 5; //
string Pres = 6; //
string PresQC = 7; //
string Temp = 8; //
string TempQC = 9; //
string Psal = 10; //
string PsalQC = 11; //
string ScientificCalibEquation = 12; //
string ScientificCalibCoefficient = 13; //
string UpdateTime = 14; //
string LastTime = 15; //
string Status = 16; //:1线2线
}

View File

@ -0,0 +1,85 @@
syntax = "proto3";
message EsSets //es<EFBFBD><EFBFBD>
{
repeated Es Es = 1;
}
message Es
{
string es_sid = 1; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_subjectId = 2; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD>id
string es_hkey = 3; //URLΨһ<EFBFBD><EFBFBD><EFBFBD>
string es_pkey = 4; //<EFBFBD><EFBFBD>URL<EFBFBD><EFBFBD><EFBFBD>
string es_startid = 5; //<EFBFBD><EFBFBD>ʼ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_urlname = 6; //URL<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_sitename = 7; //<EFBFBD><EFBFBD>վ<EFBFBD><EFBFBD>
string es_extname = 8; //<EFBFBD><EFBFBD>׺<EFBFBD><EFBFBD>
string es_channel = 9; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƶ<EFBFBD><EFBFBD>
string es_groupname = 10; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_urltitle = 11; //<EFBFBD><EFBFBD>ҳ<EFBFBD><EFBFBD><EFBFBD>ñ<EFBFBD><EFBFBD><EFBFBD>
string es_urltopic = 12; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ҳ<title><EFBFBD>ñ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ı<EFBFBD><EFBFBD><EFBFBD>
string es_lasttime = 13; //<EFBFBD>ɼ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_loadtime = 14; //<EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD>ʵ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ES<EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD>
string es_urldate = 15; //<EFBFBD><EFBFBD><EFBFBD>µķ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_urltime = 16; //<EFBFBD><EFBFBD><EFBFBD>µķ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_srcname = 17; //<EFBFBD><EFBFBD><EFBFBD>µ<EFBFBD><EFBFBD><EFBFBD>Դ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȱʧ<EFBFBD><EFBFBD>
string es_authors = 18; //<EFBFBD><EFBFBD><EFBFBD>µ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ߣ<EFBFBD><EFBFBD><EFBFBD>ȱʧ<EFBFBD><EFBFBD>
string es_district = 19; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>µ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>µĵ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȱʧ<EFBFBD><EFBFBD>
string es_catalog = 20; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_catalog1 = 21; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_catalog2 = 22; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_keywords = 23; //<EFBFBD><EFBFBD><EFBFBD>¹ؼ<EFBFBD><EFBFBD>ʣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>½<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ģ<EFBFBD>
string es_abstract = 24; //<EFBFBD><EFBFBD><EFBFBD>µ<EFBFBD>ժҪ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>½<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ģ<EFBFBD>
string es_simflag = 25; //<EFBFBD>ظ<EFBFBD><EFBFBD><EFBFBD>ǣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>֮<EFBFBD>ظ<EFBFBD><EFBFBD><EFBFBD>ҳ<EFBFBD><EFBFBD>HKEY
string es_simrank = 26; //<EFBFBD><EFBFBD><EFBFBD>ƶ<EFBFBD><EFBFBD><EFBFBD>ֵ
string es_urlimage = 27; //ͼƬ<EFBFBD><EFBFBD>ַ
string es_imageflag = 28; //<EFBFBD><EFBFBD>ҳ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͼƬ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ŀ
string es_tableflag = 29; //<EFBFBD><EFBFBD>ҳ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ŀ
string es_doclength = 30; //<EFBFBD><EFBFBD><EFBFBD>ij<EFBFBD><EFBFBD><EFBFBD>
string es_content = 31; //<EFBFBD><EFBFBD>ҳ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݣ<EFBFBD><EFBFBD><EFBFBD>ͼƬ<EFBFBD><EFBFBD>
string es_urlcontent = 32; //<EFBFBD><EFBFBD>ҳ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͼƬ<EFBFBD><EFBFBD>
string es_bbsnum = 33; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_pagelevel = 34; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʼҳ<EFBFBD>ʼ<EFBFBD>IJ<EFBFBD><EFBFBD><EFBFBD>
string es_urllevel = 35; //<EFBFBD><EFBFBD><EFBFBD>ӵ<EFBFBD>Ŀ¼<EFBFBD><EFBFBD><EFBFBD>
string es_simhash = 36; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>simhashֵ
string es_ip = 37; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ip
string es_heat = 38; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȶ<EFBFBD>
string es_similaritycount = 39; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_similarity = 40; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>id
string es_similaritytime = 41; //<EFBFBD><EFBFBD><EFBFBD>ƶȼ<EFBFBD><EFBFBD><EFBFBD>ʱ<EFBFBD><EFBFBD>
string es_emotion = 42; //<EFBFBD><EFBFBD><EFBFBD>
string es_warningtime = 43; //Ԥ<EFBFBD><EFBFBD>ʱ<EFBFBD><EFBFBD>
string es_carriertype = 44; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_commentcount = 45; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_forwardcount = 46; //ת<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_positiveWords = 47; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_negativeWords = 48; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_negativeProbability = 49; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_reportinfo = 50; //<EFBFBD>Ƿ<EFBFBD><EFBFBD>ϱ<EFBFBD><EFBFBD><EFBFBD>Ϣ
string es_attention = 51; //<EFBFBD>Ƿ<EFBFBD><EFBFBD>ע
string es_warning = 52; //<EFBFBD>Ƿ<EFBFBD>Ԥ<EFBFBD><EFBFBD>
string es_readsign = 53; //<EFBFBD>Ƿ<EFBFBD><EFBFBD>Ѷ<EFBFBD>
string es_briefing = 54; //<EFBFBD>Ƿ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_warning_word = 55; //Ԥ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_attentiontime = 56; //<EFBFBD><EFBFBD>עʱ<EFBFBD><EFBFBD>
string es_collection = 57; //<EFBFBD>Ƿ<EFBFBD><EFBFBD>ղ<EFBFBD>
string es_attachment = 58; //<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_userid = 59;//number,<EFBFBD>û<EFBFBD>id<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ý<EFBFBD><EFBFBD><EFBFBD>˻<EFBFBD>)
string es_contenttype = 60;//string,<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Post<EFBFBD><EFBFBD><EFBFBD>ͣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>status<EFBFBD><EFBFBD>link<EFBFBD><EFBFBD>photo<EFBFBD><EFBFBD>video<EFBFBD><EFBFBD>event<EFBFBD><EFBFBD>music<EFBFBD><EFBFBD>note<EFBFBD><EFBFBD>offer<EFBFBD><EFBFBD>album<EFBFBD>ȣ<EFBFBD>
string es_likecount = 61;//number,<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_links = 62;//string<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>е<EFBFBD><EFBFBD><EFBFBD><EFBFBD>ӵ<EFBFBD>ַ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƶ<EFBFBD>ļ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ӵ<EFBFBD>ַ
string es_reactioncount = 63;//number,<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_linkdesc = 64;//string<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><EFBFBD>post <EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϊ<EFBFBD><EFBFBD><EFBFBD>ӣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ӵ<EFBFBD>һЩ<EFBFBD><EFBFBD>Ϣ
string es_repostuid = 65;//number<EFBFBD><EFBFBD>ת<EFBFBD><EFBFBD>ԭ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ߵ<EFBFBD>ID
string es_repostuname =66;//string<EFBFBD><EFBFBD>ת<EFBFBD><EFBFBD>ԭ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ߵ<EFBFBD>name
string es_repostid = 67;//string<EFBFBD><EFBFBD>ת<EFBFBD><EFBFBD>ԭ<EFBFBD><EFBFBD>ID
string es_tags = 68;//string<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_mentionsaccount = 69;//string<EFBFBD><EFBFBD><EFBFBD><EFBFBD>˺<EFBFBD>
string es_video = 70;//string<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>е<EFBFBD><EFBFBD><EFBFBD>Ƶ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_isrepost = 71;//boolean<EFBFBD><EFBFBD><EFBFBD>Ƿ<EFBFBD>ת<EFBFBD><EFBFBD>
string es_lang = 72;//string<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
string es_client = 73;//string<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ͻ<EFBFBD><EFBFBD><EFBFBD>
string es_snapshot = 74;
}

View File

@ -0,0 +1,21 @@
syntax = "proto3";
message FaceBookUserInfo
{
string ID = 1; //id主键
string NAME = 2; //
string GENDER = 3; //
string BRITHDAY = 4; //
repeated string EDUCATION = 5; //["学历:学校"]
repeated string WORK = 6; // ["工作单位"]
repeated string LIVING = 7; // ["家乡:地址","现住地:地址"]
repeated string CONTACT = 8; // ["类别:地址"]
repeated string YEAR_OVERVIEW = 9; // ["日期:事件"]
string RELATIONSHIP = 10; //
repeated string FRIEND_IDS = 11;
}
message FaceBookUserInfoSets
{
repeated FaceBookUserInfo SETS = 1;
}

View File

@ -0,0 +1,37 @@
syntax = "proto3";
message LaunchSiteInfoSets //
{
repeated LaunchSiteInfo LaunchSiteInfo = 1;
}
message LaunchPlaneInfoSets //
{
repeated LaunchPlaneInfo LaunchPlaneInfo = 1;
}
message LaunchSiteInfo
{
string ID = 1; //ID
string Name_Zh = 2; //
string Name_En = 3; //
string Position = 4; //
string Nation = 5; //
string BuilderTime = 6; //
string Affiliates = 7; //
string Longitude = 8; //
string Latitude = 9; //
string Image = 10; //
string UpdateTime = 11; //
string LastTime = 12; //
string Sensitive = 13; //0/1/
}
message LaunchPlaneInfo
{
string ID = 1; //ID
string postid =2; //id
string date =3; //
string title =4; //
string excerpt = 5; //
}

View File

@ -0,0 +1,29 @@
syntax = "proto3";
message NotamInfoSets
{
repeated NotamInfo NotamInfo =1;
}
message NotamInfo
{
string ID = 1;
string NotamNumber = 2;
string IssueDate = 3;
string Location = 4;
string BeginningDatetime =5;
string EndingDateTime =6;
string Reason = 7;
string Type =8;
string AffectedAreaType = 9;
string AffectedAreas = 10;
string EffectiveDates = 11;
string Image = 12;
string DelFlag = 13;
string UpdateTime = 14;
string Lasttime = 15;
string EditStatus = 16;
}

View File

@ -0,0 +1,163 @@
syntax = "proto3";
message PlaneInfoSets //
{
repeated PlaneInfo PlaneInfo = 1;
}
message FligthInfoSets //
{
repeated FligthInfo FligthInfo = 1;
}
message FligthTrackpointInfoSets //
{
repeated FligthTrackpointInfo FligthTrackpointInfo = 1;
}
message FligthTrackpointAllInfoSets //
{
repeated FligthTrackpointAllInfo FligthTrackpointAllInfo = 1;
}
message PlaneBaseInfoSets //
{
repeated PlaneBaseInfo PlaneBaseInfo = 1;
}
message PlaneInfo
{
string ID = 1; //ID
string MSN = 2; //
string Reg = 3; //
string ICAO = 4; //ICAO
string Callsign = 5; //
string Owner = 6; //
string Nation = 7; //
string UsageType = 8; //
string Model = 9; //
string Image = 10; //
string Age = 11; //
string UpdateTime = 12; //
string DataSource = 13; //
string LastTime = 14; //
string Type = 15; //
string DisplayModel = 16; //
string Sensitive = 17; //0/1/
}
message FligthInfo
{
string ID = 1; //ID
string FlightID = 2; //ID
string ICAO = 3; //ICAO
string IATA = 4; //IATA
string PlaneReg = 5; //
string TakeoffBase = 6; //
string TakeoffBaseICAO = 7; //ICAO
string TakeoffBaseIATA = 8; //IATA
string LandBase = 9; //
string LandBaseICAO = 10; //ICAO
string LandBaseIATA = 11; //IATA
string ScheduleBegTime = 12; //
string ActualBegTime = 13; //
string ScheduleArrTime = 14; //
string ActualArrTime = 15; //
string UpdateTime = 16; //
string LastTime = 17; //
string Status = 18; //:12
}
message FligthTrackpointInfo
{
string ID = 1; //ID
string FlightID = 2; //ID
string FlightIATA = 3; //IATA
string FlightICAO = 4; //ICAO
string PlaneREG = 5; //
string Longitude = 6; //
string Latitude = 7; //
string Height = 8; //
string Speed = 9; //
string Angle = 10; //
string UpdateTime = 11; //
string LastTime = 12; //
string Status = 13; //:1线2线
}
message FligthTrackpointAllInfo
{
string ID = 1; //ID
string FlightID = 2; //ID
string FlightIATA = 3; //IATA
string FlightICAO = 4; //ICAO
string PlaneREG = 5; //
string Longitude = 6; //
string Latitude = 7; //
string Height = 8; //
string Speed = 9; //
string Angle = 10; //
string UpdateTime = 11; //
string LastTime = 12; //
string Status = 13; //:1线2线
string TakeoffBase = 14; //
string TakeoffBaseICAO = 15; //ICAO
string TakeoffBaseIATA = 16; //IATA
string LandBase = 17; //
string LandBaseICAO = 18; //ICAO
string LandBaseIATA = 19; //IATA
string ScheduleBegTime = 20; //
string ActualBegTime = 21; //
string ScheduleArrTime = 22; //
string ActualArrTime = 23; //
}
message PlaneBaseInfo
{
string ID = 1; // id
string Type = 2; //
string BaseInfo = 3; //
string BaseInfoSource = 4; //
string Crew = 5; //
string CrewSource = 6; //
string PracticalCeiling = 7; //
string PracticalCeilingSource = 8; //
string Length = 9; //
string LengthSource = 10; //
string MaxRange = 11; //
string MaxRangeSource = 12; //
string WingSpan = 13; //
string WingSpanSource = 14; //
string EmptyWeight = 15; //
string EmptyWeightSource = 16; //
string ZeroFuelWeight = 17; //
string ZeroFuelWeightSource = 18; //
string MaxTakeoffWeight = 19; //
string MaxTakeoffWeightSource = 20; //
string Height = 21; //
string HeightSource = 22; //
string MaxSpeed = 23; //
string MaxSpeedSource = 24; //
string CruiseSpeed = 25; //
string CruiseSpeedSource = 26; //
string CombatRange = 27; //
string CombatRangeSource = 28; //
string Endurance = 29; //
string EnduranceSource = 30; //
string Radar = 31; //
string RadarSource = 32; //
string ElectronicWarfare = 33; //
string ElectronicWarfareSource = 34; //
string AntiSubmarine = 35; //
string AntiSubmarineSource = 36; //
string Missile = 37; //
string MissileSource = 38; //
string TorpedoMine = 39; // /
string TorpedoMineSource = 40; // /
string CommandAndControl = 41; //
string CommandAndControlSource = 42; //
string Communication = 43; //
string CommunicationSource = 44; //
string UpdateTime = 45; //
string LastTime = 46; //
}

View File

@ -0,0 +1,27 @@
syntax = "proto3";
message PortInfoSets //
{
repeated PortInfo PortInfo = 1;
}
message PortInfo
{
string ID = 1; //ID
string GlobalCode = 2; //id
string Name_En = 3; //
string Code = 4; //
string UsageType = 5; //
string Nation = 6; //
string Image = 7; //
string Route = 8; //线
string DataSource = 9; //
string Longitude = 10; //
string Latitude = 11; //
string UpdateTime = 12; //
string LastTime = 13; //
string Name_Zh = 14; //
string Sensitive = 15; //0/1/
}

View File

@ -0,0 +1,55 @@
syntax = "proto3";
message SatelliteInfoSets //
{
repeated SatelliteInfo SatelliteInfo = 1;
}
message SatelliteTrackpointInfoSets //
{
repeated SatelliteTrackpointInfo SatelliteTrackpointInfo = 1;
}
message SatelliteInfo
{
string ID = 1; //ID
string Image = 2; //
string NORAD = 3; //NORAD编号
string NSSDC = 4; //
string Nation = 5; //
string Name = 6; //
string Type = 7; //
string LaunchTime = 8; //
string RCS = 9; //RCS
string Perigee = 10; //km
string Apogee = 11; //(km)
string Inclination = 12; //()
string MonitorDiam = 13; //km
string Eccentricity = 14; //
string Period = 15; //
string LaunchSite = 16; //
string LaunchTimes = 17; //
string CarrierCode = 18; //
string CarrierName = 19; //
string DataSource = 20; //
string IsOnRail = 21; //
string UpdateTime = 22; //
string LastTime = 23; //
string Sensitive = 24; //0/1/
}
message SatelliteTrackpointInfo
{
string ID = 1; //ID
string SatelliteNORAD = 2; //NORAD
string Longitude = 3; //
string Latitude = 4; //
string Height = 5; //
string Speed = 6; //km/s
string UpdateTime = 7; //
string LastTime = 8; //
string Status = 9; //:1线2线
}

View File

@ -0,0 +1,145 @@
syntax = "proto3";
message ShipInfoSets //
{
repeated ShipInfo ShipInfo = 1;
}
message VoyageTrackpointInfoSets //
{
repeated VoyageTrackpointInfo VoyageTrackpointInfo = 1;
}
message VoyageInfoSets //
{
repeated VoyageInfo VoyageInfo = 1;
}
message ShipBaseInfoSets //
{
repeated ShipBaseInfo ShipBaseInfo = 1;
}
message ShipInfo
{
string ID = 1; //ID
string Name = 2; //
string IMO = 3; //IMO编号
string MMSI = 4; //MMSI编号
string Callsign = 5; //
string Image = 6; //
string RegCountry = 7; //
string Owner = 8; //
string Builder = 9; //
string BuilderTime = 10; //
string Type = 11; //
string SourceType = 12; //
string UsageType = 13; //
string Nettonnage = 14; //
string Width = 15; //
string Length = 16; //chang
string LeftPost = 17; //
string Trail = 18; //
string Draught = 19; //
string CrewNum = 20; //
string DataSource = 21; //
string UpdateTime = 22; //
string LastTime = 23; //
string Sensitive = 24; //0/1/
}
message VoyageTrackpointInfo
{
string ID = 1; //ID
string VoyageID = 2; //ID
string ShipMMSI = 3; //MMSI
string Head = 4; //
string Trace = 5; //
string Speed = 6; //
string Status = 7; ///
string Longitude = 8; //
string Latitude = 9; //
string FromPort = 10; //
string FromGlobalCode = 11; //id
string DestPort = 12; //
string DestGlobalCode = 13; //id
string UpdateTime = 14; //
string LastTime = 15; //
string DepTime = 16; //
string DestTime = 17; //
string Position = 18; //
}
message VoyageInfo
{
string ID = 1; //ID
string VoyageID = 2; //ID
string NaviDistance = 3; //
string DepAtbTime = 4; //
string Sog1 = 5; //1
string DepTime = 6; //
string DestPortName_En = 7; //
string DestPortZone = 8; //
string Sog2 = 9; //2
string DestTime = 10; //
string MMSI = 11; // MMSI
string DepCountryCode = 12; //
string DepPortName_En = 13; //
string DestCountryCode = 14; //
string NaviTime = 15; //
string DestPortName_Cn = 16; //
string DepPortName_Cn = 17; //
string Position = 18; //
string Status = 19; //
string DepPortZone = 20; //
string UpdateTime = 21; //
string LastTime = 22; //
}
message ShipBaseInfo
{
string ID = 1; // ID
string DepthType = 2; //
string BaseInfo = 3; //
string BaseInfoSource = 4; //
string Crew = 5; //
string CrewSource = 6; //
string ShipNumber = 7; //
string ShipNumberSource = 8; //
string Length = 9; //
string LengthSource = 10; //
string MaxSpeed = 11; //
string MaxSpeedSource = 12; //
string Width = 13; //
string WidthSource = 14; //
string CruisingSpeed = 15; //
string CruisingSpeedSource = 16; //
string Displacement = 17; //
string DisplacementSource = 18; //
string Draft = 19; //
string DraftSource = 20; //
string Endurance = 21; //
string EnduranceSource = 22; //
string PowerPlant = 23; //
string PowerPlantSource = 24; //
string NavalGun = 25; //
string NavalGunSource = 26; //
string Missile = 27; //
string MissileSource = 28; //
string Torpedo = 29; //
string TorpedoSource = 30; //
string CarrierAircraft = 31; //
string CarrierAircraftSource = 32; //
string Radar = 33; //
string RadarSource = 34; //
string CommandAndControl = 35; //
string CommandAndControlSource = 36; //
string Sonar = 37; //
string SonarSource = 38; //
string ElectricWarfare = 39; //
string ElectricWarfareSource = 40; //
string UpdateTime = 41; //
string LastTime = 42; //
}

View File

@ -0,0 +1,59 @@
syntax = "proto3";
message PersonInfoSets //
{
repeated PersonInfo PersonInfo = 1;
}
message AirportInfoSets //
{
repeated AirportInfoS AirportInfoS = 1;
}
message PortInfoSets //
{
repeated PortInfo PortInfo = 1;
}
message OrganizationInfoSets //
{
repeated OrganizationInfo OrganizationInfo = 1;
}
message PersonInfo
{
string ID = 1; //ID
string NAME = 2; //
string Age = 3; //
string Nation = 4; //
string Introduction = 5; //
}
message AirportInfoS
{
string ID = 1; //ID
string NAME = 2; //
string Location = 3; //
string Longitude = 4; //
string Latitude = 5; //
}
message PortInfo
{
string ID = 1; //ID
string NAME = 2; //
string Location = 3; //
string Longitude = 4; //
string Latitude = 5; //
}
message OrganizationInfo
{
string ID = 1; //ID
string NAME = 2; //
string Nation = 3;
string Commander = 4;
string Type = 5;
string Introduction = 6;
}

View File

@ -0,0 +1,2 @@
C:\Users\DELL-1\Downloads\protobuf-3.11.4\protoc.exe -I=D:\git\osc\devops\deploy\proto\definition --python_out=C:\Users\DELL-1\Downloads\proto --java_out=C:\Users\DELL-1\Downloads\proto D:\git\osc\devops\deploy\proto\definition\Es.proto
pause

View File

@ -0,0 +1,3 @@
appendonly yes
appendfsync everysec
requirepass jlkj-841-2-redis

View File

@ -0,0 +1,13 @@
# redis_clear.py 用于清理因单站点任务运行结束而产生的废弃 Redis 队列,通过服务器上的 crontab 服务定期启动
from redisbloom.client import Client
import re
redis_client = Client(host='107.182.191.3', port=7379, password='jlkj-841-2-redis')
pattern = "WebSite_\w+(\.\w+)+_\w{8}\-\w{4}\-\w{4}\-\w{4}\-\w{12}:\w+"
keys = redis_client.keys()
for key in keys:
key_str = key.decode()
if re.match(pattern, key_str):
redis_client.delete(key)
print(key_str)

11
deploy/scrapyd/Dockerfile Normal file
View File

@ -0,0 +1,11 @@
FROM python:3.8.2
ENV TZ Asia/Shanghai
ENV PATH /usr/local/bin:$PATH
ADD ./requirements.txt /usr/local
ADD ./default_scrapyd.conf /etc/scrapyd/scrapyd.conf
RUN pip install -r /usr/local/requirements.txt
EXPOSE 6800
WORKDIR /root

2
deploy/scrapyd/MANIFEST Normal file
View File

@ -0,0 +1,2 @@
# file GENERATED by distutils, do NOT edit
setup.py

View File

@ -0,0 +1,29 @@
[scrapyd]
eggs_dir = eggs
logs_dir = logs
items_dir =
jobs_to_keep = 5
dbs_dir = dbs
max_proc = 0
max_proc_per_cpu = 4
finished_to_keep = 100
poll_interval = 5.0
bind_address = 0.0.0.0
http_port = 6800
debug = off
runner = scrapyd.runner
application = scrapyd.app.application
launcher = scrapyd.launcher.Launcher
webroot = scrapyd.website.Root
[services]
schedule.json = scrapyd.webservice.Schedule
cancel.json = scrapyd.webservice.Cancel
addversion.json = scrapyd.webservice.AddVersion
listprojects.json = scrapyd.webservice.ListProjects
listversions.json = scrapyd.webservice.ListVersions
listspiders.json = scrapyd.webservice.ListSpiders
delproject.json = scrapyd.webservice.DeleteProject
delversion.json = scrapyd.webservice.DeleteVersion
listjobs.json = scrapyd.webservice.ListJobs
daemonstatus.json = scrapyd.webservice.DaemonStatus

View File

@ -0,0 +1,84 @@
async-timeout==4.0.2
attrs==23.1.0
Automat==20.2.0
autopep8==1.5
cachetools==5.3.1
certifi==2020.4.5.1
cffi==1.14.0
chardet==5.1.0
colorama==0.4.6
constantly==15.1.0
coverage==7.2.7
cryptography==41.0.1
cssselect==1.1.0
decorator==4.4.2
distlib==0.3.6
exceptiongroup==1.1.1
exif==1.6.0
filelock==3.12.1
flake8==6.0.0
global-land-mask==1.0.0
hiredis==1.0.1
hyperlink==19.0.0
idna==2.9
incremental==22.10.0
iniconfig==2.0.0
itemadapter==0.8.0
itemloaders==1.1.0
jmespath==1.0.1
kafka-python==2.0.1
lxml==4.5.0
mccabe==0.7.0
mock==5.0.2
msedge-selenium-tools==3.141.4
numpy==1.18.3
packaging==23.1
parsel==1.5.2
Pillow==7.1.2
platformdirs==3.5.3
pluggy==1.0.0
plum-py==0.8.6
Protego==0.1.16
protobuf==3.12.2
pyaes==1.6.1
pyasn1==0.4.8
pyasn1-modules==0.2.8
pycodestyle==2.10.0
pycparser==2.20
PyDispatcher==2.0.5
pyflakes==3.0.1
pyhamcrest==2.0.4
PyMySQL==1.0.3
pyOpenSSL==23.2.0
pyproject-api==1.5.1
pytest==7.3.2
pytest-cov==4.1.0
queuelib==1.5.0
redis==3.5.3
redisbloom==0.3.2
requests==2.23.0
requests-file==1.5.1
rmtest==0.7.0
rsa==4.9
Scrapy==2.9.0
scrapy-redis==0.7.3
scrapy-selenium==0.0.7
scrapy-splash==0.9.0
scrapyd==1.4.2
selenium==3.141.0
service-identity==18.1.0
simhash==2.0.0
six==1.16.0
Telethon==1.28.5
tldextract==3.4.4
tomli==2.0.1
tox==4.6.0
Twisted==22.10.0
typing-extensions==4.6.3
urllib3==1.25.9
validators==0.15.0
virtualenv==20.23.0
w3lib==1.21.0
wget==3.2
xlrd==1.2.0
zope.interface==5.1.0

View File

@ -0,0 +1,11 @@
FROM python:3.8.2-slim
ENV TZ Asia/Shanghai
ENV PATH /usr/local/bin:$PATH
WORKDIR /usr/local
RUN mkdir shipxy
COPY ./requirements.txt .
COPY shipxy ./shipxy
RUN pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple && cd shipxy && tar -zxvf static.tar.gz
EXPOSE 5000
WORKDIR /usr/local/shipxy
ENTRYPOINT [ "sh", "start.sh" ]

View File

@ -0,0 +1,3 @@
selenium~=3.141.0
Flask~=2.2.3
urllib3~=1.25.8

View File

@ -0,0 +1,86 @@
import selenium
from flask import Flask, render_template, request
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.firefox.options import Options
driver_options = Options()
driver_options.add_argument('--headless')
driver_options.add_argument('--no-sandbox')
browser = selenium.webdriver.remote.webdriver.WebDriver(command_executor="http://172.18.0.2:4444",
desired_capabilities=DesiredCapabilities.EDGE,
options=driver_options)
app = Flask(__name__)
@app.route('/')
def hello_world():
return 'shipxy decoder'
@app.route('/decode')
def decode():
return render_template('./templates/decode.html')
@app.route('/api/decode', methods=['post'])
def decodeApi():
try:
# 注意!这里 get 的 url 只能以最终部署在容器内部的容器内 IP 作为域名,否则模拟浏览器无法获知页面内容。
# 这主要是因为本工程在部署时与模拟浏览器分处不同的容器中,因此只能将本工程容器和模拟浏览器的容器创建在同一个 docker network 下
browser.get('http://172.18.0.2:5000/decode')
except:
try:
browser.start_session(capabilities=DesiredCapabilities.EDGE)
browser.get('http://172.18.0.2:5000/decode')
except:
return '500'
data = request.get_data()
# inputEle.send_keys(bytes.decode(data))
js = 'var txt = document.getElementById("encode");txt.value="%s";' % bytes.decode(data)
browser.execute_script(js)
button = browser.find_element_by_id('decode')
button.click()
inputEle = browser.find_element_by_id('encode')
text = inputEle.get_attribute('value')
# browser.close()
return text
@app.route('/decode_track')
def decode_track():
return render_template('./templates/decode_track.html')
@app.route('/api/decode_track', methods=['post'])
def decodeTrackApi():
try:
browser.get('http://172.18.0.2:5000/decode_track')
except:
try:
browser.start_session(capabilities=DesiredCapabilities.EDGE)
browser.get('http://172.18.0.2:5000/decode')
except:
return '500'
data = request.get_data()
# inputEle.send_keys(bytes.decode(data))
js = 'var txt = document.getElementById("encode");txt.value="%s";' % bytes.decode(data)
browser.execute_script(js)
button = browser.find_element_by_id('decode')
button.click()
inputEle = browser.find_element_by_id('encode')
text = inputEle.get_attribute('value')
# browser.close()
return text
@app.route('/api/getvalue')
def decodeApi2():
inputEle = browser.find_element_by_id('encode')
text = inputEle.text
return text
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)

View File

@ -0,0 +1,2 @@
#!/usr/bin/env bash
python server.py

View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:a47344e460415d23794fcf429f544d8409c3bfdb2ac660541757783953b89427
size 1124476

View File

@ -0,0 +1,19 @@
<!DOCTYPE HTML>
<html>
<head>
<script src="{{url_for('static',filename='jquery-1.8.2.min.js')}}"></script>
<script src="{{url_for('static',filename='ElaneMap.min.f.js')}}"></script>
</head>
<body>
<input type="text" id="encode">
<input type="button" value="decode" id="decode" onclick="decode()">
<script>
function decode(){
var encodeContent = document.getElementById('encode').value;
// var temp = ShipxyAPI._0x44f5d9;//['_requestAreaShips_v3'];
var result = dedecode(encodeContent,"1");
document.getElementById('encode').value = JSON.stringify(result.data);
}
</script>
</body>
</html>

View File

@ -0,0 +1,20 @@
<!DOCTYPE HTML>
<html>
<head>
<script src="{{url_for('static',filename='jquery-1.8.2.min.js')}}"></script>
<script src="{{url_for('static',filename='ElaneMap.min.f.js')}}"></script>
<script src="{{url_for('static',filename='DeAnalyseManager.js')}}"></script>
</head>
<body>
<input type="text" id="encode">
<input type="button" value="decode" id="decode" onclick="decode_track()">
<script>
function decode_track(){
var encodeContent = document.getElementById('encode').value;
// var temp = ShipxyAPI._0x44f5d9;//['_requestAreaShips_v3'];
var result = analyseAisTrack(encodeContent);
document.getElementById('encode').value = JSON.stringify(result.data);
}
</script>
</body>
</html>

View File

@ -0,0 +1,78 @@
# WGCLOUD介绍
WGCLOUD是一款轻量高效的运维监控系统专注Linux、Windows等服务器主机性能监测,故障发送告警通知。WGCLOUD部署使用简单、轻量、分布式、开源、免注册、也可内网使用。
支持主机各种指标监测cpu使用率cpu温度内存使用率磁盘容量磁盘IO硬盘SMART健康状态系统负载连接数量网卡流量硬件系统信息等。支持监测服务器上的进程应用、文件、端口、日志、DOCKER容器、数据库、数据表等资源。支持监测服务接口API、数通设备如交换机、路由器、打印机等。自动生成网络拓扑图大屏可视化web SSH统计分析图表巡检报告指令下发批量执行FTP/SFTP监测告警信息推送如邮件、钉钉、微信、短信等
WGCLOUD的唯一官网https://www.wgstart.com
# 如何安装
## 在服务器上创建docker-compose.yml文件
```
version: '3'
services:
wgcloud:
image: tianshiyeben/wgcloud-server:v3.4.6
container_name: wgcloud-server
restart: unless-stopped
privileged: true
environment:
- TZ=Asia/Shanghai
ports:
- 9997:9997
- 9998:9998
- 9999:9999
volumes:
- ./config:/wgcloud-server/config
- ./logo:/wgcloud-sever/logo
```
## 运行docker-compose up -d
运行如上指令后会在docker-compose.yml文件同级目录下生成一个config文件夹需要在里面存入server的配置文件application.yml和daemon.properties这两个配置文件在安装包wgcloud-server/config/下,也可以下载 https://www.wgstart.com/download/3.4.6/wgcloud-v3.4.6.tar.gz 解压后在wgcloud-v3.4.6/server/config下找到这两个配置文件这两个文件一定要放入config下我们配置好config/application.yml比如数据库连接信息然后重新运行指令`docker-compose up -d`
注意把config/application.yml中的守护进程url中的localhost改为宿主机ip有时候默认用localhost也可以看具体环境如下
```
#守护进程访问urlserver服务使用agent不使用一般保持默认即可
daemonUrl: http://localhost:9997
```
如果没有生效,那先停止容器:`docker stop wgcloud-server`,再启动:`docker start wgcloud-server`
至此wgcloud服务端就算启动完成了这里的server是v3.4.6版本
## 解析授权文件license.txt付费功能普通版可跳过
我们获取到授权文件license.txt先把授权文件license.txt放到config/下然后进入容器把license.txt复制到wgcloud-server/目录下即可,如下
```
root@vultr:/docker# docker container exec -it wgcloud-server /bin/bash
root@981a404efc50:/wgcloud-server# cd config/
root@981a404efc50:/wgcloud-server/config# ls
application.yml license.txt
root@981a404efc50:/wgcloud-server/config# cp license.txt ../
root@981a404efc50:/wgcloud-server/config# rm -rf license.txt
```
然后重启容器就好了
## 当server部署在armmacOS、龙芯mips等平台时需要替换守护进程下wgcloud-daemon-release
> 参考 https://www.wgstart.com/help/docs60.html
下载对应版本的守护进程后 先把wgcloud-daemon-release放到config/下然后进入容器先备份或删除wgcloud-server/wgcloud-daemon-release把config/wgcloud-daemon-release复制到wgcloud-server/目录下即可记得给wgcloud-daemon-release加可执行权限如下
```
root@vultr:/docker# docker container exec -it wgcloud-server /bin/bash
root@981a404efc50:/wgcloud-server# mv wgcloud-daemon-release wgcloud-daemon-release_0312
root@981a404efc50:/wgcloud-server# cd config/
root@981a404efc50:/wgcloud-server/config# ls
application.yml wgcloud-daemon-release
root@981a404efc50:/wgcloud-server/config# cp wgcloud-daemon-release ../
root@981a404efc50:/wgcloud-server/config# rm -rf wgcloud-daemon-release
root@981a404efc50:/wgcloud-server/config# chmod +x ../wgcloud-daemon-release
```
然后重启容器就好了
## 什么是agent
agent是探针端agent和server需要保持相同版本https://www.wgstart.com/docs.html 需要在被监控的服务器部署拷贝出agent目录到在需要被监控的服务器上运行start.sh脚本命令即可
# web页面访问地址
`http://[ip]:9999/wgcloud`
docker 部署时,上面的 9999 替换成映射的宿主机端口
**默认登录账号密码admin/111111**
> 本 README 整理自 https://hub.docker.com/r/tianshiyeben/wgcloud-server

View File

@ -0,0 +1,191 @@
server:
port: 9999
servlet:
session:
timeout: 120m
context-path: /wgcloud
#日志文件输出路径
logging:
file:
path: ./log
# 数据库 相关设置
spring:
application:
name: wgcloud-server
datasource:
driver-class-name: org.postgresql.Driver
url: jdbc:postgresql://39.98.151.140:23306/wgcloud
username: postgres
password: passok123A
hikari:
validationTimeout: 3000
connectionTimeout: 60000
idleTimeout: 60000
minimumIdle: 10
maximumPoolSize: 10
maxLifeTime: 60000
connectionTestQuery: select 1
mvc:
static-path-pattern: /static/**
thymeleaf:
cache: false
mybatis:
config-location: classpath:mybatis/mybatis-config.xml
mapper-locations: classpath:mybatis/mapper/*.xml
#自定义配置参数
base:
#管理员登录账号
account: admin
#管理员登录账号的密码
accountPwd: 111111
#只读账号(只有浏览权限,没有修改、删除、添加权限,此功能需升级到专业版才生效)
guestAccount: guest
#只读账号的密码
guestAccountPwd: 111111
#通信tokenagent端的wgToken和此保持一致
wgToken: wgcloud
#每页显示多少条数据建议不小于10
pageSize: 20
#是否开启web ssh客户端yes开启no关闭
webSsh: yes
#web ssh客户端的服务端口
webSshPort: 9998
#守护进程访问urlserver服务使用agent不使用若守护端口修改那此处的端口也要同步修改下一般保持默认即可
daemonUrl: http://localhost:9997
#是否开启公众看板yes开启no关闭开启后看板页面无需登陆
dashView: yes
#公众看板页面主机IP是否开启脱敏显示yes开启no关闭
dashViewIpHide: yes
#是否开启大屏展示看板yes开启no关闭开启后看板页面无需登陆
dapingView: yes
#是否开启数据开放接口yes开启no关闭
openDataAPI: no
#是否自动闭合左侧菜单yes是no否
sidebarCollapse: yes
#是否在列表页面显示告警次数yes是no否这个会对性能有些影响不建议开启
showWarnCount: no
#是否开启指令下发(若关闭,将不能再新增和下发指令)yes是no否
shellToRun: yes
#linux指令下发不能包含的敏感字符小写即可多个用逗号隔开
shellToRunLinuxBlock: 'rm ,mkfs, /dev/,:(){:|:&};:,mv ,wget , install '
#windows指令下发不能包含的敏感字符小写即可多个用逗号隔开
shellToRunWinBlock: 'del ,delete ,format ,ren ,rd ,rd/s/q ,rmdir '
#数据监控的sql语句不能编写出现的敏感字符即sql可能注入的关键字小写即可多个用逗号隔开
sqlInKeys: 'execute ,update ,delete ,insert ,create ,drop ,alter ,rename ,modify '
#数据表监控间隔,单位秒默认60分钟
dbTableTimes: 3600
#服务接口监控间隔单位秒默认10分钟
heathTimes: 120
#ftp/sftp服务监控间隔单位秒默认10分钟
ftpTimes: 600
#数通设备PING监控间隔单位秒默认15分钟
dceTimes: 600
#数通设备snmp监测间隔单位秒默认20分钟
snmpTimes: 1200
#告警缓存时间间隔(即告警静默时间,此时间段内同一告警通知不再重复发)单位秒默认120分钟
warnCacheTimes: 7200
#监控数据保留天数默认30天
historyDataOut: 10
#节点类型集群才会用到一般保持默认即可master或slave一个集群只能有一个master和N(最多31)个slaveslave1slave2
nodeType: master
#是否开启使用标签yes开启no关闭
hostGroup: no
#是否开启成员账号管理即每个成员可管理自己的资源yes开启no关闭关闭后新增的成员不能再登录管理员和只读账号可以登录此功能需升级到专业版
userInfoManage: no
#server/logo/下的ico图标名称建议32*32如favicon.png此功能需升级到专业版
icoUrl:
#server/logo/下的logo图标名称建议120*120如logo.png此功能需升级到专业版
logoUrl:
#系统全称如wgcloud运维监控系统此功能需升级到专业版
wgName:
#系统简称如wgcloud此功能需升级到专业版
wgShortName:
#告警邮件标题前缀,此功能需升级到专业版
mailTitlePrefix: '[WGCLOUD]'
#告警邮件内容后缀,此功能需升级到专业版
mailContentSuffix: '<p><p><p>WGCLOUD敬上'
#是否显示页面底部版权、网址信息yes显示no不显示此功能需升级到专业版
copyRight: yes
#告警配置,策略优先级按照前后顺序执行,比如告警总开关关闭时,那下面所有告警开关都会失效
mail:
#告警总开关yes开启no关闭。总开关开启后以下子开关设置才会生效。以下开关均遵循此规则。
allWarnMail: yes
#告警时间段cron表达式设置(在该时间段内发送告警,其他时间不发),默认为空会持续发送,如'* * 8-20 ? * MON-FRI'表示周1到周5的8点-20点发送告警'* * 8-20 * * ?'表示每天的8点-20点发送告警带单引号
warnCronTime:
#不需要告警ip集合多个用逗号,隔开,此主机所有监控资源都不会再告警
blockIps:
#主机上行传输速率bytes sent告警开关yes开启no关闭
upSpeedMail: no
#主机上行传输速率告警值单位KB/s默认10MB超过此值即发送告警
upSpeedVal: 10240
#主机上行传输速率低于此值时发送告警此配置项一般不用默认即可单位KB/s默认0MB
upSpeedMinVal: 0
#主机下行传输速率bytes received告警开关yes开启no关闭
downSpeedMail: no
#主机下行传输速率告警值单位KB/s默认10MB超过此值即发送告警
downSpeedVal: 10240
#主机下行传输速率低于此值时发送告警此配置项一般不用默认即可单位KB/s默认0MB
downSpeedMinVal: 0
#内存告警开关yes开启no关闭
memWarnMail: yes
#主机内存使用率%告警值,超过此值即发送告警
memWarnVal: 90
#主机系统负载告警开关yes开启no关闭
sysLoadWarnMail: no
#主机系统负载告警值以5分钟系统负载值为准进行告警可以为小数如1.2,超过此值即发送告警
sysLoadWarnVal: 20
#主机CPU使用率告警开关yes开启no关闭
cpuWarnMail: no
#主机cpu使用率%告警值可大于100超过此值即发送告警
cpuWarnVal: 99
#主机CPU温度告警开关yes开启no关闭
cpuTemperatureWarnMail: no
#主机CPU温度告警值℃超过此值即发送告警
cpuTemperatureWarnVal: 92
#主机磁盘使用率告警开关yes开启no关闭
diskWarnMail: yes
#主机磁盘使用率%告警值,超过此值即发送告警
diskWarnVal: 96
#主机不需要告警磁盘在此屏蔽,多个盘符用,隔开,如/boot,/dev。支持Ant路径匹配规则如/dev/**。特殊符号用单引号,如'C:'
diskBlock: /dev,/snap/**,'C:','E:'
#主机磁盘SMART健康检测告警开关yes开启no关闭
smartWarnMail: yes
#主机下线告警开关yes开启no关闭
hostDownWarnMail: yes
#进程下线告警开关yes开启no关闭
appDownWarnMail: yes
#DOCKER下线告警开关yes开启no关闭
dockerDownWarnMail: yes
#服务接口告警开关yes开启no关闭
heathWarnMail: yes
#服务接口监测失败连续几次后发送告警通知默认2次
heathWarnCount: 3
#ftp/sftp告警开关yes开启no关闭
ftpWarnMail: yes
#数通设备PING告警yes开启no关闭
dceWarnMail: yes
#数通设备PING监测失败连续几次后发送告警通知默认2次
dceWarnCount: 2
#数通设备SNMP监测告警yes开启no关闭
snmpWarnMail: yes
#数据源、数据表告警开关yes开启no关闭
dbDownWarnMail: yes
#日志文件监控告警开关yes开启no关闭
fileLogWarnMail: yes
#端口telnet不通告警开关yes开启no关闭
portWarnMail: yes
#文件防篡改告警开关yes开启no关闭
fileSafeWarnMail: yes
#指令下发通知开关yes开启no关闭
shellWarnMail: yes
#自定义监控项告警开关yes开启no关闭
customInfoWarnMail: yes
#服务器登录提醒通知开关yes开启no关闭此功能需升级到专业版
hostLoginWarnMail: yes
#告警脚本完整路径(若配置脚本,无论是否配置过邮件,都会执行该脚本),可以为空
warnScript:
#是否将告警内容转为unicode(针对告警脚本生效)yes是no否钉钉微信等告警建议设置为yeswindows部署server需设置为yes否则接受到会是乱码
warnToUnicode: no

View File

@ -0,0 +1,2 @@
#守护进程端口agent配置的守护进程端口需和此处保持一致
port=9997

View File

@ -0,0 +1,16 @@
version: '3'
services:
wgcloud:
image: tianshiyeben/wgcloud-server:v3.4.6
container_name: wgcloud-server
restart: unless-stopped
privileged: true
environment:
- TZ=Asia/Shanghai
ports:
- 28084:9997
- 28085:9998
- 28086:9999
volumes:
- ./config:/wgcloud-server/config
- ./logo:/wgcloud-sever/logo

View File

@ -0,0 +1,15 @@
FROM python:3.8.2-slim
ENV TZ Asia/Shanghai
ENV PATH /usr/local/bin:$PATH
WORKDIR /usr/local
COPY . .
RUN python -m pip install --force-reinstall https://github.com/yt-dlp/yt-dlp/archive/master.tar.gz && pip install Flask
EXPOSE 5000
ENTRYPOINT [ "sh", "start.sh" ]

View File

@ -0,0 +1,96 @@
worker_processes auto;
#error_log logs/error.log;
#error_log logs/error.log notice;
#error_log logs/error.log info;
#pid logs/nginx.pid;
events {
worker_connections 1024;
}
http {
include mime.types;
default_type application/octet-stream;
#log_format main '$remote_addr - $remote_user [$time_local] "$request" '
# '$status $body_bytes_sent "$http_referer" '
# '"$http_user_agent" "$http_x_forwarded_for"';
#access_log logs/access.log main;
sendfile on;
#tcp_nopush on;
#keepalive_timeout 0;
keepalive_timeout 65;
#gzip on;
client_max_body_size 20m;
server {
listen 8080;
server_name localhost;
#charset koi8-r;
charset utf-8;
#access_log logs/host.access.log main;
location / {
root /usr/share/nginx/html;
index index.html index.htm;
autoindex on;
autoindex_exact_size off;
autoindex_localtime on;
}
#error_page 404 /404.html;
# redirect server error pages to the static page /50x.html
#
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root html;
}
# proxy the PHP scripts to Apache listening on 127.0.0.1:80
#
#location ~ \.php$ {
# proxy_pass http://127.0.0.1;
#}
# pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000
#
#location ~ \.php$ {
# root html;
# fastcgi_pass 127.0.0.1:9000;
# fastcgi_index index.php;
# fastcgi_param SCRIPT_FILENAME /scripts$fastcgi_script_name;
# include fastcgi_params;
#}
# deny access to .htaccess files, if Apache's document root
# concurs with nginx's one
#
#location ~ /\.ht {
# deny all;
#}
}
# another virtual host using mix of IP-, name-, and port-based configuration
#
#server {
# listen 8000;
# listen somename:8080;
# server_name somename alias another.alias;
# location / {
# root html;
# index index.html index.htm;
# }
#}
}

View File

@ -0,0 +1,63 @@
# -*- coding: UTF-8 -*-
import json
import os
import platform
from flask import Flask, request
from threading import Thread
app = Flask(__name__)
def single_dl(url, options):
command = f'yt-dlp {options} {url}'
os.system(command)
def batch_dl(urls, options):
for url in urls:
print('[Start download] Video url: %s' % url.strip())
single_dl(url, options)
@app.route('/ping')
def ping():
return 'ytb-dl'
@app.route('/download', methods=['post'])
def download():
try:
data = request.get_json()
options = '-f best --output "/usr/local/download/%(id)s.%(ext)s"'
if 'options' in data:
options = data['options']
if 'urls' in data:
video_urls = data['urls']
if len(video_urls) > 0:
dl_thread = Thread(target=batch_dl, args=(video_urls, options))
dl_thread.start()
return json.dumps({
'code': 200,
'message': f'开始下载 {len(video_urls)} 个视频'
}, ensure_ascii=False)
return json.dumps({
'code': 300,
'message': '未下载视频'
}, ensure_ascii=False)
except Exception as e:
return json.dumps({
'code': 500,
'message': repr(e)
}, ensure_ascii=False)
if __name__ == '__main__':
output_path = '/usr/local/download/'
v_options = '-f best '
if str.upper(platform.system()) == 'WINDOWS':
output_path = 'E:/youtube-dl/'
if not os.path.exists(output_path):
os.makedirs(output_path)
app.run(host='0.0.0.0', port=5000)

View File

@ -0,0 +1,2 @@
#!/usr/bin/env bash
python server.py

View File

@ -0,0 +1,183 @@
Usage: youtube-dl [OPTIONS] URL [URL...]
Options:
通用选项:
-h, --help 打印帮助文档
--version 打印版本信息
-U, --update 更新到最新版(需要权限)
-i, --ignore-errors 遇到下载错误时跳过
--abort-on-error 遇到下载错误时终止
--dump-user-agent 显示当前使用的浏览器(User-agent)
--list-extractors 列出所有的提取器(支持的网站)
--extractor-descriptions 同上
--force-generic-extractor 强制使用通用提取器下载
--default-search PREFIX 使用此前缀补充不完整的URLs例如"ytsearch2 yt-dl" 从youtube搜索并下载两个关于yt-dl视频. 使用"auto"youtube-dl就会猜一个一般效果等价于"ytsearch"("auto_warning"猜测时加入警告).我已知支持的PREFIXytsearch (youtube), ytsearchdate (youtube), yvsearch (yahoo videos), gvsearch (google videos)
--ignore-config 不读取配置文件,当时用了全局配置文件/etc/youtube-dl.conf:不再读取 ~/.config/youtube-dl/config (%APPDATA%/youtube-dl/config.txt on Windows)
--config-location PATH 使用指定路径下的配置文件
--flat-playlist 列出列表视频但不下载
--mark-watched 标记看过此视频 (YouTube only)
--no-mark-watched 不标记看过此视频 (YouTube only)
--no-color 打印到屏幕上的代码不带色
网络选项:
--proxy URL 使用HTTP/HTTPS/SOCKS协议的代理.如socks5://127.0.0.1:1080/.
--socket-timeout SECONDS 放弃连接前等待时间
--source-address IP 绑定的客户端IP地址
-4, --force-ipv4 所有连接通过IPv4
-6, --force-ipv6 所有连接通过IPv6
地理限制:
--geo-verification-proxy URL 使用此代理地址测试一些有地理限制的地址
--geo-bypass 绕过地理限制通过伪装X-Forwarded-For HTTP头部的客户端ip (实验)
--no-geo-bypass 不 绕过地理限制通过伪装X-Forwarded-For HTTP头部的客户端ip (实验)
--geo-bypass-country CODE 强制绕过地理限制通过提供准确的ISO 3166-2标准的国别代码(实验)
视频选择:
--playlist-start NUMBER 指定列表中开始下载的视频(默认为1)
--playlist-end NUMBER 指定列表中结束的视频(默认为last)
--playlist-items ITEM_SPEC 指定列表中要下载的视频项目编号.如:"--playlist-items 1,2,5,8"或"--playlist-items 1-3,7,10-13"
--match-title REGEX 下载标题匹配的视频(正则表达式或区分大小写的字符串)
--reject-title REGEX 跳过下载标题匹配的视频(正则表达式或区分大小写的字符串)
--max-downloads NUMBER 下载NUMBER个视频后停止
--min-filesize SIZE 不下载小于SIZE的视频(e.g. 50k or 44.6m)
--max-filesize SIZE 不下载大于SIZE的视频(e.g. 50k or 44.6m)
--date DATE 仅下载上传日期在指定日期的视频
--datebefore DATE 仅下载上传日期在指定日期或之前的视频 (i.e. inclusive)
--dateafter DATE 仅下载上传日期在指定日期或之后的视频 (i.e. inclusive)
--min-views COUNT 不下载观影数小于指定值的视频
--max-views COUNT 不下载观影数大于指定值的视频
--match-filter FILTER 通用视频过滤器. Specify any key (see help for -o for a list of available keys) to match if the key is present, !key to check if the key is not present, key > NUMBER (like "comment_count > 12", also works with >=, <, <=, !=, =) to compare against a number,key = LITERAL (like "uploader = Mike Smith", also works with !=) to match against a string literal and & to require multiple matches. Values which are not known are excluded unless you put a question mark (?) after the operator. For example, to only match videos that have been liked more than 100 times and disliked less than 50 times (or the dislike functionality is not available at the given service), but who also have a description, use --match-filter "like_count > 100 & dislike_count <? 50 & description" .
--no-playlist 当视频链接到一个视频和一个播放列表时,仅下载视频
--yes-playlist 当视频链接到一个视频和一个播放列表时,下载视频和播放列表
--age-limit YEARS 下载合适上传年限的视频
--download-archive FILE 仅下载档案文件中未列出的影片已下载的记录ID
--include-ads 同时下载广告(实验)
下载选项:
-r, --limit-rate RATE 最大bps (e.g. 50K or 4.2M)
-R, --retries RETRIES 重试次数 (默认10), or "infinite".
--fragment-retries RETRIES 一个分段的最大重试次数(default is 10), or "infinite" (DASH, hlsnative and ISM)
--skip-unavailable-fragments 跳过不可用分段(DASH, hlsnative and ISM)
--abort-on-unavailable-fragment 放弃某个分段当不可获取时
--keep-fragments 下载完成后,将下载的片段保存在磁盘上; 片段默认被删除
--buffer-size SIZE 设置缓冲区大小buffer (e.g. 1024 or 16K) (default is 1024)
--no-resize-buffer 不自动调整缓冲区大小.默认情况下自动调整
--playlist-reverse 以相反的顺序下载播放列表视频
--playlist-random 以随机的顺序下载播放列表视频
--xattr-set-filesize Set file xattribute ytdl.filesize with expected file size (experimental)
--hls-prefer-native 使用本机默认HLS下载器而不是ffmpeg
--hls-prefer-ffmpeg 使用ffmpeg而不是本机HLS下载器
--hls-use-mpegts 使用TS流容器来存放HLS视频,一些高级播放器允许在下载的同时播放视频
--external-downloader COMMAND 使用指定的第三方下载工具,当前支持aria2c,avconv,axel,curl,ffmpeg,httpie,wget
--external-downloader-args ARGS 给第三方下载工具指定参数,如:--external-downloader aria2c --external-downloader-args -j8
文件系统选项:
-a, --batch-file FILE 文件中包含需要下载的URL
--id 仅使用文件名中的视频ID
-o, --output TEMPLATE Output filename template, see the "OUTPUT TEMPLATE" for all the info
--autonumber-start NUMBER 指定%(autonumber)s的起始值(默认为1)
--restrict-filenames 将文件名限制为ASCII字符并避免文件名中的“”和空格
-w, --no-overwrites 不要覆盖文件
-c, --continue 强制恢复部分下载的文件。 默认情况下youtube-dl仅在可能时将恢复下载。
--no-continue 不要恢复部分下载的文件(从头开始重新启动)
--no-part 不使用.part文件 - 直接写入输出文件
--no-mtime 不使用Last-modified header来设置文件最后修改时间
--write-description 将视频描述写入.description文件
--write-info-json 将视频元数据写入.info.json文件
--write-annotations 将视频注释写入.annotations.xml文件
--load-info-json FILE 包含视频信息的JSON文件(使用“--write-info-json”选项创建)
--cookies FILE 文件从中读取Cookie(经测试export cookies插件可以使用但firebug导出的cookies导致错误)
--cache-dir DIR 文件存储位置。youtube-dl需要永久保存一些下载的信息。默认为$XDG_CACHE_HOME/youtube-dl或/.cache/youtube-dl。目前只有YouTube播放器文件对于具有模糊签名的视频进行缓存但可能会发生变化。
--no-cache-dir 不用缓存
--rm-cache-dir 删除所有缓存文件
缩略图:
--write-thumbnail 把缩略图写入硬盘
--write-all-thumbnails 将所有缩略图写入磁盘
--list-thumbnails 列出所有可用的缩略图格式
详细/模拟选项:
-q, --quiet 激活退出模式
--no-warnings 忽略警告
-s, --simulate 不下载不存储任何文件到硬盘,模拟下载模式
--skip-download 不下载视频
-g, --get-url 模拟下载获取视频直连
-e, --get-title 模拟下载获取标题
--get-id 模拟下载获取id
--get-thumbnail 模拟下载获取缩略图URL
--get-description 模拟下载获取视频描述
--get-duration 模拟下载获取视频长度
--get-filename 模拟下载获取输出视频文件名
--get-format 模拟下载获取输出视频格式
-j, --dump-json 模拟下载获取JSON information.
-J, --dump-single-json 模拟下载获取每条命令行参数的JSON information.如果是个播放列表就获取整个播放列表的JSON
--print-json 下载的同时获取视频信息的JSON
--newline 进度条在新行输出
--no-progress 不打印进度条
--console-title 在控制台标题栏显示进度
-v, --verbose 打印各种调试信息
--dump-pages 打印下载下来的使用base64编码的页面来调试问题非常冗长
--write-pages 将下载的中间页以文件的形式写入当前目录中以调试问题
--print-traffic 显示发送和读取HTTP流量
-C, --call-home 联系youtube-dl服务器进行调试
--no-call-home 不联系youtube-dl服务器进行调试
解决方法:
--encoding ENCODING 强制指定编码(实验)
--no-check-certificate 禁止HTTPS证书验证
--prefer-insecure 使用未加密的连接来检索有关视频的信息(目前仅支持YouTube)
--user-agent UA 指定user agent
--referer URL 指定自定义的referer,仅限视频来源于同一网站
--add-header FIELD:VALUE 指定一个自定义值的HTTP头文件,使用分号分割,可以多次使用此选项
--bidi-workaround 围绕缺少双向文本支持的终端工作。需要在PATH中有bidiv或fribidi可执行文件
--sleep-interval SECONDS 在每次下载之前休眠的秒数,或者每次下载之前的随机睡眠的范围的下限(最小可能的睡眠秒数)与-max-sleep-interval一起使用。
--max-sleep-interval SECONDS 每次下载前随机睡眠范围的上限(最大可能睡眠秒数)。只能与--min-sleep-interval一起使用。
视频格式选项:
-f, --format FORMAT 视频格式代码,查看"FORMAT SELECTION"获取所有信息
--all-formats 获取所有视频格式
--prefer-free-formats 开源的视频格式优先,除非有特定的请求
-F, --list-formats 列出请求视频的所有可用格式
--youtube-skip-dash-manifest 不要下载关于YouTube视频的DASH清单和相关数据
--merge-output-format FORMAT 如果需要合并(例如bestvideo + bestaudio)则输出到给定的容器格式。mkvmp4oggwebmflv之一。如果不需要合并则忽略
字幕选项:
--write-sub 下载字幕文件
--write-auto-sub 下载自动生成的字幕文件 (YouTube only)
--all-subs 下载所有可用的字幕
--list-subs 列出所有字幕
--sub-format FORMAT 字幕格式,接受格式偏好,如:"srt" or "ass/srt/best"
--sub-lang LANGS 要下载的字幕的语言(可选)用逗号分隔,请使用--list-subs表示可用的语言标签
验证选项:
-u, --username USERNAME 使用ID登录
-p, --password PASSWORD 账户密码,如果此选项未使用,youtube-dl将交互式地询问。
-2, --twofactor TWOFACTOR 双因素认证码
-n, --netrc 使用.netrc认证数据
--video-password PASSWORD 视频密码(vimeo, smotri, youku)
Adobe Pass Options:
--ap-mso MSO Adobe Pass多系统运营商(电视提供商)标识符,使用--ap-list-mso列出可用的MSO
--ap-username USERNAME MSO账号登录
--ap-password PASSWORD 账户密码,如果此选项未使用,youtube-dl将交互式地询问。
--ap-list-mso 列出所有支持的MSO
后处理选项:
-x, --extract-audio 将视频文件转换为纯音频文件(需要ffmpeg或avconv和ffprobe或avprobe)
--audio-format FORMAT 指定音频格式: "best", "aac", "flac", "mp3", "m4a", "opus", "vorbis", or "wav"; "best" by default;-x存在时无效
--audio-quality QUALITY 指定ffmpeg/avconv音频质量,为VBR插入一个0(best)-9(worse)的值(默认5),或者指定比特率
--recode-video FORMAT 必要时将视频转码为其他格式(当前支持: mp4|flv|ogg|webm|mkv|avi)
--postprocessor-args ARGS 给后处理器提供这些参数
-k, --keep-video 视频文件在后处理后保存在磁盘上; 该视频默认被删除
--no-post-overwrites 不要覆盖后处理文件; 默认情况下,后处理文件将被覆盖
--embed-subs 在视频中嵌入字幕(仅适用于mp4,webm和mkv视频
--embed-thumbnail 将缩略图嵌入音频作为封面艺术
--add-metadata 将元数据写入视频文件
--metadata-from-title FORMAT 从视频标题中解析附加元数据,如歌曲标题/艺术家。格式语法和--output相似.也可以使用带有命名捕获组的正则表达式。解析的参数替换现有值。Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like "Coldplay - Paradise". Example (regex): --metadata-from-title "(?P<artist>.+?) - (?P<title>.+)"
--xattrs 将元数据写入视频文件的xattrs(使用dublin core 和 xdg标准)
--fixup POLICY 自动更正文件的已知故障。never(不做警告), warn(只发出警告), detect_or_warn (默认;如果可以的话修复文件,否则警告)
--prefer-avconv 后处理时相较ffmpeg偏向于avconv
--prefer-ffmpeg 后处理优先使用ffmpeg
--ffmpeg-location PATH ffmpeg/avconv程序位置;PATH为二进制所在文件夹或者目录.
--exec CMD 在下载后对文件执行命令,类似于find -exec语法.示例:--execadb push {} /sdcard/Music/ && rm {}
--convert-subs FORMAT 转换字幕格式(当前支持: srt|ass|vtt)

1
dsp/README.md Normal file
View File

@ -0,0 +1 @@
# 后处理服务

199
dsp/dsp.iml Normal file
View File

@ -0,0 +1,199 @@
<?xml version="1.0" encoding="UTF-8"?>
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
<component name="FacetManager">
<facet type="web" name="Web">
<configuration>
<webroots />
</configuration>
</facet>
<facet type="Spring" name="Spring">
<configuration />
</facet>
</component>
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
<output url="file://$MODULE_DIR$/target/classes" />
<output-test url="file://$MODULE_DIR$/target/test-classes" />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="Maven: org.jetbrains:annotations:24.1.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-compress:1.18" level="project" />
<orderEntry type="library" name="Maven: com.alibaba:fastjson:1.2.75" level="project" />
<orderEntry type="library" name="Maven: org.projectlombok:lombok:1.18.16" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-web:2.4.1" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter:2.4.1" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-logging:2.4.1" level="project" />
<orderEntry type="library" name="Maven: ch.qos.logback:logback-classic:1.2.3" level="project" />
<orderEntry type="library" name="Maven: ch.qos.logback:logback-core:1.2.3" level="project" />
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-to-slf4j:2.13.3" level="project" />
<orderEntry type="library" name="Maven: org.apache.logging.log4j:log4j-api:2.13.3" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:jul-to-slf4j:1.7.30" level="project" />
<orderEntry type="library" name="Maven: jakarta.annotation:jakarta.annotation-api:1.3.5" level="project" />
<orderEntry type="library" name="Maven: org.yaml:snakeyaml:1.27" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-json:2.4.1" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.datatype:jackson-datatype-jdk8:2.11.3" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.datatype:jackson-datatype-jsr310:2.11.3" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.module:jackson-module-parameter-names:2.11.3" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-tomcat:2.4.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.tomcat.embed:tomcat-embed-core:9.0.41" level="project" />
<orderEntry type="library" name="Maven: org.glassfish:jakarta.el:3.0.3" level="project" />
<orderEntry type="library" name="Maven: org.apache.tomcat.embed:tomcat-embed-websocket:9.0.41" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-web:5.3.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-beans:5.3.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-webmvc:5.3.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-aop:5.3.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-expression:5.3.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-data-elasticsearch:2.4.1" level="project" />
<orderEntry type="library" name="Maven: org.springframework.data:spring-data-elasticsearch:4.1.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-tx:5.3.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework.data:spring-data-commons:2.4.2" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch.plugin:transport-netty4-client:7.7.0" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty-buffer:4.1.55.Final" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty-codec:4.1.55.Final" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty-codec-http:4.1.55.Final" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty-common:4.1.55.Final" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty-handler:4.1.55.Final" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty-resolver:4.1.55.Final" level="project" />
<orderEntry type="library" name="Maven: io.netty:netty-transport:4.1.55.Final" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch.client:elasticsearch-rest-high-level-client:7.7.0" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch:elasticsearch:7.7.0" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch:elasticsearch-core:7.7.0" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch:elasticsearch-secure-sm:7.7.0" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch:elasticsearch-x-content:7.7.0" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.dataformat:jackson-dataformat-smile:2.11.3" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.11.3" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.dataformat:jackson-dataformat-cbor:2.11.3" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch:elasticsearch-geo:7.7.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-core:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-analyzers-common:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-backward-codecs:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-grouping:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-highlighter:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-join:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-memory:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-misc:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-queries:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-queryparser:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-sandbox:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-spatial-extras:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-spatial3d:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-suggest:8.5.1" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch:elasticsearch-cli:7.7.0" level="project" />
<orderEntry type="library" name="Maven: com.carrotsearch:hppc:0.8.1" level="project" />
<orderEntry type="library" name="Maven: joda-time:joda-time:2.10.4" level="project" />
<orderEntry type="library" name="Maven: com.tdunning:t-digest:3.2" level="project" />
<orderEntry type="library" name="Maven: org.hdrhistogram:HdrHistogram:2.1.9" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch:jna:4.5.1" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch.client:elasticsearch-rest-client:7.7.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpclient:4.5.13" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore:4.4.14" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpasyncclient:4.1.4" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore-nio:4.4.14" level="project" />
<orderEntry type="library" name="Maven: commons-codec:commons-codec:1.15" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch.plugin:mapper-extras-client:7.7.0" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch.plugin:parent-join-client:7.7.0" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch.plugin:aggs-matrix-stats-client:7.7.0" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch.plugin:rank-eval-client:7.7.0" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch.plugin:lang-mustache-client:7.7.0" level="project" />
<orderEntry type="library" name="Maven: com.github.spullara.mustache.java:compiler:0.9.6" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-core:2.11.3" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.30" level="project" />
<orderEntry type="library" name="Maven: org.springframework.cloud:spring-cloud-stream:3.0.7.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-starter-validation:2.4.1" level="project" />
<orderEntry type="library" name="Maven: org.hibernate.validator:hibernate-validator:6.1.6.Final" level="project" />
<orderEntry type="library" name="Maven: jakarta.validation:jakarta.validation-api:2.0.2" level="project" />
<orderEntry type="library" name="Maven: org.jboss.logging:jboss-logging:3.4.1.Final" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml:classmate:1.5.1" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-messaging:5.3.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework.integration:spring-integration-core:5.4.2" level="project" />
<orderEntry type="library" name="Maven: io.projectreactor:reactor-core:3.4.1" level="project" />
<orderEntry type="library" name="Maven: org.reactivestreams:reactive-streams:1.0.3" level="project" />
<orderEntry type="library" name="Maven: org.springframework.integration:spring-integration-jmx:5.4.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework.retry:spring-retry:1.3.0" level="project" />
<orderEntry type="library" name="Maven: javax.annotation:javax.annotation-api:1.3.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework.cloud:spring-cloud-function-context:3.0.9.RELEASE" level="project" />
<orderEntry type="library" name="Maven: net.jodah:typetools:0.6.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework.cloud:spring-cloud-function-core:3.0.9.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework.cloud:spring-cloud-stream-binder-kafka:3.0.7.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework.cloud:spring-cloud-stream-binder-kafka-core:3.0.7.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework.integration:spring-integration-kafka:5.4.2" level="project" />
<orderEntry type="library" name="Maven: org.apache.kafka:kafka-clients:2.6.0" level="project" />
<orderEntry type="library" name="Maven: com.github.luben:zstd-jni:1.4.4-7" level="project" />
<orderEntry type="library" name="Maven: org.lz4:lz4-java:1.7.1" level="project" />
<orderEntry type="library" name="Maven: org.xerial.snappy:snappy-java:1.1.7.3" level="project" />
<orderEntry type="library" name="Maven: org.springframework.kafka:spring-kafka:2.6.4" level="project" />
<orderEntry type="library" name="Maven: com.google.protobuf:protobuf-java:3.11.4" level="project" />
<orderEntry type="library" name="Maven: com.squareup.okhttp3:okhttp:4.8.1" level="project" />
<orderEntry type="library" name="Maven: com.squareup.okio:okio:2.7.0" level="project" />
<orderEntry type="library" name="Maven: org.jetbrains.kotlin:kotlin-stdlib-common:1.4.21" level="project" />
<orderEntry type="library" name="Maven: org.jetbrains.kotlin:kotlin-stdlib:1.4.21" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.springframework.boot:spring-boot-starter-test:2.4.1" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.springframework.boot:spring-boot-test:2.4.1" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.springframework.boot:spring-boot-test-autoconfigure:2.4.1" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: com.jayway.jsonpath:json-path:2.4.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: net.minidev:json-smart:2.3" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: net.minidev:accessors-smart:1.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.ow2.asm:asm:5.0.4" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: jakarta.xml.bind:jakarta.xml.bind-api:2.3.3" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: jakarta.activation:jakarta.activation-api:1.2.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.assertj:assertj-core:3.18.1" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.hamcrest:hamcrest:2.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.junit.jupiter:junit-jupiter:5.7.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.junit.jupiter:junit-jupiter-params:5.7.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.junit.jupiter:junit-jupiter-engine:5.7.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.junit.platform:junit-platform-engine:1.7.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.mockito:mockito-core:3.6.28" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: net.bytebuddy:byte-buddy:1.10.18" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: net.bytebuddy:byte-buddy-agent:1.10.18" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.objenesis:objenesis:3.1" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.mockito:mockito-junit-jupiter:3.6.28" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.skyscreamer:jsonassert:1.5.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: com.vaadin.external.google:android-json:0.0.20131108.vaadin1" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-core:5.3.2" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-jcl:5.3.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.springframework:spring-test:5.3.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.xmlunit:xmlunit-core:2.7.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.springframework.cloud:spring-cloud-stream-test-support:3.0.7.RELEASE" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-autoconfigure:2.4.1" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-databind:2.11.3" level="project" />
<orderEntry type="library" name="Maven: com.fasterxml.jackson.core:jackson-annotations:2.11.3" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.springframework.kafka:spring-kafka-test:2.6.4" level="project" />
<orderEntry type="library" name="Maven: org.springframework:spring-context:5.3.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.apache.kafka:kafka-clients:test:2.6.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.apache.kafka:kafka-streams:2.6.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.apache.kafka:connect-json:2.6.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.apache.kafka:connect-api:2.6.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.rocksdb:rocksdbjni:5.18.4" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.apache.kafka:kafka-streams-test-utils:2.6.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.apache.kafka:kafka_2.13:2.6.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: com.fasterxml.jackson.module:jackson-module-scala_2.13:2.11.3" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: com.fasterxml.jackson.module:jackson-module-paranamer:2.11.3" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: com.thoughtworks.paranamer:paranamer:2.8" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: com.fasterxml.jackson.dataformat:jackson-dataformat-csv:2.11.3" level="project" />
<orderEntry type="library" name="Maven: net.sf.jopt-simple:jopt-simple:5.0.4" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: com.yammer.metrics:metrics-core:2.2.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.scala-lang.modules:scala-collection-compat_2.13:2.1.6" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.scala-lang.modules:scala-java8-compat_2.13:0.9.1" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.scala-lang:scala-library:2.13.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.scala-lang:scala-reflect:2.13.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: com.typesafe.scala-logging:scala-logging_2.13:3.9.2" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.apache.zookeeper:zookeeper:3.5.8" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.apache.zookeeper:zookeeper-jute:3.5.8" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.apache.yetus:audience-annotations:0.5.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: io.netty:netty-transport-native-epoll:4.1.55.Final" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: io.netty:netty-transport-native-unix-common:4.1.55.Final" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: commons-cli:commons-cli:1.4" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.apache.kafka:kafka_2.13:test:2.6.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.junit.jupiter:junit-jupiter-api:5.7.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.apiguardian:apiguardian-api:1.1.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.opentest4j:opentest4j:1.2.0" level="project" />
<orderEntry type="library" scope="TEST" name="Maven: org.junit.platform:junit-platform-commons:1.7.0" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot-devtools:2.4.1" level="project" />
<orderEntry type="library" name="Maven: org.springframework.boot:spring-boot:2.4.1" level="project" />
<orderEntry type="library" scope="RUNTIME" name="Maven: mysql:mysql-connector-java:8.0.22" level="project" />
</component>
</module>

310
dsp/mvnw vendored Normal file
View File

@ -0,0 +1,310 @@
#!/bin/sh
# ----------------------------------------------------------------------------
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# ----------------------------------------------------------------------------
# ----------------------------------------------------------------------------
# Maven Start Up Batch script
#
# Required ENV vars:
# ------------------
# JAVA_HOME - location of a JDK home dir
#
# Optional ENV vars
# -----------------
# M2_HOME - location of maven2's installed home dir
# MAVEN_OPTS - parameters passed to the Java VM when running Maven
# e.g. to debug Maven itself, use
# set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000
# MAVEN_SKIP_RC - flag to disable loading of mavenrc files
# ----------------------------------------------------------------------------
if [ -z "$MAVEN_SKIP_RC" ] ; then
if [ -f /etc/mavenrc ] ; then
. /etc/mavenrc
fi
if [ -f "$HOME/.mavenrc" ] ; then
. "$HOME/.mavenrc"
fi
fi
# OS specific support. $var _must_ be set to either true or false.
cygwin=false;
darwin=false;
mingw=false
case "`uname`" in
CYGWIN*) cygwin=true ;;
MINGW*) mingw=true;;
Darwin*) darwin=true
# Use /usr/libexec/java_home if available, otherwise fall back to /Library/Java/Home
# See https://developer.apple.com/library/mac/qa/qa1170/_index.html
if [ -z "$JAVA_HOME" ]; then
if [ -x "/usr/libexec/java_home" ]; then
export JAVA_HOME="`/usr/libexec/java_home`"
else
export JAVA_HOME="/Library/Java/Home"
fi
fi
;;
esac
if [ -z "$JAVA_HOME" ] ; then
if [ -r /etc/gentoo-release ] ; then
JAVA_HOME=`java-config --jre-home`
fi
fi
if [ -z "$M2_HOME" ] ; then
## resolve links - $0 may be a link to maven's home
PRG="$0"
# need this for relative symlinks
while [ -h "$PRG" ] ; do
ls=`ls -ld "$PRG"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG="`dirname "$PRG"`/$link"
fi
done
saveddir=`pwd`
M2_HOME=`dirname "$PRG"`/..
# make it fully qualified
M2_HOME=`cd "$M2_HOME" && pwd`
cd "$saveddir"
# echo Using m2 at $M2_HOME
fi
# For Cygwin, ensure paths are in UNIX format before anything is touched
if $cygwin ; then
[ -n "$M2_HOME" ] &&
M2_HOME=`cygpath --unix "$M2_HOME"`
[ -n "$JAVA_HOME" ] &&
JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
[ -n "$CLASSPATH" ] &&
CLASSPATH=`cygpath --path --unix "$CLASSPATH"`
fi
# For Mingw, ensure paths are in UNIX format before anything is touched
if $mingw ; then
[ -n "$M2_HOME" ] &&
M2_HOME="`(cd "$M2_HOME"; pwd)`"
[ -n "$JAVA_HOME" ] &&
JAVA_HOME="`(cd "$JAVA_HOME"; pwd)`"
fi
if [ -z "$JAVA_HOME" ]; then
javaExecutable="`which javac`"
if [ -n "$javaExecutable" ] && ! [ "`expr \"$javaExecutable\" : '\([^ ]*\)'`" = "no" ]; then
# readlink(1) is not available as standard on Solaris 10.
readLink=`which readlink`
if [ ! `expr "$readLink" : '\([^ ]*\)'` = "no" ]; then
if $darwin ; then
javaHome="`dirname \"$javaExecutable\"`"
javaExecutable="`cd \"$javaHome\" && pwd -P`/javac"
else
javaExecutable="`readlink -f \"$javaExecutable\"`"
fi
javaHome="`dirname \"$javaExecutable\"`"
javaHome=`expr "$javaHome" : '\(.*\)/bin'`
JAVA_HOME="$javaHome"
export JAVA_HOME
fi
fi
fi
if [ -z "$JAVACMD" ] ; then
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD="$JAVA_HOME/jre/sh/java"
else
JAVACMD="$JAVA_HOME/bin/java"
fi
else
JAVACMD="`which java`"
fi
fi
if [ ! -x "$JAVACMD" ] ; then
echo "Error: JAVA_HOME is not defined correctly." >&2
echo " We cannot execute $JAVACMD" >&2
exit 1
fi
if [ -z "$JAVA_HOME" ] ; then
echo "Warning: JAVA_HOME environment variable is not set."
fi
CLASSWORLDS_LAUNCHER=org.codehaus.plexus.classworlds.launcher.Launcher
# traverses directory structure from process work directory to filesystem root
# first directory with .mvn subdirectory is considered project base directory
find_maven_basedir() {
if [ -z "$1" ]
then
echo "Path not specified to find_maven_basedir"
return 1
fi
basedir="$1"
wdir="$1"
while [ "$wdir" != '/' ] ; do
if [ -d "$wdir"/.mvn ] ; then
basedir=$wdir
break
fi
# workaround for JBEAP-8937 (on Solaris 10/Sparc)
if [ -d "${wdir}" ]; then
wdir=`cd "$wdir/.."; pwd`
fi
# end of workaround
done
echo "${basedir}"
}
# concatenates all lines of a file
concat_lines() {
if [ -f "$1" ]; then
echo "$(tr -s '\n' ' ' < "$1")"
fi
}
BASE_DIR=`find_maven_basedir "$(pwd)"`
if [ -z "$BASE_DIR" ]; then
exit 1;
fi
##########################################################################################
# Extension to allow automatically downloading the maven-wrapper.jar from Maven-central
# This allows using the maven wrapper in projects that prohibit checking in binary data.
##########################################################################################
if [ -r "$BASE_DIR/.mvn/wrapper/maven-wrapper.jar" ]; then
if [ "$MVNW_VERBOSE" = true ]; then
echo "Found .mvn/wrapper/maven-wrapper.jar"
fi
else
if [ "$MVNW_VERBOSE" = true ]; then
echo "Couldn't find .mvn/wrapper/maven-wrapper.jar, downloading it ..."
fi
if [ -n "$MVNW_REPOURL" ]; then
jarUrl="$MVNW_REPOURL/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
else
jarUrl="https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
fi
while IFS="=" read key value; do
case "$key" in (wrapperUrl) jarUrl="$value"; break ;;
esac
done < "$BASE_DIR/.mvn/wrapper/maven-wrapper.properties"
if [ "$MVNW_VERBOSE" = true ]; then
echo "Downloading from: $jarUrl"
fi
wrapperJarPath="$BASE_DIR/.mvn/wrapper/maven-wrapper.jar"
if $cygwin; then
wrapperJarPath=`cygpath --path --windows "$wrapperJarPath"`
fi
if command -v wget > /dev/null; then
if [ "$MVNW_VERBOSE" = true ]; then
echo "Found wget ... using wget"
fi
if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then
wget "$jarUrl" -O "$wrapperJarPath"
else
wget --http-user=$MVNW_USERNAME --http-password=$MVNW_PASSWORD "$jarUrl" -O "$wrapperJarPath"
fi
elif command -v curl > /dev/null; then
if [ "$MVNW_VERBOSE" = true ]; then
echo "Found curl ... using curl"
fi
if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then
curl -o "$wrapperJarPath" "$jarUrl" -f
else
curl --user $MVNW_USERNAME:$MVNW_PASSWORD -o "$wrapperJarPath" "$jarUrl" -f
fi
else
if [ "$MVNW_VERBOSE" = true ]; then
echo "Falling back to using Java to download"
fi
javaClass="$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.java"
# For Cygwin, switch paths to Windows format before running javac
if $cygwin; then
javaClass=`cygpath --path --windows "$javaClass"`
fi
if [ -e "$javaClass" ]; then
if [ ! -e "$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.class" ]; then
if [ "$MVNW_VERBOSE" = true ]; then
echo " - Compiling MavenWrapperDownloader.java ..."
fi
# Compiling the Java class
("$JAVA_HOME/bin/javac" "$javaClass")
fi
if [ -e "$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.class" ]; then
# Running the downloader
if [ "$MVNW_VERBOSE" = true ]; then
echo " - Running MavenWrapperDownloader.java ..."
fi
("$JAVA_HOME/bin/java" -cp .mvn/wrapper MavenWrapperDownloader "$MAVEN_PROJECTBASEDIR")
fi
fi
fi
fi
##########################################################################################
# End of extension
##########################################################################################
export MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"}
if [ "$MVNW_VERBOSE" = true ]; then
echo $MAVEN_PROJECTBASEDIR
fi
MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS"
# For Cygwin, switch paths to Windows format before running java
if $cygwin; then
[ -n "$M2_HOME" ] &&
M2_HOME=`cygpath --path --windows "$M2_HOME"`
[ -n "$JAVA_HOME" ] &&
JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"`
[ -n "$CLASSPATH" ] &&
CLASSPATH=`cygpath --path --windows "$CLASSPATH"`
[ -n "$MAVEN_PROJECTBASEDIR" ] &&
MAVEN_PROJECTBASEDIR=`cygpath --path --windows "$MAVEN_PROJECTBASEDIR"`
fi
# Provide a "standardized" way to retrieve the CLI args that will
# work with both Windows and non-Windows executions.
MAVEN_CMD_LINE_ARGS="$MAVEN_CONFIG $@"
export MAVEN_CMD_LINE_ARGS
WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain
exec "$JAVACMD" \
$MAVEN_OPTS \
-classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \
"-Dmaven.home=${M2_HOME}" "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \
${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@"

182
dsp/mvnw.cmd vendored Normal file
View File

@ -0,0 +1,182 @@
@REM ----------------------------------------------------------------------------
@REM Licensed to the Apache Software Foundation (ASF) under one
@REM or more contributor license agreements. See the NOTICE file
@REM distributed with this work for additional information
@REM regarding copyright ownership. The ASF licenses this file
@REM to you under the Apache License, Version 2.0 (the
@REM "License"); you may not use this file except in compliance
@REM with the License. You may obtain a copy of the License at
@REM
@REM https://www.apache.org/licenses/LICENSE-2.0
@REM
@REM Unless required by applicable law or agreed to in writing,
@REM software distributed under the License is distributed on an
@REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@REM KIND, either express or implied. See the License for the
@REM specific language governing permissions and limitations
@REM under the License.
@REM ----------------------------------------------------------------------------
@REM ----------------------------------------------------------------------------
@REM Maven Start Up Batch script
@REM
@REM Required ENV vars:
@REM JAVA_HOME - location of a JDK home dir
@REM
@REM Optional ENV vars
@REM M2_HOME - location of maven2's installed home dir
@REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands
@REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a keystroke before ending
@REM MAVEN_OPTS - parameters passed to the Java VM when running Maven
@REM e.g. to debug Maven itself, use
@REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000
@REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files
@REM ----------------------------------------------------------------------------
@REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on'
@echo off
@REM set title of command window
title %0
@REM enable echoing by setting MAVEN_BATCH_ECHO to 'on'
@if "%MAVEN_BATCH_ECHO%" == "on" echo %MAVEN_BATCH_ECHO%
@REM set %HOME% to equivalent of $HOME
if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%")
@REM Execute a user defined script before this one
if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre
@REM check for pre script, once with legacy .bat ending and once with .cmd ending
if exist "%HOME%\mavenrc_pre.bat" call "%HOME%\mavenrc_pre.bat"
if exist "%HOME%\mavenrc_pre.cmd" call "%HOME%\mavenrc_pre.cmd"
:skipRcPre
@setlocal
set ERROR_CODE=0
@REM To isolate internal variables from possible post scripts, we use another setlocal
@setlocal
@REM ==== START VALIDATION ====
if not "%JAVA_HOME%" == "" goto OkJHome
echo.
echo Error: JAVA_HOME not found in your environment. >&2
echo Please set the JAVA_HOME variable in your environment to match the >&2
echo location of your Java installation. >&2
echo.
goto error
:OkJHome
if exist "%JAVA_HOME%\bin\java.exe" goto init
echo.
echo Error: JAVA_HOME is set to an invalid directory. >&2
echo JAVA_HOME = "%JAVA_HOME%" >&2
echo Please set the JAVA_HOME variable in your environment to match the >&2
echo location of your Java installation. >&2
echo.
goto error
@REM ==== END VALIDATION ====
:init
@REM Find the project base dir, i.e. the directory that contains the folder ".mvn".
@REM Fallback to current working directory if not found.
set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR%
IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir
set EXEC_DIR=%CD%
set WDIR=%EXEC_DIR%
:findBaseDir
IF EXIST "%WDIR%"\.mvn goto baseDirFound
cd ..
IF "%WDIR%"=="%CD%" goto baseDirNotFound
set WDIR=%CD%
goto findBaseDir
:baseDirFound
set MAVEN_PROJECTBASEDIR=%WDIR%
cd "%EXEC_DIR%"
goto endDetectBaseDir
:baseDirNotFound
set MAVEN_PROJECTBASEDIR=%EXEC_DIR%
cd "%EXEC_DIR%"
:endDetectBaseDir
IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig
@setlocal EnableExtensions EnableDelayedExpansion
for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a
@endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS%
:endReadAdditionalConfig
SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe"
set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar"
set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain
set DOWNLOAD_URL="https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
FOR /F "tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO (
IF "%%A"=="wrapperUrl" SET DOWNLOAD_URL=%%B
)
@REM Extension to allow automatically downloading the maven-wrapper.jar from Maven-central
@REM This allows using the maven wrapper in projects that prohibit checking in binary data.
if exist %WRAPPER_JAR% (
if "%MVNW_VERBOSE%" == "true" (
echo Found %WRAPPER_JAR%
)
) else (
if not "%MVNW_REPOURL%" == "" (
SET DOWNLOAD_URL="%MVNW_REPOURL%/io/takari/maven-wrapper/0.5.6/maven-wrapper-0.5.6.jar"
)
if "%MVNW_VERBOSE%" == "true" (
echo Couldn't find %WRAPPER_JAR%, downloading it ...
echo Downloading from: %DOWNLOAD_URL%
)
powershell -Command "&{"^
"$webclient = new-object System.Net.WebClient;"^
"if (-not ([string]::IsNullOrEmpty('%MVNW_USERNAME%') -and [string]::IsNullOrEmpty('%MVNW_PASSWORD%'))) {"^
"$webclient.Credentials = new-object System.Net.NetworkCredential('%MVNW_USERNAME%', '%MVNW_PASSWORD%');"^
"}"^
"[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; $webclient.DownloadFile('%DOWNLOAD_URL%', '%WRAPPER_JAR%')"^
"}"
if "%MVNW_VERBOSE%" == "true" (
echo Finished downloading %WRAPPER_JAR%
)
)
@REM End of extension
@REM Provide a "standardized" way to retrieve the CLI args that will
@REM work with both Windows and non-Windows executions.
set MAVEN_CMD_LINE_ARGS=%*
%MAVEN_JAVA_EXE% %JVM_CONFIG_MAVEN_PROPS% %MAVEN_OPTS% %MAVEN_DEBUG_OPTS% -classpath %WRAPPER_JAR% "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %*
if ERRORLEVEL 1 goto error
goto end
:error
set ERROR_CODE=1
:end
@endlocal & set ERROR_CODE=%ERROR_CODE%
if not "%MAVEN_SKIP_RC%" == "" goto skipRcPost
@REM check for post script, once with legacy .bat ending and once with .cmd ending
if exist "%HOME%\mavenrc_post.bat" call "%HOME%\mavenrc_post.bat"
if exist "%HOME%\mavenrc_post.cmd" call "%HOME%\mavenrc_post.cmd"
:skipRcPost
@REM pause the script if MAVEN_BATCH_PAUSE is set to 'on'
if "%MAVEN_BATCH_PAUSE%" == "on" pause
if "%MAVEN_TERMINATE_CMD%" == "on" exit %ERROR_CODE%
exit /B %ERROR_CODE%

138
dsp/pom.xml Normal file
View File

@ -0,0 +1,138 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.4.1</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>com.jsc</groupId>
<artifactId>dsp</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>dsp</name>
<properties>
<java.version>1.8</java.version>
<spring-cloud.version>Hoxton.SR7</spring-cloud.version>
<elasticsearch.version>7.7.0</elasticsearch.version>
</properties>
<dependencies>
<dependency>
<groupId>org.jetbrains</groupId>
<artifactId>annotations</artifactId>
<version>RELEASE</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
<version>1.18</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.75</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-stream</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-stream-binder-kafka</artifactId>
</dependency>
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>3.11.4</version>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>4.8.1</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.junit.vintage</groupId>
<artifactId>junit-vintage-engine</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-stream-test-support</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.kafka</groupId>
<artifactId>spring-kafka-test</artifactId>
<scope>test</scope>
</dependency>
<!--devtools热部署 -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-devtools</artifactId>
<optional>true</optional>
<scope>true</scope>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<scope>runtime</scope>
</dependency>
</dependencies>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-dependencies</artifactId>
<version>${spring-cloud.version}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<repositories>
<repository>
<id>nexus-aliyun</id>
<name>Nexus aliyun</name>
<url>http://maven.aliyun.com/nexus/content/groups/public</url>
</repository>
</repositories>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<includeSystemScope>true</includeSystemScope>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,17 @@
package com.jsc.dsp;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
import org.springframework.scheduling.annotation.EnableScheduling;
@SpringBootApplication(exclude = DataSourceAutoConfiguration.class)
@EnableScheduling
public class DspApplication {
public static void main(String[] args) {
SpringApplication.run(DspApplication.class, args);
}
}

View File

@ -0,0 +1,17 @@
package com.jsc.dsp.binding;
import org.springframework.cloud.stream.annotation.Input;
import org.springframework.cloud.stream.annotation.Output;
import org.springframework.messaging.MessageChannel;
import org.springframework.messaging.SubscribableChannel;
public interface FileDlBinding {
public static String FILE_DL_PIPELINE_IN = "file_dl_pipeline_in";
public static String FILE_DL_PIPELINE_OUT = "file_dl_pipeline_out";
@Input(FileDlBinding.FILE_DL_PIPELINE_IN)
SubscribableChannel fileDlInput();
@Output(FileDlBinding.FILE_DL_PIPELINE_OUT)
MessageChannel fileDlOutPut();
}

View File

@ -0,0 +1,18 @@
package com.jsc.dsp.binding;
import org.springframework.cloud.stream.annotation.Input;
import org.springframework.cloud.stream.annotation.Output;
import org.springframework.messaging.MessageChannel;
import org.springframework.messaging.SubscribableChannel;
public interface ProtobufBinding {
public static String PROTOBUF_PIPELINE_IN = "protobuf_pipeline_in";
public static String PROTOBUF_PIPELINE_OUT = "protobuf_pipeline_out";
@Input(ProtobufBinding.PROTOBUF_PIPELINE_IN)
SubscribableChannel protobufInput();
@Output(ProtobufBinding.PROTOBUF_PIPELINE_OUT)
MessageChannel protobufOutPut();
}

View File

@ -0,0 +1,20 @@
package com.jsc.dsp.binding;
import org.springframework.cloud.stream.annotation.Input;
import org.springframework.cloud.stream.annotation.Output;
import org.springframework.messaging.MessageChannel;
import org.springframework.messaging.SubscribableChannel;
public interface StorageBinding {
public static String STORAGE_PIPELINE_IN = "storage_pipeline_in";
public static String STORAGE_PIPELINE_OUT = "storage_pipeline_out";
@Input(StorageBinding.STORAGE_PIPELINE_IN)
SubscribableChannel StorageInput();
@Output(StorageBinding.STORAGE_PIPELINE_OUT)
MessageChannel StorageOutput();
}

View File

@ -0,0 +1,55 @@
package com.jsc.dsp.config;
import com.jsc.dsp.utils.EsUtils;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.json.JacksonJsonParser;
import org.springframework.context.annotation.Bean;
import org.springframework.stereotype.Component;
import java.util.Base64;
import java.util.Base64.Decoder;
import java.util.Base64.Encoder;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@Component
public class Configuration {
@Value("${es.ip}")
String esIp;
@Value("${es.port}")
Integer esPort;
@Value("${es.username}")
String esUsername;
@Value("${es.password}")
String esPassword;
@Bean
public JacksonJsonParser getJacksonParser() {
return new JacksonJsonParser();
}
@Bean
public Decoder getBase64() {
return Base64.getDecoder();
}
@Bean
public Encoder getEncoder() {
return Base64.getEncoder();
}
@Bean
public ExecutorService getTheadPool() {
return Executors.newFixedThreadPool(4);
}
@Bean
public RestHighLevelClient esClient() {
return EsUtils.getElasticsearchClient(esIp, esPort, esUsername, esPassword);
}
}

View File

@ -0,0 +1,24 @@
package com.jsc.dsp.config;
import org.springframework.boot.web.embedded.tomcat.TomcatConnectorCustomizer;
import org.springframework.boot.web.embedded.tomcat.TomcatServletWebServerFactory;
import org.springframework.boot.web.servlet.server.ServletWebServerFactory;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class TomcatConfig {
//Tomcat的高版本具体从哪个版本开始没有具体了解中增加了一个新特性就是严格按照 RFC 3986规范进行访问解析
// RFC 3986规范规定Url中只允许包含英文字母a-zA-Z数字0-9-_.~4个特殊字符以及所有保留字符
//(RFC3986中指定了以下字符为保留字符! * ( ) ; : @ & = + $ , / ? # [ ])
/**
* 配置tomcat
* @return
*/
@Bean
public ServletWebServerFactory webServerFactory() {
TomcatServletWebServerFactory fa = new TomcatServletWebServerFactory();
fa.addConnectorCustomizers((TomcatConnectorCustomizer) connector -> connector.setProperty("relaxedQueryChars", "[]{}"));
return fa;
}
}

View File

@ -0,0 +1,82 @@
package com.jsc.dsp.model;
import lombok.Data;
import java.io.Serializable;
@Data
public class Indeximos implements Serializable {
String es_abstract;
String es_annex;
String es_attachment;
String es_attchment;
String es_attention;
String es_attentiontime;
String es_author;
String es_authors;
String es_briefing;
String es_carriertype;
String es_catalog;
String es_catalog1;
String es_catalog2;
String es_client;
String es_collection;
String es_commentcount;
String es_content;
String es_contenttype;
String es_district;
Float es_doclength;
String es_emotion;
String es_extname;
String es_forwardcount;
String es_groupname;
String es_heat;
String es_hkey;
String es_hotkey;
String es_imageflag;
String es_images;
String es_ip;
String es_isrepost;
String es_keywords;
String es_lang;
String es_lasttime;
String es_likecount;
String es_links;
String es_loadtime;
String es_mentionsaccount;
Float es_negativeProbability;
String es_negativeWords;
String es_pkey;
String es_positiveWords;
String es_publisher;
String es_reactioncount;
String es_readsign;
String es_reportinfo;
String es_repostid;
String es_repostuid;
String es_repostuname;
String es_rultopic;
String es_sid;
String es_simhash;
String es_similarity;
String es_similaritycount;
String es_similaritytime;
Float es_simrank;
String es_sitename;
String es_srcname;
String es_subjectId;
String es_tableflag;
String es_tags;
String es_title;
String es_urlcontent;
String es_urlimage;
String es_urlname;
String es_urltime;
String es_urltitle;
String es_urltopic;
String es_userid;
String es_video;
String es_warning;
String es_warning_word;
String es_warningtime;
}

View File

@ -0,0 +1,16 @@
package com.jsc.dsp.model;
public class ReturnT<T> {
public Integer code;
public String message;
public T content;
public ReturnT() {
}
public ReturnT(Integer code, String message, T content) {
this.code = code;
this.message = message;
this.content = content;
}
}

View File

@ -0,0 +1,16 @@
package com.jsc.dsp.model;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.util.Date;
@Data
@NoArgsConstructor
@AllArgsConstructor
public class SearchAggregation {
String name;
Integer count;
Date lastTime;
}

View File

@ -0,0 +1,25 @@
package com.jsc.dsp.model;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.util.Date;
@Data
@AllArgsConstructor
@NoArgsConstructor
public class TargetSocial {
private Integer id;
private String userName;
private String userUid;
private String userType;
private String userFlag;
private String keywords;
private Date updateTime;
private String memo;
private Integer checkTotalNum;
private Date checkLastTime;
private Date checkUpdateTime;
}

View File

@ -0,0 +1,30 @@
package com.jsc.dsp.model;
import lombok.Data;
import lombok.NoArgsConstructor;
import java.util.Date;
@Data
@NoArgsConstructor
public class TargetWebsite {
private Integer id;
private String startUrl;
private String siteName;
private String region;
private String lang;
private String project;
private Integer weight;
private String carrierType;
private String siteType;
private String resourceType;
private String storageOption;
private Integer status;
private Integer parserConfig;
private Integer filterConfig;
private String memo;
private Integer checkTotalNum;
private Date checkLastTime;
private Date checkUpdateTime;
}

Some files were not shown because too many files have changed in this diff Show More