114 lines
3.8 KiB
Python
114 lines
3.8 KiB
Python
# -*- coding: utf-8 -*-
|
||
# !/usr/bin/env python
|
||
"""
|
||
-------------------------------------------------
|
||
File Name: ProxyManager.py
|
||
Description :
|
||
Author : JHao
|
||
date: 2016/12/3
|
||
-------------------------------------------------
|
||
Change Activity:
|
||
2016/12/3:
|
||
-------------------------------------------------
|
||
"""
|
||
__author__ = 'JHao'
|
||
|
||
import random
|
||
import json
|
||
from ProxyHelper import Proxy
|
||
from DB.DbClient import DbClient
|
||
from Config.ConfigGetter import config
|
||
from Util.LogHandler import LogHandler
|
||
from Util.utilFunction import verifyProxyFormat
|
||
from ProxyGetter.getFreeProxy import GetFreeProxy
|
||
|
||
|
||
class ProxyManager(object):
|
||
"""
|
||
ProxyManager
|
||
"""
|
||
|
||
def __init__(self):
|
||
self.db = DbClient()
|
||
self.raw_proxy_queue = 'raw_proxy'
|
||
self.log = LogHandler('proxy_manager')
|
||
# self.useful_proxy_queue = 'ProxyPool:useful_proxy_3'
|
||
self.useful_proxy_queue = 'ProxyPool:useful_proxy_63'
|
||
|
||
def fetch(self):
|
||
"""
|
||
fetch proxy into db by ProxyGetter
|
||
:return:
|
||
"""
|
||
self.db.changeTable(self.raw_proxy_queue)
|
||
proxy_set = set()
|
||
self.log.info("ProxyFetch : start")
|
||
for proxyGetter in config.proxy_getter_functions:
|
||
self.log.info("ProxyFetch - {func}: start".format(func=proxyGetter))
|
||
try:
|
||
for proxy_str in getattr(GetFreeProxy, proxyGetter.strip())():
|
||
proxy_dict = json.loads(proxy_str, encoding='utf-8')
|
||
proxy = proxy_dict['proxy']
|
||
proxy_type = proxy_dict['proxy_type']
|
||
anonimity = proxy_dict['anonimity']
|
||
#self.log.info(anonimity)
|
||
|
||
if not proxy or not verifyProxyFormat(proxy):
|
||
self.log.error('ProxyFetch - {func}: '
|
||
'{proxy} illegal'.format(func=proxyGetter, proxy=proxy.ljust(20)))
|
||
continue
|
||
elif proxy in proxy_set:
|
||
self.log.info('ProxyFetch - {func}: '
|
||
'{proxy} exist'.format(func=proxyGetter, proxy=proxy.ljust(20)))
|
||
continue
|
||
else:
|
||
self.db.put(Proxy(proxy, source=proxyGetter, proxy_type=proxy_type, anonimity=anonimity))
|
||
proxy_set.add(proxy)
|
||
self.log.info('ProxyFetch - {func}: '
|
||
'{proxy} added'.format(func=proxyGetter, proxy=proxy.ljust(20)))
|
||
except Exception as e:
|
||
self.log.error("ProxyFetch - {func}: error".format(func=proxyGetter))
|
||
self.log.error(str(e))
|
||
|
||
def get(self):
|
||
"""
|
||
return a useful proxy
|
||
:return:
|
||
"""
|
||
self.db.changeTable(self.useful_proxy_queue)
|
||
item_list = self.db.getAll()
|
||
if item_list:
|
||
random_choice = random.choice(item_list)
|
||
return Proxy.newProxyFromJson(random_choice)
|
||
return None
|
||
|
||
def delete(self, proxy_str):
|
||
"""
|
||
delete proxy from pool
|
||
:param proxy_str:
|
||
:return:
|
||
"""
|
||
self.db.changeTable(self.useful_proxy_queue)
|
||
self.db.delete(proxy_str)
|
||
|
||
def getAll(self):
|
||
"""
|
||
get all proxy from pool as list
|
||
:return:
|
||
"""
|
||
self.db.changeTable(self.useful_proxy_queue)
|
||
item_list = self.db.getAll()
|
||
return [Proxy.newProxyFromJson(_) for _ in item_list]
|
||
|
||
def getNumber(self):
|
||
self.db.changeTable(self.raw_proxy_queue)
|
||
total_raw_proxy = self.db.getNumber()
|
||
self.db.changeTable(self.useful_proxy_queue)
|
||
total_useful_queue = self.db.getNumber()
|
||
return {'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue}
|
||
|
||
|
||
if __name__ == '__main__':
|
||
pp = ProxyManager()
|
||
pp.fetch()
|