# -*- coding: utf-8 -*- # !/usr/bin/env python """ ------------------------------------------------- File Name: ProxyManager.py Description : Author : JHao date: 2016/12/3 ------------------------------------------------- Change Activity: 2016/12/3: ------------------------------------------------- """ __author__ = 'JHao' import random import json from ProxyHelper import Proxy from DB.DbClient import DbClient from Config.ConfigGetter import config from Util.LogHandler import LogHandler from Util.utilFunction import verifyProxyFormat from ProxyGetter.getFreeProxy import GetFreeProxy class ProxyManager(object): """ ProxyManager """ def __init__(self): self.db = DbClient() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') # self.useful_proxy_queue = 'ProxyPool:useful_proxy_3' self.useful_proxy_queue = 'ProxyPool:useful_proxy_63' def fetch(self): """ fetch proxy into db by ProxyGetter :return: """ self.db.changeTable(self.raw_proxy_queue) proxy_set = set() self.log.info("ProxyFetch : start") for proxyGetter in config.proxy_getter_functions: self.log.info("ProxyFetch - {func}: start".format(func=proxyGetter)) try: for proxy_str in getattr(GetFreeProxy, proxyGetter.strip())(): proxy_dict = json.loads(proxy_str, encoding='utf-8') proxy = proxy_dict['proxy'] proxy_type = proxy_dict['proxy_type'] anonimity = proxy_dict['anonimity'] #self.log.info(anonimity) if not proxy or not verifyProxyFormat(proxy): self.log.error('ProxyFetch - {func}: ' '{proxy} illegal'.format(func=proxyGetter, proxy=proxy.ljust(20))) continue elif proxy in proxy_set: self.log.info('ProxyFetch - {func}: ' '{proxy} exist'.format(func=proxyGetter, proxy=proxy.ljust(20))) continue else: self.db.put(Proxy(proxy, source=proxyGetter, proxy_type=proxy_type, anonimity=anonimity)) proxy_set.add(proxy) self.log.info('ProxyFetch - {func}: ' '{proxy} added'.format(func=proxyGetter, proxy=proxy.ljust(20))) except Exception as e: self.log.error("ProxyFetch - {func}: error".format(func=proxyGetter)) self.log.error(str(e)) def get(self): """ return a useful proxy :return: """ self.db.changeTable(self.useful_proxy_queue) item_list = self.db.getAll() if item_list: random_choice = random.choice(item_list) return Proxy.newProxyFromJson(random_choice) return None def delete(self, proxy_str): """ delete proxy from pool :param proxy_str: :return: """ self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy_str) def getAll(self): """ get all proxy from pool as list :return: """ self.db.changeTable(self.useful_proxy_queue) item_list = self.db.getAll() return [Proxy.newProxyFromJson(_) for _ in item_list] def getNumber(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.getNumber() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.getNumber() return {'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue} if __name__ == '__main__': pp = ProxyManager() pp.fetch()