forked from jhao104/proxy_pool
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathProxyManager.py
104 lines (92 loc) · 3.22 KB
/
ProxyManager.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# -*- coding: utf-8 -*-
# !/usr/bin/env python
"""
-------------------------------------------------
File Name: ProxyManager.py
Description :
Author : JHao
date: 2016/12/3
-------------------------------------------------
Change Activity:
2016/12/3:
-------------------------------------------------
"""
__author__ = 'JHao'
import random
from Util import EnvUtil
from DB.DbClient import DbClient
from Config.ConfigGetter import config
from Util.LogHandler import LogHandler
from Util.utilFunction import verifyProxyFormat
from ProxyGetter.getFreeProxy import GetFreeProxy
class ProxyManager(object):
"""
ProxyManager
"""
def __init__(self):
self.db = DbClient()
self.raw_proxy_queue = 'raw_proxy'
self.log = LogHandler('proxy_manager')
self.useful_proxy_queue = 'useful_proxy'
def refresh(self):
"""
fetch proxy into Db by ProxyGetter/getFreeProxy.py
:return:
"""
self.db.changeTable(self.raw_proxy_queue)
for proxyGetter in config.proxy_getter_functions:
# fetch
try:
self.log.info("{func}: fetch proxy start".format(func=proxyGetter))
for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
# 直接存储代理, 不用在代码中排重, hash 结构本身具有排重功能
proxy = proxy.strip()
if proxy and verifyProxyFormat(proxy):
self.log.info('{func}: fetch proxy {proxy}'.format(func=proxyGetter, proxy=proxy))
self.db.put(proxy)
else:
self.log.error('{func}: fetch proxy {proxy} error'.format(func=proxyGetter, proxy=proxy))
except Exception as e:
self.log.error("{func}: fetch proxy fail".format(func=proxyGetter))
continue
def get(self):
"""
return a useful proxy
:return:
"""
self.db.changeTable(self.useful_proxy_queue)
item_dict = self.db.getAll()
if item_dict:
if EnvUtil.PY3:
return random.choice(list(item_dict.keys()))
else:
return random.choice(item_dict.keys())
return None
# return self.db.pop()
def delete(self, proxy):
"""
delete proxy from pool
:param proxy:
:return:
"""
self.db.changeTable(self.useful_proxy_queue)
self.db.delete(proxy)
def getAll(self):
"""
get all proxy from pool as list
:return:
"""
self.db.changeTable(self.useful_proxy_queue)
item_dict = self.db.getAll()
if EnvUtil.PY3:
return list(item_dict.keys()) if item_dict else list()
return item_dict.keys() if item_dict else list()
def getNumber(self):
self.db.changeTable(self.raw_proxy_queue)
total_raw_proxy = self.db.getNumber()
self.db.changeTable(self.useful_proxy_queue)
total_useful_queue = self.db.getNumber()
return {'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue}
if __name__ == '__main__':
pp = ProxyManager()
pp.refresh()