forked from jhao104/proxy_pool
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck.py
158 lines (138 loc) · 5.34 KB
/
check.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name: check
Description : 执行代理校验
Author : JHao
date: 2019/8/6
-------------------------------------------------
Change Activity:
2019/08/06: 执行代理校验
2021/05/25: 分别校验http和https
2022/08/16: 获取代理Region信息
-------------------------------------------------
"""
__author__ = 'JHao'
from util.six import Empty
from threading import Thread
from datetime import datetime
from util.webRequest import WebRequest
from handler.logHandler import LogHandler
from helper.validator import ProxyValidator
from handler.proxyHandler import ProxyHandler
from handler.configHandler import ConfigHandler
class DoValidator(object):
""" 执行校验 """
conf = ConfigHandler()
@classmethod
def validator(cls, proxy, work_type):
"""
校验入口
Args:
proxy: Proxy Object
work_type: raw/use
Returns:
Proxy Object
"""
http_r = cls.httpValidator(proxy)
https_r = False if not http_r else cls.httpsValidator(proxy)
proxy.check_count += 1
proxy.last_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
proxy.last_status = True if http_r else False
if http_r:
if proxy.fail_count > 0:
proxy.fail_count -= 1
proxy.https = True if https_r else False
if work_type == "raw":
proxy.region = cls.regionGetter(proxy) if cls.conf.proxyRegion else ""
else:
proxy.fail_count += 1
return proxy
@classmethod
def httpValidator(cls, proxy):
for func in ProxyValidator.http_validator:
if not func(proxy.proxy):
return False
return True
@classmethod
def httpsValidator(cls, proxy):
for func in ProxyValidator.https_validator:
if not func(proxy.proxy):
return False
return True
@classmethod
def preValidator(cls, proxy):
for func in ProxyValidator.pre_validator:
if not func(proxy):
return False
return True
@classmethod
def regionGetter(cls, proxy):
try:
url = 'https://searchplugin.csdn.net/api/v1/ip/get?ip=%s' % proxy.proxy.split(':')[0]
r = WebRequest().get(url=url, retry_time=1, timeout=2).json
return r['data']['address']
except:
return 'error'
class _ThreadChecker(Thread):
""" 多线程检测 """
def __init__(self, work_type, target_queue, thread_name):
Thread.__init__(self, name=thread_name)
self.work_type = work_type
self.log = LogHandler("checker")
self.proxy_handler = ProxyHandler()
self.target_queue = target_queue
self.conf = ConfigHandler()
def run(self):
self.log.info("{}ProxyCheck - {}: start".format(self.work_type.title(), self.name))
while True:
try:
proxy = self.target_queue.get(block=False)
except Empty:
self.log.info("{}ProxyCheck - {}: complete".format(self.work_type.title(), self.name))
break
proxy = DoValidator.validator(proxy, self.work_type)
if self.work_type == "raw":
self.__ifRaw(proxy)
else:
self.__ifUse(proxy)
self.target_queue.task_done()
def __ifRaw(self, proxy):
if proxy.last_status:
if self.proxy_handler.exists(proxy):
self.log.info('RawProxyCheck - {}: {} exist'.format(self.name, proxy.proxy.ljust(23)))
else:
self.log.info('RawProxyCheck - {}: {} pass'.format(self.name, proxy.proxy.ljust(23)))
self.proxy_handler.put(proxy)
else:
self.log.info('RawProxyCheck - {}: {} fail'.format(self.name, proxy.proxy.ljust(23)))
def __ifUse(self, proxy):
if proxy.last_status:
self.log.info('UseProxyCheck - {}: {} pass'.format(self.name, proxy.proxy.ljust(23)))
self.proxy_handler.put(proxy)
else:
if proxy.fail_count > self.conf.maxFailCount:
self.log.info('UseProxyCheck - {}: {} fail, count {} delete'.format(self.name,
proxy.proxy.ljust(23),
proxy.fail_count))
self.proxy_handler.delete(proxy)
else:
self.log.info('UseProxyCheck - {}: {} fail, count {} keep'.format(self.name,
proxy.proxy.ljust(23),
proxy.fail_count))
self.proxy_handler.put(proxy)
def Checker(tp, queue):
"""
run Proxy ThreadChecker
:param tp: raw/use
:param queue: Proxy Queue
:return:
"""
thread_list = list()
for index in range(20):
thread_list.append(_ThreadChecker(tp, queue, "thread_%s" % str(index).zfill(2)))
for thread in thread_list:
thread.setDaemon(True)
thread.start()
for thread in thread_list:
thread.join()