Skip to content

Commit

Permalink
[update] 优化代理验证格式
Browse files Browse the repository at this point in the history
  • Loading branch information
jhao104 committed Apr 3, 2018
1 parent 6fbba5b commit 24a9f09
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 9 deletions.
21 changes: 16 additions & 5 deletions Manager/ProxyManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from DB.DbClient import DbClient
from Util.GetConfig import GetConfig
from Util.LogHandler import LogHandler
from Util.utilFunction import verifyProxyFormat
from ProxyGetter.getFreeProxy import GetFreeProxy


Expand All @@ -40,14 +41,23 @@ def refresh(self):
:return:
"""
for proxyGetter in self.config.proxy_getter_functions:
# fetch
proxy_set = set()
# fetch raw proxy
for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
if proxy:
try:
self.log.info("{func}: fetch proxy start".format(func=proxyGetter))
proxy_iter = [_ for _ in getattr(GetFreeProxy, proxyGetter.strip())()]
except Exception as e:
self.log.error("{func}: fetch proxy fail".format(func=proxyGetter))
continue
for proxy in proxy_iter:
proxy = proxy.strip()
if proxy and verifyProxyFormat(proxy):
self.log.info('{func}: fetch proxy {proxy}'.format(func=proxyGetter, proxy=proxy))
proxy_set.add(proxy.strip())
proxy_set.add(proxy)
else:
self.log.error('{func}: fetch proxy {proxy} error'.format(func=proxyGetter, proxy=proxy))

# store raw proxy
# store
for proxy in proxy_set:
self.db.changeTable(self.useful_proxy_queue)
if self.db.exists(proxy):
Expand Down Expand Up @@ -97,6 +107,7 @@ def getNumber(self):
total_useful_queue = self.db.getNumber()
return {'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue}


if __name__ == '__main__':
pp = ProxyManager()
pp.refresh()
3 changes: 2 additions & 1 deletion Util/utilFunction.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ def verifyProxyFormat(proxy):
"""
import re
verify_regex = r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}"
return True if re.findall(verify_regex, proxy) else False
_proxy = re.findall(verify_regex, proxy)
return True if len(_proxy) == 1 and _proxy[0] == proxy else False


# noinspection PyPep8Naming
Expand Down
16 changes: 13 additions & 3 deletions doc/release_notes.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
## Release Notes

* newest
  1.使用多线程验证useful_pool
* dev

1.优化代理格式检查;

2.增加代理源;

* 1.11(2017.8)

  1.使用多线程验证useful_pool;

* 1.10(2016.11)

* 1.10
  1. 第一版;

  2. 支持PY2/PY3;

  3. 代理池基本功能;

0 comments on commit 24a9f09

Please sign in to comment.