Skip to content

Commit

Permalink
Merge pull request jhao104#157 from xrfinbupt/master
Browse files Browse the repository at this point in the history
  • Loading branch information
jhao104 authored May 23, 2018
2 parents b9f3e14 + 876b1ec commit 9f55c5c
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 2 deletions.
1 change: 1 addition & 0 deletions Config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ freeProxyEight = 1
freeProxyNinth = 1
freeProxyTen = 1
freeProxyEleven = 1
freeProxyTwelve = 1
;foreign website, outside the wall
freeProxyWallFirst = 1
freeProxyWallSecond = 1
Expand Down
37 changes: 37 additions & 0 deletions ProxyGetter/getFreeProxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import sys
import requests


try:
from importlib import reload # py3 实际不会实用,只是为了不显示语法错误
except:
Expand All @@ -25,6 +26,7 @@

from Util.utilFunction import robustCrawl, getHtmlTree
from Util.WebRequest import WebRequest
from Util.utilFunction import verifyProxyFormat

# for debug to disable insecureWarning
requests.packages.urllib3.disable_warnings()
Expand Down Expand Up @@ -251,6 +253,24 @@ def freeProxyEleven():
for proxy in proxies:
yield ":".join(proxy)

@staticmethod
def freeProxyTwelve(page_count=8):
"""
guobanjia http://ip.jiangxianli.com/?page=
免费代理库
超多量
:return:
"""
for i in range(1, page_count + 1):
url = 'http://ip.jiangxianli.com/?page={}'.format(i)
# print(url)
html_tree = getHtmlTree(url)
tr_list = html_tree.xpath("/html/body/div[1]/div/div[1]/div[2]/table/tbody/tr")
if len(tr_list) == 0:
continue
for tr in tr_list:
yield tr.xpath("./td[2]/text()")[0] + ":" + tr.xpath("./td[3]/text()")[0]

@staticmethod
def freeProxyWallFirst():
"""
Expand Down Expand Up @@ -314,6 +334,23 @@ def freeProxyWallThird():

# test_batch(gg.freeProxyEleven())

proxy_iter = gg.freeProxyTwelve()
proxy_set = set()
for proxy in proxy_iter:
proxy = proxy.strip()
if proxy and verifyProxyFormat(proxy):
#self.log.info('{func}: fetch proxy {proxy}'.format(func=proxyGetter, proxy=proxy))
proxy_set.add(proxy)
#else:
#self.log.error('{func}: fetch proxy {proxy} error'.format(func=proxyGetter, proxy=proxy))

# store
for proxy in proxy_set:
print(proxy)


# test_batch(gg.freeProxyTwelve())

# test_batch(gg.freeProxyWallFirst())

# test_batch(gg.freeProxyWallSecond())
Expand Down
16 changes: 14 additions & 2 deletions Test/testGetFreeProxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,18 @@
"""
__author__ = 'J_hao'

import re
import sys
import requests


try:
from importlib import reload # py3 实际不会实用,只是为了不显示语法错误
except:
reload(sys)
sys.setdefaultencoding('utf-8')

sys.path.append('..')
from ProxyGetter.getFreeProxy import GetFreeProxy
from Util.GetConfig import GetConfig

Expand All @@ -28,9 +40,9 @@ def testGetFreeProxy():
proxy_count = 0
for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
if proxy:
print('{func}: fetch proxy {proxy}'.format(func=proxyGetter, proxy=proxy))
print('{func}: fetch proxy {proxy},proxy_count:{proxy_count}'.format(func=proxyGetter, proxy=proxy,proxy_count=proxy_count))
proxy_count += 1
assert proxy_count >= 20, '{} fetch proxy fail'.format(proxyGetter)
#assert proxy_count >= 20, '{} fetch proxy fail'.format(proxyGetter)


if __name__ == '__main__':
Expand Down

0 comments on commit 9f55c5c

Please sign in to comment.