Skip to content

Commit

Permalink
[update] staticmethod下的robustCrawl不起作用
Browse files Browse the repository at this point in the history
  • Loading branch information
jhao104 committed Nov 5, 2017
1 parent b1018a9 commit 200b279
Showing 1 changed file with 32 additions and 23 deletions.
55 changes: 32 additions & 23 deletions ProxyGetter/getFreeProxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def __init__(self):
pass

@staticmethod
@robustCrawl # decoration print error if exception happen
def freeProxyFirst(page=10):
"""
抓取无忧代理 http://www.data5u.com/
Expand All @@ -53,10 +52,12 @@ def freeProxyFirst(page=10):
html_tree = getHtmlTree(url)
ul_list = html_tree.xpath('//ul[@class="l2"]')
for ul in ul_list:
yield ':'.join(ul.xpath('.//li/text()')[0:2])
try:
yield ':'.join(ul.xpath('.//li/text()')[0:2])
except Exception as e:
pass

@staticmethod
@robustCrawl
def freeProxySecond(proxy_number=100):
"""
抓取代理66 http://www.66ip.cn/
Expand All @@ -73,7 +74,6 @@ def freeProxySecond(proxy_number=100):
yield proxy

@staticmethod
@robustCrawl
def freeProxyThird(days=1):
"""
抓取ip181 http://www.ip181.com/
Expand All @@ -82,12 +82,14 @@ def freeProxyThird(days=1):
"""
url = 'http://www.ip181.com/'
html_tree = getHtmlTree(url)
tr_list = html_tree.xpath('//tr')[1:]
for tr in tr_list:
yield ':'.join(tr.xpath('./td/text()')[0:2])
try:
tr_list = html_tree.xpath('//tr')[1:]
for tr in tr_list:
yield ':'.join(tr.xpath('./td/text()')[0:2])
except Exception as e:
pass

@staticmethod
@robustCrawl
def freeProxyFourth():
"""
抓取西刺代理 http://api.xicidaili.com/free2016.txt
Expand All @@ -100,10 +102,12 @@ def freeProxyFourth():
tree = getHtmlTree(each_url)
proxy_list = tree.xpath('.//table[@id="ip_list"]//tr')
for proxy in proxy_list:
yield ':'.join(proxy.xpath('./td/text()')[0:2])
try:
yield ':'.join(proxy.xpath('./td/text()')[0:2])
except Exception as e:
pass

@staticmethod
@robustCrawl
def freeProxyFifth():
"""
抓取guobanjia http://www.goubanjia.com/free/gngn/index.shtml
Expand All @@ -122,13 +126,15 @@ def freeProxyFifth():
]/text()
"""
for each_proxy in proxy_list:
# :符号裸放在td下,其他放在div span p中,先分割找出ip,再找port
ip_addr = ''.join(each_proxy.xpath(xpath_str))
port = each_proxy.xpath(".//span[contains(@class, 'port')]/text()")[0]
yield '{}:{}'.format(ip_addr, port)
try:
# :符号裸放在td下,其他放在div span p中,先分割找出ip,再找port
ip_addr = ''.join(each_proxy.xpath(xpath_str))
port = each_proxy.xpath(".//span[contains(@class, 'port')]/text()")[0]
yield '{}:{}'.format(ip_addr, port)
except Exception as e:
pass

@staticmethod
@robustCrawl
def freeProxySixth():
"""
抓取讯代理免费proxy http://www.xdaili.cn/ipagent/freeip/getFreeIps?page=1&rows=10
Expand All @@ -147,16 +153,19 @@ def freeProxySixth():
if __name__ == '__main__':
gg = GetFreeProxy()
# for e in gg.freeProxyFirst():
# print e

# print(e)
#
# for e in gg.freeProxySecond():
# print e

# print(e)
#
# for e in gg.freeProxyThird():
# print e
# print(e)

# for e in gg.freeProxySixth():
# for e in gg.freeProxyFourth():
# print(e)

# for e in gg.freeProxyFifth():
# print(e)
for e in gg.freeProxyFifth():
print(e)

# for e in gg.freeProxySixth():
# print(e)

0 comments on commit 200b279

Please sign in to comment.