Skip to content

Commit

Permalink
添加代理网站 https://hidemy.name/en/
Browse files Browse the repository at this point in the history
  • Loading branch information
awolfly9 committed Feb 20, 2017
1 parent 5886aca commit af71fe1
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 3 deletions.
57 changes: 57 additions & 0 deletions ipproxytool/spiders/proxy/hidemy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#-*- coding: utf-8 -*-

import utils

from scrapy import Selector
from basespider import BaseSpider
from proxy import Proxy


class HidemySpider(BaseSpider):
name = 'hidemy'

def __init__(self, *a, **kw):
super(HidemySpider, self).__init__(*a, **kw)

self.urls = ['https://hidemy.name/en/proxy-list/?start=%s' % n for n in range(0, 5 * 64, 64)]
self.headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'en-US,en;q=0.5',
'Connection': 'keep-alive',
'Host': 'hidemy.name',
'Referer': 'https://hidemy.name/en/proxy-list/?start=0',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:51.0) Gecko/20100101 Firefox/51.0',
}

self.init()

def parse_page(self, response):
utils.log(dir(response))
utils.log('body type:%s' % type(response.body))
utils.log('body_as_unicode type:%s' % type(response.body_as_unicode))
self.write(response.body)

sel = Selector(response)
infos = sel.xpath('//tbody/tr').extract()
for i, info in enumerate(infos):
if i == 0:
continue

val = Selector(text = info)
ip = val.xpath('//td[1]/text()').extract_first()
port = val.xpath('//td[2]/text()').extract_first()
country = val.xpath('//td[3]/div/text()').extract_first()
anonymity = val.xpath('//td[6]/text()').extract_first()

proxy = Proxy()
proxy.set_value(
ip = ip,
port = port,
country = country,
anonymity = anonymity,
source = self.name,
)

self.add_proxy(proxy = proxy)
6 changes: 3 additions & 3 deletions proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ def get_anonymity_type(self, anonymity):
'''

if anonymity == u'高匿代理' or anonymity == u'高匿名' or anonymity == 'elite proxy' or \
anonymity == u'超级匿名':
anonymity == u'超级匿名' or anonymity == u'High':
return '1'
elif anonymity == u'匿名' or anonymity == 'anonymous' or anonymity == u'普通匿名':
elif anonymity == u'匿名' or anonymity == 'anonymous' or anonymity == u'普通匿名' or anonymity == u'Medium':
return '2'
elif anonymity == u'透明' or anonymity == 'transparent':
elif anonymity == u'透明' or anonymity == 'transparent' or anonymity == u'No':
return '3'
else:
return '3'
2 changes: 2 additions & 0 deletions runspider.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from ipproxytool.spiders.proxy.ip181 import IpOneEightOneSpider
from ipproxytool.spiders.proxy.kuaidaili import KuaiDaiLiSpider
from ipproxytool.spiders.proxy.gatherproxy import GatherproxySpider
from ipproxytool.spiders.proxy.hidemy import HidemySpider

scrapydo.setup()

Expand Down Expand Up @@ -45,6 +46,7 @@
items = scrapydo.run_spider(IpOneEightOneSpider)
items = scrapydo.run_spider(KuaiDaiLiSpider)
items = scrapydo.run_spider(GatherproxySpider)
items = scrapydo.run_spider(HidemySpider)

utils.log('*******************run spider waiting...*******************')
time.sleep(300)

0 comments on commit af71fe1

Please sign in to comment.