Skip to content

Commit

Permalink
update bug
Browse files Browse the repository at this point in the history
  • Loading branch information
Germey committed Jul 13, 2017
1 parent e789be1 commit e0c9d4b
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 58 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ python3 run.py

利用requests获取方法如下

```
```python
import requests

PROXY_POOL_URL = 'http://localhost:5000/get'
Expand Down
2 changes: 2 additions & 0 deletions proxypool/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def index():
def get_proxy():
"""
Get a proxy
:return: 随机代理
"""
conn = get_conn()
return conn.random()
Expand All @@ -31,6 +32,7 @@ def get_proxy():
def get_counts():
"""
Get the count of proxies
:return: 代理池总量
"""
conn = get_conn()
return str(conn.count())
Expand Down
32 changes: 20 additions & 12 deletions proxypool/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,29 +23,37 @@ def get_proxies(self, callback):
print('成功获取到代理', proxy)
proxies.append(proxy)
return proxies

def crawl_daxiang(self):
"""
获取大象代理
:return: 代理
"""
url = 'http://vtp.daxiangdaili.com/ip/?tid=555546364094534&num=100'
html = get_page(url)
if html:
urls = html.split('\n')
for url in urls:
yield url

def crawl_kuaidaili(self):
"""
获取快代理
:return: 代理
"""
url = 'http://dev.kuaidaili.com/api/getproxy/?orderid=959961765125099&num=100&b_pcchrome=1&b_pcie=1&b_pcff=1&protocol=1&method=1&an_an=1&an_ha=1&quality=1&format=json&sep=2'
html = get_page(url)
if html:
result = json.loads(html)
ips = result.get('data').get('proxy_list')
for ip in ips:
yield ip
proxies = result.get('data').get('proxy_list')
for proxy in proxies:
yield proxy

def crawl_daili66(self, page_count=4):
"""
获取代理66
:param page_count:
:return:
:param page_count: 页码
:return: 代理
"""
start_url = 'http://www.66ip.cn/{}.html'
urls = [start_url.format(page) for page in range(1, page_count + 1)]
Expand All @@ -59,11 +67,11 @@ def crawl_daili66(self, page_count=4):
ip = tr.find('td:nth-child(1)').text()
port = tr.find('td:nth-child(2)').text()
yield ':'.join([ip, port])

def crawl_proxy360(self):
"""
获取Proxy360
:return:
:return: 代理
"""
start_url = 'http://www.proxy360.cn/Region/China'
print('Crawling', start_url)
Expand All @@ -75,11 +83,11 @@ def crawl_proxy360(self):
ip = line.find('.tbBottomLine:nth-child(1)').text()
port = line.find('.tbBottomLine:nth-child(2)').text()
yield ':'.join([ip, port])

def crawl_goubanjia(self):
"""
获取Goubanjia
:return:
:return: 代理
"""
start_url = 'http://www.goubanjia.com/free/gngn/index.shtml'
html = get_page(start_url)
Expand Down
62 changes: 26 additions & 36 deletions proxypool/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,37 +9,26 @@ class RedisClient(object):
def __init__(self, host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD):
"""
初始化
:param host:
:param port:
:param password:
:param host: Redis 地址
:param port: Redis 端口
:param password: Redis密码
"""
self.db = redis.StrictRedis(host=host, port=port, password=password)

def top(self):
"""
获取排名第一的代理
:return:
"""
proxies = self.db.zrevrange(REDIS_KEY, 0, 0)
if proxies:
return proxies[0].decode('utf-8')
else:
raise PoolEmptyError


def add(self, proxy, score=INITIAL_SCORE):
"""
添加代理,设置分数为最高
:param proxy:
:param score:
:return:
:param proxy: 代理
:param score: 分数
:return: 添加结果
"""
if self.db.zscore(proxy):
if self.db.zscore(REDIS_KEY, proxy):
return self.db.zadd(REDIS_KEY, score, proxy)

def random(self):
"""
随机获取有效代理,首先尝试获取最高分数代理,如果不存在,按照排名获取,否则异常
:return:
:return: 随机代理
"""
result = self.db.zrangebyscore(REDIS_KEY, MAX_SCORE, MAX_SCORE)
if len(result):
Expand All @@ -50,48 +39,49 @@ def random(self):
return choice(result).decode('utf-8')
else:
raise PoolEmptyError

def decrease(self, proxy):
"""
代理值减一分,小于最小值则删除
:param proxy:
:return:
:param proxy: 代理
:return: 修改后的代理分数
"""
score = self.db.zscore(REDIS_KEY, proxy)
if score and score > MIN_SCORE:
self.db.zincrby(REDIS_KEY, proxy, -1)
print('代理', proxy, '当前分数', score, '减1')
return self.db.zincrby(REDIS_KEY, proxy, -1)
else:
self.db.zrem(REDIS_KEY, proxy)
print('代理', proxy, '当前分数', score, '移除')

return self.db.zrem(REDIS_KEY, proxy)

def exists(self, proxy):
"""
判断是否存在
:param proxy:
:return:
:param proxy: 代理
:return: 是否存在
"""
return not self.db.zscore(REDIS_KEY, proxy) == None

def max(self, proxy):
"""
将代理设置为MAX_SCORE
:param proxy:
:return:
:param proxy: 代理
:return: 设置结果
"""
print('代理', proxy, '可用,设置为', MAX_SCORE)
return self.db.zadd(REDIS_KEY, MAX_SCORE, proxy)

def count(self):
"""
获取数量
:return:
:return: 数量
"""
return self.db.zcard(REDIS_KEY)

def all(self):
"""
获取全部代理
:return:
:return: 全部代理列表
"""
all = self.db.zrangebyscore(REDIS_KEY, MIN_SCORE, MAX_SCORE)
return [item.decode('utf-8') for item in all]
Expand Down
4 changes: 2 additions & 2 deletions proxypool/setting.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# Redis数据库地址
REDIS_HOST = 'DataCrawl-Pool.redis.cache.chinacloudapi.cn'
REDIS_HOST = 'localhost'

# Redis端口
REDIS_PORT = 6379

# Redis密码,如无填None
REDIS_PASSWORD = None
REDIS_PASSWORD = 'foobared'

REDIS_KEY = 'proxies'

Expand Down
13 changes: 6 additions & 7 deletions proxypool/tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@
class Tester(object):
def __init__(self):
self.redis = RedisClient()

async def test_single_proxy(self, proxy):
"""
测试单个代理
:param proxy:
:return:
:param proxy:
:return:
"""
conn = aiohttp.TCPConnector(verify_ssl=False)
async with aiohttp.ClientSession(connector=conn) as session:
Expand All @@ -28,16 +28,15 @@ async def test_single_proxy(self, proxy):
print('正在测试', proxy)
async with session.get(TEST_URL, proxy=real_proxy, timeout=15) as response:
if response.status in VALID_STATUS_CODES:
self.redis.add(proxy)
self.redis.max(proxy)
print('代理可用', proxy)
else:
self.redis.decrease(proxy)
print('请求响应码不合法,IP', proxy)
except (ClientError, aiohttp.client_exceptions.ClientConnectorError, asyncio.TimeoutError, AttributeError):
if self.redis.exists(proxy):
self.redis.decrease(proxy)
self.redis.decrease(proxy)
print('代理请求失败', proxy)

def run(self):
"""
测试主函数
Expand Down

0 comments on commit e0c9d4b

Please sign in to comment.