Skip to content

Commit

Permalink
add success_content_parse when response success
Browse files Browse the repository at this point in the history
  • Loading branch information
宋晓楠 committed Sep 4, 2017
1 parent 8514a10 commit 81ee1cb
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 10 deletions.
6 changes: 5 additions & 1 deletion ipproxytool/spiders/validator/bbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ def __init__(self, name = None, **kwargs):
'Firefox/50.0',
}

self.success_mark = 'conmain'
self.is_record_web_page = False
self.init()

def success_content_parse(self, response):
if 'conmain' in response.text:
return True
return False
6 changes: 5 additions & 1 deletion ipproxytool/spiders/validator/boss.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ def __init__(self, name = None, **kwargs):
'Firefox/50.0',
}

self.success_mark = '<!DOCTYPE html>'
self.is_record_web_page = False
self.init()

def success_content_parse(self, response):
if '<!DOCTYPE html>' in response.text:
return True
return False
2 changes: 1 addition & 1 deletion ipproxytool/spiders/validator/httpbin.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def success_parse(self, response):

self.save_page(proxy.ip, response.body)

if self.success_mark in response.text or self.success_mark is '':
if self.success_content_parse(response):
proxy.speed = time.time() - response.meta.get('cur_time')
proxy.vali_count += 1
self.log('proxy_info:%s' % (str(proxy)))
Expand Down
7 changes: 5 additions & 2 deletions ipproxytool/spiders/validator/jd.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,14 @@ def __init__(self, name = None, **kwargs):
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:52.0) Gecko/20100101 Firefox/52.0',
}

self.success_mark = 'comments'
self.is_record_web_page = False

self.init()

def success_content_parse(self, response):
if 'comments' in response.text:
return True
return False

def start_requests(self):
count = self.sql.get_proxy_count(self.name)
count_httpbin = self.sql.get_proxy_count(config.httpbin_table)
Expand Down
6 changes: 5 additions & 1 deletion ipproxytool/spiders/validator/lagou.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,13 @@ def __init__(self, name = None, **kwargs):
}

self.is_record_web_page = True
self.success_mark = 'success'
self.init()

def success_content_parse(self, response):
if 'success' in response.text:
return True
return False

def start_requests(self):
count = self.sql.get_proxy_count(self.name)
count_httpbin = self.sql.get_proxy_count(config.httpbin_table)
Expand Down
7 changes: 6 additions & 1 deletion ipproxytool/spiders/validator/liepin.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ def __init__(self, name = None, **kwargs):
'Firefox/50.0',
}

self.success_mark = 'sojob-list'
self.is_record_web_page = False
self.init()

def success_content_parse(self, response):
if 'sojob-list' in response.text:
return True
return False

6 changes: 4 additions & 2 deletions ipproxytool/spiders/validator/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ def __init__(self, name = None, **kwargs):

self.urls = []
self.headers = None
self.success_mark = ''
self.timeout = 10
self.is_record_web_page = False

Expand Down Expand Up @@ -81,7 +80,7 @@ def success_parse(self, response):

proxy.vali_count += 1
proxy.speed = time.time() - response.meta.get('cur_time')
if self.success_mark in response.text or self.success_mark is '':
if self.success_content_parse(response):
if table == self.name:
if proxy.speed > self.timeout:
self.sql.del_proxy_with_id(table, proxy.id)
Expand All @@ -96,6 +95,9 @@ def success_parse(self, response):

self.sql.commit()

def success_content_parse(self, response):
return True

def error_parse(self, failure):
request = failure.request
self.log('error_parse value:%s url:%s meta:%s' % (failure.value, request.url, request.meta))
Expand Down
6 changes: 5 additions & 1 deletion ipproxytool/spiders/validator/zhilian.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ def __init__(self, name = None, **kwargs):
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.110 Safari/537.36',
}

self.success_mark = '<!DOCTYPE html>'
self.is_record_web_page = False
self.init()

def success_content_parse(self, response):
if '<!DOCTYPE html>' in response.text:
return True
return False

0 comments on commit 81ee1cb

Please sign in to comment.