diff --git a/ipproxytool/spiders/validator/bbs.py b/ipproxytool/spiders/validator/bbs.py index 231e924..5c75ff2 100644 --- a/ipproxytool/spiders/validator/bbs.py +++ b/ipproxytool/spiders/validator/bbs.py @@ -29,6 +29,10 @@ def __init__(self, name = None, **kwargs): 'Firefox/50.0', } - self.success_mark = 'conmain' self.is_record_web_page = False self.init() + + def success_content_parse(self, response): + if 'conmain' in response.text: + return True + return False diff --git a/ipproxytool/spiders/validator/boss.py b/ipproxytool/spiders/validator/boss.py index 6642525..436476c 100644 --- a/ipproxytool/spiders/validator/boss.py +++ b/ipproxytool/spiders/validator/boss.py @@ -25,6 +25,10 @@ def __init__(self, name = None, **kwargs): 'Firefox/50.0', } - self.success_mark = '' self.is_record_web_page = False self.init() + + def success_content_parse(self, response): + if '' in response.text: + return True + return False \ No newline at end of file diff --git a/ipproxytool/spiders/validator/httpbin.py b/ipproxytool/spiders/validator/httpbin.py index b7a6c29..a57479e 100644 --- a/ipproxytool/spiders/validator/httpbin.py +++ b/ipproxytool/spiders/validator/httpbin.py @@ -84,7 +84,7 @@ def success_parse(self, response): self.save_page(proxy.ip, response.body) - if self.success_mark in response.text or self.success_mark is '': + if self.success_content_parse(response): proxy.speed = time.time() - response.meta.get('cur_time') proxy.vali_count += 1 self.log('proxy_info:%s' % (str(proxy))) diff --git a/ipproxytool/spiders/validator/jd.py b/ipproxytool/spiders/validator/jd.py index 98bdbd0..42ce7d4 100644 --- a/ipproxytool/spiders/validator/jd.py +++ b/ipproxytool/spiders/validator/jd.py @@ -41,11 +41,14 @@ def __init__(self, name = None, **kwargs): 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:52.0) Gecko/20100101 Firefox/52.0', } - self.success_mark = 'comments' self.is_record_web_page = False - self.init() + def success_content_parse(self, response): + if 'comments' in response.text: + return True + return False + def start_requests(self): count = self.sql.get_proxy_count(self.name) count_httpbin = self.sql.get_proxy_count(config.httpbin_table) diff --git a/ipproxytool/spiders/validator/lagou.py b/ipproxytool/spiders/validator/lagou.py index 2260303..115c0cc 100644 --- a/ipproxytool/spiders/validator/lagou.py +++ b/ipproxytool/spiders/validator/lagou.py @@ -31,9 +31,13 @@ def __init__(self, name = None, **kwargs): } self.is_record_web_page = True - self.success_mark = 'success' self.init() + def success_content_parse(self, response): + if 'success' in response.text: + return True + return False + def start_requests(self): count = self.sql.get_proxy_count(self.name) count_httpbin = self.sql.get_proxy_count(config.httpbin_table) diff --git a/ipproxytool/spiders/validator/liepin.py b/ipproxytool/spiders/validator/liepin.py index 8ef4e3a..1b291f5 100644 --- a/ipproxytool/spiders/validator/liepin.py +++ b/ipproxytool/spiders/validator/liepin.py @@ -27,6 +27,11 @@ def __init__(self, name = None, **kwargs): 'Firefox/50.0', } - self.success_mark = 'sojob-list' self.is_record_web_page = False self.init() + + def success_content_parse(self, response): + if 'sojob-list' in response.text: + return True + return False + diff --git a/ipproxytool/spiders/validator/validator.py b/ipproxytool/spiders/validator/validator.py index 6eb260c..6470855 100644 --- a/ipproxytool/spiders/validator/validator.py +++ b/ipproxytool/spiders/validator/validator.py @@ -20,7 +20,6 @@ def __init__(self, name = None, **kwargs): self.urls = [] self.headers = None - self.success_mark = '' self.timeout = 10 self.is_record_web_page = False @@ -81,7 +80,7 @@ def success_parse(self, response): proxy.vali_count += 1 proxy.speed = time.time() - response.meta.get('cur_time') - if self.success_mark in response.text or self.success_mark is '': + if self.success_content_parse(response): if table == self.name: if proxy.speed > self.timeout: self.sql.del_proxy_with_id(table, proxy.id) @@ -96,6 +95,9 @@ def success_parse(self, response): self.sql.commit() + def success_content_parse(self, response): + return True + def error_parse(self, failure): request = failure.request self.log('error_parse value:%s url:%s meta:%s' % (failure.value, request.url, request.meta)) diff --git a/ipproxytool/spiders/validator/zhilian.py b/ipproxytool/spiders/validator/zhilian.py index cf2628a..8dcafe5 100644 --- a/ipproxytool/spiders/validator/zhilian.py +++ b/ipproxytool/spiders/validator/zhilian.py @@ -24,6 +24,10 @@ def __init__(self, name = None, **kwargs): 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.110 Safari/537.36', } - self.success_mark = '' self.is_record_web_page = False self.init() + + def success_content_parse(self, response): + if '' in response.text: + return True + return False