临时提交

cwjokaka · Sep 6, 2019 · 65c4798 · 65c4798
1 parent 6b51f91
commit 65c4798
Show file tree

Hide file tree

Showing 3 changed files with 15 additions and 6 deletions.
diff --git a/setting.py b/setting.py
@@ -1,4 +1,4 @@
-from src.spider import Spider66Ip
+from src.spider.spider_66_ip import Spider66Ip
 
 DB_TYPE = 'memory'      # memory/redis
 

diff --git a/src/spider/abs_spider.py b/src/spider/abs_spider.py
@@ -1,13 +1,18 @@
+from typing import List, Iterable
+
+from src.entity.proxy_entity import ProxyEntity
+
+
 class AbsSpider(object):
 
     def __init__(self, name='unknown') -> None:
         self._name = name
 
     def crawl(self):
-        print('开始爬取...')
+        print(f'{self._name}开始爬取...')
         res = self.do_crawl()
-        print(f'爬取完毕!共:{len(res)}个代理')
+        print(f'{self._name}爬取完毕!共:{len(res)}个代理')
         return res
 
-    def do_crawl(self):
+    def do_crawl(self) -> List[ProxyEntity]:
         raise RuntimeError('do_crawl方法没有实现!')
diff --git a/src/spider/spider_66_ip.py b/src/spider/spider_66_ip.py
@@ -1,3 +1,5 @@
+from typing import List, Iterable
+
 import requests
 
 from src.entity.proxy_entity import ProxyEntity
@@ -14,10 +16,10 @@ def __init__(self) -> None:
         super().__init__('66IP代理爬虫')
         self._base_url = 'http://www.66ip.cn'
 
-    def do_crawl(self):
+    def do_crawl(self) -> List[ProxyEntity]:
         result = []
         for page in range(1, 5):
-            print(f'第{page}页...')
+            # print(f'第{page}页...')
             resp = requests.get(f'{self._base_url}/{page}.html')
             resp.encoding = 'gb2312'
             soup = BeautifulSoup(resp.text, 'lxml')
@@ -34,3 +36,5 @@ def do_crawl(self):
                 print(f'{ip}:{port}/{region}/{proxy_type}/{check_time}')
                 result.append(ProxyEntity(ip, port, self._name, type=proxy_type, region=region))
         return result
+
+