forked from awolfly9/IPProxyTool
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_crawl_proxy.py
68 lines (55 loc) · 1.99 KB
/
run_crawl_proxy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#-*- coding: utf-8 -*-
import logging
import os
import sys
import scrapydo
import time
import utils
import config
from sql import SqlManager
from ipproxytool.spiders.proxy.xicidaili import XiCiDaiLiSpider
from ipproxytool.spiders.proxy.sixsixip import SixSixIpSpider
from ipproxytool.spiders.proxy.ip181 import IpOneEightOneSpider
from ipproxytool.spiders.proxy.kuaidaili import KuaiDaiLiSpider
from ipproxytool.spiders.proxy.gatherproxy import GatherproxySpider
from ipproxytool.spiders.proxy.hidemy import HidemySpider
from ipproxytool.spiders.proxy.proxylistplus import ProxylistplusSpider
from ipproxytool.spiders.proxy.freeproxylists import FreeProxyListsSpider
from ipproxytool.spiders.proxy.peuland import PeulandSpider
from ipproxytool.spiders.proxy.usproxy import UsProxySpider
from ipproxytool.spiders.proxy.proxydb import ProxyDBSpider
from ipproxytool.spiders.proxy.proxyrox import ProxyRoxSpider
scrapydo.setup()
if __name__ == '__main__':
os.chdir(sys.path[0])
reload(sys)
sys.setdefaultencoding('utf-8')
if not os.path.exists('log'):
os.makedirs('log')
logging.basicConfig(
filename = 'log/crawl_proxy.log',
format = '%(levelname)s %(asctime)s: %(message)s',
level = logging.DEBUG
)
sql = SqlManager()
spiders = [
XiCiDaiLiSpider,
SixSixIpSpider,
IpOneEightOneSpider,
KuaiDaiLiSpider, # 在访问前加了一个 js ,反爬
GatherproxySpider,
HidemySpider,
ProxylistplusSpider,
FreeProxyListsSpider,
# PeulandSpider, # 目标站点失效
UsProxySpider,
ProxyDBSpider,
ProxyRoxSpider,
]
while True:
utils.log('*******************run spider start...*******************')
sql.delete_old(config.free_ipproxy_table, 0.5)
for spider in spiders:
scrapydo.run_spider(spider_cls = spider)
utils.log('*******************run spider waiting...*******************')
time.sleep(1200)