This repository has been archived by the owner on Dec 17, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 69
/
main.py
67 lines (54 loc) · 1.69 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# -*- coding: utf-8 -*-
"""
Created on 2017/3/19
@author: will4906
"""
import configparser
import os
import sys
import click
from scrapy import cmdline
from config import base_settings as base
from config.base_settings import *
# from crawler.pipelines import CrawlerPipeline
from entity.models import Patents
from service.account import account
from service.info import init_crawler
from service.log import init_log
def init_config():
cfg = configparser.ConfigParser()
cfg.read(os.path.join('config', 'config.ini'), 'utf-8')
account.check_username(cfg)
account.check_password(cfg)
base.check_proxy(cfg)
base.check_request(cfg)
base.check_output(cfg)
init_crawler(cfg)
def init_base_path():
if os.path.exists(OUTPUT_PATH) is False:
os.mkdir(OUTPUT_PATH)
if os.path.exists(OUTPUT_GROUP_PATH) is False:
os.mkdir(OUTPUT_GROUP_PATH)
def init_data_base():
Patents.create_table()
if __name__ == '__main__':
click.echo(
'''
***************************************************************************
* 使用说明:https://github.com/will4906/PatentCrawler/wiki
* 代码更新:https://github.com/will4906/PatentCrawler
* bug反馈、交流建议:
* \t邮箱:[email protected]
* \tgithub:https://github.com/will4906/PatentCrawler/issues
***************************************************************************
'''
)
init_log()
init_base_path()
init_config()
# init_data_base()
# CrawlerPipeline().process_item(None, None)
if 'log' in base.OUTPUT_ITEMS:
cmdline.execute(("scrapy crawl Patent -s LOG_FILE=" + LOG_FILENAME).split())
else:
cmdline.execute(("scrapy crawl Patent").split())