-
Notifications
You must be signed in to change notification settings - Fork 35
/
check.py
90 lines (73 loc) · 2.9 KB
/
check.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# @author: [email protected]
# @site: https://chenjiehua.me
# @date: 2016-09-05
#
import os
import csv
import gzip
import os.path
import logging
import argparse
import multiprocessing
from lib.validate import Validator
root = os.path.dirname(os.path.abspath(__file__))
def main():
args = parse_args()
set_loglevel(args.loglevel)
validator = Validator(args.target, args.timeout, args.worker, args.thread)
ip_all = [ip for ip in read_csv(args.input)]
logging.info("Load proxy ip, total: %s", len(ip_all))
result = validator.run(ip_all)
result = sorted(result, key=lambda x: x["speed"])
write_csv(result)
def parse_args():
procs_num = multiprocessing.cpu_count()
parser = argparse.ArgumentParser()
parser.add_argument("input", help="the input proxy ip list, in csv format(supprot gz)")
parser.add_argument("--target", default="http://www.baidu.com", help="target uri to validate proxy ip, default: http://www.baidu.com")
parser.add_argument("--timeout", type=int, default=15, help="timeout of validating each ip, default: 15s")
parser.add_argument("--worker", type=int, default=procs_num, help="run with multi workers, default: CPU cores")
parser.add_argument("--thread", type=int, default=100, help="run with multi thread in each worker, default: 100")
parser.add_argument("--loglevel", default="info", help="set log level, e.g. debug, info, warn, error; default: info")
args = parser.parse_args()
return args
def set_loglevel(loglvl):
level = logging.INFO
if loglvl == "debug":
level = logging.DEBUG
elif loglvl == "info":
level = logging.INFO
elif loglvl == "warn":
level = logging.WARN
elif loglvl == "error":
level = logging.ERROR
else:
logging.error("Unknown logging level: %s", loglvl)
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s", level=level)
logging.info("Set log level: %s", loglvl)
def read_csv(fpath):
o = gzip.open if fpath.endswith(".gz") else open
with o(fpath) as f:
csvreader = csv.DictReader(f, restval=0, delimiter=",",
quotechar="\"", quoting=csv.QUOTE_MINIMAL)
for row in csvreader:
yield row
def write_csv(ip_avaliable):
_dir = os.path.join(root, "data")
if not os.path.exists(_dir):
os.makedirs(_dir)
logging.info("Save to file, total proxy ip: %s", len(ip_avaliable))
header = ["ip", "port", "anonymous", "info", "speed"]
output = os.path.join(_dir, "proxyip.csv")
with open(output, "w") as fw:
w = csv.DictWriter(fw, fieldnames=header, restval="",
extrasaction="ignore", delimiter=",",
quotechar="\"", quoting=csv.QUOTE_MINIMAL)
w.writeheader()
for ip in ip_avaliable:
w.writerow(ip)
if __name__ == "__main__":
main()