forked from queensun/Nyspider
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathiservice.py
68 lines (60 loc) · 2.08 KB
/
iservice.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
from util import *
import json
from bs4 import BeautifulSoup
import time
def load_city():
f = open('./files/city.json', 'r')
city_data = json.load(f)
return city_data
def get_city_hall(province_code, city_code):
url = 'http://iservice.10010.com/e3/static/life/listHallByPropertyNew?provinceCode={}&cityCode={}&page={}'
page = 1
result = []
keys = ['epProvincename', 'epCityname', 'epName',
'epAddress', 'epLinkTelphone', 'epBusinessTime']
while True:
req = build_request(url.format(province_code, city_code, page))
res_data = req.json()
if 'errorMessage' in res_data:
break
try:
hall_list = res_data['BusinessHallList']
except:
continue
for hall in hall_list:
line = []
for key in keys:
try:
line.append(hall[key])
except:
line.append('')
result.append(line)
print(current_time(), province_code, city_code, page, 'OK')
page += 1
return result
def crawl_hall():
city_data = load_city()
provinces = city_data['provinces']
city_list = city_data['citys']
for index in range(len(provinces)):
province_code = provinces[index][0]
province_name = provinces[index][1]
for city in city_list[index]:
city_code = city[0]
city_name = city[1]
try:
result = get_city_hall(province_code, city_code)
except:
f = open('./files/fail', 'a')
f.write(json.dumps(
provinces[index]+city, ensure_ascii=False)+'\n')
f.close()
continue
f = open('./files/result', 'a')
for hall in result:
f.write(json.dumps(
[province_name, city_name]+hall, ensure_ascii=False)+'\n')
f.close()
print(current_time(), province_name, city_name, 'OK')
crawl_hall()
write_to_excel(load_txt('./files/result'), '联通营业厅数据.xlsx')