Skip to content

Commit

Permalink
add iservice.10010.com
Browse files Browse the repository at this point in the history
  • Loading branch information
Nyloner committed May 27, 2018
1 parent 760b25a commit d35ab9c
Show file tree
Hide file tree
Showing 2 changed files with 174 additions and 0 deletions.
61 changes: 61 additions & 0 deletions iservice.10010.com/iservice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from util import *
import json
from bs4 import BeautifulSoup
import time


def load_city():
f = open('./files/city.json', 'r')
city_data = json.load(f)
return city_data


def get_city_hall(province_code, city_code):
url = 'http://iservice.10010.com/e3/static/life/listHallByPropertyNew?provinceCode={}&cityCode={}&page={}'
page = 1
result = []
keys = ['epProvincename', 'epCityname', 'epName', 'epAddress']
while True:
req = build_request(url.format(province_code, city_code, page))
res_data = req.json()
if 'errorMessage' in res_data:
break
try:
hall_list = res_data['BusinessHallList']
except:
continue
for hall in hall_list:
line = []
for key in keys:
line.append(hall[key])
result.append(line)
print(current_time(), province_code, city_code, page, 'OK')
page += 1
return result


def crawl_hall():
city_data = load_city()
provinces = city_data['provinces']
city_list = city_data['citys']
for index in range(len(provinces)):
province_code = provinces[index][0]
province_name = provinces[index][1]
for city in city_list[index]:
city_code = city[0]
city_name = city[1]
try:
result = get_city_hall(province_code, city_code)
except:
f=open('./files/fail','a')
f.write(json.dumps(provinces[index]+city,ensure_ascii=False)+'\n')
f.close()
continue
f = open('./files/result', 'a')
for hall in result:
f.write(json.dumps(
[province_name, city_name]+hall, ensure_ascii=False)+'\n')
f.close()
print(current_time(),province_name,city_name,'OK')

write_to_excel(load_txt('./files/result'),'联通营业厅数据.xlsx')
113 changes: 113 additions & 0 deletions iservice.10010.com/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import requests
import time
import openpyxl
import random
import datetime
import json
import re
import csv
import os


def get_headers():
pc_headers = {
"X-Forwarded-For": '%s.%s.%s.%s' % (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)),
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"
}
return pc_headers


class NetWorkError(Exception):
pass


def build_session_request(session, url, headers=None, data=None, json_data=None, timeout=15, try_times=3):
if headers is None:
headers = get_headers()
for i in range(try_times):
try:
if data:
response = session.post(
url, data=data, headers=headers, timeout=timeout)
elif json_data:
headers['Content-Type'] = 'application/json'
response = session.post(
url, data=json.dumps(json_data), headers=headers, timeout=timeout)
else:
response = session.get(url, headers=headers, timeout=timeout)
return response
except Exception as e:
continue
raise NetWorkError


def build_request(url, headers=None, data=None, json_data=None, timeout=15, try_times=3):
if headers is None:
headers = get_headers()
for i in range(try_times):
try:
if data:
response = requests.post(
url, data=data, headers=headers, timeout=timeout)
elif json_data:
headers['Content-Type'] = 'application/json'
response = requests.post(
url, data=json.dumps(json_data), headers=headers, timeout=timeout)
else:
response = requests.get(url, headers=headers, timeout=timeout)
return response
except Exception as e:
continue
raise NetWorkError


def write_to_excel(lines, filename, write_only=True):
excel = openpyxl.Workbook(write_only=write_only)
sheet = excel.create_sheet()
for line in lines:
try:
sheet.append(line)
except:
continue
excel.save(filename)


def write_to_csv(lines, filename):
csvfile = open(filename, 'w', encoding='utf-8')
spamwriter = csv.writer(csvfile, delimiter=',',
quotechar='"', quoting=csv.QUOTE_MINIMAL)
for line in lines:
spamwriter.writerow(line)
csvfile.close()


def get_next_date(current_date='2017-01-01'):
current_date = datetime.datetime.strptime(current_date, '%Y-%m-%d')
oneday = datetime.timedelta(days=1)
next_date = current_date+oneday
return str(next_date).split(' ')[0]


def current_time():
return time.strftime("%Y-%m-%d %H:%M:%S")


def load_txt(filename):
for line in open(filename, 'r'):
try:
item = json.loads(line)
except:
continue
yield item


def sub_str(string, words=None, append=None):
if words is None:
words = ['\r', '\n', '\t', '\xa0']
if append is not None:
words += append
string = re.sub('|'.join(words), '', string)
return string

0 comments on commit d35ab9c

Please sign in to comment.