forked from hustcc/PTHospital.chrome
-
Notifications
You must be signed in to change notification settings - Fork 0
/
read_from_README.py
65 lines (55 loc) · 1.58 KB
/
read_from_README.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
Created on 2016年5月4日
从 https://raw.githubusercontent.com/langhua9527/Hospital/master/README.md 中解析数据
@author: hustcc
'''
import requests
import re
import json
def get_readme_content():
r = requests.get("https://raw.githubusercontent.com/langhua9527/Hospital/master/README.md")
return r.text
def get_phone_num(content, i, max = 4):
" - 电话 +86 21 5187 6888"
cnt = 0
while True:
cnt = cnt + 1
if cnt > max:
return None
i = i - 1
line = content[i]
if line.startswith(u" - 电话 "):
return line[len(u" - 电话 "):]
def get_website(line):
" - 网址 www.tcmmh.com"
if line.startswith(u" - 网址 "):
return line[len(u" - 网址 "):]
return None
def get_name(content, i):
"- 上海市闵行区中医院"
cnt = 0
while True:
cnt = cnt + 1
i = i - 1
line = content[i]
if line.startswith(u"- "):
return line[len(u"- "):], cnt
def process_readme(content):
hos = {}
content = content.split("\n")
for i in xrange(len(content)):
line = content[i]
website = get_website(line)
if website:
name, max = get_name(content, i)
callnum = get_phone_num(content, i, max)
hos[website] = [(name or ""), callnum or ""]
# print website, hos[website]
return hos
if __name__ == '__main__':
content = get_readme_content()
hos = process_readme(content)
print len(hos)
print json.dumps(hos)