forked from ybogdanov/history-timeline
-
Notifications
You must be signed in to change notification settings - Fork 0
/
final.py
executable file
·69 lines (50 loc) · 1.77 KB
/
final.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/python
import sys
import json
import argparse
from operator import itemgetter
curr_year = 2016
max_age = 150
result = []
dups = {}
missing = []
parser = argparse.ArgumentParser(description='Finalizes the dataset, sorts by rating.')
parser.add_argument('--limit', default=None, type=int,
help='limits the number of resulting people (default: None)')
args = parser.parse_args()
for p in json.load(sys.stdin):
if 'to' not in p:
p['to'] = 0
if p['from'] == 0 or p['from'] == None:
p['missing_reason'] = "missing birth year (from)"
missing.append(p)
continue
if p['to'] == 0 and curr_year-p['from'] > max_age:
p['missing_reason'] = "is older then %d years and is missing death year" % max_age
missing.append(p)
continue
age = p['to'] - p['from']
if age > max_age:
p['missing_reason'] = "is too old (%d years)" % age
missing.append(p)
continue
if age == 0:
p['missing_reason'] = "age is 0"
missing.append(p)
continue
# Avoid dups
# TODO: merge useful stuff from dups?
if p['name'] in dups: continue
dups[p['name']] = True
p['link'] = "https://en.wikipedia.org/wiki/" + p['name'].replace(" ", "_")
result.append(p)
sorted_result = sorted(result, key=itemgetter('rating'), reverse=True)
if args.limit is not None:
sorted_result = sorted_result[:args.limit]
print json.dumps(sorted_result, indent=4, separators=(',', ': '))
sys.stderr.write("Written %d people, %d are missing\n" % (len(sorted_result), len(missing)))
if len(missing) > 0:
sorted_missing = sorted(missing, key=itemgetter('rating'), reverse=True)
sys.stderr.write("Top 20 people missing:\n")
for nf in sorted_missing[:20]:
sys.stderr.write("%s (rating %f), reason: %s\n" % (nf['name'], nf['rating'], nf['missing_reason']))