forked from LC044/WeChatMsg
-
Notifications
You must be signed in to change notification settings - Fork 0
/
__init__.py
141 lines (110 loc) · 4.4 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import difflib
import os
import typing
import urllib.request
import simplejson as json
class FuzzyDict(dict):
"""Provides a dictionary that performs fuzzy lookup"""
def __init__(self, cutoff: float = 0.6):
"""Construct a new FuzzyDict instance
items is an dictionary to copy items from (optional)
cutoff is the match ratio below which matches should not be considered
cutoff needs to be a float between 0 and 1 (where zero is no match
and 1 is a perfect match)"""
super(FuzzyDict, self).__init__()
self.cutoff = cutoff
# short wrapper around some super (dict) methods
self._dict_contains = lambda key: super(FuzzyDict, self).__contains__(key)
self._dict_getitem = lambda key: super(FuzzyDict, self).__getitem__(key)
def _search(self, lookfor: typing.Any, stop_on_first: bool = False):
"""Returns the value whose key best matches lookfor
if stop_on_first is True then the method returns as soon
as it finds the first item
"""
# if the item is in the dictionary then just return it
if self._dict_contains(lookfor):
return True, lookfor, self._dict_getitem(lookfor), 1
# set up the fuzzy matching tool
ratio_calc = difflib.SequenceMatcher()
ratio_calc.set_seq1(lookfor)
# test each key in the dictionary
best_ratio = 0
best_match = None
best_key = None
for key in self:
# if the current key is not a string
# then we just skip it
try:
# set up the SequenceMatcher with other text
ratio_calc.set_seq2(key)
except TypeError:
continue
# we get an error here if the item to look for is not a
# string - if it cannot be fuzzy matched and we are here
# this it is definitely not in the dictionary
try:
# calculate the match value
ratio = ratio_calc.ratio()
except TypeError:
break
# if this is the best ratio so far - save it and the value
if ratio > best_ratio:
best_ratio = ratio
best_key = key
best_match = self._dict_getitem(key)
if stop_on_first and ratio >= self.cutoff:
break
return best_ratio >= self.cutoff, best_key, best_match, best_ratio
def __contains__(self, item: typing.Any):
if self._search(item, True)[0]:
return True
else:
return False
def __getitem__(self, lookfor: typing.Any):
matched, key, item, ratio = self._search(lookfor)
if not matched:
raise KeyError(
"'%s'. closest match: '%s' with ratio %.3f"
% (str(lookfor), str(key), ratio)
)
return item
__HERE = os.path.abspath(os.path.dirname(__file__))
with open(os.path.join(__HERE, "map_filename.json"), "r", encoding="utf8") as f:
FILENAMES: FuzzyDict = FuzzyDict()
for k, v in json.load(f).items():
FILENAMES[k] = v
with open(os.path.join(__HERE, "city_coordinates.json"), "r", encoding="utf8") as f:
COORDINATES: FuzzyDict = FuzzyDict()
for k, v in json.load(f).items():
COORDINATES[k] = v
EXTRA = {}
def register_url(asset_url: str):
if asset_url:
registry = asset_url + "/registry.json"
try:
contents = urllib.request.urlopen(registry).read()
contents = json.loads(contents)
except Exception as e:
raise e
files = {}
pinyin_names = set()
for name, pinyin in contents["PINYIN_MAP"].items():
file_name = contents["FILE_MAP"][pinyin]
files[name] = [file_name, "js"]
pinyin_names.add(pinyin)
for key, file_name in contents["FILE_MAP"].items():
if key not in pinyin_names:
# English names
files[key] = [file_name, "js"]
js_folder_name = contents["JS_FOLDER"]
if js_folder_name == "/":
js_file_prefix = f"{asset_url}/"
else:
js_file_prefix = f"{asset_url}/{js_folder_name}/"
EXTRA[js_file_prefix] = files
def register_files(asset_files: dict):
if asset_files:
FILENAMES.update(asset_files)
def register_coords(coords: dict):
if coords:
COORDINATES.update(coords)