-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathcache.py
188 lines (155 loc) · 6.43 KB
/
cache.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
"""
Loads cached data such as exclusions, program mappings, etc, into local
JSON files for faster algorithms performance.
This should be run from the backend directory or via runprocessors
"""
from functools import reduce
import operator
import re
from typing import Any, Literal
from algorithms.cache.cache_config import (CACHE_CONFIG, CACHED_EQUIVALENTS_FILE,
CACHED_EXCLUSIONS_FILE,
CACHED_WARNINGS_FILE,
CONDITIONS_PROCESSED_FILE,
COURSE_MAPPINGS_FILE,
COURSES_PROCESSED_FILE,
MAPPINGS_FILE,
PROGRAM_MAPPINGS_FILE,
PROGRAMS_FORMATTED_FILE)
from data.utility.data_helpers import read_data, write_data
def cache_equivalents():
"""
Reads from processed courses and stores the exclusions in a map mapping
COURSE: {
EXCLUSION_1: 1,
EXCLUSION_2: 1,
EXCLUSION_3: 1
}
NOTE: Should run this after all the conditions have been processed as sometimes
exclusions are included inside the conditions text
"""
courses = read_data(COURSES_PROCESSED_FILE)
cached_exclusions = {}
for course, data in courses.items():
cached_exclusions[course] = data["equivalents"]
write_data(cached_exclusions, CACHED_EQUIVALENTS_FILE)
def cache_exclusions():
"""
Reads from processed courses and stores the exclusions in a map mapping
COURSE: {
EXCLUSION_1: 1,
EXCLUSION_2: 1,
EXCLUSION_3: 1
}
NOTE: Should run this after all the conditions have been processed as sometimes
exclusions are included inside the conditions text
"""
courses = read_data(COURSES_PROCESSED_FILE)
cached_exclusions = {}
for course, data in courses.items():
cached_exclusions[course] = data["exclusions"] | data["equivalents"]
write_data(cached_exclusions, CACHED_EXCLUSIONS_FILE)
def cache_handbook_note():
"""
Reads from processed conditions and stores the warnings in a map mapping
COURSE: WARNING
NOTE: Condition warnings are created during the manual fix stage, so this
will need to be re-run as more conditions are manually fixed.
"""
conditions = read_data(CONDITIONS_PROCESSED_FILE)
cached_handbook_note = {}
for course, data in conditions.items():
if "handbook_note" in data:
cached_handbook_note[course] = data["handbook_note"]
write_data(cached_handbook_note, CACHED_WARNINGS_FILE)
def cache_mappings():
"""
Writes to mappings.json and courseMappings.json (i.e maps courses to corresponding school/faculty)
"""
mappings = {}
courseMappings: dict[str, dict[str, Literal[1]]] = {}
courses = read_data(COURSES_PROCESSED_FILE)
# Tokenise faculty using regex, e.g 'UNSW Business School' -> 'F Business'
def tokeniseFaculty(Faculty):
faculty_token = "F "
if re.search("Faculty of.+", Faculty):
match_object = re.search("(?<=Faculty\sof\s)[^\s\n\,]+", Faculty)
elif re.search("UNSW", Faculty):
match_object = re.search(r"(?<=UNSW\s)[^\s\n\,]+", Faculty)
else:
match_object = re.search("^([\w]+)", Faculty)
if match_object is None:
raise Exception(f'no match found for faculty: {Faculty}')
match = match_object.group()
faculty_token += match
return faculty_token
# Tokenise faculty using regex, e.g 'School of Psychology' -> 'S Psychology'
def tokeniseSchool(School):
school_token = "S "
if re.search("School\sof\sthe.+", School):
match_object = re.search("(?<=School\sof\sthe\s)[^\s\n\,]+", School)
elif re.search("School\sof\s.+", School):
match_object = re.search("(?<=School\sof\s)[^\s\n\,]+", School)
elif re.search("^(UC)", School):
match_object = re.search("(?<=UC\s)[^\s\n\,]+", School)
if match_object is None:
raise Exception(f'no match found for school: {School}')
match = school_token + "UC-" + match_object.group()
return match
elif re.search("UNSW", School):
match_object = re.search("(?<=UNSW\s)[^\s\n\,]+", School)
else:
match_object = re.search("^([\w]+)", School)
if match_object is None:
raise Exception(f'no match found for school: {School}')
match = match_object.group()
school_token += match
return school_token
# add faculties to mappings.json
for course in courses:
faculty = courses[course]['faculty']
if faculty not in mappings:
faculty_token = tokeniseFaculty(faculty)
mappings[faculty] = faculty_token
courseMappings[faculty_token] = {}
# add schools to mappings.json
for course in courses.values():
if 'school' in course:
school = course['school']
if school not in mappings:
school_token = tokeniseSchool(school)
mappings[school] = school_token
courseMappings[school_token] = {}
write_data(mappings, MAPPINGS_FILE)
# finalise
for course in courses.values():
courseCode = course['code']
courseFaculty = course['faculty']
if 'school' in course:
courseSchool = course['school']
courseMappings[mappings[courseSchool]][courseCode] = 1
courseMappings[mappings[courseFaculty]][courseCode] = 1
write_data(courseMappings, COURSE_MAPPINGS_FILE)
def cache_program_mappings():
"""
Maps CODE# to programs, e.g.
{
"ACTL#": {
"3586": 1,
"4520": 1,
}
}
Achieves this by looking for a keyword in the program's title
"""
keyword_codes: dict[str, list[str]] = read_data(CACHE_CONFIG)
mappings : dict = { # TODO: make more strict
code: {} for code
in reduce(operator.add, keyword_codes.values())
}
programs: dict[str, Any] = read_data(PROGRAMS_FORMATTED_FILE)
for program in programs.values():
for keyword in keyword_codes.keys():
if keyword.lower() in program["title"].lower():
for code in keyword_codes[keyword]:
mappings[code][program["code"]] = 1
write_data(mappings, PROGRAM_MAPPINGS_FILE)