-
Notifications
You must be signed in to change notification settings - Fork 1
/
data_collection.py
273 lines (221 loc) · 10.7 KB
/
data_collection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
import os
import pprint
import requests
from bs4 import BeautifulSoup
from music_objects import Chord, ChordShape, Scale
from urllib.parse import urljoin
import json
import music_data as md
'''
Archive of funcitons I made to collect and/or format data from different sources. The
formatted data is now in the music_data.py file or jsons.
'''
def create_chord_dictionary_from_file(file_path):
chord_dict = {}
with open(file_path, 'r') as file:
for line in file:
if ',' in line: # Processing a line with chord data
parts = line.strip().split(', ')
if len(parts) == 3:
# Extracting the name and integer spelling
name = parts[0]
integer_spelling = set(map(int, parts[2].split()))
chord_dict[name] = integer_spelling
return chord_dict
def create_chords_from_file(chord_spellings):
with open(chord_spellings, 'r') as file:
chord_data = file.readlines()
chords = []
for line in chord_data:
if ',' not in line: # This ensures we're processing a line with chord data
chord_type = line.strip()
else:
parts = line.strip().split(', ')
name = parts[0]
standard_spelling = set(parts[1].split(' '))
integer_spelling = set(parts[2].split(' '))
chord = Chord(chord_type, name, standard_spelling, integer_spelling)
chords.append(chord)
return chords
def process_scale_lines(lines):
scale_dict = {}
for line in lines:
line = line[:23] + ';' + line[23:] # Insert ';' at the 24th column
parts = line.split(';')
if len(parts) == 3: # Adjust the condition to match the number of columns in your file
scale_num, binary_notes, scale_name = parts # Adjust the indices based on your file structure
integer_spelling = decimal_to_scale_tones(scale_num.strip())
scale_dict[scale_name.strip()] = integer_spelling
return scale_dict
def decimal_to_scale_tones(decimal_number):
decimal_number = int(decimal_number)
# Convert the decimal number to binary. The binary representation is right-aligned
# and padded with zeros to ensure it always has 12 digits.
binary_representation = format(decimal_number, '012b')
# Calculate the scale tones. If a bit is '1', the corresponding note (index + 1) is included.
scale_tones = [i + 1 for i, bit in enumerate(binary_representation) if bit == '1']
return tuple(scale_tones)
@staticmethod
def scrape_chord_shapes(url):
print("scraping url...\t", url)
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
with open('chord_shapes.txt', 'w') as file:
# Find all links to text files
count = 0
for link in soup.find_all('a'):
href = link.get('href')
if href and href.endswith('.txt'):
txt_url = urljoin(url, href) # Construct the full URL
filename = os.path.basename(txt_url) # Get the filename
filename_without_ext = os.path.splitext(filename)[0] # Remove the .txt extension
print(filename_without_ext + ":\n")
txt_response = requests.get(txt_url)
# if txt_response.status_code != 200:
# print(f"Failed to get {txt_url}")
# continue
# Process the text file content into ChordShape objects
file.write(filename_without_ext + ":\n")
process_chord_shapes(txt_response.text, file)
count += 1
def process_chord_shapes(txt_content, file):
lines = txt_content.split('\n')
for line in lines:
if line and all(s.isdigit() or s == 'x' for s in line.split()):
coords = [0 if s == 'x' else int(s) + 1 for s in line.split()]
curr = ChordShape(coords, 0)
coords = [curr.root] + coords
file.write(str(coords) + "\n")
url = 'https://www.hakwright.co.uk/guitarchords/A_chords.html'
# file = 'C:\\Users\\socce\\Desktop\\Guitar Project (Python)\\old_chord_assignments.txt'
# with open(file, 'r') as file:
# lines = file.readlines()
# chords = []
# for line in lines:
# if '[' in line:
# parts = line.split('[')
# parts = parts[1].split(']')
# parts = parts[0].split(',')
# parts = [int(part.strip()) for part in parts]
# print("ChordShape(", parts, "),\n")
def convert_chord_json():
json_files = ['guitar.json', 'guitar-ly.json', 'guitar-legacy.json']
new_file = 'guitar_data.json'
new_data = []
for json_file in json_files:
with open(json_file, 'r', encoding='utf-8') as file:
guitar_data = json.load(file)
for chord in guitar_data['chords']:
if 'base' in chord:
frets = chord['frets']
base = chord['base']
coords = [0, 0, 0, 0, 0, 0, 0]
coords[1:] = [fret + base for fret in frets]
new_data.append({'name': chord['name'], 'coords': coords})
else:
new_data.append({'name': chord['name'], 'copy': chord['copy']})
with open(new_file, 'w', encoding='utf-8') as out_file:
out_file.write('{"chords": [\n')
for item in new_data:
out_file.write(json.dumps(item, ensure_ascii=False) + ",\n")
out_file.write(']}')
def name_spelling_json():
map = md.chord_map
alternates = md.alternate_chord_names
with open('chord_data.json', 'w', encoding='utf-8') as file:
file.write('{"chords": [\n')
for name, spelling in map.items():
for alt in alternates:
if alt == name:
names = [name] + list(alternates[alt])
file.write(json.dumps({'names': names, 'spelling': sorted(spelling[1])}, ensure_ascii=False) + ",\n")
file.write(']}')
#add new spellings to names_spellings.json
# names_coords = './names_coords_parsed.json'
# with open(names_coords, 'r', encoding='utf-8') as file1:
# with open('names_spellings.json', 'r', encoding='utf-8') as file2:
# data1 = json.load(file1)
# data2 = json.load(file2)
# i = 0
# for item in data1['chords']:
# spellings = [chord['spelling'] for chord in data2['chords']]
# if item['names'][0] == 'NC':
# continue
# root_note = md.parse_chord_note(item['names'][0])
# print(root_note, item['coords'])
# shape = ChordShape( item['coords'], root_note)
# curr_spelling = sorted(list(shape.chord.integer_spelling)) if shape.chord else None
# if curr_spelling not in spellings:
# data2['chords'].append({'names': item['names'], 'spelling': curr_spelling})
# i += 1
# print(i)
# with open('names_spellings2.json', 'w', encoding='utf-8') as out_file:
# out_file.write('{"chords": [\n')
# for item in data2:
# out_file.write(json.dumps(item, ensure_ascii=False) + ",\n")
# out_file.write(']}')
def parse_specific_shapes(shape_file, spelling_file, new_shape_file):
specific_shapes = {}
spellings = {}
with open(shape_file, 'r', encoding='utf-8') as shape_f:
specific_shapes = json.load(shape_f)
with open(spelling_file, 'r', encoding='utf-8') as spell_f:
spellings = json.load(spell_f)
#each shape has a root note coord (1-6) & a 6-digit coords list
generic_shapes = {'shapes': []}
i = 1
for shape in specific_shapes['chords']:
root_note = md.get_note_from_chord(shape['names'][0])
coords = shape['coords']
shape_obj = ChordShape(coords, root_note)
if(shape_obj.is_valid == False):
continue
generic_names = []
for name in shape['names']:
generic_names.append( md.get_generic_chord_name(name))
coords7 = [shape_obj.root] + shape_obj.coords
generic_shapes['shapes'].append({'names': generic_names, 'coords': coords7})
print(str(i) + ": " + generic_names[0], shape_obj.chord.name, shape_obj.spelling, shape_obj.root)
#check if current chord's spelling is already in spellings json
found = False
for entry in spellings['chords']:
spelling = entry['spelling']
if set(spelling) == shape_obj.spelling:
found = True
break
spellings['chords'].append({'names': generic_names, 'spelling': sorted(list(shape_obj.spelling))})
if(i < 0 ):
break
# for entry in generic_shapes['shapes']:
# names1 = entry['names']
# for entry2 in generic_shapes['shapes']:
# names2 = entry2['names']
with open(spelling_file.replace(".json", "2.json"), 'w', encoding='utf-8') as out_file:
out_file.write("{\"chords\": [" + "\n")
for entry in spellings['chords']:
out_file.write(json.dumps(entry, ensure_ascii=False) + ",\n")
out_file.write("]}" + "\n")
with open(new_shape_file, 'w', encoding='utf-8') as out_file:
out_file.write("{\"shapes\": [" + "\n")
for entry in generic_shapes['shapes']:
out_file.write(json.dumps(entry, ensure_ascii=False) + ",\n")
out_file.write("]}" + "\n")
names_coords = './names_coords_parsed.json'
names_spelling = './names_spellings.json'
# with open('generic_shapes3.json', 'r', encoding='utf-8') as file:
# data = json.load(file)
# shapes = data['shapes']
# shapes2 = []
# for shape in shapes:
# coords = shape['coords']
# coords = md.reduced_coords(coords)
# if len(shapes2) > 0 and coords not in [shape['coords'] for shape in shapes2]:
# shapes2.append({'names': shape['names'], 'coords': coords})
# else :
# shapes2.append({'names': shape['names'], 'coords': coords})
# with open('generic_shapes4.json', 'w', encoding='utf-8') as out_file:
# out_file.write("{\"shapes\": [" + "\n")
# for entry in shapes2:
# out_file.write(json.dumps(entry, ensure_ascii=False) + ",\n")
# out_file.write("]}" + "\n")
# #each shape has 'names' and 'coords' keys