-
Notifications
You must be signed in to change notification settings - Fork 127
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Matthew Murray
committed
Jul 11, 2017
1 parent
357e38b
commit ec18327
Showing
15 changed files
with
7,531 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
# coding: utf-8 | ||
import pickle | ||
import sys | ||
import pandas as pd | ||
import requests | ||
import sox | ||
import numpy as np | ||
import os | ||
from subprocess import run, PIPE | ||
from PIL import Image | ||
import tempfile | ||
import re | ||
import time | ||
import string | ||
from boto.s3.connection import S3Connection | ||
from boto.s3.key import Key | ||
import glob | ||
|
||
conn = S3Connection('', '') | ||
s3bucket = conn.get_bucket('spectrograms') | ||
k = Key(s3bucket) | ||
|
||
df = pd.read_pickle('final_data.pkl') | ||
|
||
DEFAULT_IMG_SIZE = 256 | ||
DATA_DIR = '' | ||
|
||
# downloads the mp3 from juno | ||
def download(url, file_name): | ||
with open(file_name, "wb") as file: | ||
response = requests.get(url) | ||
file.write(response.content) | ||
|
||
# helper function to delete files no longer needed | ||
def delete_file(file_path): | ||
os.remove(file_path) | ||
|
||
# creates a mono version of the file | ||
# deletes original stero mp3 and renames the temp | ||
# mono file to the original stero filename | ||
def set_to_mono(input_file): | ||
tmp_name = 'tmp.mp3' | ||
command = "sox {} {} remix 1,2".format(input_file, tmp_name) | ||
run(command, shell=True, stdin=PIPE, stdout=PIPE) | ||
delete_file(input_file) | ||
os.rename(tmp_name, input_file) | ||
|
||
# converts the audio to spectrogram | ||
def audio_to_spect(input_file, output_file): | ||
command = "sox {} -n spectrogram -Y 300 -X 50 -m -r -o {}".format(input_file, output_file) | ||
run(command, shell=True, stdin=PIPE, stdout=PIPE) | ||
delete_file(input_file) | ||
|
||
# helper function - gets dimensions of the spectrogram | ||
def get_spect_dims(input_img): | ||
img_width, img_height = input_img.size | ||
return img_width, img_height | ||
|
||
# helper function - calculates the number of slices available from the full size spectrogram | ||
def get_num_slices(img_width): | ||
n_slices = img_width // DEFAULT_IMG_SIZE | ||
return n_slices | ||
|
||
# helper function - returns a list of coordinates/dimensions where to split the spectrogram | ||
def get_slice_dims(input_img): | ||
img_width, img_height = get_spect_dims(input_img) | ||
num_slices = get_num_slices(img_width) | ||
unused_size = img_width - (num_slices * DEFAULT_IMG_SIZE) | ||
start_px = 0 + unused_size | ||
image_dims = [] | ||
for i in range(num_slices): | ||
img_width = DEFAULT_IMG_SIZE | ||
image_dims.append((start_px, start_px + DEFAULT_IMG_SIZE)) | ||
start_px += DEFAULT_IMG_SIZE | ||
return image_dims | ||
|
||
# slices the spectrogram into individual sample images | ||
def slice_spect(input_file): | ||
input_file_cleaned = input_file.replace('.png','') | ||
img = Image.open(input_file) | ||
dims = get_slice_dims(img) | ||
counter = 0 | ||
for dim in dims: | ||
counter_formatted = str(counter).zfill(3) | ||
img_name = '{}__{}.png'.format(input_file_cleaned, counter_formatted) | ||
start_width = dim[0] | ||
end_width = dim[1] | ||
sliced_img = img.crop((start_width, 0, end_width, DEFAULT_IMG_SIZE)) | ||
sliced_img.save(DATA_DIR + img_name) | ||
counter += 1 | ||
delete_file(input_file) | ||
|
||
def create_file_names(id): | ||
genre_list = list(df['parent_genre']) | ||
genre_name = str(genre_list[id]).lower() | ||
genre_name = genre_name.replace('/','_') | ||
genre_name = genre_name.replace(' ','_') | ||
genre_name = genre_name.replace('&', 'n') | ||
id_list = list(df['id']) | ||
id_name = (id_list[id]) | ||
track_name = '{}__{}.mp3'.format(genre_name, id_name) | ||
spect_name = track_name.replace('.mp3','') | ||
spect_name = '{}.png'.format(spect_name) | ||
return track_name, spect_name, genre_name | ||
|
||
url_list = list(df['track_url']) | ||
for track_id in range(len(df)): | ||
url = url_list[track_id] | ||
track_name, spect_name, genre_name = create_file_names(track_id) | ||
print('Track: {}, Spect: {}, Genre: {}' | ||
.format(track_name, spect_name, genre_name)) | ||
|
||
try: | ||
download(url, track_name) | ||
set_to_mono(track_name) | ||
audio_to_spect(track_name, spect_name) | ||
slice_spect(spect_name) | ||
|
||
# all png files should now be in the working directory | ||
file_list = glob.glob('*.png') | ||
for file in file_list: | ||
# get genre from start of file name | ||
genre_name = file.split('__')[0] | ||
|
||
# set file name ready to upload to s3 | ||
full_key_name = '{}/{}'.format(genre_name, file) | ||
|
||
try: | ||
# send file to s3 | ||
k.key = full_key_name | ||
k.set_contents_from_filename(file) | ||
# once copied, delete from local | ||
delete_file(file) | ||
except: | ||
print('Problem copying file {}'.format(file)) | ||
pass | ||
|
||
time.sleep(5) | ||
|
||
except KeyboardInterrupt: | ||
sys.exit() | ||
except: | ||
print('Something went wrong. Moving to next file') | ||
pass | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
from keras import backend as K | ||
from keras.models import Sequential | ||
from keras.layers.core import Flatten, Dense, Dropout, Activation | ||
from keras.optimizers import rmsprop | ||
from keras.models import Sequential | ||
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img | ||
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D | ||
from keras.callbacks import EarlyStopping, ModelCheckpoint | ||
from keras.layers import Input, Dense | ||
import os | ||
import numpy as np | ||
import pandas as pd | ||
import numpy as np | ||
import pickle | ||
|
||
# Set values | ||
|
||
num_classes = 9 | ||
image_size = 256 | ||
nb_epoch = 20 | ||
batch_size = 128 | ||
|
||
train_data_dir = 'data/train' | ||
validation_data_dir = 'data/validation' | ||
|
||
nb_train_samples = 120000 | ||
nb_validation_samples = 42000 | ||
|
||
if K.image_data_format() == 'channels_first': | ||
input_shape = (3, image_size, image_size) | ||
else: | ||
input_shape = (image_size, image_size, 3) | ||
|
||
# Specify model | ||
|
||
# callbacks | ||
early_stopping = EarlyStopping(monitor='val_loss', patience=3) | ||
save_best_model = ModelCheckpoint(filepath='model_.{epoch:02d}_{val_loss:.2f}.hdf5', verbose=1, | ||
monitor='val_loss') | ||
|
||
# instantiate Sequential model | ||
model = Sequential() | ||
|
||
model.add(Conv2D(filters=64, kernel_size=2, strides=2, activation='elu', kernel_initializer='glorot_normal', input_shape=input_shape)) | ||
model.add(MaxPooling2D(pool_size=2, padding='same')) | ||
|
||
model.add(Conv2D(filters=128, kernel_size=2, strides=2, activation='elu', kernel_initializer='glorot_normal')) | ||
model.add(MaxPooling2D(pool_size=2, padding='same')) | ||
|
||
model.add(Conv2D(filters=256, kernel_size=2, strides=2, activation='elu', kernel_initializer='glorot_normal')) | ||
model.add(MaxPooling2D(pool_size=2, padding='same')) | ||
|
||
model.add(Conv2D(filters=512, kernel_size=2, strides=2, activation='elu', kernel_initializer='glorot_normal')) | ||
model.add(MaxPooling2D(pool_size=2, padding='same')) | ||
|
||
model.add(Flatten()) | ||
model.add(Dense(128)) | ||
|
||
model.add(Activation('elu')) | ||
model.add(Dropout(0.5)) | ||
|
||
model.add(Dense(num_classes)) | ||
model.add(Activation('softmax')) | ||
opt = rmsprop() | ||
|
||
model.compile(loss='categorical_crossentropy', | ||
optimizer = opt, | ||
metrics = ['accuracy']) | ||
|
||
# Image generators | ||
train_datagen = ImageDataGenerator(rescale= 1./255) | ||
validation_datagen = ImageDataGenerator(rescale=1./255) | ||
|
||
train_generator = train_datagen.flow_from_directory( | ||
train_data_dir, | ||
target_size=(image_size, image_size), | ||
shuffle=True, | ||
batch_size=batch_size, | ||
class_mode='categorical' | ||
) | ||
|
||
validation_generator = validation_datagen.flow_from_directory( | ||
validation_data_dir, | ||
target_size=(image_size, image_size), | ||
batch_size=batch_size, | ||
shuffle=True, | ||
class_mode='categorical' | ||
) | ||
|
||
# Fit model | ||
history = model.fit_generator(train_generator, | ||
steps_per_epoch=(nb_train_samples // batch_size), | ||
epochs=nb_epoch, | ||
validation_data=validation_generator, | ||
callbacks=[early_stopping, save_best_model], | ||
validation_steps=(nb_validation_samples // batch_size) | ||
) | ||
|
||
# Save model | ||
model.save_weights('full_model_weights.h5') | ||
model.save('model.h5') |
Oops, something went wrong.