Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexLacson committed May 28, 2019
2 parents 0ff0642 + 347bb7f commit 83750b5
Show file tree
Hide file tree
Showing 3 changed files with 243 additions and 14 deletions.
151 changes: 151 additions & 0 deletions evaluation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
from scipy.interpolate import interp1d
from scipy.optimize import brentq
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.metrics.pairwise import cosine_similarity
from utils import *
from model import *
import os
import matplotlib.pyplot as plt


def get_and_plot_k_eer_auc(label, scores, k=1):
step = int(label.shape[0] / float(k))
EER_VECTOR = np.zeros((k, 1))
AUC_VECTOR = np.zeros((k, 1))

fig = plt.figure()
ax = fig.gca()

for split_num in range(k):
index_start = split_num * step
index_end = (split_num + 1) * step

EER_temp, AUC_temp, fpr, tpr = get_eer_auc(label[index_start:index_end], scores[index_start:index_end])

EER_VECTOR[split_num] = EER_temp
AUC_VECTOR[split_num] = AUC_temp

plt.setp(plt.plot(fpr, tpr, label='{} split'.format(split_num)), linewidth=2)

print("EER=", np.mean(EER_VECTOR) * 100)
print("AUC=", np.mean(AUC_VECTOR) * 100)

ax.set_xticks(np.arange(0, 1.1, 0.1))
ax.set_yticks(np.arange(0, 1.1, 0.1))

plt.title('ROC with {}-fold cross validation'.format(k))
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.grid()
plt.savefig('eer_auc.png')

# plt.show()


def get_eer_auc(label, distance):
fpr, tpr, thresholds = roc_curve(label, distance, pos_label=1)
auc = roc_auc_score(label, distance)
eer = brentq(lambda x: 1. - x - interp1d(fpr, tpr)(x), 0., 1.)

return eer, auc, fpr, tpr


class Evaluation:
def __init__(self, background_model, speaker_models_path):

self.model = background_model
self.speaker_models = {}
for file in os.listdir(speaker_models_path):
if torch.cuda.is_available():
self.speaker_models[file.replace('.pt', '')] = (torch.load(speaker_models_path + '/' + file))
else:
self.speaker_models[file.replace('.pt', '')] = (torch.load(speaker_models_path + '/' + file,
map_location=lambda storage, loc: storage))

def compute_Similarity(self, utterance, type='cosine_similarity'):
self.model.eval()
speaker_features = self.model.create_Speaker_Model(utterance).detach().numpy()

if type == 'cosine_similarity':

similarity_vec = np.zeros(len(self.speaker_models))
assigned_speaker_vec = np.zeros(len(self.speaker_models))

for index, (key, speaker_model) in enumerate(self.speaker_models.items()):
similarity_vec[index] = cosine_similarity(speaker_features, speaker_model.detach().numpy())

assigned_speaker_vec[np.argmax(similarity_vec)] = 1

# print('the speaker was closer to {}'.format(
# list(self.speaker_models.items())[np.argmax(assigned_speaker_vec)][0]))

return similarity_vec, assigned_speaker_vec


def create_dataset(indexed_labels, origin_file_path):
from load_data import AudioDataset

cube_shape = (80, 40, 20)
cube = FeatureCube(cube_shape)
transform = transforms.Compose([CMVN(), cube, ToTensor()])

dataset = AudioDataset(
origin_file_path,
c.DATA_ORIGIN,
indexed_labels=indexed_labels,
transform=transform)

return dataset


def evaluate():
model_path = '/Users/leonidas/Downloads/model_14_percent_best_so_far.pt'

if not torch.cuda.is_available():
model = C3D2(100, 1).load_checkpoint(torch.load(model_path, map_location=lambda storage,loc: storage))
else:
model = C3D2(100, 1).load_checkpoint(torch.load(model_path))

dir_path = os.path.join(c.ROOT, 'speaker_models')
test_set = os.path.join(c.ROOT, '50_first_ids.txt')
indexed_labels = np.load(c.ROOT + '/50_first_ids.npy', allow_pickle=True).item()

dataset = create_dataset(indexed_labels=indexed_labels, origin_file_path=test_set)

eval = Evaluation(model, dir_path)

speaker_model_ids = list(eval.speaker_models.keys())
labels = []
scores = []

for i in range(len(dataset)):
features = dataset.__getitem__(i)[0]
[a, b, cc, d] = features.shape
s = torch.from_numpy(features.reshape((1, a, b, cc, d)))

similarity_vec, _ = eval.compute_Similarity(s)
scores.append(similarity_vec)

current_id = dataset.sound_files[i][0:7]

print('correct speaker {} , the speaker was closer to {}'.format(current_id,
speaker_model_ids[np.argmax(similarity_vec)]))

true_label = np.zeros_like(similarity_vec)
true_label[np.argwhere(current_id in speaker_model_ids)] = 1
labels.append(true_label)


labels = np.array(labels)
scores = np.array(scores)

# labels = np.array([[1., 0.], [1., 0.]])
# scores = np.array([[0.7, 0.3], [0.2, 0.8]])
# print(labels.flatten())

# fpr, tpr, thresholds = roc_curve(labels[0], scores[0], pos_label=1)
get_and_plot_k_eer_auc(labels.flatten(), scores.flatten(), k=1)


if __name__ == '__main__':
evaluate()
29 changes: 16 additions & 13 deletions gcloud_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials

project = 'dt2119-speaker-verification'
zone = 'europe-west1-b'
instance = 'dt2119-speaker-verification-vm'


def start_speech_vm(compute=None):
if compute is None:
Expand All @@ -14,16 +18,16 @@ def start_speech_vm(compute=None):
)

start_request = compute.instances().start(
project='dt2119-speaker-verification',
zone='europe-west1-b',
instance='dt2119-speaker-verification-vm'
project=project,
zone=zone,
instance=instance
)
start_response = start_request.execute()

wait_for_operation(
compute=compute,
project='dt2119-speaker-verification',
zone='europe-west1-b',
project=project,
zone=zone,
operation=start_response['name']
)

Expand All @@ -39,17 +43,17 @@ def stop_speech_vm(compute=None):
)

stop_request = compute.instances().stop(
project='dt2119-speaker-verification',
zone='europe-west1-b',
instance='dt2119-speaker-verification-vm'
project=project,
zone=zone,
instance=instance
)

stop_response = stop_request.execute()

wait_for_operation(
compute=compute,
project='dt2119-speaker-verification',
zone='europe-west1-b',
project=project,
zone=zone,
operation=stop_response['name']
)

Expand All @@ -70,6 +74,7 @@ def wait_for_operation(compute, project, zone, operation):

time.sleep(1)


# from google.cloud import storage
# def upload_blob(bucket_name, source_file_name, destination_blob_name):
# """Uploads a file to the bucket."""
Expand All @@ -83,8 +88,6 @@ def wait_for_operation(compute, project, zone, operation):


if __name__ == "__main__":


# start_speech_vm()
start_speech_vm()
# stop_speech_vm(compute)
pass
77 changes: 76 additions & 1 deletion model.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def forward(self, x):
class C3D2(torch.nn.Module):
def __init__(self, n_labels, num_channels):
super(C3D2, self).__init__()

self.n_labels, self.num_channels = n_labels, num_channels
print('tasos model')

self.conv1_1 = torch.nn.Conv3d(num_channels, 16, kernel_size=(3, 1, 5), stride=(1, 1, 1))
Expand Down Expand Up @@ -174,6 +174,19 @@ def forward(self, x, development=True):
x = F.softmax(x, dim=1)
return x

def load_checkpoint(self, checkpoint_dict):
model = C3D2(n_labels=self.n_labels, num_channels=self.num_channels)
model_dict = model.state_dict()
pretrained_dict = {k.replace('module.', ''): v for k, v in checkpoint_dict["state_dict"].items() if
k.replace('module.', '') in model_dict}
model_dict.update(pretrained_dict)
model.load_state_dict(pretrained_dict)
return model

def create_Speaker_Model(self, utterance):
Speaker_Model = self.forward(utterance, development=False)
return Speaker_Model


class C2D(torch.nn.Module):
"""
Expand Down Expand Up @@ -215,3 +228,65 @@ def forward(self, x):
x = torch.nn.Softmax(self.FC3(x))

return x


def create_speaker_models():
import constants as c
import torchvision.transforms as transforms
from load_data import AudioDataset
import numpy as np
from utils import FeatureCube3C, FeatureCube, CMVN, ToTensor, SubsetRandomSampler
import os

model_path = '/Users/leonidas/Downloads/model_best.pt'
save_speaker_models_path = '/Users/leonidas/PycharmProjects/Speaker_Verification/speaker_models'
indexed_labels = np.load(c.ROOT + '/50_first_ids.npy', allow_pickle=True).item()
origin_file_path = c.ROOT + '/50_first_ids.txt'

train_paths_origin = np.genfromtxt(origin_file_path, dtype='str')

id_per_wav = []
for index, train in enumerate(train_paths_origin):
id_per_wav.append(train[0:7])

if c.DERIVATIVE:
num_channels = 3
cube_shape = (80, 40, 20, num_channels)
cube = FeatureCube3C(cube_shape)

else:
num_channels = 1
cube_shape = (80, 40, 20)
cube = FeatureCube(cube_shape)

transform = transforms.Compose([CMVN(), cube, ToTensor()])

dataset = AudioDataset(
origin_file_path,
c.DATA_ORIGIN,
indexed_labels=indexed_labels,
transform=transform)

train_loader = torch.utils.data.DataLoader(
dataset,
batch_size=1)

if not os.path.exists(save_speaker_models_path):
os.mkdir(save_speaker_models_path)

if not torch.cuda.is_available():
model = C3D2(100, 1).load_checkpoint(torch.load(model_path, map_location=lambda storage,loc: storage))
else:
model = C3D2(100, 1).load_checkpoint(torch.load(model_path))

speaker_models = {}
for i, data in enumerate(train_loader, 1):
# get the inputs
train_input, train_labels = data
speaker_model = model.create_Speaker_Model(train_input)
speaker_models[id_per_wav[i-1]] = speaker_model
torch.save(speaker_model,'{}/{}.pt'.format(save_speaker_models_path, id_per_wav[i-1]))


if __name__ == '__main__':
create_speaker_models()

0 comments on commit 83750b5

Please sign in to comment.