forked from AlexandaJerry/whisper-vits-japanese
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaudio_metrics.py
77 lines (66 loc) · 3.64 KB
/
audio_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#After running SRT_to_CSV_and_audio_split.py this script will return basic metrics of the pre-processed dataset.
import pandas as pd
from glob import glob
import wave
import contextlib
import datetime
import time
import os
def audio_metrics():
data = pd.read_csv('./merged_csv/Filepath_Filesize.csv')
print('------------------------------------------------------------------------------------------------------')
print('COUNT OF FILES PER DATSET-SUBSET')
print('------------------------------------------------------------------------------------------------------')
#calculate length of subsets of dataset
dev = pd.read_csv('./final_csv/dev.csv')
test = pd.read_csv('./final_csv/test.csv')
train = pd.read_csv('./final_csv/train.csv')
df_all = pd.concat([dev,test,train])
print('Length of dev-set: {} \nLength of test-set: {} \nLength of train-set: {}\
\nLength of full-set: {}'.format(len(dev), len(test), len(train), len(df_all)))
#prepare files for merge with audio length
df_all = df_all.drop(['wav_filesize', 'transcript'], axis=1)
#now individually merge with data df to catch audio length
new_df = df_all.merge(data, on='wav_filename')
#sum and avg of audio length for complete dataset
avg_length = new_df['duration'].mean()
sum = new_df['duration'].sum()
length_of_audio = str(datetime.timedelta(seconds=sum))
print('------------------------------------------------------------------------------------------------------')
print('AUDIO METRICS FOR FULL DATASET')
print('------------------------------------------------------------------------------------------------------')
print('Average length of audio is: {} seconds.'.format(round(avg_length, 1)))
print('Total length of audio in seconds is: {} seconds.'.format(round(sum, 1)))
print('Total length of audio is: {}'.format(length_of_audio))
print('------------------------------------------------------------------------------------------------------')
#Calculate audio metrics per subset of the dataset:
def calculate_audio_metrics_subset(subset):
#prepare ch files for merge with audio length
df = pd.read_csv('./final_csv/'+subset+'.csv')
df = df.drop(['wav_filesize', 'transcript'], axis=1)
data = pd.read_csv('./merged_csv/Filepath_Filesize.csv')
#now individually merge with data df to catch audio length
new_df = df.merge(data, on='wav_filename')
#check to see that for every file a corresponding audio length was found in data df
print('Length of dataframe {} that matches with the count of files in deepspeech csvs: {}'.format(subset,len(new_df)))
#CH sum and avg of audio length
avg_length = new_df['duration'].mean()
sum_meteo = new_df['duration'].sum()
length_of_audio = str(datetime.timedelta(seconds=sum))
print('Average length of {} audio is: {}'.format(subset,round(avg_length, 1)))
print('Total length of {} audio in seconds is: {} seconds'.format(subset,round(sum, 1)))
print('Total length of {} audio is: {} '.format(subset, length_of_audio))
print('AUDIO METRICS PER SUBSET:')
print('------------------------------------------------------------------------------------------------------')
print('TRAIN')
print('-------')
calculate_audio_metrics_subset('train')
print('-------')
print('DEV')
print('-------')
calculate_audio_metrics_subset('dev')
print('-------')
print('TEST')
print('-------')
calculate_audio_metrics_subset('test')
print('------------------------------------------------------------------------------------------------------')