-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSoundLoader.py
51 lines (43 loc) · 1.66 KB
/
SoundLoader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import numpy as np
import torch
from torch.utils.data import Dataset
import librosa
import os
"""
--------------------------------------------------
Sound Dataset
--------------------------------------------------
"""
class LungSoundDS(Dataset):
def __init__(self, df, data_path, transform=None):
self.df = df
self.data_path = data_path
self.transform = transform
self.sr = 22050
self.n_bins = 96
# ----------------------------
# Number of items in dataset
# ----------------------------
def __len__(self):
return len(self.df)
# ----------------------------
# Get i'th item in dataset
# ----------------------------
def __getitem__(self, idx):
# File path of the audio file
# full path
audio_path = os.path.join(self.data_path,self.df.filename.iloc[idx])
# Get the Class ID
class_id = self.df.id.iloc[idx]
# Load an audio file. Return the signal and the sample rate
x, _ = librosa.load(audio_path)
mfcc = librosa.power_to_db(librosa.feature.mfcc(x, sr=self.sr, n_mfcc=self.n_bins, hop_length=512, htk=True),ref=np.max)
mel = librosa.power_to_db(librosa.feature.melspectrogram(x, sr=self.sr, n_mels=self.n_bins),ref=np.max)
cqt_abs = np.abs(librosa.cqt(x, sr=self.sr, hop_length=512, n_bins=self.n_bins))
cqt = librosa.amplitude_to_db(cqt_abs,ref=np.max)
image = np.stack((mfcc, mel, cqt), axis=0)
image = torch.from_numpy(image)
# Applying Transformations
if self.transform:
image = self.transform(image)
return image, class_id