Add assignment-2 related scripts

emrekgn · Apr 27, 2017 · 2acf175 · 2acf175
1 parent 3511b18
commit 2acf175
Show file tree

Hide file tree

Showing 12 changed files with 605 additions and 6 deletions.
diff --git a/assignment-2/SR.py b/assignment-2/SR.py
@@ -0,0 +1,129 @@
+#!/usr/bin/env python
+
+from __future__ import division
+from sys import byteorder
+from array import array
+import os
+from python_speech_features import mfcc
+from scipy.io import wavfile
+import matplotlib.pyplot as plt
+import numpy
+import pyaudio
+import wave
+
+FREQUENCY = 44100
+INPUT_PATH = 'record/record.wav'
+OUTPUT_ZERO_CROSSING_RATE = 'output/zero-crossing-rate.png'
+OUTPUT_ENERGY = 'output/short-time-energy.png'
+OUTPUT_SPECTROGRAM = 'output/spectrogram.png'
+OUTPUT_RAW_SIGNAL = 'output/raw-signal.png'
+OUTPUT_MFCC_TXT = 'output/mfcc-features.txt'
+OUTPUT_MFCC_FIG = 'output/mfcc-features.png'
+
+
+def analyze(signal):
+    log('Analyzing audio signal...\n')
+    signal = signal / max(abs(signal))  # scale for plotting and calculations
+    assert min(signal) >= -1 and max(signal) <= 1
+
+    # Prints some stats
+    log('Frequency ==> {} Hz\n'.format(FREQUENCY))  # sampling rate
+    log('Length of signal  ==> {} samples\n'.format(len(signal)))
+    log('Signal  ==> {}\n'.format(signal))
+
+    sampsPerMilli = int(FREQUENCY / 1000)
+    millisPerFrame = 20
+    sampsPerFrame = sampsPerMilli * millisPerFrame
+    nFrames = int(len(signal) / sampsPerFrame)  # number of non-overlapping _full_ frames
+
+    log('Samples/millisecond  ==> {}\n'.format(sampsPerMilli))
+    log('Samples/[%dms]frame  ==> % {} {}\n'.format(millisPerFrame, sampsPerFrame))
+    log('Number of frames     ==> {}\n'.format(nFrames))
+
+    # Raw signal
+    plt.figure()
+    plt.plot(signal)
+    plt.title('Raw Signal')
+    plt.xlabel('Sample')
+    plt.autoscale(tight='both')
+    plt.savefig(OUTPUT_RAW_SIGNAL)
+
+    # Short-time energy
+    STEs = []
+    for k in range(nFrames):
+        startIdx = k * sampsPerFrame
+        stopIdx = startIdx + sampsPerFrame
+        window = numpy.zeros(signal.shape)
+        window[startIdx:stopIdx] = 1  # rectangular window
+        STE = sum((signal ** 2) * (window ** 2))
+        STEs.append(STE)
+
+    plt.figure()
+    plt.plot(STEs)
+    plt.title('Energy')
+    plt.ylabel('ENERGY')
+    plt.xlabel('FRAME')
+    plt.autoscale(tight='both')
+    plt.savefig(OUTPUT_ENERGY)
+
+    # Zero-crossing rate
+    DC = numpy.mean(signal)
+    newSignal = signal - DC  # create a new signal, preserving old
+    log('DC               ==> {}\n'.format(DC))
+    log('mean(newSignal)  ==> {}\n'.format(numpy.mean(newSignal)))
+    ZCCs = []  # list of short-time zero crossing counts
+    for i in range(nFrames):
+        startIdx = i * sampsPerFrame
+        stopIdx = startIdx + sampsPerFrame
+        s = newSignal[startIdx:stopIdx]  # /s/ is the frame, named to correspond to the equation
+        ZCC = 0
+        for k in range(1, len(s)):
+            ZCC += 0.5 * abs(numpy.sign(s[k]) - numpy.sign(s[k - 1]))
+        ZCCs.append(ZCC)
+
+    plt.figure()
+    plt.plot(ZCCs)
+    plt.title('Zero Crossing Rate')
+    plt.ylabel('ZCC')
+    plt.xlabel('FRAME')
+    plt.autoscale(tight='both')
+    plt.savefig(OUTPUT_ZERO_CROSSING_RATE)
+
+    # Extract features
+    mfcc_features = mfcc(signal, FREQUENCY, nfilt=40, lowfreq=50)
+    numpy.savetxt(OUTPUT_MFCC_TXT, mfcc_features)
+    log('MFCC:\nNumber of windows = {}\n'.format(mfcc_features.shape[0]))
+    log('Length of each feature = {}\n'.format(mfcc_features.shape[1]))
+
+    # plt.figure()
+    # Transform the matrix so that the time domain is horizontal
+    mfcc_features = mfcc_features.T
+    plt.matshow(mfcc_features)
+    plt.title('MFCC')
+    plt.savefig(OUTPUT_MFCC_FIG)
+    #plt.show()
+
+    log("Done - results written to output directory.\n")
+
+
+def log(message):
+    """
+    Appends provided message into ./output/log.txt
+    :param message: 
+    :return: 
+    """
+    if not os.path.exists('./output'):
+        os.makedirs('./output')
+    with open('./output/log.txt', 'a+') as f:
+        f.write(message)
+
+
+def main():
+    global FREQUENCY
+    log('Reading endpointed audio file...\n')
+    FREQUENCY, signal = wavfile.read(INPUT_PATH)
+    analyze(signal)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/assignment-2/output/log.txt b/assignment-2/output/log.txt
@@ -0,0 +1,15 @@
+Reading endpointed audio file...
+Analyzing audio signal...
+Frequency ==> 8000 Hz
+Length of signal  ==> 34122 samples
+Signal  ==> [ 0.0276996   0.01940453  0.02888461 ...,  0.13494297  0.13494297
+  0.12546289]
+Samples/millisecond  ==> 8
+Samples/[%dms]frame  ==> % 20 160
+Number of frames     ==> 213
+DC               ==> 0.031818708390729956
+mean(newSignal)  ==> 1.665887663701073e-18
+MFCC:
+Number of windows = 426
+Length of each feature = 13
+Done - results written to output directory.
diff --git a/assignment-2/output/mfcc-features.png b/assignment-2/output/mfcc-features.png