forked from microsoft/CNTK
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathComputeConfusion.m
184 lines (163 loc) · 6.08 KB
/
ComputeConfusion.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
function confusionData = ComputeConfusion(mlfFile)
% function confusionData = ComputeConfusion(mlfFile)
% Compute all the confusions for one experiment. Read in the TIMIT MLF file
% so we know which utterances we have. For each utterance, read in the
% CNTK output and compute the confusion matrix. Sum them all together.
if nargin < 1
mlfFile = 'TimitLabels.mlf';
end
%%
% Parse the Timit MLF file because it tells us the true phonetic labels
% for each segment of each utterance.
fp = fopen(mlfFile,'r');
segmentLabels = [];
scale=1e-7;
numberOfUtterances = 0;
confusionData = 0;
while 1
theLine = fgets(fp);
if isempty(theLine) || theLine(1) == -1
break;
end
if strncmp(theLine, '#!MLF!#', 7)
continue; % Ignore the header
end
if theLine(1) == '"' % Look for file name indication
numberOfUtterances = numberOfUtterances + 1;
fileName = strtok(theLine);
fileName = fileName(2:end-1);
segmentLabels = [];
end
if theLine(1) >= '0' && theLine(1) <= '9'
% Got a speech segment with times and phoneme label. Parse it.
c = textscan(theLine, '%d %d %s ');
b = double(c{1}(1)); e = double(c{2}); l = c{3}{1};
if isempty(segmentLabels)
clear segmentLabels;
segmentLabels(1000) = struct('begin', b, 'end', e, 'label', l);
segmentCount = 0;
end
segmentCount = segmentCount + 1;
% Add a new entry in the list of segments.
segmentLabels(segmentCount) = struct('begin', b*scale, 'end', e*scale, 'label', l);
end
if theLine(1) == '.'
% Found the end of the speech transcription. Process the new data.
c = ComputeConfusionOnce(fileName, segmentLabels(1:segmentCount));
confusionData = confusionData + c;
segmentLabels = [];
end
end
fclose(fp);
function Confusions = ComputeConfusionOnce(utteranceName, segmentLabels)
% function Confusions = ComputeConfusionOnce(utteranceName, labelData)
% Compute the confusion matrix for one TIMIT utterance. This routine takes
% the segment data (from the TIMIT label file) and a feature-file name. It
% transforms the feature file into a CNTK output file. It reads in the
% CNTK output file, and tabulates a confusion matrix. We do this one
% segment at a time, since TIMIT segments are variable length, and the CNTK
% output is sampled at regular intervals (10ms).
likelihoodName = strrep(strrep(utteranceName, 'Features/', 'Output/'), ...
'fbank_zda', 'log');
try
[likelihood,~] = htkread(likelihoodName);
catch me
fprintf('Can''t read %s using htkread. Ignoring.\n', likelihoodName);
Confusions = 0;
return
end
nStates = 183; % Preordained.
frameRate = 100; % Preordained
Confusions = zeros(nStates, nStates);
for i=1:size(segmentLabels, 2)
% Go through each entry in the MLF file for one utterance. Each entry
% lists the beginning and each of each speech state.
% Compare the true label with the winner of the maximum likelihood from
% CNTK.
beginIndex = max(1, round(segmentLabels(i).begin*frameRate));
endIndex = min(size(likelihood,1), round(segmentLabels(i).end*frameRate));
curIndices = beginIndex:endIndex;
[~,winners] = max(likelihood(curIndices,:),[], 2);
correctLabel = FindLabelNumber(segmentLabels(i).label);
for w=winners(:)' % increment one at a time
Confusions(correctLabel, w) = Confusions(correctLabel, w) + 1;
end
end
function labelNumber = FindLabelNumber(labelName)
% For each label name, turn the name into an index. The labels are listed,
% in order, in the TimitStateList file.
persistent stateList
if isempty(stateList)
stateList = ReadStateList('TimitStateList.txt');
end
for labelNumber=1:size(stateList,1)
if strcmp(labelName, stateList{labelNumber})
return;
end
end
labelNumber = [];
function stateList = ReadStateList(stateListFile)
% Read in the state list file. This file contains an ordered list of
% states, each corresponding to one label (and one output in the CNTK
% network.)
fp = fopen(stateListFile);
nStates = 183; % Preordained
stateList = cell(nStates, 1);
stateIndex = 1;
while true
theLine = fgets(fp);
if isempty(theLine) || theLine(1) == -1
break;
end
stateList{stateIndex} = theLine(1:end-1);
stateIndex = stateIndex + 1;
end
fclose(fp);
function [ DATA, HTKCode ] = htkread( Filename )
% [ DATA, HTKCode ] = htkread( Filename )
%
% Read DATA from possibly compressed HTK format file.
%
% Filename (string) - Name of the file to read from
% DATA (nSamp x NUMCOFS) - Output data array
% HTKCode - HTKCode describing file contents
%
% Compression is handled using the algorithm in 5.10 of the HTKBook.
% CRC is not implemented.
%
% Mark Hasegawa-Johnson
% July 3, 2002
% Based on function mfcc_read written by Alexis Bernard
% Found at: https://raw.githubusercontent.com/ronw/matlab_htk/master/htkread.m
%
fid=fopen(Filename,'r','b');
if fid<0,
error(sprintf('Unable to read from file %s',Filename));
end
% Read number of frames
nSamp = fread(fid,1,'int32');
% Read sampPeriod
sampPeriod = fread(fid,1,'int32');
% Read sampSize
sampSize = fread(fid,1,'int16');
% Read HTK Code
HTKCode = fread(fid,1,'int16');
%%%%%%%%%%%%%%%%%
% Read the data
if bitget(HTKCode, 11),
DIM=sampSize/2;
nSamp = nSamp-4;
%disp(sprintf('htkread: Reading %d frames, dim %d, compressed, from %s',nSamp,DIM,Filename));
% Read the compression parameters
A = fread(fid,[1 DIM],'float');
B = fread(fid,[1 DIM],'float');
% Read and uncompress the data
DATA = fread(fid, [DIM nSamp], 'int16')';
DATA = (repmat(B, [nSamp 1]) + DATA) ./ repmat(A, [nSamp 1]);
else
DIM=sampSize/4;
%disp(sprintf('htkread: Reading %d frames, dim %d, uncompressed, from %s',nSamp,DIM,Filename));
% If not compressed: Read floating point data
DATA = fread(fid, [DIM nSamp], 'float')';
end
fclose(fid);