comparison userProgramsASRforDummies/cHMM.m @ 38:c2204b18f4a2 tip

End nov big change
author Ray Meddis <rmeddis@essex.ac.uk>
date Mon, 28 Nov 2011 13:34:28 +0000
parents
children
comparison
equal deleted inserted replaced
37:771a643d5c29 38:c2204b18f4a2
1 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2 % This program is free software; you can redistribute it and/or modify
3 % it under the terms of the GNU General Public License as published by
4 % the Free Software Foundation; either version 2 of the License, or
5 % (at your option) any later version.
6 %
7 % This program is distributed in the hope that it will be useful,
8 % but WITHOUT ANY WARRANTY; without even the implied warranty of
9 % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 % GNU General Public License for more details.
11 %
12 % You can obtain a copy of the GNU General Public License from
13 % http://www.gnu.org/copyleft/gpl.html or by writing to
14 % Free Software Foundation, Inc.,675 Mass Ave, Cambridge, MA 02139, USA.
15 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
16
17 classdef cHMM
18 %HMMCLASS Handles all of the HTK related gubbins
19 % Please see the documentation located in a separate file for further
20 % information.
21
22 %% *********************************************************
23 % properties _ _
24 % | | (_)
25 % _ __ _ __ ___ _ __ ___ _ __| |_ _ ___ ___
26 % | '_ \| '__/ _ \| '_ \ / _ \ '__| __| |/ _ \/ __|
27 % | |_) | | | (_) | |_) | __/ | | |_| | __/\__ \
28 % | .__/|_| \___/| .__/ \___|_| \__|_|\___||___/
29 % | | | |
30 % |_| |_|
31 %************************************************************
32
33 %% **********************************************************
34 % Public properties - can be set by user
35 %************************************************************
36 properties(Access = public)
37 hmmFolder
38 paramType = 'USER_D_A'; %DELTAS and ACCELERATIONS
39 numCoeff = 27; %9*3 THIS IS FOR PROBABILITY MODEL (not high spont+low spont which would be 18*3=54)
40
41 HERestDataPath = fullfile(pwd, 'def', 'HERest_digit');
42 binPath = fullfile(pwd, 'def', 'bin');
43 configFile = fullfile(pwd, 'def', 'config_STANDARD');
44 trainWordListFile = fullfile(pwd, 'def', 'Grammar_digit', 'words3');
45 testWordListFile = fullfile(pwd, 'def', 'Grammar_digit', 'wordsNoSevenZero');
46 wordNetFile = fullfile(pwd, 'def', 'Grammar_digit', 'wdnetNoSP.slf');
47 dictFile = fullfile(pwd, 'def', 'Grammar_digit', 'noSevenZeroDict');
48 end
49
50 %% **********************************************************
51 % Dependent - never set by user. Only calculated when needed
52 %************************************************************
53 properties(Dependent = true)
54 protoFile % = fullfile(pwd, 'def', 'proto_RobANonly_9'); %probability only
55 end
56
57 %% *********************************************************
58 % methods _ _ _
59 % | | | | | |
60 % _ __ ___ ___| |_| |__ ___ __| |___
61 %| '_ ` _ \ / _ \ __| '_ \ / _ \ / _` / __|
62 %| | | | | | __/ |_| | | | (_) | (_| \__ \
63 %|_| |_| |_|\___|\__|_| |_|\___/ \__,_|___/
64 %************************************************************
65
66 methods
67 %% **********************************************************
68 % Constructor
69 %************************************************************
70 function obj = cHMM(hmmFolder)
71 if nargin > 0
72 obj.hmmFolder = hmmFolder;
73 end
74 end % ------ OF CONSTRUCTOR
75
76 %% **********************************************************
77 % genProto - generate task specific prototype
78 %************************************************************
79 function genProto(obj)
80 % models_1mixsil.exe - takes input (hmmdef) and copies it making a
81 % basis of one, two three etc. etc.
82 obj.models_1mixsilMat(fullfile(obj.hmmFolder,'hmm0','hmmdef'), fullfile(obj.hmmFolder,'hmm0','models'));
83 end % ------ OF GENPROTO
84
85 %% **********************************************************
86 % istrained
87 %************************************************************
88 function boolans = istrained(obj)
89 boolans = numel(dir(fullfile(obj.hmmFolder,'hmm36','models')));
90 end
91
92 %% **********************************************************
93 % Train
94 %************************************************************
95 function train(obj, trainFeatureFolder)
96
97 % Most of the following code block can be replaced by doing a
98 % find and replace across the code below. I didn't
99 % want to mess with the working code too much, so I just
100 % copied the object properties needed into the variable names
101 % expected.
102
103 ED_CMDFILE1 = fullfile(obj.HERestDataPath, 'sil1.hed');
104 ED_CMDFILE2 = fullfile(obj.HERestDataPath, 'mix2_16.hed');
105 ED_CMDFILE3 = fullfile(obj.HERestDataPath, 'mix3_16.hed');
106 ED_CMDFILE4 = fullfile(obj.HERestDataPath, 'mix5_16.hed');
107 ED_CMDFILE5 = fullfile(obj.HERestDataPath, 'mix7_16.hed');
108
109 NUM_COEF = obj.numCoeff;
110 PAR_TYPE = obj.paramType;
111 LIST_FILE = fullfile(obj.hmmFolder, 'tmp.list');
112
113 word_list = obj.trainWordListFile;%fullfile(obj.grammarPath, 'words');
114 word_listSP = word_list; % for use in hmm4 onwards - UGLY HACK NOW SP ABANDONED
115 proto = obj.protoFile;%Does not exist on disk just yet probably - see a few lines down
116 config = obj.configFile;
117 train_list = fullfile(trainFeatureFolder, 'list.scp');
118 labels = fullfile(trainFeatureFolder, 'labels.mlf');
119 labelssp = labels; % for use in hmm4 onwards - UGLY HACK NOW SP ABANDONED
120 hmm_dir = obj.hmmFolder;
121
122 FEAT_ROOT = trainFeatureFolder;
123
124 % Now for the actual HMM training code
125 mkdir(hmm_dir)
126 for I = 0:36
127 h = fullfile(hmm_dir,['hmm' num2str(I)]);
128 mkdir(h);
129 end
130 obj.makeProtoHmm(proto, obj.paramType, obj.numCoeff, 18);
131
132 fid = fopen(train_list,'r');
133 disp(train_list)
134 S = textscan(fid,'%s','Delimiter','\n');
135 fclose(fid);
136
137 fid = fopen(LIST_FILE,'w');
138
139 for I = 1:size(S{1},1)
140 str = fullfile(FEAT_ROOT,S{1}{I});
141 fprintf(fid,'%s\n',str);
142 end;
143 fclose(fid);
144
145 % HCompV just gets the vfloor stuff out so we can begin approximating
146 cmd = ['"HCompV" -T 2 -D -C "' config '" -o hmmdef -f 0.01 -m -S "' LIST_FILE '" -M "' hmm_dir filesep 'hmm0" "' proto '"'];
147 system(cmd);
148
149 %cmd = [BINDIR filesep 'macro' binExt ' ' num2str(NUM_COEF) ' ' PAR_TYPE ' ' '"' hmm_dir filesep 'hmm0' filesep 'vFloors' '" "' hmm_dir filesep 'hmm0' filesep 'macros' '"'];
150 %system(cmd);
151 obj.macroMat(NUM_COEF,PAR_TYPE, fullfile(hmm_dir, 'hmm0', 'vFloors'), fullfile(hmm_dir, 'hmm0', 'macros'));
152
153 %MAKE THE INITIAL MODEL PROTOTYPE
154 genProto(obj);
155
156 disp('Seed HMM successfully Produced.....');
157
158 %Training
159 for I = 1:3
160 disp(I)
161 j = I-1;
162 cmd = ['HERest -D -C ' config ' -I ' labels ' -t 250.0 150.0 1000.0 -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_list];
163 % disp(cmd)
164 system(cmd);
165 end
166
167 disp('3 iterations complete');
168
169 rmdir ([hmm_dir filesep 'hmm4'],'s')
170 copyfile ([hmm_dir filesep 'hmm3'], [hmm_dir filesep 'hmm4'])
171
172 % The following command takes state 3 from the silence model
173 % and appends it to the end of the model as state 2 of the
174 % short pause model.
175 % Original:
176 % cmd = [BINDIR filesep 'spmodel_gen' binExt ' ' hmm_dir filesep 'hmm3' filesep 'models ' hmm_dir filesep 'hmm4' filesep 'models'];
177 % system(cmd);
178 % New:
179 obj.spmodel_genMat(fullfile(hmm_dir,'hmm3','models'), fullfile(hmm_dir,'hmm4','models'));
180
181 cmd = ['HHEd -T 2 -H ' hmm_dir filesep 'hmm4' filesep 'macros -H ' hmm_dir filesep 'hmm4' filesep 'models -M ' hmm_dir filesep 'hmm5 ' ED_CMDFILE1 ' ' word_listSP ];
182 system(cmd);
183 disp ('SP model fixed')
184
185 % after the spmodel_gen command - the word_list is changed to
186 % word_listSP. The sp model is just ignored currently
187
188 for I = 6:8
189 disp(I)
190 j = I-1;
191 cmd = ['HERest -C ' config ' -I ' labelssp ' -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_listSP];
192 system(cmd);
193 end
194
195 disp('6 iterations complete');
196
197 cmd = ['HHEd -T 2 -H ' hmm_dir filesep 'hmm8' filesep 'macros -H ' hmm_dir filesep 'hmm8' filesep 'models -M ' hmm_dir filesep 'hmm9 ' ED_CMDFILE2 ' ' word_listSP ];
198 system(cmd);
199 disp ('2 gaussians per mixture')
200
201 for I = 10:12
202 disp(I)
203 j = I-1;
204 cmd = ['HERest -C ' config ' -I ' labelssp ' -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_listSP];
205 system(cmd);
206 end
207
208 disp ('9 iterations completed')
209
210 cmd = ['HHEd -T 2 -H ' hmm_dir filesep 'hmm12' filesep 'macros -H ' hmm_dir filesep 'hmm12' filesep 'models -M ' hmm_dir filesep 'hmm13 ' ED_CMDFILE3 ' ' word_listSP ];
211 system(cmd);
212 disp ('3 gaussians per mixture')
213
214 for I = 14:20
215 disp(I)
216 j = I-1;
217 cmd = ['HERest -C ' config ' -I ' labelssp ' -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_listSP];
218 system(cmd);
219 end
220
221 disp ('16 iterations completed')
222
223 cmd = ['HHEd -T 2 -H ' hmm_dir filesep 'hmm20' filesep 'macros -H ' hmm_dir filesep 'hmm20' filesep 'models -M ' hmm_dir filesep 'hmm21 ' ED_CMDFILE4 ' ' word_listSP ];
224 system(cmd);
225 disp ('5 gaussians per mixture')
226
227 for I = 22:28
228 disp(I)
229 j = I-1;
230 cmd = ['HERest -C ' config ' -I ' labelssp ' -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_listSP];
231 system(cmd);
232 end
233
234 disp ('23 iterations completed')
235
236 cmd = ['HHEd -T 2 -H ' hmm_dir filesep 'hmm28' filesep 'macros -H ' hmm_dir filesep 'hmm28' filesep 'models -M ' hmm_dir filesep 'hmm29 ' ED_CMDFILE5 ' ' word_listSP ];
237 system(cmd);
238 disp ('7 gaussians per mixture')
239
240 for I = 30:36
241 disp(I)
242 j = I-1;
243 cmd = ['HERest -C ' config ' -I ' labelssp ' -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_listSP];
244 system(cmd);
245 end
246
247 disp ('30 iterations completed')
248
249 end % ------ OF TRAIN
250
251 %% **********************************************************
252 % Test
253 %************************************************************
254 function test(obj, testFeatureFolder)
255 flags = '-p 0.0 -s 0.0';
256 test_word_list = obj.testWordListFile;
257
258 net = obj.wordNetFile;
259 dict = obj.dictFile;
260
261 LIST_FILE = fullfile(testFeatureFolder, 'tmp.list');
262 config = obj.configFile;
263
264 disp ('Now testing with 7 mixture HMMs')
265 mod_file = fullfile(obj.hmmFolder, 'hmm36', 'models');
266 mac_file = fullfile(obj.hmmFolder, 'hmm36', 'macros');
267
268
269 RESULTS_DIR = testFeatureFolder;
270 TEST_FEAT_ROOT = testFeatureFolder;
271
272 N1list = fullfile(testFeatureFolder, 'list.scp');
273
274 ftest = fopen(N1list,'r');
275 S = textscan(ftest,'%s','Delimiter','\n');
276 fclose(ftest);
277
278 flist = fopen(LIST_FILE,'w');
279 for I = 1:size(S{1},1)
280 str = fullfile(TEST_FEAT_ROOT,S{1}{I});
281 fprintf(flist,'%s\n',str);
282 end;
283 fclose(flist);
284
285 cmd = ['HVite -D -H ' mac_file ' -H ' mod_file ' -S ' LIST_FILE ' -C ' config ' -w ' net ' -l ''*'' -i ' RESULTS_DIR filesep 'result.mlf ' flags ' ' dict ' ' test_word_list];
286 system(cmd);
287 end % ------ OF TEST
288
289 %% **********************************************************
290 % Get methods determining feature vector related gubbins
291 %************************************************************
292 function value = get.protoFile(obj)
293 value = fullfile(obj.hmmFolder, 'proto_AutoGen');
294 end
295
296 end % ------ OF METHODS
297
298 %% *********************************************************
299 % _ _ _ _ _ _
300 % | | | | (_) | | | | | |
301 % ___| |_ __ _| |_ _ ___ _ __ ___ ___| |_| |__ ___ __| |___
302 % / __| __/ _` | __| |/ __| | '_ ` _ \ / _ \ __| '_ \ / _ \ / _` / __|
303 % \__ \ || (_| | |_| | (__ | | | | | | __/ |_| | | | (_) | (_| \__ \
304 % |___/\__\__,_|\__|_|\___| |_| |_| |_|\___|\__|_| |_|\___/ \__,_|___/
305 %************************************************************
306
307 methods(Static)
308 %% **********************************************************
309 % macroMat (matlab port of macro C code)
310 % ported by NC - Nov 2011
311 %************************************************************
312 function macroMat(VECSIZE, PARAMETER_TYPE, infile, outfile)
313 % This function takes the vFloors file that is created after
314 % invoking HCompV and generates a Macro file required for further
315 % HMM training
316
317 ofp = fopen(outfile,'w');
318
319 fprintf(ofp, '~o\n<STREAMINFO> 1 %d\n', VECSIZE);
320 fprintf(ofp, '<VECSIZE> %d\n', VECSIZE);
321 fprintf(ofp, '<NULLD>\n<%s>\n', PARAMETER_TYPE);
322
323 fp = fopen(infile);
324 tline = fgets(fp);
325 while ischar(tline)
326 fprintf(ofp,tline);
327 tline = fgets(fp);
328 end
329 fclose(fp);
330 fclose(ofp);
331 end %---- of MACROMAT
332
333 %% **********************************************************
334 % spmodel_genMat (matlab port of spmodel_gen C code)
335 % ported by NC - Nov 2011
336 %************************************************************
337 function spmodel_genMat(infile, outfile)
338 % This function copies the middle state (3) from the silence model
339 % and makes a sp model out of it by copying it to state 2 of the sp model.
340
341 % From: http://www.voxforge.org/home/dev/acousticmodels/linux/create/htkjulius/tutorial/monophones/step-7
342 % In the last step you created HMM models that did not include an "sp"
343 % (short pause) silence model - which refers to the types of short pauses
344 % that occur between words in normal speech. However, you did create a
345 % "sil" silence model - sil silence models are typically of longer
346 % duration, and refer to the pauses occur at the end of a sentence.
347 %
348 % The HTK book says that the sp model needs to have its "emitting state
349 % tied to the centre state of the silence model". What this means is that
350 % you need to create a new sp model in your hmmdefs, that it will use the
351 % centre state of sil, and then they both need to be 'tied' together. For
352 % a bit of background on HMMs and states, see this example.
353 %
354 % This can be done by copying the centre state from the sil model in your
355 % hmmdefs file and adding it to the sp model, and then running a special
356 % tool called HHED to 'tie' the sp model to the sil model so that they
357 % share the same centre state. The HTK book provides some background on
358 % what this means, but you need an understanding of the basics of Hidden
359 % Markov Modelling before tackling the HTK Book explanations
360
361 ofp = fopen(outfile,'a+'); % we append this time
362
363 fprintf(ofp,'~h "sp"\n');
364 fprintf(ofp,'<BEGINHMM>\n<NUMSTATES> 3\n<STATE> 2\n');
365
366 %-- This block gets the hmmdef file to the MODEL
367 lNow = [];
368 fp = fopen(infile);
369 while ~(strcmpi(lNow, '~h "sil"'))
370 lNow = fgetl(fp);
371 end
372 %------------------------------
373
374 %-- This block gets the hmmdef file to the STATE
375 lNow = [];
376 fp = fopen(infile);
377 while ~(strcmpi(lNow, ['<STATE> ' num2str(3)]))
378 lNow = fgetl(fp);
379 end
380 %------------------------------
381
382 %%% This block puts a copy of the hmmdef file in from the
383 %%% correct line as found above
384 tline = fgetl(fp);
385 while ~(strcmpi(tline, ['<STATE> ' num2str(4)]))
386 fprintf(ofp,'%s\n', tline);
387 tline = fgetl(fp);
388 end
389 fclose(fp);
390 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
391
392 fprintf(ofp,'<TRANSP> 3\n0.000000e+00 1.000000e+00 0.000000e+00\n');
393 fprintf(ofp,'0.000000e+00 5.000000e-01 5.000000e-01\n');
394 fprintf(ofp,'0.000000e+00 0.000000e+00 0.000000e+00\n<ENDHMM>\n');
395
396 fclose(ofp);
397 end %---- of spmodel_genMat
398
399 %% **********************************************************
400 % models_1mixsilMat (matlab port of models_1mixsil C code)
401 % ported by NC - Nov 2011
402 %************************************************************
403 function models_1mixsilMat(infile, outfile)
404 % This function takes the hmmdef file and
405 % generates a HMM Model file
406
407 ofp = fopen(outfile,'w');
408 for ii = 1:11
409
410 %%% This block gets the hmmdef file to the correct line
411 lNow = [];
412 fp = fopen(infile);
413 while ~(strcmpi(lNow, '~h "hmmdef"'))
414 lNow = fgetl(fp);
415 end
416 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
417
418 %%% This block puts a numbered header
419 if ii==1; fprintf(ofp,'~h "one"\n'); end
420 if ii==2; fprintf(ofp,'~h "two"\n'); end
421 if ii==3; fprintf(ofp,'~h "three"\n'); end
422 if ii==4; fprintf(ofp,'~h "four"\n'); end
423 if ii==5; fprintf(ofp,'~h "five"\n'); end
424 if ii==6; fprintf(ofp,'~h "six"\n'); end
425 if ii==7; fprintf(ofp,'~h "seven"\n'); end
426 if ii==8; fprintf(ofp,'~h "eight"\n'); end
427 if ii==9; fprintf(ofp,'~h "nine"\n'); end
428 if ii==10; fprintf(ofp,'~h "oh"\n'); end
429 if ii==11; fprintf(ofp,'~h "zero"\n'); end
430 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
431
432 %%% This block puts a copy of the hmmdef file in from the
433 %%% correct line as found above
434 tline = fgets(fp);
435 while ischar(tline)
436 fprintf(ofp,tline);
437 tline = fgets(fp);
438 end
439 fclose(fp); %close it as we reached EOF
440 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
441 end
442
443 %%% THIS IS THE SILENCE MODEL @ THE END
444 fprintf(ofp,'~h "sil"\n');
445 fprintf(ofp,'<BEGINHMM>\n<NUMSTATES> 5\n');
446 for kk = 2:4
447 fprintf(ofp,'<STATE> %d\n<NUMMIXES> 1\n', kk);
448 %-- This block gets the hmmdef file to the correct line
449 lNow = [];
450 fp = fopen(infile);
451 while ~(strcmpi(lNow, ['<STATE> ' num2str(kk)]))
452 lNow = fgetl(fp);
453 end
454 %------------------------------
455
456 %%% This block puts a copy of the hmmdef file in from the
457 %%% correct line as found above
458 tline = fgetl(fp);
459 while ~(strcmpi(tline, ['<STATE> ' num2str(kk+1)]))
460 fprintf(ofp,'%s\n', tline);
461 tline = fgetl(fp);
462 end
463 fclose(fp);
464 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
465 end
466
467 fprintf(ofp,'<TRANSP> 5\n0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00\n');
468 fprintf(ofp,'0.000000e+00 6.000000e-01 4.000000e-01 0.000000e+00 0.000000e+00\n');
469 fprintf(ofp,'0.000000e+00 0.000000e+00 6.000000e-01 4.000000e-01 0.000000e+00\n');
470 fprintf(ofp,'0.000000e+00 0.000000e+00 0.000000e+00 7.000000e-01 3.000000e-01\n');
471 fprintf(ofp,'0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00\n<ENDHMM>\n');
472
473 fclose(ofp);
474 end %---- of models_1mixsilMat
475
476 %% **********************************************************
477 % makeProtoHmm (Make a prototype HMM)
478 %************************************************************
479 function makeProtoHmm(filename,featureType,numFeatures,numStates)
480 % filename is obvious
481 % featureType is usually USER_D_A
482 % numFeatures is number of features (including differences if used)
483 % numStates is usually 18
484
485 ofp = fopen(filename,'w');
486
487 fprintf(ofp,'<BeginHMM>\n');
488 fprintf(ofp,' <NumStates> %d <VecSize> %d <%s> <nullD> <diagC>\n',numStates,numFeatures,featureType');
489 fprintf(ofp,' <StreamInfo> 1 %d\n',numFeatures);
490 for state=2:numStates-1,
491 fprintf(ofp,' <State> %d <NumMixes> 1\n',state);
492 fprintf(ofp,' <Stream> 1\n');
493 fprintf(ofp,' <Mixture> 1 1.0\n');
494 fprintf(ofp,' <Mean> %d\n',numFeatures);
495 fprintf(ofp,' ');
496 fprintf(ofp,'%1.1f ',zeros(1,numFeatures));
497 fprintf(ofp,'\n');
498 fprintf(ofp,' <Variance> %d\n',numFeatures);
499 fprintf(ofp,' ');
500 fprintf(ofp,'%1.1f ',ones(1,numFeatures));
501 fprintf(ofp,'\n');
502 end
503 fprintf(ofp,'<TransP> %d\n',numStates);
504 transp = zeros(numFeatures);
505 transp(1,2)=1;
506 for state=2:numStates-2,
507 transp(state,state)=0.6;
508 transp(state,state+1)=0.4;
509 end
510 transp(numStates-1,numStates-1)=0.9;
511 transp(numStates-1,numStates)=0.1;
512 for state=1:numStates,
513 fprintf(ofp,'%1.3e ',transp(state,1:numStates));
514 fprintf(ofp,'\n');
515 end
516 fprintf(ofp,'<EndHMM>\n');
517 fclose(ofp);
518 end %---- of MAKEPROTOHMM
519
520 %% **********************************************************
521 % createMLF - master label file - belongs with hmm class
522 %************************************************************
523 function createMLF(mapFileFolder)
524 mlfFileName = 'labels';
525 d = dir(fullfile(mapFileFolder, '*.map'));
526 fid = fopen([fullfile(mapFileFolder,mlfFileName) '.mlf'],'w');
527 fprintf(fid,'#!MLF!#\n');
528
529 for I = 1:size(d,1)
530 fprintf(fid,['"*/' d(I).name(1:end-3) 'lab"\n']);
531 fprintf(fid,'sil\n');
532 labels = d(I).name(5:end-5);
533 for J = 1:length(labels);
534 switch labels(J)
535 case 'O'
536 S = 'oh';
537 case '1'
538 S = 'one';
539 case '2'
540 S = 'two';
541 case '3'
542 S = 'three';
543 case '4'
544 S = 'four';
545 case '5'
546 S = 'five';
547 case '6'
548 S = 'six';
549 case '7'
550 S = 'seven';
551 case '8'
552 S = 'eight';
553 case '9'
554 S = 'nine';
555 case 'Z'
556 S = 'zero';
557 end
558 fprintf(fid,S);
559 fprintf(fid,'\n');
560 end;
561 fprintf(fid,'sil\n.\n');
562 end;
563 fclose(fid);
564 end % ------ OF CREATEMLF
565
566 %% **********************************************************
567 % createSCP - Just a file list
568 %************************************************************
569 function createSCP(mapFileFolder)
570 scpFileName = 'list';
571 d = dir(fullfile(mapFileFolder, '*.map'));
572 fid = fopen([fullfile(mapFileFolder,scpFileName) '.scp'],'w');
573
574 for I = 1:size(d,1)
575 fprintf(fid,[d(I).name(1:end-3) 'map\n']);
576 end;
577
578 fclose(fid);
579 end% ------ OF createSCP
580
581 %% **********************************************************
582 % score - Simple non-dynamic scoring
583 %************************************************************
584 function score(testMLFpath)
585 % First get the test result files and store the file locations
586 testIdx = 1;
587 temp = dir(testMLFpath);
588 for nn = 1:numel(temp)
589 if length(temp(nn).name) > 3
590 if strcmp(temp(nn).name(end-3:end), '.mlf')
591 testMLFfiles{testIdx} = temp(nn).name;
592 testIdx = testIdx+1;
593 end
594 end
595 end
596
597 for fileIdx = 1:numel(testMLFfiles)
598 % Extract file name and regonised words as strings -> store in recResults.
599 % This code block is Matt Robertson's
600 fmlf = fopen(fullfile(testMLFpath , testMLFfiles{fileIdx}),'r');
601
602 MLF = textscan(fmlf,'%s','delimiter','\n');
603 replaceDownPattern = '[0-9\-\.\s]';
604
605 for I = 1:size(MLF{1},1);
606 if strfind(MLF{1}{I},'.rec') >0
607 M{I} = MLF{1}{I};
608 else
609 if strfind(MLF{1}{I},'MLF') >0;
610 M{I} = MLF{1}{I};
611 else
612 M{I} = regexprep(MLF{1}{I},replaceDownPattern,'');
613 end
614 end;
615 end;
616
617 fclose(fmlf);
618
619 A = 1;
620 for I = 1:size(M,2)
621 if not(strcmpi(M{I},'sp')) && not(strcmpi(M{I},'sil')) && not(strcmpi(M{I},''));
622 M2{A} = M{I};
623 A = A + 1;
624 end;
625 end;
626
627 recResults = [];
628 X=0;
629 for I = 1:length(M2)
630 if strfind(M2{I},'MLF')
631 else
632 if strfind(M2{I},'.rec')
633 X = X+1;
634
635 % found FileName
636 recResults(X).fileName = M2{I};
637 recResults(X).utterance = {};
638 else
639 recResults(X).utterance = [recResults(X).utterance M2{I}];
640 end;
641 end;
642 end;
643
644 % Make separate 2D arrays of input and output digits
645 inputDigits = zeros(numel(recResults),3);
646 outputDigits = zeros(numel(recResults),3);
647 for nn = 1:numel(recResults)
648 ipStr = recResults(nn).fileName(end-8:end-6);
649
650 for kk = 1:3
651 inputDigits(nn,kk) = cHMM.htk_str2num(ipStr(kk)); %see local function @ bottom of script
652 opStr = recResults(nn).utterance(kk);
653 outputDigits(nn,kk) = cHMM.htk_str2num(opStr{:}); %bit of a hack to do cell2str as it were
654 end
655 end
656
657 % Now do the scoring (simple whan data in the right format)
658 % :D
659 scoreArray = (inputDigits == outputDigits);
660 pcSent(fileIdx) = 100*sum(all(scoreArray,2)) / numel(recResults);
661 pcWord(fileIdx) = 100*sum(scoreArray(:)) / numel(scoreArray);
662 end
663
664 %will output table to console if used on *nix
665 xlsdataFull = [{'-- File Name --', '%Sent', '%Word'}; testMLFfiles' num2cell(pcSent') num2cell(pcWord')];
666 disp(xlsdataFull)
667 fid = fopen(fullfile(testMLFpath,['score__' num2str(pcWord(1), '%0.1f') '__.txt']),'w');
668 fclose(fid);
669 end% ------ OF SCORE
670
671 %% **********************************************************
672 % scoreWhole folder - make my life easier @ command line
673 %************************************************************
674 function scoreWholeFolder(folderToScore, searchString)
675 if nargin < 2
676 searchString = '*featR*';
677 end
678 dirInfo = dir(fullfile(folderToScore, searchString));
679 numFolders = numel(dirInfo);
680 for nn = 1:numFolders;
681 currentScoring = fullfile(folderToScore, dirInfo(nn).name);
682 disp('');
683 disp(currentScoring);
684 cHMM.score(currentScoring);
685 end
686 end
687
688 %**************************************************************************
689 % htk_str2num - Convert strings to integers
690 %**************************************************************************
691 function opNum = htk_str2num(ipString)
692 switch ipString
693 case {'oh' , 'O'}
694 opNum = 0;
695 case {'one' , '1'}
696 opNum = 1;
697 case {'two' , '2'}
698 opNum = 2;
699 case {'three' , '3'}
700 opNum = 3;
701 case {'four' , '4'}
702 opNum = 4;
703 case {'five' , '5'}
704 opNum = 5;
705 case {'six' , '6'}
706 opNum = 6;
707 case {'eight' , '8'}
708 opNum = 8;
709 case {'nine' , '9'}
710 opNum = 9;
711 otherwise
712 assert(0) % throw error
713 end
714 end % ------ OF htk_str2num
715
716 end % ------ OF STATIC METHODS
717
718 end % ------ OF CLASS