Mercurial > hg > map
comparison userProgramsASRforDummies/cHMM.m @ 38:c2204b18f4a2 tip
End nov big change
author | Ray Meddis <rmeddis@essex.ac.uk> |
---|---|
date | Mon, 28 Nov 2011 13:34:28 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
37:771a643d5c29 | 38:c2204b18f4a2 |
---|---|
1 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
2 % This program is free software; you can redistribute it and/or modify | |
3 % it under the terms of the GNU General Public License as published by | |
4 % the Free Software Foundation; either version 2 of the License, or | |
5 % (at your option) any later version. | |
6 % | |
7 % This program is distributed in the hope that it will be useful, | |
8 % but WITHOUT ANY WARRANTY; without even the implied warranty of | |
9 % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
10 % GNU General Public License for more details. | |
11 % | |
12 % You can obtain a copy of the GNU General Public License from | |
13 % http://www.gnu.org/copyleft/gpl.html or by writing to | |
14 % Free Software Foundation, Inc.,675 Mass Ave, Cambridge, MA 02139, USA. | |
15 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
16 | |
17 classdef cHMM | |
18 %HMMCLASS Handles all of the HTK related gubbins | |
19 % Please see the documentation located in a separate file for further | |
20 % information. | |
21 | |
22 %% ********************************************************* | |
23 % properties _ _ | |
24 % | | (_) | |
25 % _ __ _ __ ___ _ __ ___ _ __| |_ _ ___ ___ | |
26 % | '_ \| '__/ _ \| '_ \ / _ \ '__| __| |/ _ \/ __| | |
27 % | |_) | | | (_) | |_) | __/ | | |_| | __/\__ \ | |
28 % | .__/|_| \___/| .__/ \___|_| \__|_|\___||___/ | |
29 % | | | | | |
30 % |_| |_| | |
31 %************************************************************ | |
32 | |
33 %% ********************************************************** | |
34 % Public properties - can be set by user | |
35 %************************************************************ | |
36 properties(Access = public) | |
37 hmmFolder | |
38 paramType = 'USER_D_A'; %DELTAS and ACCELERATIONS | |
39 numCoeff = 27; %9*3 THIS IS FOR PROBABILITY MODEL (not high spont+low spont which would be 18*3=54) | |
40 | |
41 HERestDataPath = fullfile(pwd, 'def', 'HERest_digit'); | |
42 binPath = fullfile(pwd, 'def', 'bin'); | |
43 configFile = fullfile(pwd, 'def', 'config_STANDARD'); | |
44 trainWordListFile = fullfile(pwd, 'def', 'Grammar_digit', 'words3'); | |
45 testWordListFile = fullfile(pwd, 'def', 'Grammar_digit', 'wordsNoSevenZero'); | |
46 wordNetFile = fullfile(pwd, 'def', 'Grammar_digit', 'wdnetNoSP.slf'); | |
47 dictFile = fullfile(pwd, 'def', 'Grammar_digit', 'noSevenZeroDict'); | |
48 end | |
49 | |
50 %% ********************************************************** | |
51 % Dependent - never set by user. Only calculated when needed | |
52 %************************************************************ | |
53 properties(Dependent = true) | |
54 protoFile % = fullfile(pwd, 'def', 'proto_RobANonly_9'); %probability only | |
55 end | |
56 | |
57 %% ********************************************************* | |
58 % methods _ _ _ | |
59 % | | | | | | | |
60 % _ __ ___ ___| |_| |__ ___ __| |___ | |
61 %| '_ ` _ \ / _ \ __| '_ \ / _ \ / _` / __| | |
62 %| | | | | | __/ |_| | | | (_) | (_| \__ \ | |
63 %|_| |_| |_|\___|\__|_| |_|\___/ \__,_|___/ | |
64 %************************************************************ | |
65 | |
66 methods | |
67 %% ********************************************************** | |
68 % Constructor | |
69 %************************************************************ | |
70 function obj = cHMM(hmmFolder) | |
71 if nargin > 0 | |
72 obj.hmmFolder = hmmFolder; | |
73 end | |
74 end % ------ OF CONSTRUCTOR | |
75 | |
76 %% ********************************************************** | |
77 % genProto - generate task specific prototype | |
78 %************************************************************ | |
79 function genProto(obj) | |
80 % models_1mixsil.exe - takes input (hmmdef) and copies it making a | |
81 % basis of one, two three etc. etc. | |
82 obj.models_1mixsilMat(fullfile(obj.hmmFolder,'hmm0','hmmdef'), fullfile(obj.hmmFolder,'hmm0','models')); | |
83 end % ------ OF GENPROTO | |
84 | |
85 %% ********************************************************** | |
86 % istrained | |
87 %************************************************************ | |
88 function boolans = istrained(obj) | |
89 boolans = numel(dir(fullfile(obj.hmmFolder,'hmm36','models'))); | |
90 end | |
91 | |
92 %% ********************************************************** | |
93 % Train | |
94 %************************************************************ | |
95 function train(obj, trainFeatureFolder) | |
96 | |
97 % Most of the following code block can be replaced by doing a | |
98 % find and replace across the code below. I didn't | |
99 % want to mess with the working code too much, so I just | |
100 % copied the object properties needed into the variable names | |
101 % expected. | |
102 | |
103 ED_CMDFILE1 = fullfile(obj.HERestDataPath, 'sil1.hed'); | |
104 ED_CMDFILE2 = fullfile(obj.HERestDataPath, 'mix2_16.hed'); | |
105 ED_CMDFILE3 = fullfile(obj.HERestDataPath, 'mix3_16.hed'); | |
106 ED_CMDFILE4 = fullfile(obj.HERestDataPath, 'mix5_16.hed'); | |
107 ED_CMDFILE5 = fullfile(obj.HERestDataPath, 'mix7_16.hed'); | |
108 | |
109 NUM_COEF = obj.numCoeff; | |
110 PAR_TYPE = obj.paramType; | |
111 LIST_FILE = fullfile(obj.hmmFolder, 'tmp.list'); | |
112 | |
113 word_list = obj.trainWordListFile;%fullfile(obj.grammarPath, 'words'); | |
114 word_listSP = word_list; % for use in hmm4 onwards - UGLY HACK NOW SP ABANDONED | |
115 proto = obj.protoFile;%Does not exist on disk just yet probably - see a few lines down | |
116 config = obj.configFile; | |
117 train_list = fullfile(trainFeatureFolder, 'list.scp'); | |
118 labels = fullfile(trainFeatureFolder, 'labels.mlf'); | |
119 labelssp = labels; % for use in hmm4 onwards - UGLY HACK NOW SP ABANDONED | |
120 hmm_dir = obj.hmmFolder; | |
121 | |
122 FEAT_ROOT = trainFeatureFolder; | |
123 | |
124 % Now for the actual HMM training code | |
125 mkdir(hmm_dir) | |
126 for I = 0:36 | |
127 h = fullfile(hmm_dir,['hmm' num2str(I)]); | |
128 mkdir(h); | |
129 end | |
130 obj.makeProtoHmm(proto, obj.paramType, obj.numCoeff, 18); | |
131 | |
132 fid = fopen(train_list,'r'); | |
133 disp(train_list) | |
134 S = textscan(fid,'%s','Delimiter','\n'); | |
135 fclose(fid); | |
136 | |
137 fid = fopen(LIST_FILE,'w'); | |
138 | |
139 for I = 1:size(S{1},1) | |
140 str = fullfile(FEAT_ROOT,S{1}{I}); | |
141 fprintf(fid,'%s\n',str); | |
142 end; | |
143 fclose(fid); | |
144 | |
145 % HCompV just gets the vfloor stuff out so we can begin approximating | |
146 cmd = ['"HCompV" -T 2 -D -C "' config '" -o hmmdef -f 0.01 -m -S "' LIST_FILE '" -M "' hmm_dir filesep 'hmm0" "' proto '"']; | |
147 system(cmd); | |
148 | |
149 %cmd = [BINDIR filesep 'macro' binExt ' ' num2str(NUM_COEF) ' ' PAR_TYPE ' ' '"' hmm_dir filesep 'hmm0' filesep 'vFloors' '" "' hmm_dir filesep 'hmm0' filesep 'macros' '"']; | |
150 %system(cmd); | |
151 obj.macroMat(NUM_COEF,PAR_TYPE, fullfile(hmm_dir, 'hmm0', 'vFloors'), fullfile(hmm_dir, 'hmm0', 'macros')); | |
152 | |
153 %MAKE THE INITIAL MODEL PROTOTYPE | |
154 genProto(obj); | |
155 | |
156 disp('Seed HMM successfully Produced.....'); | |
157 | |
158 %Training | |
159 for I = 1:3 | |
160 disp(I) | |
161 j = I-1; | |
162 cmd = ['HERest -D -C ' config ' -I ' labels ' -t 250.0 150.0 1000.0 -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_list]; | |
163 % disp(cmd) | |
164 system(cmd); | |
165 end | |
166 | |
167 disp('3 iterations complete'); | |
168 | |
169 rmdir ([hmm_dir filesep 'hmm4'],'s') | |
170 copyfile ([hmm_dir filesep 'hmm3'], [hmm_dir filesep 'hmm4']) | |
171 | |
172 % The following command takes state 3 from the silence model | |
173 % and appends it to the end of the model as state 2 of the | |
174 % short pause model. | |
175 % Original: | |
176 % cmd = [BINDIR filesep 'spmodel_gen' binExt ' ' hmm_dir filesep 'hmm3' filesep 'models ' hmm_dir filesep 'hmm4' filesep 'models']; | |
177 % system(cmd); | |
178 % New: | |
179 obj.spmodel_genMat(fullfile(hmm_dir,'hmm3','models'), fullfile(hmm_dir,'hmm4','models')); | |
180 | |
181 cmd = ['HHEd -T 2 -H ' hmm_dir filesep 'hmm4' filesep 'macros -H ' hmm_dir filesep 'hmm4' filesep 'models -M ' hmm_dir filesep 'hmm5 ' ED_CMDFILE1 ' ' word_listSP ]; | |
182 system(cmd); | |
183 disp ('SP model fixed') | |
184 | |
185 % after the spmodel_gen command - the word_list is changed to | |
186 % word_listSP. The sp model is just ignored currently | |
187 | |
188 for I = 6:8 | |
189 disp(I) | |
190 j = I-1; | |
191 cmd = ['HERest -C ' config ' -I ' labelssp ' -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_listSP]; | |
192 system(cmd); | |
193 end | |
194 | |
195 disp('6 iterations complete'); | |
196 | |
197 cmd = ['HHEd -T 2 -H ' hmm_dir filesep 'hmm8' filesep 'macros -H ' hmm_dir filesep 'hmm8' filesep 'models -M ' hmm_dir filesep 'hmm9 ' ED_CMDFILE2 ' ' word_listSP ]; | |
198 system(cmd); | |
199 disp ('2 gaussians per mixture') | |
200 | |
201 for I = 10:12 | |
202 disp(I) | |
203 j = I-1; | |
204 cmd = ['HERest -C ' config ' -I ' labelssp ' -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_listSP]; | |
205 system(cmd); | |
206 end | |
207 | |
208 disp ('9 iterations completed') | |
209 | |
210 cmd = ['HHEd -T 2 -H ' hmm_dir filesep 'hmm12' filesep 'macros -H ' hmm_dir filesep 'hmm12' filesep 'models -M ' hmm_dir filesep 'hmm13 ' ED_CMDFILE3 ' ' word_listSP ]; | |
211 system(cmd); | |
212 disp ('3 gaussians per mixture') | |
213 | |
214 for I = 14:20 | |
215 disp(I) | |
216 j = I-1; | |
217 cmd = ['HERest -C ' config ' -I ' labelssp ' -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_listSP]; | |
218 system(cmd); | |
219 end | |
220 | |
221 disp ('16 iterations completed') | |
222 | |
223 cmd = ['HHEd -T 2 -H ' hmm_dir filesep 'hmm20' filesep 'macros -H ' hmm_dir filesep 'hmm20' filesep 'models -M ' hmm_dir filesep 'hmm21 ' ED_CMDFILE4 ' ' word_listSP ]; | |
224 system(cmd); | |
225 disp ('5 gaussians per mixture') | |
226 | |
227 for I = 22:28 | |
228 disp(I) | |
229 j = I-1; | |
230 cmd = ['HERest -C ' config ' -I ' labelssp ' -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_listSP]; | |
231 system(cmd); | |
232 end | |
233 | |
234 disp ('23 iterations completed') | |
235 | |
236 cmd = ['HHEd -T 2 -H ' hmm_dir filesep 'hmm28' filesep 'macros -H ' hmm_dir filesep 'hmm28' filesep 'models -M ' hmm_dir filesep 'hmm29 ' ED_CMDFILE5 ' ' word_listSP ]; | |
237 system(cmd); | |
238 disp ('7 gaussians per mixture') | |
239 | |
240 for I = 30:36 | |
241 disp(I) | |
242 j = I-1; | |
243 cmd = ['HERest -C ' config ' -I ' labelssp ' -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_listSP]; | |
244 system(cmd); | |
245 end | |
246 | |
247 disp ('30 iterations completed') | |
248 | |
249 end % ------ OF TRAIN | |
250 | |
251 %% ********************************************************** | |
252 % Test | |
253 %************************************************************ | |
254 function test(obj, testFeatureFolder) | |
255 flags = '-p 0.0 -s 0.0'; | |
256 test_word_list = obj.testWordListFile; | |
257 | |
258 net = obj.wordNetFile; | |
259 dict = obj.dictFile; | |
260 | |
261 LIST_FILE = fullfile(testFeatureFolder, 'tmp.list'); | |
262 config = obj.configFile; | |
263 | |
264 disp ('Now testing with 7 mixture HMMs') | |
265 mod_file = fullfile(obj.hmmFolder, 'hmm36', 'models'); | |
266 mac_file = fullfile(obj.hmmFolder, 'hmm36', 'macros'); | |
267 | |
268 | |
269 RESULTS_DIR = testFeatureFolder; | |
270 TEST_FEAT_ROOT = testFeatureFolder; | |
271 | |
272 N1list = fullfile(testFeatureFolder, 'list.scp'); | |
273 | |
274 ftest = fopen(N1list,'r'); | |
275 S = textscan(ftest,'%s','Delimiter','\n'); | |
276 fclose(ftest); | |
277 | |
278 flist = fopen(LIST_FILE,'w'); | |
279 for I = 1:size(S{1},1) | |
280 str = fullfile(TEST_FEAT_ROOT,S{1}{I}); | |
281 fprintf(flist,'%s\n',str); | |
282 end; | |
283 fclose(flist); | |
284 | |
285 cmd = ['HVite -D -H ' mac_file ' -H ' mod_file ' -S ' LIST_FILE ' -C ' config ' -w ' net ' -l ''*'' -i ' RESULTS_DIR filesep 'result.mlf ' flags ' ' dict ' ' test_word_list]; | |
286 system(cmd); | |
287 end % ------ OF TEST | |
288 | |
289 %% ********************************************************** | |
290 % Get methods determining feature vector related gubbins | |
291 %************************************************************ | |
292 function value = get.protoFile(obj) | |
293 value = fullfile(obj.hmmFolder, 'proto_AutoGen'); | |
294 end | |
295 | |
296 end % ------ OF METHODS | |
297 | |
298 %% ********************************************************* | |
299 % _ _ _ _ _ _ | |
300 % | | | | (_) | | | | | | | |
301 % ___| |_ __ _| |_ _ ___ _ __ ___ ___| |_| |__ ___ __| |___ | |
302 % / __| __/ _` | __| |/ __| | '_ ` _ \ / _ \ __| '_ \ / _ \ / _` / __| | |
303 % \__ \ || (_| | |_| | (__ | | | | | | __/ |_| | | | (_) | (_| \__ \ | |
304 % |___/\__\__,_|\__|_|\___| |_| |_| |_|\___|\__|_| |_|\___/ \__,_|___/ | |
305 %************************************************************ | |
306 | |
307 methods(Static) | |
308 %% ********************************************************** | |
309 % macroMat (matlab port of macro C code) | |
310 % ported by NC - Nov 2011 | |
311 %************************************************************ | |
312 function macroMat(VECSIZE, PARAMETER_TYPE, infile, outfile) | |
313 % This function takes the vFloors file that is created after | |
314 % invoking HCompV and generates a Macro file required for further | |
315 % HMM training | |
316 | |
317 ofp = fopen(outfile,'w'); | |
318 | |
319 fprintf(ofp, '~o\n<STREAMINFO> 1 %d\n', VECSIZE); | |
320 fprintf(ofp, '<VECSIZE> %d\n', VECSIZE); | |
321 fprintf(ofp, '<NULLD>\n<%s>\n', PARAMETER_TYPE); | |
322 | |
323 fp = fopen(infile); | |
324 tline = fgets(fp); | |
325 while ischar(tline) | |
326 fprintf(ofp,tline); | |
327 tline = fgets(fp); | |
328 end | |
329 fclose(fp); | |
330 fclose(ofp); | |
331 end %---- of MACROMAT | |
332 | |
333 %% ********************************************************** | |
334 % spmodel_genMat (matlab port of spmodel_gen C code) | |
335 % ported by NC - Nov 2011 | |
336 %************************************************************ | |
337 function spmodel_genMat(infile, outfile) | |
338 % This function copies the middle state (3) from the silence model | |
339 % and makes a sp model out of it by copying it to state 2 of the sp model. | |
340 | |
341 % From: http://www.voxforge.org/home/dev/acousticmodels/linux/create/htkjulius/tutorial/monophones/step-7 | |
342 % In the last step you created HMM models that did not include an "sp" | |
343 % (short pause) silence model - which refers to the types of short pauses | |
344 % that occur between words in normal speech. However, you did create a | |
345 % "sil" silence model - sil silence models are typically of longer | |
346 % duration, and refer to the pauses occur at the end of a sentence. | |
347 % | |
348 % The HTK book says that the sp model needs to have its "emitting state | |
349 % tied to the centre state of the silence model". What this means is that | |
350 % you need to create a new sp model in your hmmdefs, that it will use the | |
351 % centre state of sil, and then they both need to be 'tied' together. For | |
352 % a bit of background on HMMs and states, see this example. | |
353 % | |
354 % This can be done by copying the centre state from the sil model in your | |
355 % hmmdefs file and adding it to the sp model, and then running a special | |
356 % tool called HHED to 'tie' the sp model to the sil model so that they | |
357 % share the same centre state. The HTK book provides some background on | |
358 % what this means, but you need an understanding of the basics of Hidden | |
359 % Markov Modelling before tackling the HTK Book explanations | |
360 | |
361 ofp = fopen(outfile,'a+'); % we append this time | |
362 | |
363 fprintf(ofp,'~h "sp"\n'); | |
364 fprintf(ofp,'<BEGINHMM>\n<NUMSTATES> 3\n<STATE> 2\n'); | |
365 | |
366 %-- This block gets the hmmdef file to the MODEL | |
367 lNow = []; | |
368 fp = fopen(infile); | |
369 while ~(strcmpi(lNow, '~h "sil"')) | |
370 lNow = fgetl(fp); | |
371 end | |
372 %------------------------------ | |
373 | |
374 %-- This block gets the hmmdef file to the STATE | |
375 lNow = []; | |
376 fp = fopen(infile); | |
377 while ~(strcmpi(lNow, ['<STATE> ' num2str(3)])) | |
378 lNow = fgetl(fp); | |
379 end | |
380 %------------------------------ | |
381 | |
382 %%% This block puts a copy of the hmmdef file in from the | |
383 %%% correct line as found above | |
384 tline = fgetl(fp); | |
385 while ~(strcmpi(tline, ['<STATE> ' num2str(4)])) | |
386 fprintf(ofp,'%s\n', tline); | |
387 tline = fgetl(fp); | |
388 end | |
389 fclose(fp); | |
390 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
391 | |
392 fprintf(ofp,'<TRANSP> 3\n0.000000e+00 1.000000e+00 0.000000e+00\n'); | |
393 fprintf(ofp,'0.000000e+00 5.000000e-01 5.000000e-01\n'); | |
394 fprintf(ofp,'0.000000e+00 0.000000e+00 0.000000e+00\n<ENDHMM>\n'); | |
395 | |
396 fclose(ofp); | |
397 end %---- of spmodel_genMat | |
398 | |
399 %% ********************************************************** | |
400 % models_1mixsilMat (matlab port of models_1mixsil C code) | |
401 % ported by NC - Nov 2011 | |
402 %************************************************************ | |
403 function models_1mixsilMat(infile, outfile) | |
404 % This function takes the hmmdef file and | |
405 % generates a HMM Model file | |
406 | |
407 ofp = fopen(outfile,'w'); | |
408 for ii = 1:11 | |
409 | |
410 %%% This block gets the hmmdef file to the correct line | |
411 lNow = []; | |
412 fp = fopen(infile); | |
413 while ~(strcmpi(lNow, '~h "hmmdef"')) | |
414 lNow = fgetl(fp); | |
415 end | |
416 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
417 | |
418 %%% This block puts a numbered header | |
419 if ii==1; fprintf(ofp,'~h "one"\n'); end | |
420 if ii==2; fprintf(ofp,'~h "two"\n'); end | |
421 if ii==3; fprintf(ofp,'~h "three"\n'); end | |
422 if ii==4; fprintf(ofp,'~h "four"\n'); end | |
423 if ii==5; fprintf(ofp,'~h "five"\n'); end | |
424 if ii==6; fprintf(ofp,'~h "six"\n'); end | |
425 if ii==7; fprintf(ofp,'~h "seven"\n'); end | |
426 if ii==8; fprintf(ofp,'~h "eight"\n'); end | |
427 if ii==9; fprintf(ofp,'~h "nine"\n'); end | |
428 if ii==10; fprintf(ofp,'~h "oh"\n'); end | |
429 if ii==11; fprintf(ofp,'~h "zero"\n'); end | |
430 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
431 | |
432 %%% This block puts a copy of the hmmdef file in from the | |
433 %%% correct line as found above | |
434 tline = fgets(fp); | |
435 while ischar(tline) | |
436 fprintf(ofp,tline); | |
437 tline = fgets(fp); | |
438 end | |
439 fclose(fp); %close it as we reached EOF | |
440 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
441 end | |
442 | |
443 %%% THIS IS THE SILENCE MODEL @ THE END | |
444 fprintf(ofp,'~h "sil"\n'); | |
445 fprintf(ofp,'<BEGINHMM>\n<NUMSTATES> 5\n'); | |
446 for kk = 2:4 | |
447 fprintf(ofp,'<STATE> %d\n<NUMMIXES> 1\n', kk); | |
448 %-- This block gets the hmmdef file to the correct line | |
449 lNow = []; | |
450 fp = fopen(infile); | |
451 while ~(strcmpi(lNow, ['<STATE> ' num2str(kk)])) | |
452 lNow = fgetl(fp); | |
453 end | |
454 %------------------------------ | |
455 | |
456 %%% This block puts a copy of the hmmdef file in from the | |
457 %%% correct line as found above | |
458 tline = fgetl(fp); | |
459 while ~(strcmpi(tline, ['<STATE> ' num2str(kk+1)])) | |
460 fprintf(ofp,'%s\n', tline); | |
461 tline = fgetl(fp); | |
462 end | |
463 fclose(fp); | |
464 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
465 end | |
466 | |
467 fprintf(ofp,'<TRANSP> 5\n0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00\n'); | |
468 fprintf(ofp,'0.000000e+00 6.000000e-01 4.000000e-01 0.000000e+00 0.000000e+00\n'); | |
469 fprintf(ofp,'0.000000e+00 0.000000e+00 6.000000e-01 4.000000e-01 0.000000e+00\n'); | |
470 fprintf(ofp,'0.000000e+00 0.000000e+00 0.000000e+00 7.000000e-01 3.000000e-01\n'); | |
471 fprintf(ofp,'0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00\n<ENDHMM>\n'); | |
472 | |
473 fclose(ofp); | |
474 end %---- of models_1mixsilMat | |
475 | |
476 %% ********************************************************** | |
477 % makeProtoHmm (Make a prototype HMM) | |
478 %************************************************************ | |
479 function makeProtoHmm(filename,featureType,numFeatures,numStates) | |
480 % filename is obvious | |
481 % featureType is usually USER_D_A | |
482 % numFeatures is number of features (including differences if used) | |
483 % numStates is usually 18 | |
484 | |
485 ofp = fopen(filename,'w'); | |
486 | |
487 fprintf(ofp,'<BeginHMM>\n'); | |
488 fprintf(ofp,' <NumStates> %d <VecSize> %d <%s> <nullD> <diagC>\n',numStates,numFeatures,featureType'); | |
489 fprintf(ofp,' <StreamInfo> 1 %d\n',numFeatures); | |
490 for state=2:numStates-1, | |
491 fprintf(ofp,' <State> %d <NumMixes> 1\n',state); | |
492 fprintf(ofp,' <Stream> 1\n'); | |
493 fprintf(ofp,' <Mixture> 1 1.0\n'); | |
494 fprintf(ofp,' <Mean> %d\n',numFeatures); | |
495 fprintf(ofp,' '); | |
496 fprintf(ofp,'%1.1f ',zeros(1,numFeatures)); | |
497 fprintf(ofp,'\n'); | |
498 fprintf(ofp,' <Variance> %d\n',numFeatures); | |
499 fprintf(ofp,' '); | |
500 fprintf(ofp,'%1.1f ',ones(1,numFeatures)); | |
501 fprintf(ofp,'\n'); | |
502 end | |
503 fprintf(ofp,'<TransP> %d\n',numStates); | |
504 transp = zeros(numFeatures); | |
505 transp(1,2)=1; | |
506 for state=2:numStates-2, | |
507 transp(state,state)=0.6; | |
508 transp(state,state+1)=0.4; | |
509 end | |
510 transp(numStates-1,numStates-1)=0.9; | |
511 transp(numStates-1,numStates)=0.1; | |
512 for state=1:numStates, | |
513 fprintf(ofp,'%1.3e ',transp(state,1:numStates)); | |
514 fprintf(ofp,'\n'); | |
515 end | |
516 fprintf(ofp,'<EndHMM>\n'); | |
517 fclose(ofp); | |
518 end %---- of MAKEPROTOHMM | |
519 | |
520 %% ********************************************************** | |
521 % createMLF - master label file - belongs with hmm class | |
522 %************************************************************ | |
523 function createMLF(mapFileFolder) | |
524 mlfFileName = 'labels'; | |
525 d = dir(fullfile(mapFileFolder, '*.map')); | |
526 fid = fopen([fullfile(mapFileFolder,mlfFileName) '.mlf'],'w'); | |
527 fprintf(fid,'#!MLF!#\n'); | |
528 | |
529 for I = 1:size(d,1) | |
530 fprintf(fid,['"*/' d(I).name(1:end-3) 'lab"\n']); | |
531 fprintf(fid,'sil\n'); | |
532 labels = d(I).name(5:end-5); | |
533 for J = 1:length(labels); | |
534 switch labels(J) | |
535 case 'O' | |
536 S = 'oh'; | |
537 case '1' | |
538 S = 'one'; | |
539 case '2' | |
540 S = 'two'; | |
541 case '3' | |
542 S = 'three'; | |
543 case '4' | |
544 S = 'four'; | |
545 case '5' | |
546 S = 'five'; | |
547 case '6' | |
548 S = 'six'; | |
549 case '7' | |
550 S = 'seven'; | |
551 case '8' | |
552 S = 'eight'; | |
553 case '9' | |
554 S = 'nine'; | |
555 case 'Z' | |
556 S = 'zero'; | |
557 end | |
558 fprintf(fid,S); | |
559 fprintf(fid,'\n'); | |
560 end; | |
561 fprintf(fid,'sil\n.\n'); | |
562 end; | |
563 fclose(fid); | |
564 end % ------ OF CREATEMLF | |
565 | |
566 %% ********************************************************** | |
567 % createSCP - Just a file list | |
568 %************************************************************ | |
569 function createSCP(mapFileFolder) | |
570 scpFileName = 'list'; | |
571 d = dir(fullfile(mapFileFolder, '*.map')); | |
572 fid = fopen([fullfile(mapFileFolder,scpFileName) '.scp'],'w'); | |
573 | |
574 for I = 1:size(d,1) | |
575 fprintf(fid,[d(I).name(1:end-3) 'map\n']); | |
576 end; | |
577 | |
578 fclose(fid); | |
579 end% ------ OF createSCP | |
580 | |
581 %% ********************************************************** | |
582 % score - Simple non-dynamic scoring | |
583 %************************************************************ | |
584 function score(testMLFpath) | |
585 % First get the test result files and store the file locations | |
586 testIdx = 1; | |
587 temp = dir(testMLFpath); | |
588 for nn = 1:numel(temp) | |
589 if length(temp(nn).name) > 3 | |
590 if strcmp(temp(nn).name(end-3:end), '.mlf') | |
591 testMLFfiles{testIdx} = temp(nn).name; | |
592 testIdx = testIdx+1; | |
593 end | |
594 end | |
595 end | |
596 | |
597 for fileIdx = 1:numel(testMLFfiles) | |
598 % Extract file name and regonised words as strings -> store in recResults. | |
599 % This code block is Matt Robertson's | |
600 fmlf = fopen(fullfile(testMLFpath , testMLFfiles{fileIdx}),'r'); | |
601 | |
602 MLF = textscan(fmlf,'%s','delimiter','\n'); | |
603 replaceDownPattern = '[0-9\-\.\s]'; | |
604 | |
605 for I = 1:size(MLF{1},1); | |
606 if strfind(MLF{1}{I},'.rec') >0 | |
607 M{I} = MLF{1}{I}; | |
608 else | |
609 if strfind(MLF{1}{I},'MLF') >0; | |
610 M{I} = MLF{1}{I}; | |
611 else | |
612 M{I} = regexprep(MLF{1}{I},replaceDownPattern,''); | |
613 end | |
614 end; | |
615 end; | |
616 | |
617 fclose(fmlf); | |
618 | |
619 A = 1; | |
620 for I = 1:size(M,2) | |
621 if not(strcmpi(M{I},'sp')) && not(strcmpi(M{I},'sil')) && not(strcmpi(M{I},'')); | |
622 M2{A} = M{I}; | |
623 A = A + 1; | |
624 end; | |
625 end; | |
626 | |
627 recResults = []; | |
628 X=0; | |
629 for I = 1:length(M2) | |
630 if strfind(M2{I},'MLF') | |
631 else | |
632 if strfind(M2{I},'.rec') | |
633 X = X+1; | |
634 | |
635 % found FileName | |
636 recResults(X).fileName = M2{I}; | |
637 recResults(X).utterance = {}; | |
638 else | |
639 recResults(X).utterance = [recResults(X).utterance M2{I}]; | |
640 end; | |
641 end; | |
642 end; | |
643 | |
644 % Make separate 2D arrays of input and output digits | |
645 inputDigits = zeros(numel(recResults),3); | |
646 outputDigits = zeros(numel(recResults),3); | |
647 for nn = 1:numel(recResults) | |
648 ipStr = recResults(nn).fileName(end-8:end-6); | |
649 | |
650 for kk = 1:3 | |
651 inputDigits(nn,kk) = cHMM.htk_str2num(ipStr(kk)); %see local function @ bottom of script | |
652 opStr = recResults(nn).utterance(kk); | |
653 outputDigits(nn,kk) = cHMM.htk_str2num(opStr{:}); %bit of a hack to do cell2str as it were | |
654 end | |
655 end | |
656 | |
657 % Now do the scoring (simple whan data in the right format) | |
658 % :D | |
659 scoreArray = (inputDigits == outputDigits); | |
660 pcSent(fileIdx) = 100*sum(all(scoreArray,2)) / numel(recResults); | |
661 pcWord(fileIdx) = 100*sum(scoreArray(:)) / numel(scoreArray); | |
662 end | |
663 | |
664 %will output table to console if used on *nix | |
665 xlsdataFull = [{'-- File Name --', '%Sent', '%Word'}; testMLFfiles' num2cell(pcSent') num2cell(pcWord')]; | |
666 disp(xlsdataFull) | |
667 fid = fopen(fullfile(testMLFpath,['score__' num2str(pcWord(1), '%0.1f') '__.txt']),'w'); | |
668 fclose(fid); | |
669 end% ------ OF SCORE | |
670 | |
671 %% ********************************************************** | |
672 % scoreWhole folder - make my life easier @ command line | |
673 %************************************************************ | |
674 function scoreWholeFolder(folderToScore, searchString) | |
675 if nargin < 2 | |
676 searchString = '*featR*'; | |
677 end | |
678 dirInfo = dir(fullfile(folderToScore, searchString)); | |
679 numFolders = numel(dirInfo); | |
680 for nn = 1:numFolders; | |
681 currentScoring = fullfile(folderToScore, dirInfo(nn).name); | |
682 disp(''); | |
683 disp(currentScoring); | |
684 cHMM.score(currentScoring); | |
685 end | |
686 end | |
687 | |
688 %************************************************************************** | |
689 % htk_str2num - Convert strings to integers | |
690 %************************************************************************** | |
691 function opNum = htk_str2num(ipString) | |
692 switch ipString | |
693 case {'oh' , 'O'} | |
694 opNum = 0; | |
695 case {'one' , '1'} | |
696 opNum = 1; | |
697 case {'two' , '2'} | |
698 opNum = 2; | |
699 case {'three' , '3'} | |
700 opNum = 3; | |
701 case {'four' , '4'} | |
702 opNum = 4; | |
703 case {'five' , '5'} | |
704 opNum = 5; | |
705 case {'six' , '6'} | |
706 opNum = 6; | |
707 case {'eight' , '8'} | |
708 opNum = 8; | |
709 case {'nine' , '9'} | |
710 opNum = 9; | |
711 otherwise | |
712 assert(0) % throw error | |
713 end | |
714 end % ------ OF htk_str2num | |
715 | |
716 end % ------ OF STATIC METHODS | |
717 | |
718 end % ------ OF CLASS |