annotate userProgramsASRforDummies/cHMM.m @ 38:c2204b18f4a2 tip

End nov big change
author Ray Meddis <rmeddis@essex.ac.uk>
date Mon, 28 Nov 2011 13:34:28 +0000
parents
children
rev   line source
rmeddis@38 1 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
rmeddis@38 2 % This program is free software; you can redistribute it and/or modify
rmeddis@38 3 % it under the terms of the GNU General Public License as published by
rmeddis@38 4 % the Free Software Foundation; either version 2 of the License, or
rmeddis@38 5 % (at your option) any later version.
rmeddis@38 6 %
rmeddis@38 7 % This program is distributed in the hope that it will be useful,
rmeddis@38 8 % but WITHOUT ANY WARRANTY; without even the implied warranty of
rmeddis@38 9 % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
rmeddis@38 10 % GNU General Public License for more details.
rmeddis@38 11 %
rmeddis@38 12 % You can obtain a copy of the GNU General Public License from
rmeddis@38 13 % http://www.gnu.org/copyleft/gpl.html or by writing to
rmeddis@38 14 % Free Software Foundation, Inc.,675 Mass Ave, Cambridge, MA 02139, USA.
rmeddis@38 15 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
rmeddis@38 16
rmeddis@38 17 classdef cHMM
rmeddis@38 18 %HMMCLASS Handles all of the HTK related gubbins
rmeddis@38 19 % Please see the documentation located in a separate file for further
rmeddis@38 20 % information.
rmeddis@38 21
rmeddis@38 22 %% *********************************************************
rmeddis@38 23 % properties _ _
rmeddis@38 24 % | | (_)
rmeddis@38 25 % _ __ _ __ ___ _ __ ___ _ __| |_ _ ___ ___
rmeddis@38 26 % | '_ \| '__/ _ \| '_ \ / _ \ '__| __| |/ _ \/ __|
rmeddis@38 27 % | |_) | | | (_) | |_) | __/ | | |_| | __/\__ \
rmeddis@38 28 % | .__/|_| \___/| .__/ \___|_| \__|_|\___||___/
rmeddis@38 29 % | | | |
rmeddis@38 30 % |_| |_|
rmeddis@38 31 %************************************************************
rmeddis@38 32
rmeddis@38 33 %% **********************************************************
rmeddis@38 34 % Public properties - can be set by user
rmeddis@38 35 %************************************************************
rmeddis@38 36 properties(Access = public)
rmeddis@38 37 hmmFolder
rmeddis@38 38 paramType = 'USER_D_A'; %DELTAS and ACCELERATIONS
rmeddis@38 39 numCoeff = 27; %9*3 THIS IS FOR PROBABILITY MODEL (not high spont+low spont which would be 18*3=54)
rmeddis@38 40
rmeddis@38 41 HERestDataPath = fullfile(pwd, 'def', 'HERest_digit');
rmeddis@38 42 binPath = fullfile(pwd, 'def', 'bin');
rmeddis@38 43 configFile = fullfile(pwd, 'def', 'config_STANDARD');
rmeddis@38 44 trainWordListFile = fullfile(pwd, 'def', 'Grammar_digit', 'words3');
rmeddis@38 45 testWordListFile = fullfile(pwd, 'def', 'Grammar_digit', 'wordsNoSevenZero');
rmeddis@38 46 wordNetFile = fullfile(pwd, 'def', 'Grammar_digit', 'wdnetNoSP.slf');
rmeddis@38 47 dictFile = fullfile(pwd, 'def', 'Grammar_digit', 'noSevenZeroDict');
rmeddis@38 48 end
rmeddis@38 49
rmeddis@38 50 %% **********************************************************
rmeddis@38 51 % Dependent - never set by user. Only calculated when needed
rmeddis@38 52 %************************************************************
rmeddis@38 53 properties(Dependent = true)
rmeddis@38 54 protoFile % = fullfile(pwd, 'def', 'proto_RobANonly_9'); %probability only
rmeddis@38 55 end
rmeddis@38 56
rmeddis@38 57 %% *********************************************************
rmeddis@38 58 % methods _ _ _
rmeddis@38 59 % | | | | | |
rmeddis@38 60 % _ __ ___ ___| |_| |__ ___ __| |___
rmeddis@38 61 %| '_ ` _ \ / _ \ __| '_ \ / _ \ / _` / __|
rmeddis@38 62 %| | | | | | __/ |_| | | | (_) | (_| \__ \
rmeddis@38 63 %|_| |_| |_|\___|\__|_| |_|\___/ \__,_|___/
rmeddis@38 64 %************************************************************
rmeddis@38 65
rmeddis@38 66 methods
rmeddis@38 67 %% **********************************************************
rmeddis@38 68 % Constructor
rmeddis@38 69 %************************************************************
rmeddis@38 70 function obj = cHMM(hmmFolder)
rmeddis@38 71 if nargin > 0
rmeddis@38 72 obj.hmmFolder = hmmFolder;
rmeddis@38 73 end
rmeddis@38 74 end % ------ OF CONSTRUCTOR
rmeddis@38 75
rmeddis@38 76 %% **********************************************************
rmeddis@38 77 % genProto - generate task specific prototype
rmeddis@38 78 %************************************************************
rmeddis@38 79 function genProto(obj)
rmeddis@38 80 % models_1mixsil.exe - takes input (hmmdef) and copies it making a
rmeddis@38 81 % basis of one, two three etc. etc.
rmeddis@38 82 obj.models_1mixsilMat(fullfile(obj.hmmFolder,'hmm0','hmmdef'), fullfile(obj.hmmFolder,'hmm0','models'));
rmeddis@38 83 end % ------ OF GENPROTO
rmeddis@38 84
rmeddis@38 85 %% **********************************************************
rmeddis@38 86 % istrained
rmeddis@38 87 %************************************************************
rmeddis@38 88 function boolans = istrained(obj)
rmeddis@38 89 boolans = numel(dir(fullfile(obj.hmmFolder,'hmm36','models')));
rmeddis@38 90 end
rmeddis@38 91
rmeddis@38 92 %% **********************************************************
rmeddis@38 93 % Train
rmeddis@38 94 %************************************************************
rmeddis@38 95 function train(obj, trainFeatureFolder)
rmeddis@38 96
rmeddis@38 97 % Most of the following code block can be replaced by doing a
rmeddis@38 98 % find and replace across the code below. I didn't
rmeddis@38 99 % want to mess with the working code too much, so I just
rmeddis@38 100 % copied the object properties needed into the variable names
rmeddis@38 101 % expected.
rmeddis@38 102
rmeddis@38 103 ED_CMDFILE1 = fullfile(obj.HERestDataPath, 'sil1.hed');
rmeddis@38 104 ED_CMDFILE2 = fullfile(obj.HERestDataPath, 'mix2_16.hed');
rmeddis@38 105 ED_CMDFILE3 = fullfile(obj.HERestDataPath, 'mix3_16.hed');
rmeddis@38 106 ED_CMDFILE4 = fullfile(obj.HERestDataPath, 'mix5_16.hed');
rmeddis@38 107 ED_CMDFILE5 = fullfile(obj.HERestDataPath, 'mix7_16.hed');
rmeddis@38 108
rmeddis@38 109 NUM_COEF = obj.numCoeff;
rmeddis@38 110 PAR_TYPE = obj.paramType;
rmeddis@38 111 LIST_FILE = fullfile(obj.hmmFolder, 'tmp.list');
rmeddis@38 112
rmeddis@38 113 word_list = obj.trainWordListFile;%fullfile(obj.grammarPath, 'words');
rmeddis@38 114 word_listSP = word_list; % for use in hmm4 onwards - UGLY HACK NOW SP ABANDONED
rmeddis@38 115 proto = obj.protoFile;%Does not exist on disk just yet probably - see a few lines down
rmeddis@38 116 config = obj.configFile;
rmeddis@38 117 train_list = fullfile(trainFeatureFolder, 'list.scp');
rmeddis@38 118 labels = fullfile(trainFeatureFolder, 'labels.mlf');
rmeddis@38 119 labelssp = labels; % for use in hmm4 onwards - UGLY HACK NOW SP ABANDONED
rmeddis@38 120 hmm_dir = obj.hmmFolder;
rmeddis@38 121
rmeddis@38 122 FEAT_ROOT = trainFeatureFolder;
rmeddis@38 123
rmeddis@38 124 % Now for the actual HMM training code
rmeddis@38 125 mkdir(hmm_dir)
rmeddis@38 126 for I = 0:36
rmeddis@38 127 h = fullfile(hmm_dir,['hmm' num2str(I)]);
rmeddis@38 128 mkdir(h);
rmeddis@38 129 end
rmeddis@38 130 obj.makeProtoHmm(proto, obj.paramType, obj.numCoeff, 18);
rmeddis@38 131
rmeddis@38 132 fid = fopen(train_list,'r');
rmeddis@38 133 disp(train_list)
rmeddis@38 134 S = textscan(fid,'%s','Delimiter','\n');
rmeddis@38 135 fclose(fid);
rmeddis@38 136
rmeddis@38 137 fid = fopen(LIST_FILE,'w');
rmeddis@38 138
rmeddis@38 139 for I = 1:size(S{1},1)
rmeddis@38 140 str = fullfile(FEAT_ROOT,S{1}{I});
rmeddis@38 141 fprintf(fid,'%s\n',str);
rmeddis@38 142 end;
rmeddis@38 143 fclose(fid);
rmeddis@38 144
rmeddis@38 145 % HCompV just gets the vfloor stuff out so we can begin approximating
rmeddis@38 146 cmd = ['"HCompV" -T 2 -D -C "' config '" -o hmmdef -f 0.01 -m -S "' LIST_FILE '" -M "' hmm_dir filesep 'hmm0" "' proto '"'];
rmeddis@38 147 system(cmd);
rmeddis@38 148
rmeddis@38 149 %cmd = [BINDIR filesep 'macro' binExt ' ' num2str(NUM_COEF) ' ' PAR_TYPE ' ' '"' hmm_dir filesep 'hmm0' filesep 'vFloors' '" "' hmm_dir filesep 'hmm0' filesep 'macros' '"'];
rmeddis@38 150 %system(cmd);
rmeddis@38 151 obj.macroMat(NUM_COEF,PAR_TYPE, fullfile(hmm_dir, 'hmm0', 'vFloors'), fullfile(hmm_dir, 'hmm0', 'macros'));
rmeddis@38 152
rmeddis@38 153 %MAKE THE INITIAL MODEL PROTOTYPE
rmeddis@38 154 genProto(obj);
rmeddis@38 155
rmeddis@38 156 disp('Seed HMM successfully Produced.....');
rmeddis@38 157
rmeddis@38 158 %Training
rmeddis@38 159 for I = 1:3
rmeddis@38 160 disp(I)
rmeddis@38 161 j = I-1;
rmeddis@38 162 cmd = ['HERest -D -C ' config ' -I ' labels ' -t 250.0 150.0 1000.0 -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_list];
rmeddis@38 163 % disp(cmd)
rmeddis@38 164 system(cmd);
rmeddis@38 165 end
rmeddis@38 166
rmeddis@38 167 disp('3 iterations complete');
rmeddis@38 168
rmeddis@38 169 rmdir ([hmm_dir filesep 'hmm4'],'s')
rmeddis@38 170 copyfile ([hmm_dir filesep 'hmm3'], [hmm_dir filesep 'hmm4'])
rmeddis@38 171
rmeddis@38 172 % The following command takes state 3 from the silence model
rmeddis@38 173 % and appends it to the end of the model as state 2 of the
rmeddis@38 174 % short pause model.
rmeddis@38 175 % Original:
rmeddis@38 176 % cmd = [BINDIR filesep 'spmodel_gen' binExt ' ' hmm_dir filesep 'hmm3' filesep 'models ' hmm_dir filesep 'hmm4' filesep 'models'];
rmeddis@38 177 % system(cmd);
rmeddis@38 178 % New:
rmeddis@38 179 obj.spmodel_genMat(fullfile(hmm_dir,'hmm3','models'), fullfile(hmm_dir,'hmm4','models'));
rmeddis@38 180
rmeddis@38 181 cmd = ['HHEd -T 2 -H ' hmm_dir filesep 'hmm4' filesep 'macros -H ' hmm_dir filesep 'hmm4' filesep 'models -M ' hmm_dir filesep 'hmm5 ' ED_CMDFILE1 ' ' word_listSP ];
rmeddis@38 182 system(cmd);
rmeddis@38 183 disp ('SP model fixed')
rmeddis@38 184
rmeddis@38 185 % after the spmodel_gen command - the word_list is changed to
rmeddis@38 186 % word_listSP. The sp model is just ignored currently
rmeddis@38 187
rmeddis@38 188 for I = 6:8
rmeddis@38 189 disp(I)
rmeddis@38 190 j = I-1;
rmeddis@38 191 cmd = ['HERest -C ' config ' -I ' labelssp ' -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_listSP];
rmeddis@38 192 system(cmd);
rmeddis@38 193 end
rmeddis@38 194
rmeddis@38 195 disp('6 iterations complete');
rmeddis@38 196
rmeddis@38 197 cmd = ['HHEd -T 2 -H ' hmm_dir filesep 'hmm8' filesep 'macros -H ' hmm_dir filesep 'hmm8' filesep 'models -M ' hmm_dir filesep 'hmm9 ' ED_CMDFILE2 ' ' word_listSP ];
rmeddis@38 198 system(cmd);
rmeddis@38 199 disp ('2 gaussians per mixture')
rmeddis@38 200
rmeddis@38 201 for I = 10:12
rmeddis@38 202 disp(I)
rmeddis@38 203 j = I-1;
rmeddis@38 204 cmd = ['HERest -C ' config ' -I ' labelssp ' -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_listSP];
rmeddis@38 205 system(cmd);
rmeddis@38 206 end
rmeddis@38 207
rmeddis@38 208 disp ('9 iterations completed')
rmeddis@38 209
rmeddis@38 210 cmd = ['HHEd -T 2 -H ' hmm_dir filesep 'hmm12' filesep 'macros -H ' hmm_dir filesep 'hmm12' filesep 'models -M ' hmm_dir filesep 'hmm13 ' ED_CMDFILE3 ' ' word_listSP ];
rmeddis@38 211 system(cmd);
rmeddis@38 212 disp ('3 gaussians per mixture')
rmeddis@38 213
rmeddis@38 214 for I = 14:20
rmeddis@38 215 disp(I)
rmeddis@38 216 j = I-1;
rmeddis@38 217 cmd = ['HERest -C ' config ' -I ' labelssp ' -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_listSP];
rmeddis@38 218 system(cmd);
rmeddis@38 219 end
rmeddis@38 220
rmeddis@38 221 disp ('16 iterations completed')
rmeddis@38 222
rmeddis@38 223 cmd = ['HHEd -T 2 -H ' hmm_dir filesep 'hmm20' filesep 'macros -H ' hmm_dir filesep 'hmm20' filesep 'models -M ' hmm_dir filesep 'hmm21 ' ED_CMDFILE4 ' ' word_listSP ];
rmeddis@38 224 system(cmd);
rmeddis@38 225 disp ('5 gaussians per mixture')
rmeddis@38 226
rmeddis@38 227 for I = 22:28
rmeddis@38 228 disp(I)
rmeddis@38 229 j = I-1;
rmeddis@38 230 cmd = ['HERest -C ' config ' -I ' labelssp ' -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_listSP];
rmeddis@38 231 system(cmd);
rmeddis@38 232 end
rmeddis@38 233
rmeddis@38 234 disp ('23 iterations completed')
rmeddis@38 235
rmeddis@38 236 cmd = ['HHEd -T 2 -H ' hmm_dir filesep 'hmm28' filesep 'macros -H ' hmm_dir filesep 'hmm28' filesep 'models -M ' hmm_dir filesep 'hmm29 ' ED_CMDFILE5 ' ' word_listSP ];
rmeddis@38 237 system(cmd);
rmeddis@38 238 disp ('7 gaussians per mixture')
rmeddis@38 239
rmeddis@38 240 for I = 30:36
rmeddis@38 241 disp(I)
rmeddis@38 242 j = I-1;
rmeddis@38 243 cmd = ['HERest -C ' config ' -I ' labelssp ' -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_listSP];
rmeddis@38 244 system(cmd);
rmeddis@38 245 end
rmeddis@38 246
rmeddis@38 247 disp ('30 iterations completed')
rmeddis@38 248
rmeddis@38 249 end % ------ OF TRAIN
rmeddis@38 250
rmeddis@38 251 %% **********************************************************
rmeddis@38 252 % Test
rmeddis@38 253 %************************************************************
rmeddis@38 254 function test(obj, testFeatureFolder)
rmeddis@38 255 flags = '-p 0.0 -s 0.0';
rmeddis@38 256 test_word_list = obj.testWordListFile;
rmeddis@38 257
rmeddis@38 258 net = obj.wordNetFile;
rmeddis@38 259 dict = obj.dictFile;
rmeddis@38 260
rmeddis@38 261 LIST_FILE = fullfile(testFeatureFolder, 'tmp.list');
rmeddis@38 262 config = obj.configFile;
rmeddis@38 263
rmeddis@38 264 disp ('Now testing with 7 mixture HMMs')
rmeddis@38 265 mod_file = fullfile(obj.hmmFolder, 'hmm36', 'models');
rmeddis@38 266 mac_file = fullfile(obj.hmmFolder, 'hmm36', 'macros');
rmeddis@38 267
rmeddis@38 268
rmeddis@38 269 RESULTS_DIR = testFeatureFolder;
rmeddis@38 270 TEST_FEAT_ROOT = testFeatureFolder;
rmeddis@38 271
rmeddis@38 272 N1list = fullfile(testFeatureFolder, 'list.scp');
rmeddis@38 273
rmeddis@38 274 ftest = fopen(N1list,'r');
rmeddis@38 275 S = textscan(ftest,'%s','Delimiter','\n');
rmeddis@38 276 fclose(ftest);
rmeddis@38 277
rmeddis@38 278 flist = fopen(LIST_FILE,'w');
rmeddis@38 279 for I = 1:size(S{1},1)
rmeddis@38 280 str = fullfile(TEST_FEAT_ROOT,S{1}{I});
rmeddis@38 281 fprintf(flist,'%s\n',str);
rmeddis@38 282 end;
rmeddis@38 283 fclose(flist);
rmeddis@38 284
rmeddis@38 285 cmd = ['HVite -D -H ' mac_file ' -H ' mod_file ' -S ' LIST_FILE ' -C ' config ' -w ' net ' -l ''*'' -i ' RESULTS_DIR filesep 'result.mlf ' flags ' ' dict ' ' test_word_list];
rmeddis@38 286 system(cmd);
rmeddis@38 287 end % ------ OF TEST
rmeddis@38 288
rmeddis@38 289 %% **********************************************************
rmeddis@38 290 % Get methods determining feature vector related gubbins
rmeddis@38 291 %************************************************************
rmeddis@38 292 function value = get.protoFile(obj)
rmeddis@38 293 value = fullfile(obj.hmmFolder, 'proto_AutoGen');
rmeddis@38 294 end
rmeddis@38 295
rmeddis@38 296 end % ------ OF METHODS
rmeddis@38 297
rmeddis@38 298 %% *********************************************************
rmeddis@38 299 % _ _ _ _ _ _
rmeddis@38 300 % | | | | (_) | | | | | |
rmeddis@38 301 % ___| |_ __ _| |_ _ ___ _ __ ___ ___| |_| |__ ___ __| |___
rmeddis@38 302 % / __| __/ _` | __| |/ __| | '_ ` _ \ / _ \ __| '_ \ / _ \ / _` / __|
rmeddis@38 303 % \__ \ || (_| | |_| | (__ | | | | | | __/ |_| | | | (_) | (_| \__ \
rmeddis@38 304 % |___/\__\__,_|\__|_|\___| |_| |_| |_|\___|\__|_| |_|\___/ \__,_|___/
rmeddis@38 305 %************************************************************
rmeddis@38 306
rmeddis@38 307 methods(Static)
rmeddis@38 308 %% **********************************************************
rmeddis@38 309 % macroMat (matlab port of macro C code)
rmeddis@38 310 % ported by NC - Nov 2011
rmeddis@38 311 %************************************************************
rmeddis@38 312 function macroMat(VECSIZE, PARAMETER_TYPE, infile, outfile)
rmeddis@38 313 % This function takes the vFloors file that is created after
rmeddis@38 314 % invoking HCompV and generates a Macro file required for further
rmeddis@38 315 % HMM training
rmeddis@38 316
rmeddis@38 317 ofp = fopen(outfile,'w');
rmeddis@38 318
rmeddis@38 319 fprintf(ofp, '~o\n<STREAMINFO> 1 %d\n', VECSIZE);
rmeddis@38 320 fprintf(ofp, '<VECSIZE> %d\n', VECSIZE);
rmeddis@38 321 fprintf(ofp, '<NULLD>\n<%s>\n', PARAMETER_TYPE);
rmeddis@38 322
rmeddis@38 323 fp = fopen(infile);
rmeddis@38 324 tline = fgets(fp);
rmeddis@38 325 while ischar(tline)
rmeddis@38 326 fprintf(ofp,tline);
rmeddis@38 327 tline = fgets(fp);
rmeddis@38 328 end
rmeddis@38 329 fclose(fp);
rmeddis@38 330 fclose(ofp);
rmeddis@38 331 end %---- of MACROMAT
rmeddis@38 332
rmeddis@38 333 %% **********************************************************
rmeddis@38 334 % spmodel_genMat (matlab port of spmodel_gen C code)
rmeddis@38 335 % ported by NC - Nov 2011
rmeddis@38 336 %************************************************************
rmeddis@38 337 function spmodel_genMat(infile, outfile)
rmeddis@38 338 % This function copies the middle state (3) from the silence model
rmeddis@38 339 % and makes a sp model out of it by copying it to state 2 of the sp model.
rmeddis@38 340
rmeddis@38 341 % From: http://www.voxforge.org/home/dev/acousticmodels/linux/create/htkjulius/tutorial/monophones/step-7
rmeddis@38 342 % In the last step you created HMM models that did not include an "sp"
rmeddis@38 343 % (short pause) silence model - which refers to the types of short pauses
rmeddis@38 344 % that occur between words in normal speech. However, you did create a
rmeddis@38 345 % "sil" silence model - sil silence models are typically of longer
rmeddis@38 346 % duration, and refer to the pauses occur at the end of a sentence.
rmeddis@38 347 %
rmeddis@38 348 % The HTK book says that the sp model needs to have its "emitting state
rmeddis@38 349 % tied to the centre state of the silence model". What this means is that
rmeddis@38 350 % you need to create a new sp model in your hmmdefs, that it will use the
rmeddis@38 351 % centre state of sil, and then they both need to be 'tied' together. For
rmeddis@38 352 % a bit of background on HMMs and states, see this example.
rmeddis@38 353 %
rmeddis@38 354 % This can be done by copying the centre state from the sil model in your
rmeddis@38 355 % hmmdefs file and adding it to the sp model, and then running a special
rmeddis@38 356 % tool called HHED to 'tie' the sp model to the sil model so that they
rmeddis@38 357 % share the same centre state. The HTK book provides some background on
rmeddis@38 358 % what this means, but you need an understanding of the basics of Hidden
rmeddis@38 359 % Markov Modelling before tackling the HTK Book explanations
rmeddis@38 360
rmeddis@38 361 ofp = fopen(outfile,'a+'); % we append this time
rmeddis@38 362
rmeddis@38 363 fprintf(ofp,'~h "sp"\n');
rmeddis@38 364 fprintf(ofp,'<BEGINHMM>\n<NUMSTATES> 3\n<STATE> 2\n');
rmeddis@38 365
rmeddis@38 366 %-- This block gets the hmmdef file to the MODEL
rmeddis@38 367 lNow = [];
rmeddis@38 368 fp = fopen(infile);
rmeddis@38 369 while ~(strcmpi(lNow, '~h "sil"'))
rmeddis@38 370 lNow = fgetl(fp);
rmeddis@38 371 end
rmeddis@38 372 %------------------------------
rmeddis@38 373
rmeddis@38 374 %-- This block gets the hmmdef file to the STATE
rmeddis@38 375 lNow = [];
rmeddis@38 376 fp = fopen(infile);
rmeddis@38 377 while ~(strcmpi(lNow, ['<STATE> ' num2str(3)]))
rmeddis@38 378 lNow = fgetl(fp);
rmeddis@38 379 end
rmeddis@38 380 %------------------------------
rmeddis@38 381
rmeddis@38 382 %%% This block puts a copy of the hmmdef file in from the
rmeddis@38 383 %%% correct line as found above
rmeddis@38 384 tline = fgetl(fp);
rmeddis@38 385 while ~(strcmpi(tline, ['<STATE> ' num2str(4)]))
rmeddis@38 386 fprintf(ofp,'%s\n', tline);
rmeddis@38 387 tline = fgetl(fp);
rmeddis@38 388 end
rmeddis@38 389 fclose(fp);
rmeddis@38 390 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
rmeddis@38 391
rmeddis@38 392 fprintf(ofp,'<TRANSP> 3\n0.000000e+00 1.000000e+00 0.000000e+00\n');
rmeddis@38 393 fprintf(ofp,'0.000000e+00 5.000000e-01 5.000000e-01\n');
rmeddis@38 394 fprintf(ofp,'0.000000e+00 0.000000e+00 0.000000e+00\n<ENDHMM>\n');
rmeddis@38 395
rmeddis@38 396 fclose(ofp);
rmeddis@38 397 end %---- of spmodel_genMat
rmeddis@38 398
rmeddis@38 399 %% **********************************************************
rmeddis@38 400 % models_1mixsilMat (matlab port of models_1mixsil C code)
rmeddis@38 401 % ported by NC - Nov 2011
rmeddis@38 402 %************************************************************
rmeddis@38 403 function models_1mixsilMat(infile, outfile)
rmeddis@38 404 % This function takes the hmmdef file and
rmeddis@38 405 % generates a HMM Model file
rmeddis@38 406
rmeddis@38 407 ofp = fopen(outfile,'w');
rmeddis@38 408 for ii = 1:11
rmeddis@38 409
rmeddis@38 410 %%% This block gets the hmmdef file to the correct line
rmeddis@38 411 lNow = [];
rmeddis@38 412 fp = fopen(infile);
rmeddis@38 413 while ~(strcmpi(lNow, '~h "hmmdef"'))
rmeddis@38 414 lNow = fgetl(fp);
rmeddis@38 415 end
rmeddis@38 416 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
rmeddis@38 417
rmeddis@38 418 %%% This block puts a numbered header
rmeddis@38 419 if ii==1; fprintf(ofp,'~h "one"\n'); end
rmeddis@38 420 if ii==2; fprintf(ofp,'~h "two"\n'); end
rmeddis@38 421 if ii==3; fprintf(ofp,'~h "three"\n'); end
rmeddis@38 422 if ii==4; fprintf(ofp,'~h "four"\n'); end
rmeddis@38 423 if ii==5; fprintf(ofp,'~h "five"\n'); end
rmeddis@38 424 if ii==6; fprintf(ofp,'~h "six"\n'); end
rmeddis@38 425 if ii==7; fprintf(ofp,'~h "seven"\n'); end
rmeddis@38 426 if ii==8; fprintf(ofp,'~h "eight"\n'); end
rmeddis@38 427 if ii==9; fprintf(ofp,'~h "nine"\n'); end
rmeddis@38 428 if ii==10; fprintf(ofp,'~h "oh"\n'); end
rmeddis@38 429 if ii==11; fprintf(ofp,'~h "zero"\n'); end
rmeddis@38 430 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
rmeddis@38 431
rmeddis@38 432 %%% This block puts a copy of the hmmdef file in from the
rmeddis@38 433 %%% correct line as found above
rmeddis@38 434 tline = fgets(fp);
rmeddis@38 435 while ischar(tline)
rmeddis@38 436 fprintf(ofp,tline);
rmeddis@38 437 tline = fgets(fp);
rmeddis@38 438 end
rmeddis@38 439 fclose(fp); %close it as we reached EOF
rmeddis@38 440 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
rmeddis@38 441 end
rmeddis@38 442
rmeddis@38 443 %%% THIS IS THE SILENCE MODEL @ THE END
rmeddis@38 444 fprintf(ofp,'~h "sil"\n');
rmeddis@38 445 fprintf(ofp,'<BEGINHMM>\n<NUMSTATES> 5\n');
rmeddis@38 446 for kk = 2:4
rmeddis@38 447 fprintf(ofp,'<STATE> %d\n<NUMMIXES> 1\n', kk);
rmeddis@38 448 %-- This block gets the hmmdef file to the correct line
rmeddis@38 449 lNow = [];
rmeddis@38 450 fp = fopen(infile);
rmeddis@38 451 while ~(strcmpi(lNow, ['<STATE> ' num2str(kk)]))
rmeddis@38 452 lNow = fgetl(fp);
rmeddis@38 453 end
rmeddis@38 454 %------------------------------
rmeddis@38 455
rmeddis@38 456 %%% This block puts a copy of the hmmdef file in from the
rmeddis@38 457 %%% correct line as found above
rmeddis@38 458 tline = fgetl(fp);
rmeddis@38 459 while ~(strcmpi(tline, ['<STATE> ' num2str(kk+1)]))
rmeddis@38 460 fprintf(ofp,'%s\n', tline);
rmeddis@38 461 tline = fgetl(fp);
rmeddis@38 462 end
rmeddis@38 463 fclose(fp);
rmeddis@38 464 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
rmeddis@38 465 end
rmeddis@38 466
rmeddis@38 467 fprintf(ofp,'<TRANSP> 5\n0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00\n');
rmeddis@38 468 fprintf(ofp,'0.000000e+00 6.000000e-01 4.000000e-01 0.000000e+00 0.000000e+00\n');
rmeddis@38 469 fprintf(ofp,'0.000000e+00 0.000000e+00 6.000000e-01 4.000000e-01 0.000000e+00\n');
rmeddis@38 470 fprintf(ofp,'0.000000e+00 0.000000e+00 0.000000e+00 7.000000e-01 3.000000e-01\n');
rmeddis@38 471 fprintf(ofp,'0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00\n<ENDHMM>\n');
rmeddis@38 472
rmeddis@38 473 fclose(ofp);
rmeddis@38 474 end %---- of models_1mixsilMat
rmeddis@38 475
rmeddis@38 476 %% **********************************************************
rmeddis@38 477 % makeProtoHmm (Make a prototype HMM)
rmeddis@38 478 %************************************************************
rmeddis@38 479 function makeProtoHmm(filename,featureType,numFeatures,numStates)
rmeddis@38 480 % filename is obvious
rmeddis@38 481 % featureType is usually USER_D_A
rmeddis@38 482 % numFeatures is number of features (including differences if used)
rmeddis@38 483 % numStates is usually 18
rmeddis@38 484
rmeddis@38 485 ofp = fopen(filename,'w');
rmeddis@38 486
rmeddis@38 487 fprintf(ofp,'<BeginHMM>\n');
rmeddis@38 488 fprintf(ofp,' <NumStates> %d <VecSize> %d <%s> <nullD> <diagC>\n',numStates,numFeatures,featureType');
rmeddis@38 489 fprintf(ofp,' <StreamInfo> 1 %d\n',numFeatures);
rmeddis@38 490 for state=2:numStates-1,
rmeddis@38 491 fprintf(ofp,' <State> %d <NumMixes> 1\n',state);
rmeddis@38 492 fprintf(ofp,' <Stream> 1\n');
rmeddis@38 493 fprintf(ofp,' <Mixture> 1 1.0\n');
rmeddis@38 494 fprintf(ofp,' <Mean> %d\n',numFeatures);
rmeddis@38 495 fprintf(ofp,' ');
rmeddis@38 496 fprintf(ofp,'%1.1f ',zeros(1,numFeatures));
rmeddis@38 497 fprintf(ofp,'\n');
rmeddis@38 498 fprintf(ofp,' <Variance> %d\n',numFeatures);
rmeddis@38 499 fprintf(ofp,' ');
rmeddis@38 500 fprintf(ofp,'%1.1f ',ones(1,numFeatures));
rmeddis@38 501 fprintf(ofp,'\n');
rmeddis@38 502 end
rmeddis@38 503 fprintf(ofp,'<TransP> %d\n',numStates);
rmeddis@38 504 transp = zeros(numFeatures);
rmeddis@38 505 transp(1,2)=1;
rmeddis@38 506 for state=2:numStates-2,
rmeddis@38 507 transp(state,state)=0.6;
rmeddis@38 508 transp(state,state+1)=0.4;
rmeddis@38 509 end
rmeddis@38 510 transp(numStates-1,numStates-1)=0.9;
rmeddis@38 511 transp(numStates-1,numStates)=0.1;
rmeddis@38 512 for state=1:numStates,
rmeddis@38 513 fprintf(ofp,'%1.3e ',transp(state,1:numStates));
rmeddis@38 514 fprintf(ofp,'\n');
rmeddis@38 515 end
rmeddis@38 516 fprintf(ofp,'<EndHMM>\n');
rmeddis@38 517 fclose(ofp);
rmeddis@38 518 end %---- of MAKEPROTOHMM
rmeddis@38 519
rmeddis@38 520 %% **********************************************************
rmeddis@38 521 % createMLF - master label file - belongs with hmm class
rmeddis@38 522 %************************************************************
rmeddis@38 523 function createMLF(mapFileFolder)
rmeddis@38 524 mlfFileName = 'labels';
rmeddis@38 525 d = dir(fullfile(mapFileFolder, '*.map'));
rmeddis@38 526 fid = fopen([fullfile(mapFileFolder,mlfFileName) '.mlf'],'w');
rmeddis@38 527 fprintf(fid,'#!MLF!#\n');
rmeddis@38 528
rmeddis@38 529 for I = 1:size(d,1)
rmeddis@38 530 fprintf(fid,['"*/' d(I).name(1:end-3) 'lab"\n']);
rmeddis@38 531 fprintf(fid,'sil\n');
rmeddis@38 532 labels = d(I).name(5:end-5);
rmeddis@38 533 for J = 1:length(labels);
rmeddis@38 534 switch labels(J)
rmeddis@38 535 case 'O'
rmeddis@38 536 S = 'oh';
rmeddis@38 537 case '1'
rmeddis@38 538 S = 'one';
rmeddis@38 539 case '2'
rmeddis@38 540 S = 'two';
rmeddis@38 541 case '3'
rmeddis@38 542 S = 'three';
rmeddis@38 543 case '4'
rmeddis@38 544 S = 'four';
rmeddis@38 545 case '5'
rmeddis@38 546 S = 'five';
rmeddis@38 547 case '6'
rmeddis@38 548 S = 'six';
rmeddis@38 549 case '7'
rmeddis@38 550 S = 'seven';
rmeddis@38 551 case '8'
rmeddis@38 552 S = 'eight';
rmeddis@38 553 case '9'
rmeddis@38 554 S = 'nine';
rmeddis@38 555 case 'Z'
rmeddis@38 556 S = 'zero';
rmeddis@38 557 end
rmeddis@38 558 fprintf(fid,S);
rmeddis@38 559 fprintf(fid,'\n');
rmeddis@38 560 end;
rmeddis@38 561 fprintf(fid,'sil\n.\n');
rmeddis@38 562 end;
rmeddis@38 563 fclose(fid);
rmeddis@38 564 end % ------ OF CREATEMLF
rmeddis@38 565
rmeddis@38 566 %% **********************************************************
rmeddis@38 567 % createSCP - Just a file list
rmeddis@38 568 %************************************************************
rmeddis@38 569 function createSCP(mapFileFolder)
rmeddis@38 570 scpFileName = 'list';
rmeddis@38 571 d = dir(fullfile(mapFileFolder, '*.map'));
rmeddis@38 572 fid = fopen([fullfile(mapFileFolder,scpFileName) '.scp'],'w');
rmeddis@38 573
rmeddis@38 574 for I = 1:size(d,1)
rmeddis@38 575 fprintf(fid,[d(I).name(1:end-3) 'map\n']);
rmeddis@38 576 end;
rmeddis@38 577
rmeddis@38 578 fclose(fid);
rmeddis@38 579 end% ------ OF createSCP
rmeddis@38 580
rmeddis@38 581 %% **********************************************************
rmeddis@38 582 % score - Simple non-dynamic scoring
rmeddis@38 583 %************************************************************
rmeddis@38 584 function score(testMLFpath)
rmeddis@38 585 % First get the test result files and store the file locations
rmeddis@38 586 testIdx = 1;
rmeddis@38 587 temp = dir(testMLFpath);
rmeddis@38 588 for nn = 1:numel(temp)
rmeddis@38 589 if length(temp(nn).name) > 3
rmeddis@38 590 if strcmp(temp(nn).name(end-3:end), '.mlf')
rmeddis@38 591 testMLFfiles{testIdx} = temp(nn).name;
rmeddis@38 592 testIdx = testIdx+1;
rmeddis@38 593 end
rmeddis@38 594 end
rmeddis@38 595 end
rmeddis@38 596
rmeddis@38 597 for fileIdx = 1:numel(testMLFfiles)
rmeddis@38 598 % Extract file name and regonised words as strings -> store in recResults.
rmeddis@38 599 % This code block is Matt Robertson's
rmeddis@38 600 fmlf = fopen(fullfile(testMLFpath , testMLFfiles{fileIdx}),'r');
rmeddis@38 601
rmeddis@38 602 MLF = textscan(fmlf,'%s','delimiter','\n');
rmeddis@38 603 replaceDownPattern = '[0-9\-\.\s]';
rmeddis@38 604
rmeddis@38 605 for I = 1:size(MLF{1},1);
rmeddis@38 606 if strfind(MLF{1}{I},'.rec') >0
rmeddis@38 607 M{I} = MLF{1}{I};
rmeddis@38 608 else
rmeddis@38 609 if strfind(MLF{1}{I},'MLF') >0;
rmeddis@38 610 M{I} = MLF{1}{I};
rmeddis@38 611 else
rmeddis@38 612 M{I} = regexprep(MLF{1}{I},replaceDownPattern,'');
rmeddis@38 613 end
rmeddis@38 614 end;
rmeddis@38 615 end;
rmeddis@38 616
rmeddis@38 617 fclose(fmlf);
rmeddis@38 618
rmeddis@38 619 A = 1;
rmeddis@38 620 for I = 1:size(M,2)
rmeddis@38 621 if not(strcmpi(M{I},'sp')) && not(strcmpi(M{I},'sil')) && not(strcmpi(M{I},''));
rmeddis@38 622 M2{A} = M{I};
rmeddis@38 623 A = A + 1;
rmeddis@38 624 end;
rmeddis@38 625 end;
rmeddis@38 626
rmeddis@38 627 recResults = [];
rmeddis@38 628 X=0;
rmeddis@38 629 for I = 1:length(M2)
rmeddis@38 630 if strfind(M2{I},'MLF')
rmeddis@38 631 else
rmeddis@38 632 if strfind(M2{I},'.rec')
rmeddis@38 633 X = X+1;
rmeddis@38 634
rmeddis@38 635 % found FileName
rmeddis@38 636 recResults(X).fileName = M2{I};
rmeddis@38 637 recResults(X).utterance = {};
rmeddis@38 638 else
rmeddis@38 639 recResults(X).utterance = [recResults(X).utterance M2{I}];
rmeddis@38 640 end;
rmeddis@38 641 end;
rmeddis@38 642 end;
rmeddis@38 643
rmeddis@38 644 % Make separate 2D arrays of input and output digits
rmeddis@38 645 inputDigits = zeros(numel(recResults),3);
rmeddis@38 646 outputDigits = zeros(numel(recResults),3);
rmeddis@38 647 for nn = 1:numel(recResults)
rmeddis@38 648 ipStr = recResults(nn).fileName(end-8:end-6);
rmeddis@38 649
rmeddis@38 650 for kk = 1:3
rmeddis@38 651 inputDigits(nn,kk) = cHMM.htk_str2num(ipStr(kk)); %see local function @ bottom of script
rmeddis@38 652 opStr = recResults(nn).utterance(kk);
rmeddis@38 653 outputDigits(nn,kk) = cHMM.htk_str2num(opStr{:}); %bit of a hack to do cell2str as it were
rmeddis@38 654 end
rmeddis@38 655 end
rmeddis@38 656
rmeddis@38 657 % Now do the scoring (simple whan data in the right format)
rmeddis@38 658 % :D
rmeddis@38 659 scoreArray = (inputDigits == outputDigits);
rmeddis@38 660 pcSent(fileIdx) = 100*sum(all(scoreArray,2)) / numel(recResults);
rmeddis@38 661 pcWord(fileIdx) = 100*sum(scoreArray(:)) / numel(scoreArray);
rmeddis@38 662 end
rmeddis@38 663
rmeddis@38 664 %will output table to console if used on *nix
rmeddis@38 665 xlsdataFull = [{'-- File Name --', '%Sent', '%Word'}; testMLFfiles' num2cell(pcSent') num2cell(pcWord')];
rmeddis@38 666 disp(xlsdataFull)
rmeddis@38 667 fid = fopen(fullfile(testMLFpath,['score__' num2str(pcWord(1), '%0.1f') '__.txt']),'w');
rmeddis@38 668 fclose(fid);
rmeddis@38 669 end% ------ OF SCORE
rmeddis@38 670
rmeddis@38 671 %% **********************************************************
rmeddis@38 672 % scoreWhole folder - make my life easier @ command line
rmeddis@38 673 %************************************************************
rmeddis@38 674 function scoreWholeFolder(folderToScore, searchString)
rmeddis@38 675 if nargin < 2
rmeddis@38 676 searchString = '*featR*';
rmeddis@38 677 end
rmeddis@38 678 dirInfo = dir(fullfile(folderToScore, searchString));
rmeddis@38 679 numFolders = numel(dirInfo);
rmeddis@38 680 for nn = 1:numFolders;
rmeddis@38 681 currentScoring = fullfile(folderToScore, dirInfo(nn).name);
rmeddis@38 682 disp('');
rmeddis@38 683 disp(currentScoring);
rmeddis@38 684 cHMM.score(currentScoring);
rmeddis@38 685 end
rmeddis@38 686 end
rmeddis@38 687
rmeddis@38 688 %**************************************************************************
rmeddis@38 689 % htk_str2num - Convert strings to integers
rmeddis@38 690 %**************************************************************************
rmeddis@38 691 function opNum = htk_str2num(ipString)
rmeddis@38 692 switch ipString
rmeddis@38 693 case {'oh' , 'O'}
rmeddis@38 694 opNum = 0;
rmeddis@38 695 case {'one' , '1'}
rmeddis@38 696 opNum = 1;
rmeddis@38 697 case {'two' , '2'}
rmeddis@38 698 opNum = 2;
rmeddis@38 699 case {'three' , '3'}
rmeddis@38 700 opNum = 3;
rmeddis@38 701 case {'four' , '4'}
rmeddis@38 702 opNum = 4;
rmeddis@38 703 case {'five' , '5'}
rmeddis@38 704 opNum = 5;
rmeddis@38 705 case {'six' , '6'}
rmeddis@38 706 opNum = 6;
rmeddis@38 707 case {'eight' , '8'}
rmeddis@38 708 opNum = 8;
rmeddis@38 709 case {'nine' , '9'}
rmeddis@38 710 opNum = 9;
rmeddis@38 711 otherwise
rmeddis@38 712 assert(0) % throw error
rmeddis@38 713 end
rmeddis@38 714 end % ------ OF htk_str2num
rmeddis@38 715
rmeddis@38 716 end % ------ OF STATIC METHODS
rmeddis@38 717
rmeddis@38 718 end % ------ OF CLASS