To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.
The primary repository for this project is hosted at git://github.com/rmeddis/MAP.git .
This repository is a read-only copy which is updated automatically every hour.
root / userProgramsASRforDummies / cHMM.m @ 38:c2204b18f4a2
History | View | Annotate | Download (32.7 KB)
| 1 | 38:c2204b18f4a2 | rmeddis | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
|---|---|---|---|
| 2 | % This program is free software; you can redistribute it and/or modify |
||
| 3 | % it under the terms of the GNU General Public License as published by |
||
| 4 | % the Free Software Foundation; either version 2 of the License, or |
||
| 5 | % (at your option) any later version. |
||
| 6 | % |
||
| 7 | % This program is distributed in the hope that it will be useful, |
||
| 8 | % but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
| 9 | % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||
| 10 | % GNU General Public License for more details. |
||
| 11 | % |
||
| 12 | % You can obtain a copy of the GNU General Public License from |
||
| 13 | % http://www.gnu.org/copyleft/gpl.html or by writing to |
||
| 14 | % Free Software Foundation, Inc.,675 Mass Ave, Cambridge, MA 02139, USA. |
||
| 15 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
||
| 16 | |||
| 17 | classdef cHMM |
||
| 18 | %HMMCLASS Handles all of the HTK related gubbins |
||
| 19 | % Please see the documentation located in a separate file for further |
||
| 20 | % information. |
||
| 21 | |||
| 22 | %% ********************************************************* |
||
| 23 | % properties _ _ |
||
| 24 | % | | (_) |
||
| 25 | % _ __ _ __ ___ _ __ ___ _ __| |_ _ ___ ___ |
||
| 26 | % | '_ \| '__/ _ \| '_ \ / _ \ '__| __| |/ _ \/ __| |
||
| 27 | % | |_) | | | (_) | |_) | __/ | | |_| | __/\__ \ |
||
| 28 | % | .__/|_| \___/| .__/ \___|_| \__|_|\___||___/ |
||
| 29 | % | | | | |
||
| 30 | % |_| |_| |
||
| 31 | %************************************************************ |
||
| 32 | |||
| 33 | %% ********************************************************** |
||
| 34 | % Public properties - can be set by user |
||
| 35 | %************************************************************ |
||
| 36 | properties(Access = public) |
||
| 37 | hmmFolder |
||
| 38 | paramType = 'USER_D_A'; %DELTAS and ACCELERATIONS |
||
| 39 | numCoeff = 27; %9*3 THIS IS FOR PROBABILITY MODEL (not high spont+low spont which would be 18*3=54) |
||
| 40 | |||
| 41 | HERestDataPath = fullfile(pwd, 'def', 'HERest_digit'); |
||
| 42 | binPath = fullfile(pwd, 'def', 'bin'); |
||
| 43 | configFile = fullfile(pwd, 'def', 'config_STANDARD'); |
||
| 44 | trainWordListFile = fullfile(pwd, 'def', 'Grammar_digit', 'words3'); |
||
| 45 | testWordListFile = fullfile(pwd, 'def', 'Grammar_digit', 'wordsNoSevenZero'); |
||
| 46 | wordNetFile = fullfile(pwd, 'def', 'Grammar_digit', 'wdnetNoSP.slf'); |
||
| 47 | dictFile = fullfile(pwd, 'def', 'Grammar_digit', 'noSevenZeroDict'); |
||
| 48 | end |
||
| 49 | |||
| 50 | %% ********************************************************** |
||
| 51 | % Dependent - never set by user. Only calculated when needed |
||
| 52 | %************************************************************ |
||
| 53 | properties(Dependent = true) |
||
| 54 | protoFile % = fullfile(pwd, 'def', 'proto_RobANonly_9'); %probability only |
||
| 55 | end |
||
| 56 | |||
| 57 | %% ********************************************************* |
||
| 58 | % methods _ _ _ |
||
| 59 | % | | | | | | |
||
| 60 | % _ __ ___ ___| |_| |__ ___ __| |___ |
||
| 61 | %| '_ ` _ \ / _ \ __| '_ \ / _ \ / _` / __| |
||
| 62 | %| | | | | | __/ |_| | | | (_) | (_| \__ \ |
||
| 63 | %|_| |_| |_|\___|\__|_| |_|\___/ \__,_|___/ |
||
| 64 | %************************************************************ |
||
| 65 | |||
| 66 | methods |
||
| 67 | %% ********************************************************** |
||
| 68 | % Constructor |
||
| 69 | %************************************************************ |
||
| 70 | function obj = cHMM(hmmFolder) |
||
| 71 | if nargin > 0 |
||
| 72 | obj.hmmFolder = hmmFolder; |
||
| 73 | end |
||
| 74 | end % ------ OF CONSTRUCTOR |
||
| 75 | |||
| 76 | %% ********************************************************** |
||
| 77 | % genProto - generate task specific prototype |
||
| 78 | %************************************************************ |
||
| 79 | function genProto(obj) |
||
| 80 | % models_1mixsil.exe - takes input (hmmdef) and copies it making a |
||
| 81 | % basis of one, two three etc. etc. |
||
| 82 | obj.models_1mixsilMat(fullfile(obj.hmmFolder,'hmm0','hmmdef'), fullfile(obj.hmmFolder,'hmm0','models')); |
||
| 83 | end % ------ OF GENPROTO |
||
| 84 | |||
| 85 | %% ********************************************************** |
||
| 86 | % istrained |
||
| 87 | %************************************************************ |
||
| 88 | function boolans = istrained(obj) |
||
| 89 | boolans = numel(dir(fullfile(obj.hmmFolder,'hmm36','models'))); |
||
| 90 | end |
||
| 91 | |||
| 92 | %% ********************************************************** |
||
| 93 | % Train |
||
| 94 | %************************************************************ |
||
| 95 | function train(obj, trainFeatureFolder) |
||
| 96 | |||
| 97 | % Most of the following code block can be replaced by doing a |
||
| 98 | % find and replace across the code below. I didn't |
||
| 99 | % want to mess with the working code too much, so I just |
||
| 100 | % copied the object properties needed into the variable names |
||
| 101 | % expected. |
||
| 102 | |||
| 103 | ED_CMDFILE1 = fullfile(obj.HERestDataPath, 'sil1.hed'); |
||
| 104 | ED_CMDFILE2 = fullfile(obj.HERestDataPath, 'mix2_16.hed'); |
||
| 105 | ED_CMDFILE3 = fullfile(obj.HERestDataPath, 'mix3_16.hed'); |
||
| 106 | ED_CMDFILE4 = fullfile(obj.HERestDataPath, 'mix5_16.hed'); |
||
| 107 | ED_CMDFILE5 = fullfile(obj.HERestDataPath, 'mix7_16.hed'); |
||
| 108 | |||
| 109 | NUM_COEF = obj.numCoeff; |
||
| 110 | PAR_TYPE = obj.paramType; |
||
| 111 | LIST_FILE = fullfile(obj.hmmFolder, 'tmp.list'); |
||
| 112 | |||
| 113 | word_list = obj.trainWordListFile;%fullfile(obj.grammarPath, 'words'); |
||
| 114 | word_listSP = word_list; % for use in hmm4 onwards - UGLY HACK NOW SP ABANDONED |
||
| 115 | proto = obj.protoFile;%Does not exist on disk just yet probably - see a few lines down |
||
| 116 | config = obj.configFile; |
||
| 117 | train_list = fullfile(trainFeatureFolder, 'list.scp'); |
||
| 118 | labels = fullfile(trainFeatureFolder, 'labels.mlf'); |
||
| 119 | labelssp = labels; % for use in hmm4 onwards - UGLY HACK NOW SP ABANDONED |
||
| 120 | hmm_dir = obj.hmmFolder; |
||
| 121 | |||
| 122 | FEAT_ROOT = trainFeatureFolder; |
||
| 123 | |||
| 124 | % Now for the actual HMM training code |
||
| 125 | mkdir(hmm_dir) |
||
| 126 | for I = 0:36 |
||
| 127 | h = fullfile(hmm_dir,['hmm' num2str(I)]); |
||
| 128 | mkdir(h); |
||
| 129 | end |
||
| 130 | obj.makeProtoHmm(proto, obj.paramType, obj.numCoeff, 18); |
||
| 131 | |||
| 132 | fid = fopen(train_list,'r'); |
||
| 133 | disp(train_list) |
||
| 134 | S = textscan(fid,'%s','Delimiter','\n'); |
||
| 135 | fclose(fid); |
||
| 136 | |||
| 137 | fid = fopen(LIST_FILE,'w'); |
||
| 138 | |||
| 139 | for I = 1:size(S{1},1)
|
||
| 140 | str = fullfile(FEAT_ROOT,S{1}{I});
|
||
| 141 | fprintf(fid,'%s\n',str); |
||
| 142 | end; |
||
| 143 | fclose(fid); |
||
| 144 | |||
| 145 | % HCompV just gets the vfloor stuff out so we can begin approximating |
||
| 146 | cmd = ['"HCompV" -T 2 -D -C "' config '" -o hmmdef -f 0.01 -m -S "' LIST_FILE '" -M "' hmm_dir filesep 'hmm0" "' proto '"']; |
||
| 147 | system(cmd); |
||
| 148 | |||
| 149 | %cmd = [BINDIR filesep 'macro' binExt ' ' num2str(NUM_COEF) ' ' PAR_TYPE ' ' '"' hmm_dir filesep 'hmm0' filesep 'vFloors' '" "' hmm_dir filesep 'hmm0' filesep 'macros' '"']; |
||
| 150 | %system(cmd); |
||
| 151 | obj.macroMat(NUM_COEF,PAR_TYPE, fullfile(hmm_dir, 'hmm0', 'vFloors'), fullfile(hmm_dir, 'hmm0', 'macros')); |
||
| 152 | |||
| 153 | %MAKE THE INITIAL MODEL PROTOTYPE |
||
| 154 | genProto(obj); |
||
| 155 | |||
| 156 | disp('Seed HMM successfully Produced.....');
|
||
| 157 | |||
| 158 | %Training |
||
| 159 | for I = 1:3 |
||
| 160 | disp(I) |
||
| 161 | j = I-1; |
||
| 162 | cmd = ['HERest -D -C ' config ' -I ' labels ' -t 250.0 150.0 1000.0 -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_list]; |
||
| 163 | % disp(cmd) |
||
| 164 | system(cmd); |
||
| 165 | end |
||
| 166 | |||
| 167 | disp('3 iterations complete');
|
||
| 168 | |||
| 169 | rmdir ([hmm_dir filesep 'hmm4'],'s') |
||
| 170 | copyfile ([hmm_dir filesep 'hmm3'], [hmm_dir filesep 'hmm4']) |
||
| 171 | |||
| 172 | % The following command takes state 3 from the silence model |
||
| 173 | % and appends it to the end of the model as state 2 of the |
||
| 174 | % short pause model. |
||
| 175 | % Original: |
||
| 176 | % cmd = [BINDIR filesep 'spmodel_gen' binExt ' ' hmm_dir filesep 'hmm3' filesep 'models ' hmm_dir filesep 'hmm4' filesep 'models']; |
||
| 177 | % system(cmd); |
||
| 178 | % New: |
||
| 179 | obj.spmodel_genMat(fullfile(hmm_dir,'hmm3','models'), fullfile(hmm_dir,'hmm4','models')); |
||
| 180 | |||
| 181 | cmd = ['HHEd -T 2 -H ' hmm_dir filesep 'hmm4' filesep 'macros -H ' hmm_dir filesep 'hmm4' filesep 'models -M ' hmm_dir filesep 'hmm5 ' ED_CMDFILE1 ' ' word_listSP ]; |
||
| 182 | system(cmd); |
||
| 183 | disp ('SP model fixed')
|
||
| 184 | |||
| 185 | % after the spmodel_gen command - the word_list is changed to |
||
| 186 | % word_listSP. The sp model is just ignored currently |
||
| 187 | |||
| 188 | for I = 6:8 |
||
| 189 | disp(I) |
||
| 190 | j = I-1; |
||
| 191 | cmd = ['HERest -C ' config ' -I ' labelssp ' -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_listSP]; |
||
| 192 | system(cmd); |
||
| 193 | end |
||
| 194 | |||
| 195 | disp('6 iterations complete');
|
||
| 196 | |||
| 197 | cmd = ['HHEd -T 2 -H ' hmm_dir filesep 'hmm8' filesep 'macros -H ' hmm_dir filesep 'hmm8' filesep 'models -M ' hmm_dir filesep 'hmm9 ' ED_CMDFILE2 ' ' word_listSP ]; |
||
| 198 | system(cmd); |
||
| 199 | disp ('2 gaussians per mixture')
|
||
| 200 | |||
| 201 | for I = 10:12 |
||
| 202 | disp(I) |
||
| 203 | j = I-1; |
||
| 204 | cmd = ['HERest -C ' config ' -I ' labelssp ' -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_listSP]; |
||
| 205 | system(cmd); |
||
| 206 | end |
||
| 207 | |||
| 208 | disp ('9 iterations completed')
|
||
| 209 | |||
| 210 | cmd = ['HHEd -T 2 -H ' hmm_dir filesep 'hmm12' filesep 'macros -H ' hmm_dir filesep 'hmm12' filesep 'models -M ' hmm_dir filesep 'hmm13 ' ED_CMDFILE3 ' ' word_listSP ]; |
||
| 211 | system(cmd); |
||
| 212 | disp ('3 gaussians per mixture')
|
||
| 213 | |||
| 214 | for I = 14:20 |
||
| 215 | disp(I) |
||
| 216 | j = I-1; |
||
| 217 | cmd = ['HERest -C ' config ' -I ' labelssp ' -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_listSP]; |
||
| 218 | system(cmd); |
||
| 219 | end |
||
| 220 | |||
| 221 | disp ('16 iterations completed')
|
||
| 222 | |||
| 223 | cmd = ['HHEd -T 2 -H ' hmm_dir filesep 'hmm20' filesep 'macros -H ' hmm_dir filesep 'hmm20' filesep 'models -M ' hmm_dir filesep 'hmm21 ' ED_CMDFILE4 ' ' word_listSP ]; |
||
| 224 | system(cmd); |
||
| 225 | disp ('5 gaussians per mixture')
|
||
| 226 | |||
| 227 | for I = 22:28 |
||
| 228 | disp(I) |
||
| 229 | j = I-1; |
||
| 230 | cmd = ['HERest -C ' config ' -I ' labelssp ' -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_listSP]; |
||
| 231 | system(cmd); |
||
| 232 | end |
||
| 233 | |||
| 234 | disp ('23 iterations completed')
|
||
| 235 | |||
| 236 | cmd = ['HHEd -T 2 -H ' hmm_dir filesep 'hmm28' filesep 'macros -H ' hmm_dir filesep 'hmm28' filesep 'models -M ' hmm_dir filesep 'hmm29 ' ED_CMDFILE5 ' ' word_listSP ]; |
||
| 237 | system(cmd); |
||
| 238 | disp ('7 gaussians per mixture')
|
||
| 239 | |||
| 240 | for I = 30:36 |
||
| 241 | disp(I) |
||
| 242 | j = I-1; |
||
| 243 | cmd = ['HERest -C ' config ' -I ' labelssp ' -S ' LIST_FILE ' -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'macros -H ' hmm_dir filesep 'hmm' num2str(j) filesep 'models -M ' hmm_dir filesep 'hmm' num2str(I) ' ' word_listSP]; |
||
| 244 | system(cmd); |
||
| 245 | end |
||
| 246 | |||
| 247 | disp ('30 iterations completed')
|
||
| 248 | |||
| 249 | end % ------ OF TRAIN |
||
| 250 | |||
| 251 | %% ********************************************************** |
||
| 252 | % Test |
||
| 253 | %************************************************************ |
||
| 254 | function test(obj, testFeatureFolder) |
||
| 255 | flags = '-p 0.0 -s 0.0'; |
||
| 256 | test_word_list = obj.testWordListFile; |
||
| 257 | |||
| 258 | net = obj.wordNetFile; |
||
| 259 | dict = obj.dictFile; |
||
| 260 | |||
| 261 | LIST_FILE = fullfile(testFeatureFolder, 'tmp.list'); |
||
| 262 | config = obj.configFile; |
||
| 263 | |||
| 264 | disp ('Now testing with 7 mixture HMMs')
|
||
| 265 | mod_file = fullfile(obj.hmmFolder, 'hmm36', 'models'); |
||
| 266 | mac_file = fullfile(obj.hmmFolder, 'hmm36', 'macros'); |
||
| 267 | |||
| 268 | |||
| 269 | RESULTS_DIR = testFeatureFolder; |
||
| 270 | TEST_FEAT_ROOT = testFeatureFolder; |
||
| 271 | |||
| 272 | N1list = fullfile(testFeatureFolder, 'list.scp'); |
||
| 273 | |||
| 274 | ftest = fopen(N1list,'r'); |
||
| 275 | S = textscan(ftest,'%s','Delimiter','\n'); |
||
| 276 | fclose(ftest); |
||
| 277 | |||
| 278 | flist = fopen(LIST_FILE,'w'); |
||
| 279 | for I = 1:size(S{1},1)
|
||
| 280 | str = fullfile(TEST_FEAT_ROOT,S{1}{I});
|
||
| 281 | fprintf(flist,'%s\n',str); |
||
| 282 | end; |
||
| 283 | fclose(flist); |
||
| 284 | |||
| 285 | cmd = ['HVite -D -H ' mac_file ' -H ' mod_file ' -S ' LIST_FILE ' -C ' config ' -w ' net ' -l ''*'' -i ' RESULTS_DIR filesep 'result.mlf ' flags ' ' dict ' ' test_word_list]; |
||
| 286 | system(cmd); |
||
| 287 | end % ------ OF TEST |
||
| 288 | |||
| 289 | %% ********************************************************** |
||
| 290 | % Get methods determining feature vector related gubbins |
||
| 291 | %************************************************************ |
||
| 292 | function value = get.protoFile(obj) |
||
| 293 | value = fullfile(obj.hmmFolder, 'proto_AutoGen'); |
||
| 294 | end |
||
| 295 | |||
| 296 | end % ------ OF METHODS |
||
| 297 | |||
| 298 | %% ********************************************************* |
||
| 299 | % _ _ _ _ _ _ |
||
| 300 | % | | | | (_) | | | | | | |
||
| 301 | % ___| |_ __ _| |_ _ ___ _ __ ___ ___| |_| |__ ___ __| |___ |
||
| 302 | % / __| __/ _` | __| |/ __| | '_ ` _ \ / _ \ __| '_ \ / _ \ / _` / __| |
||
| 303 | % \__ \ || (_| | |_| | (__ | | | | | | __/ |_| | | | (_) | (_| \__ \ |
||
| 304 | % |___/\__\__,_|\__|_|\___| |_| |_| |_|\___|\__|_| |_|\___/ \__,_|___/ |
||
| 305 | %************************************************************ |
||
| 306 | |||
| 307 | methods(Static) |
||
| 308 | %% ********************************************************** |
||
| 309 | % macroMat (matlab port of macro C code) |
||
| 310 | % ported by NC - Nov 2011 |
||
| 311 | %************************************************************ |
||
| 312 | function macroMat(VECSIZE, PARAMETER_TYPE, infile, outfile) |
||
| 313 | % This function takes the vFloors file that is created after |
||
| 314 | % invoking HCompV and generates a Macro file required for further |
||
| 315 | % HMM training |
||
| 316 | |||
| 317 | ofp = fopen(outfile,'w'); |
||
| 318 | |||
| 319 | fprintf(ofp, '~o\n<STREAMINFO> 1 %d\n', VECSIZE); |
||
| 320 | fprintf(ofp, '<VECSIZE> %d\n', VECSIZE); |
||
| 321 | fprintf(ofp, '<NULLD>\n<%s>\n', PARAMETER_TYPE); |
||
| 322 | |||
| 323 | fp = fopen(infile); |
||
| 324 | tline = fgets(fp); |
||
| 325 | while ischar(tline) |
||
| 326 | fprintf(ofp,tline); |
||
| 327 | tline = fgets(fp); |
||
| 328 | end |
||
| 329 | fclose(fp); |
||
| 330 | fclose(ofp); |
||
| 331 | end %---- of MACROMAT |
||
| 332 | |||
| 333 | %% ********************************************************** |
||
| 334 | % spmodel_genMat (matlab port of spmodel_gen C code) |
||
| 335 | % ported by NC - Nov 2011 |
||
| 336 | %************************************************************ |
||
| 337 | function spmodel_genMat(infile, outfile) |
||
| 338 | % This function copies the middle state (3) from the silence model |
||
| 339 | % and makes a sp model out of it by copying it to state 2 of the sp model. |
||
| 340 | |||
| 341 | % From: http://www.voxforge.org/home/dev/acousticmodels/linux/create/htkjulius/tutorial/monophones/step-7 |
||
| 342 | % In the last step you created HMM models that did not include an "sp" |
||
| 343 | % (short pause) silence model - which refers to the types of short pauses |
||
| 344 | % that occur between words in normal speech. However, you did create a |
||
| 345 | % "sil" silence model - sil silence models are typically of longer |
||
| 346 | % duration, and refer to the pauses occur at the end of a sentence. |
||
| 347 | % |
||
| 348 | % The HTK book says that the sp model needs to have its "emitting state |
||
| 349 | % tied to the centre state of the silence model". What this means is that |
||
| 350 | % you need to create a new sp model in your hmmdefs, that it will use the |
||
| 351 | % centre state of sil, and then they both need to be 'tied' together. For |
||
| 352 | % a bit of background on HMMs and states, see this example. |
||
| 353 | % |
||
| 354 | % This can be done by copying the centre state from the sil model in your |
||
| 355 | % hmmdefs file and adding it to the sp model, and then running a special |
||
| 356 | % tool called HHED to 'tie' the sp model to the sil model so that they |
||
| 357 | % share the same centre state. The HTK book provides some background on |
||
| 358 | % what this means, but you need an understanding of the basics of Hidden |
||
| 359 | % Markov Modelling before tackling the HTK Book explanations |
||
| 360 | |||
| 361 | ofp = fopen(outfile,'a+'); % we append this time |
||
| 362 | |||
| 363 | fprintf(ofp,'~h "sp"\n'); |
||
| 364 | fprintf(ofp,'<BEGINHMM>\n<NUMSTATES> 3\n<STATE> 2\n'); |
||
| 365 | |||
| 366 | %-- This block gets the hmmdef file to the MODEL |
||
| 367 | lNow = []; |
||
| 368 | fp = fopen(infile); |
||
| 369 | while ~(strcmpi(lNow, '~h "sil"')) |
||
| 370 | lNow = fgetl(fp); |
||
| 371 | end |
||
| 372 | %------------------------------ |
||
| 373 | |||
| 374 | %-- This block gets the hmmdef file to the STATE |
||
| 375 | lNow = []; |
||
| 376 | fp = fopen(infile); |
||
| 377 | while ~(strcmpi(lNow, ['<STATE> ' num2str(3)])) |
||
| 378 | lNow = fgetl(fp); |
||
| 379 | end |
||
| 380 | %------------------------------ |
||
| 381 | |||
| 382 | %%% This block puts a copy of the hmmdef file in from the |
||
| 383 | %%% correct line as found above |
||
| 384 | tline = fgetl(fp); |
||
| 385 | while ~(strcmpi(tline, ['<STATE> ' num2str(4)])) |
||
| 386 | fprintf(ofp,'%s\n', tline); |
||
| 387 | tline = fgetl(fp); |
||
| 388 | end |
||
| 389 | fclose(fp); |
||
| 390 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
||
| 391 | |||
| 392 | fprintf(ofp,'<TRANSP> 3\n0.000000e+00 1.000000e+00 0.000000e+00\n'); |
||
| 393 | fprintf(ofp,'0.000000e+00 5.000000e-01 5.000000e-01\n'); |
||
| 394 | fprintf(ofp,'0.000000e+00 0.000000e+00 0.000000e+00\n<ENDHMM>\n'); |
||
| 395 | |||
| 396 | fclose(ofp); |
||
| 397 | end %---- of spmodel_genMat |
||
| 398 | |||
| 399 | %% ********************************************************** |
||
| 400 | % models_1mixsilMat (matlab port of models_1mixsil C code) |
||
| 401 | % ported by NC - Nov 2011 |
||
| 402 | %************************************************************ |
||
| 403 | function models_1mixsilMat(infile, outfile) |
||
| 404 | % This function takes the hmmdef file and |
||
| 405 | % generates a HMM Model file |
||
| 406 | |||
| 407 | ofp = fopen(outfile,'w'); |
||
| 408 | for ii = 1:11 |
||
| 409 | |||
| 410 | %%% This block gets the hmmdef file to the correct line |
||
| 411 | lNow = []; |
||
| 412 | fp = fopen(infile); |
||
| 413 | while ~(strcmpi(lNow, '~h "hmmdef"')) |
||
| 414 | lNow = fgetl(fp); |
||
| 415 | end |
||
| 416 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
||
| 417 | |||
| 418 | %%% This block puts a numbered header |
||
| 419 | if ii==1; fprintf(ofp,'~h "one"\n'); end |
||
| 420 | if ii==2; fprintf(ofp,'~h "two"\n'); end |
||
| 421 | if ii==3; fprintf(ofp,'~h "three"\n'); end |
||
| 422 | if ii==4; fprintf(ofp,'~h "four"\n'); end |
||
| 423 | if ii==5; fprintf(ofp,'~h "five"\n'); end |
||
| 424 | if ii==6; fprintf(ofp,'~h "six"\n'); end |
||
| 425 | if ii==7; fprintf(ofp,'~h "seven"\n'); end |
||
| 426 | if ii==8; fprintf(ofp,'~h "eight"\n'); end |
||
| 427 | if ii==9; fprintf(ofp,'~h "nine"\n'); end |
||
| 428 | if ii==10; fprintf(ofp,'~h "oh"\n'); end |
||
| 429 | if ii==11; fprintf(ofp,'~h "zero"\n'); end |
||
| 430 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
||
| 431 | |||
| 432 | %%% This block puts a copy of the hmmdef file in from the |
||
| 433 | %%% correct line as found above |
||
| 434 | tline = fgets(fp); |
||
| 435 | while ischar(tline) |
||
| 436 | fprintf(ofp,tline); |
||
| 437 | tline = fgets(fp); |
||
| 438 | end |
||
| 439 | fclose(fp); %close it as we reached EOF |
||
| 440 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
||
| 441 | end |
||
| 442 | |||
| 443 | %%% THIS IS THE SILENCE MODEL @ THE END |
||
| 444 | fprintf(ofp,'~h "sil"\n'); |
||
| 445 | fprintf(ofp,'<BEGINHMM>\n<NUMSTATES> 5\n'); |
||
| 446 | for kk = 2:4 |
||
| 447 | fprintf(ofp,'<STATE> %d\n<NUMMIXES> 1\n', kk); |
||
| 448 | %-- This block gets the hmmdef file to the correct line |
||
| 449 | lNow = []; |
||
| 450 | fp = fopen(infile); |
||
| 451 | while ~(strcmpi(lNow, ['<STATE> ' num2str(kk)])) |
||
| 452 | lNow = fgetl(fp); |
||
| 453 | end |
||
| 454 | %------------------------------ |
||
| 455 | |||
| 456 | %%% This block puts a copy of the hmmdef file in from the |
||
| 457 | %%% correct line as found above |
||
| 458 | tline = fgetl(fp); |
||
| 459 | while ~(strcmpi(tline, ['<STATE> ' num2str(kk+1)])) |
||
| 460 | fprintf(ofp,'%s\n', tline); |
||
| 461 | tline = fgetl(fp); |
||
| 462 | end |
||
| 463 | fclose(fp); |
||
| 464 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
||
| 465 | end |
||
| 466 | |||
| 467 | fprintf(ofp,'<TRANSP> 5\n0.000000e+00 1.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00\n'); |
||
| 468 | fprintf(ofp,'0.000000e+00 6.000000e-01 4.000000e-01 0.000000e+00 0.000000e+00\n'); |
||
| 469 | fprintf(ofp,'0.000000e+00 0.000000e+00 6.000000e-01 4.000000e-01 0.000000e+00\n'); |
||
| 470 | fprintf(ofp,'0.000000e+00 0.000000e+00 0.000000e+00 7.000000e-01 3.000000e-01\n'); |
||
| 471 | fprintf(ofp,'0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00\n<ENDHMM>\n'); |
||
| 472 | |||
| 473 | fclose(ofp); |
||
| 474 | end %---- of models_1mixsilMat |
||
| 475 | |||
| 476 | %% ********************************************************** |
||
| 477 | % makeProtoHmm (Make a prototype HMM) |
||
| 478 | %************************************************************ |
||
| 479 | function makeProtoHmm(filename,featureType,numFeatures,numStates) |
||
| 480 | % filename is obvious |
||
| 481 | % featureType is usually USER_D_A |
||
| 482 | % numFeatures is number of features (including differences if used) |
||
| 483 | % numStates is usually 18 |
||
| 484 | |||
| 485 | ofp = fopen(filename,'w'); |
||
| 486 | |||
| 487 | fprintf(ofp,'<BeginHMM>\n'); |
||
| 488 | fprintf(ofp,' <NumStates> %d <VecSize> %d <%s> <nullD> <diagC>\n',numStates,numFeatures,featureType'); |
||
| 489 | fprintf(ofp,' <StreamInfo> 1 %d\n',numFeatures); |
||
| 490 | for state=2:numStates-1, |
||
| 491 | fprintf(ofp,' <State> %d <NumMixes> 1\n',state); |
||
| 492 | fprintf(ofp,' <Stream> 1\n'); |
||
| 493 | fprintf(ofp,' <Mixture> 1 1.0\n'); |
||
| 494 | fprintf(ofp,' <Mean> %d\n',numFeatures); |
||
| 495 | fprintf(ofp,' '); |
||
| 496 | fprintf(ofp,'%1.1f ',zeros(1,numFeatures)); |
||
| 497 | fprintf(ofp,'\n'); |
||
| 498 | fprintf(ofp,' <Variance> %d\n',numFeatures); |
||
| 499 | fprintf(ofp,' '); |
||
| 500 | fprintf(ofp,'%1.1f ',ones(1,numFeatures)); |
||
| 501 | fprintf(ofp,'\n'); |
||
| 502 | end |
||
| 503 | fprintf(ofp,'<TransP> %d\n',numStates); |
||
| 504 | transp = zeros(numFeatures); |
||
| 505 | transp(1,2)=1; |
||
| 506 | for state=2:numStates-2, |
||
| 507 | transp(state,state)=0.6; |
||
| 508 | transp(state,state+1)=0.4; |
||
| 509 | end |
||
| 510 | transp(numStates-1,numStates-1)=0.9; |
||
| 511 | transp(numStates-1,numStates)=0.1; |
||
| 512 | for state=1:numStates, |
||
| 513 | fprintf(ofp,'%1.3e ',transp(state,1:numStates)); |
||
| 514 | fprintf(ofp,'\n'); |
||
| 515 | end |
||
| 516 | fprintf(ofp,'<EndHMM>\n'); |
||
| 517 | fclose(ofp); |
||
| 518 | end %---- of MAKEPROTOHMM |
||
| 519 | |||
| 520 | %% ********************************************************** |
||
| 521 | % createMLF - master label file - belongs with hmm class |
||
| 522 | %************************************************************ |
||
| 523 | function createMLF(mapFileFolder) |
||
| 524 | mlfFileName = 'labels'; |
||
| 525 | d = dir(fullfile(mapFileFolder, '*.map')); |
||
| 526 | fid = fopen([fullfile(mapFileFolder,mlfFileName) '.mlf'],'w'); |
||
| 527 | fprintf(fid,'#!MLF!#\n'); |
||
| 528 | |||
| 529 | for I = 1:size(d,1) |
||
| 530 | fprintf(fid,['"*/' d(I).name(1:end-3) 'lab"\n']); |
||
| 531 | fprintf(fid,'sil\n'); |
||
| 532 | labels = d(I).name(5:end-5); |
||
| 533 | for J = 1:length(labels); |
||
| 534 | switch labels(J) |
||
| 535 | case 'O' |
||
| 536 | S = 'oh'; |
||
| 537 | case '1' |
||
| 538 | S = 'one'; |
||
| 539 | case '2' |
||
| 540 | S = 'two'; |
||
| 541 | case '3' |
||
| 542 | S = 'three'; |
||
| 543 | case '4' |
||
| 544 | S = 'four'; |
||
| 545 | case '5' |
||
| 546 | S = 'five'; |
||
| 547 | case '6' |
||
| 548 | S = 'six'; |
||
| 549 | case '7' |
||
| 550 | S = 'seven'; |
||
| 551 | case '8' |
||
| 552 | S = 'eight'; |
||
| 553 | case '9' |
||
| 554 | S = 'nine'; |
||
| 555 | case 'Z' |
||
| 556 | S = 'zero'; |
||
| 557 | end |
||
| 558 | fprintf(fid,S); |
||
| 559 | fprintf(fid,'\n'); |
||
| 560 | end; |
||
| 561 | fprintf(fid,'sil\n.\n'); |
||
| 562 | end; |
||
| 563 | fclose(fid); |
||
| 564 | end % ------ OF CREATEMLF |
||
| 565 | |||
| 566 | %% ********************************************************** |
||
| 567 | % createSCP - Just a file list |
||
| 568 | %************************************************************ |
||
| 569 | function createSCP(mapFileFolder) |
||
| 570 | scpFileName = 'list'; |
||
| 571 | d = dir(fullfile(mapFileFolder, '*.map')); |
||
| 572 | fid = fopen([fullfile(mapFileFolder,scpFileName) '.scp'],'w'); |
||
| 573 | |||
| 574 | for I = 1:size(d,1) |
||
| 575 | fprintf(fid,[d(I).name(1:end-3) 'map\n']); |
||
| 576 | end; |
||
| 577 | |||
| 578 | fclose(fid); |
||
| 579 | end% ------ OF createSCP |
||
| 580 | |||
| 581 | %% ********************************************************** |
||
| 582 | % score - Simple non-dynamic scoring |
||
| 583 | %************************************************************ |
||
| 584 | function score(testMLFpath) |
||
| 585 | % First get the test result files and store the file locations |
||
| 586 | testIdx = 1; |
||
| 587 | temp = dir(testMLFpath); |
||
| 588 | for nn = 1:numel(temp) |
||
| 589 | if length(temp(nn).name) > 3 |
||
| 590 | if strcmp(temp(nn).name(end-3:end), '.mlf') |
||
| 591 | testMLFfiles{testIdx} = temp(nn).name;
|
||
| 592 | testIdx = testIdx+1; |
||
| 593 | end |
||
| 594 | end |
||
| 595 | end |
||
| 596 | |||
| 597 | for fileIdx = 1:numel(testMLFfiles) |
||
| 598 | % Extract file name and regonised words as strings -> store in recResults. |
||
| 599 | % This code block is Matt Robertson's |
||
| 600 | fmlf = fopen(fullfile(testMLFpath , testMLFfiles{fileIdx}),'r');
|
||
| 601 | |||
| 602 | MLF = textscan(fmlf,'%s','delimiter','\n'); |
||
| 603 | replaceDownPattern = '[0-9\-\.\s]'; |
||
| 604 | |||
| 605 | for I = 1:size(MLF{1},1);
|
||
| 606 | if strfind(MLF{1}{I},'.rec') >0
|
||
| 607 | M{I} = MLF{1}{I};
|
||
| 608 | else |
||
| 609 | if strfind(MLF{1}{I},'MLF') >0;
|
||
| 610 | M{I} = MLF{1}{I};
|
||
| 611 | else |
||
| 612 | M{I} = regexprep(MLF{1}{I},replaceDownPattern,'');
|
||
| 613 | end |
||
| 614 | end; |
||
| 615 | end; |
||
| 616 | |||
| 617 | fclose(fmlf); |
||
| 618 | |||
| 619 | A = 1; |
||
| 620 | for I = 1:size(M,2) |
||
| 621 | if not(strcmpi(M{I},'sp')) && not(strcmpi(M{I},'sil')) && not(strcmpi(M{I},''));
|
||
| 622 | M2{A} = M{I};
|
||
| 623 | A = A + 1; |
||
| 624 | end; |
||
| 625 | end; |
||
| 626 | |||
| 627 | recResults = []; |
||
| 628 | X=0; |
||
| 629 | for I = 1:length(M2) |
||
| 630 | if strfind(M2{I},'MLF')
|
||
| 631 | else |
||
| 632 | if strfind(M2{I},'.rec')
|
||
| 633 | X = X+1; |
||
| 634 | |||
| 635 | % found FileName |
||
| 636 | recResults(X).fileName = M2{I};
|
||
| 637 | recResults(X).utterance = {};
|
||
| 638 | else |
||
| 639 | recResults(X).utterance = [recResults(X).utterance M2{I}];
|
||
| 640 | end; |
||
| 641 | end; |
||
| 642 | end; |
||
| 643 | |||
| 644 | % Make separate 2D arrays of input and output digits |
||
| 645 | inputDigits = zeros(numel(recResults),3); |
||
| 646 | outputDigits = zeros(numel(recResults),3); |
||
| 647 | for nn = 1:numel(recResults) |
||
| 648 | ipStr = recResults(nn).fileName(end-8:end-6); |
||
| 649 | |||
| 650 | for kk = 1:3 |
||
| 651 | inputDigits(nn,kk) = cHMM.htk_str2num(ipStr(kk)); %see local function @ bottom of script |
||
| 652 | opStr = recResults(nn).utterance(kk); |
||
| 653 | outputDigits(nn,kk) = cHMM.htk_str2num(opStr{:}); %bit of a hack to do cell2str as it were
|
||
| 654 | end |
||
| 655 | end |
||
| 656 | |||
| 657 | % Now do the scoring (simple whan data in the right format) |
||
| 658 | % :D |
||
| 659 | scoreArray = (inputDigits == outputDigits); |
||
| 660 | pcSent(fileIdx) = 100*sum(all(scoreArray,2)) / numel(recResults); |
||
| 661 | pcWord(fileIdx) = 100*sum(scoreArray(:)) / numel(scoreArray); |
||
| 662 | end |
||
| 663 | |||
| 664 | %will output table to console if used on *nix |
||
| 665 | xlsdataFull = [{'-- File Name --', '%Sent', '%Word'}; testMLFfiles' num2cell(pcSent') num2cell(pcWord')];
|
||
| 666 | disp(xlsdataFull) |
||
| 667 | fid = fopen(fullfile(testMLFpath,['score__' num2str(pcWord(1), '%0.1f') '__.txt']),'w'); |
||
| 668 | fclose(fid); |
||
| 669 | end% ------ OF SCORE |
||
| 670 | |||
| 671 | %% ********************************************************** |
||
| 672 | % scoreWhole folder - make my life easier @ command line |
||
| 673 | %************************************************************ |
||
| 674 | function scoreWholeFolder(folderToScore, searchString) |
||
| 675 | if nargin < 2 |
||
| 676 | searchString = '*featR*'; |
||
| 677 | end |
||
| 678 | dirInfo = dir(fullfile(folderToScore, searchString)); |
||
| 679 | numFolders = numel(dirInfo); |
||
| 680 | for nn = 1:numFolders; |
||
| 681 | currentScoring = fullfile(folderToScore, dirInfo(nn).name); |
||
| 682 | disp('');
|
||
| 683 | disp(currentScoring); |
||
| 684 | cHMM.score(currentScoring); |
||
| 685 | end |
||
| 686 | end |
||
| 687 | |||
| 688 | %************************************************************************** |
||
| 689 | % htk_str2num - Convert strings to integers |
||
| 690 | %************************************************************************** |
||
| 691 | function opNum = htk_str2num(ipString) |
||
| 692 | switch ipString |
||
| 693 | case {'oh' , 'O'}
|
||
| 694 | opNum = 0; |
||
| 695 | case {'one' , '1'}
|
||
| 696 | opNum = 1; |
||
| 697 | case {'two' , '2'}
|
||
| 698 | opNum = 2; |
||
| 699 | case {'three' , '3'}
|
||
| 700 | opNum = 3; |
||
| 701 | case {'four' , '4'}
|
||
| 702 | opNum = 4; |
||
| 703 | case {'five' , '5'}
|
||
| 704 | opNum = 5; |
||
| 705 | case {'six' , '6'}
|
||
| 706 | opNum = 6; |
||
| 707 | case {'eight' , '8'}
|
||
| 708 | opNum = 8; |
||
| 709 | case {'nine' , '9'}
|
||
| 710 | opNum = 9; |
||
| 711 | otherwise |
||
| 712 | assert(0) % throw error |
||
| 713 | end |
||
| 714 | end % ------ OF htk_str2num |
||
| 715 | |||
| 716 | end % ------ OF STATIC METHODS |
||
| 717 | |||
| 718 | end % ------ OF CLASS |