Mercurial > hg > map
comparison userProgramsASRforDummies/cJob.m @ 38:c2204b18f4a2 tip
End nov big change
author | Ray Meddis <rmeddis@essex.ac.uk> |
---|---|
date | Mon, 28 Nov 2011 13:34:28 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
37:771a643d5c29 | 38:c2204b18f4a2 |
---|---|
1 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
2 % This program is free software; you can redistribute it and/or modify | |
3 % it under the terms of the GNU General Public License as published by | |
4 % the Free Software Foundation; either version 2 of the License, or | |
5 % (at your option) any later version. | |
6 % | |
7 % This program is distributed in the hope that it will be useful, | |
8 % but WITHOUT ANY WARRANTY; without even the implied warranty of | |
9 % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
10 % GNU General Public License for more details. | |
11 % | |
12 % You can obtain a copy of the GNU General Public License from | |
13 % http://www.gnu.org/copyleft/gpl.html or by writing to | |
14 % Free Software Foundation, Inc.,675 Mass Ave, Cambridge, MA 02139, USA. | |
15 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
16 | |
17 classdef cJob | |
18 %CJOB Responsible for conversion of wav files into recogniser features | |
19 % Please see the documentation located in a separate file for further | |
20 % information. | |
21 | |
22 %% ********************************************************* | |
23 % properties _ _ | |
24 % | | (_) | |
25 % _ __ _ __ ___ _ __ ___ _ __| |_ _ ___ ___ | |
26 % | '_ \| '__/ _ \| '_ \ / _ \ '__| __| |/ _ \/ __| | |
27 % | |_) | | | (_) | |_) | __/ | | |_| | __/\__ \ | |
28 % | .__/|_| \___/| .__/ \___|_| \__|_|\___||___/ | |
29 % | | | | | |
30 % |_| |_| | |
31 %************************************************************ | |
32 | |
33 %% ********************************************************** | |
34 % Public properties - can be set by user | |
35 %************************************************************ | |
36 properties(Access = public) | |
37 wavFolder | |
38 opFolder | |
39 noiseFolder | |
40 | |
41 wavList | |
42 todoStatus | |
43 | |
44 | |
45 participant = 'Normal';%'DEMO2_multiSpont'; | |
46 noiseName = '8TalkerBabble'; | |
47 numWavs = 5; | |
48 noiseLevToUse = -200; | |
49 speechLevToUse = 50; | |
50 | |
51 speechLevStd = 0; | |
52 noiseLevStd = 0; | |
53 freezeNoise = 0; | |
54 meanSNR = 20; | |
55 speechDist = 'None'; | |
56 noiseDist = 'None'; | |
57 | |
58 noisePreDur = 0.0; | |
59 noisePostDur = 0.0; | |
60 truncateDur = 0.0; %Dr. RF used 0.550 | |
61 | |
62 currentSpeechLevel | |
63 currentNoiseLevel | |
64 | |
65 | |
66 useSpectrogram = false; | |
67 numCoeff = 9; | |
68 | |
69 %************************************************************ | |
70 % plotting handles | |
71 %************************************************************ | |
72 probHaxes = []; | |
73 probHaxesSM = []; | |
74 sacfHaxes = []; | |
75 sacfHaxesSM = []; | |
76 featHaxes = []; | |
77 reconHaxes = []; | |
78 | |
79 %************************************************************ | |
80 % SACF params | |
81 %************************************************************ | |
82 useSACF = false; | |
83 SACFacfTau = 2; % > 1 = Wiegrebe mode | |
84 SACFnBins = 128; | |
85 SACFminLag = 1 / 4000; | |
86 SACFmaxLag = 1 / 50; | |
87 SACFlambda = 10e-3; | |
88 | |
89 %************************************************************ | |
90 % MAP params | |
91 %************************************************************ | |
92 MAProot = fullfile('..'); | |
93 MAPplotGraphs = 0; | |
94 MAPDRNLSave = 0; | |
95 | |
96 MAPopLSR = 0; | |
97 MAPopMSR = 0; | |
98 MAPopHSR = 1; | |
99 | |
100 MAPparamChanges = {}; | |
101 | |
102 %************************************************************ | |
103 % HTK stuff - writing to HTK recogniser format | |
104 %************************************************************ | |
105 frameshift = 10; % shift between frames (ms) | |
106 sampPeriodFromMsFactor = 10000; % appropriate for 10ms frame rate | |
107 paramKind = 9; % HTK USER format for user-defined features (p73 of HTK book) | |
108 | |
109 removeEnergyStatic = false; | |
110 doCMN = false; % Cepstral mean normalisation | |
111 | |
112 %************************************************************ | |
113 % Portable EssexAid params | |
114 %************************************************************ | |
115 bwOct = 1/1; | |
116 filterOrder = 2; | |
117 | |
118 mainGain = [ 1; 1; 1; 1; 1]; % gain in linear units | |
119 TCdBO = [40; 40; 40; 40; 40]; %Compression thresholds (in dB OUTPUT from 2nd filt) | |
120 TMdBO = [10; 10; 10; 10; 10]; %MOC thresholds (in dB OUTPUT from 2nd filt) | |
121 DRNLc = [ 0.2; 0.2; 0.2; 0.2; 0.2;] | |
122 | |
123 ARtau = 0.03; % decay time constant | |
124 ARthresholddB = 85; % dB SPL (input signal level) =>200 to disable | |
125 MOCtau = 0.3; | |
126 MOCfactor = 0.5; %dB per dB OUTPUT | |
127 | |
128 numSamples = 1024; %MAX=6912 | |
129 useAid = 0; | |
130 end | |
131 | |
132 %% ********************************************************* | |
133 % Protected properties - inter/ra class communication | |
134 %************************************************************ | |
135 properties(Access = protected) | |
136 jobLockFid % File identifier for mutex | |
137 | |
138 %Nick C comment on this: | |
139 %OK. The big-endian thing used to work because in the config file | |
140 %'config_tr_zcpa12' there was a flag called NATURALREADORDER that was set to | |
141 %FALSE and thus appeared to override x86 standard: | |
142 %little-endian. Endianess has **!nothing!** to do with win vs *nix | |
143 byteOrder = 'le'; % byte order is big endian | |
144 end | |
145 properties(Dependent = true) | |
146 jobLockTxtFile | |
147 end | |
148 | |
149 %% ********************************************************* | |
150 % methods _ _ _ | |
151 % | | | | | | | |
152 % _ __ ___ ___| |_| |__ ___ __| |___ | |
153 %| '_ ` _ \ / _ \ __| '_ \ / _ \ / _` / __| | |
154 %| | | | | | __/ |_| | | | (_) | (_| \__ \ | |
155 %|_| |_| |_|\___|\__|_| |_|\___/ \__,_|___/ | |
156 %************************************************************ | |
157 | |
158 methods | |
159 %% ********************************************************** | |
160 % Constructor | |
161 %************************************************************ | |
162 function obj = cJob(LearnOrRecWavs, jobFolder) | |
163 if nargin < 1 | |
164 warning('job:ambiguouscorpus',... | |
165 'We need to know whether the (L)earning or (R)ecognition corpus is being used, assuming ''L'''); | |
166 LearnOrRecWavs = 'L'; | |
167 end | |
168 if nargin > 1 | |
169 obj.opFolder = jobFolder; | |
170 else | |
171 if isunix | |
172 if ismac | |
173 obj.opFolder = '~/ASR/exps/_foo'; | |
174 else | |
175 obj.opFolder = '/scratch/nrclark/exps/_foo'; | |
176 end | |
177 else | |
178 obj.opFolder = 'D:\exps\_foo'; | |
179 end | |
180 end | |
181 | |
182 obj = obj.assignWavPaths(LearnOrRecWavs); | |
183 obj = obj.initMAP; | |
184 | |
185 | |
186 end % ------ OF CONSTRUCTOR | |
187 | |
188 %% ********************************************************** | |
189 % Set Wav Paths | |
190 %************************************************************ | |
191 function obj = assignWavPaths(obj, LearnOrRecWavs) | |
192 if isunix | |
193 if ismac | |
194 lWAVpath = fullfile('demo_wavs', 'TrainingData-Clean'); | |
195 rWAVpath = fullfile('demo_wavs', 'TripletTestData'); | |
196 obj.noiseFolder = fullfile('demo_wavs', 'noises'); | |
197 else | |
198 lWAVpath = fullfile('demo_wavs', 'TrainingData-Clean'); | |
199 rWAVpath = fullfile('demo_wavs', 'TripletTestData'); | |
200 obj.noiseFolder = fullfile('demo_wavs', 'noises'); | |
201 end | |
202 else | |
203 lWAVpath = 'D:\AURORA digits (wav)\TrainingData-Clean'; | |
204 rWAVpath = 'D:\AURORA digits (wav)\TripletTestData'; | |
205 obj.noiseFolder = 'D:\AURORA digits (wav)\noises'; | |
206 end | |
207 | |
208 if strcmpi(LearnOrRecWavs, 'l') | |
209 obj.wavFolder = lWAVpath; | |
210 elseif strcmpi(LearnOrRecWavs, 'r') | |
211 obj.wavFolder = rWAVpath; | |
212 else | |
213 error('First argument to constructor must be ''L'' or ''R'''); | |
214 end | |
215 end | |
216 | |
217 %% ********************************************************** | |
218 % mutex related _ | |
219 % | | | |
220 % _ __ ___ _ _| |_ _____ __ | |
221 % | '_ ` _ \| | | | __/ _ \ \/ / | |
222 % | | | | | | |_| | || __/> < | |
223 % |_| |_| |_|\__,_|\__\___/_/\_\ | |
224 %************************************************************ | |
225 | |
226 %% ********************************************************** | |
227 % lockJobList - File mutex protecting from race conditions | |
228 %************************************************************ | |
229 function obj = lockJobList(obj) | |
230 lockON = false; | |
231 while (~lockON) | |
232 if numel(dir(obj.jobLockTxtFile)) %Check to see if lock already in place | |
233 wTime = randi(750)+250; %3,000-10,000 ms | |
234 disp(['File mutex in place. Retrying in ' num2str(wTime) ' ms']) | |
235 pause(wTime/1000); | |
236 else | |
237 obj.jobLockFid = fopen(obj.jobLockTxtFile,'w'); | |
238 disp('Job locked'); | |
239 pause(50/1000); | |
240 lockON = true; | |
241 end | |
242 end | |
243 fclose(obj.jobLockFid); | |
244 end% ------ OF LOCKJOBLIST | |
245 | |
246 %% ********************************************************** | |
247 % unlockJobList - unlocks for other workers | |
248 %************************************************************ | |
249 function obj = unlockJobList(obj) | |
250 lockON = logical(numel(dir(obj.jobLockTxtFile))); | |
251 while(lockON) | |
252 try | |
253 delete(obj.jobLockTxtFile); | |
254 disp('Job unlocked'); | |
255 pause(50/1000); | |
256 lockON = false; | |
257 catch %#ok<CTCH> | |
258 disp('Unjamming lock - retrying immediately') | |
259 pause(200/1000) | |
260 end | |
261 end | |
262 end% ------ OF UNLOCKJOBLIST | |
263 | |
264 %% ********************************************************** | |
265 % storeSelf - save a copy of this object in opFolder | |
266 %************************************************************ | |
267 function storeSelf(obj) | |
268 doneSaving = false; | |
269 while(~doneSaving) | |
270 try | |
271 save(fullfile(obj.opFolder, 'jobObject'), 'obj'); | |
272 doneSaving = true; | |
273 catch %#ok<CTCH> | |
274 wTime = randi(3800)+200; %200-4000 ms | |
275 disp(['Save collision (THIS IS VERY VERY BAD - have you not used the mutex?). Retrying in ' num2str(wTime) ' ms']); | |
276 pause(wTime/1000); | |
277 end | |
278 end | |
279 end % ------ OF STORESELF | |
280 | |
281 %% ********************************************************** | |
282 % loadSelf - load a copy of this object from opFolder | |
283 %************************************************************ | |
284 function obj = loadSelf(obj) | |
285 doneLoading = false; | |
286 while(~doneLoading) | |
287 try | |
288 load(fullfile(obj.opFolder,'jobObject')); | |
289 doneLoading = true; | |
290 catch %#ok<CTCH> | |
291 wTime = randi(3800)+200; %200-4000 ms | |
292 disp(['Load collision (THIS IS VERY VERY BAD - have you not used the mutex?). Retrying in ' num2str(wTime) ' ms']) | |
293 pause(wTime/1000); | |
294 end | |
295 end | |
296 end% ------ OF LOADSELF | |
297 | |
298 | |
299 %% ********************************************************* | |
300 % _ _ _ | |
301 % | | | | (_) | |
302 % | |__ ___ _ _ ___ ___| | _____ ___ _ __ _ _ __ __ _ | |
303 % | '_ \ / _ \| | | / __|/ _ \ |/ / _ \/ _ \ '_ \| | '_ \ / _` | | |
304 % | | | | (_) | |_| \__ \ __/ < __/ __/ |_) | | | | | (_| | | |
305 % |_| |_|\___/ \__,_|___/\___|_|\_\___|\___| .__/|_|_| |_|\__, | | |
306 % | | __/ | | |
307 % |_| |___/ | |
308 %************************************************************ | |
309 | |
310 %% ********************************************************** | |
311 % get method for dependtent var jobLockTxtFile | |
312 %************************************************************ | |
313 function value = get.jobLockTxtFile(obj) | |
314 %Name the MUTEX file here | |
315 value = [fullfile(obj.opFolder, 'jobLock') '.txt']; | |
316 end | |
317 | |
318 %% ********************************************************** | |
319 % checkStatus - see how much of the current job is complete | |
320 %************************************************************ | |
321 function checkStatus(obj) | |
322 NOWopen = sum(obj.todoStatus==0); %Starting from R2010b, Matlab supports enumerations. For now, we resort to integers for compatibility. | |
323 NOWpend = sum(obj.todoStatus==1); | |
324 NOWdone = sum(obj.todoStatus==2); | |
325 | |
326 zz = clock; | |
327 disp([num2str(zz(3)) '-' num2str(zz(2)) '-' num2str(zz(1)) ' ' num2str(zz(4)) ':' num2str(zz(5))]) | |
328 disp(['CURRENT JOB:' obj.opFolder]); | |
329 disp(' ') | |
330 disp(['open - ' num2str(NOWopen) ' || pending - ' num2str(NOWpend) ' || complete - ' num2str(NOWdone)]) | |
331 | |
332 % ---- PROGRESS BAR ---- | |
333 pcDone = 100*NOWdone/obj.numWavs; | |
334 progBarLength = 40; | |
335 charBars = repmat('=',1,floor(pcDone/100 * progBarLength)); | |
336 charWhiteSpace = repmat(' ',1, progBarLength - numel(charBars)); | |
337 disp(' ') | |
338 disp([' -[' charBars charWhiteSpace ']- ' num2str(pcDone, '%0.1f') '%']) | |
339 disp(' ') | |
340 disp(' ') | |
341 % -- END PROGRESS BAR --- | |
342 end% ------ OF CHECKSTATUS | |
343 | |
344 %% ********************************************************** | |
345 % initMAP - add MAP stuff to path | |
346 %************************************************************ | |
347 function obj = initMAP(obj) | |
348 addpath(...fullfile(obj.MAProot, 'modules'),... | |
349 fullfile(obj.MAProot, 'utilities'),... | |
350 fullfile(obj.MAProot, 'MAP'),... | |
351 fullfile(obj.MAProot, 'parameterStore')); | |
352 end % ------ OF INIT MAP | |
353 | |
354 %% ********************************************************** | |
355 % assign files to testing and training sets | |
356 %************************************************************ | |
357 function obj = assignFiles(obj) | |
358 speechWavs = dir(fullfile(obj.wavFolder, '*.wav')); | |
359 assert(obj.numWavs <= size(speechWavs, 1) ,... | |
360 'not enough files available in the corpus'); % make sure we have enough wavs | |
361 | |
362 randomWavs = rand(1, size(speechWavs, 1)); | |
363 [~,b] = sort(randomWavs); | |
364 trainFileIdx = b(1:obj.numWavs); | |
365 | |
366 obj.wavList = speechWavs(trainFileIdx); %This is a record of all of the wavs that should be done | |
367 | |
368 %Starting from R2010b, Matlab should support enumerated types. For now we | |
369 %use integers for compatibility. | |
370 %0=open, 1=processing, 2=done | |
371 obj.todoStatus = zeros(numel(obj.wavList), 1); | |
372 | |
373 end % ------ OF ASSIGN FILES | |
374 | |
375 %% ********************************************************** | |
376 % generate feature | |
377 %************************************************************ | |
378 function obj = genFeat(obj, currentWav) | |
379 fprintf(1,'Processing: %s \n', currentWav); | |
380 if strcmpi(obj.speechDist,'Gaussian') | |
381 tempSpeechLev = obj.speechLevToUse + obj.speechLevStd*randn; | |
382 elseif strcmpi(obj.speechDist,'Uniform') | |
383 % for a uniform distribution, the standard deviation is | |
384 % range/sqrt(12) | |
385 % http://mathforum.org/library/drmath/view/52066.html | |
386 tempSpeechLev = obj.speechLevToUse - obj.speechLevStd*sqrt(12)/2 + obj.speechLevStd*sqrt(12)*rand; | |
387 elseif strcmpi(obj.speechDist,'None') | |
388 tempSpeechLev = obj.speechLevToUse; | |
389 end | |
390 | |
391 if strcmpi(obj.noiseDist,'Gaussian') | |
392 tempNoiseLev = speechLev - obj.meanSNR + obj.noiseLevStd*randn; | |
393 elseif strcmpi(obj.noiseDist,'Uniform') | |
394 tempNoiseLev = tempSpeechLev - obj.meanSNR - obj.noiseLevStd*sqrt(12)/2 + obj.noiseLevStd*sqrt(12)*rand; | |
395 elseif strcmpi(obj.noiseDist,'None') | |
396 tempNoiseLev = obj.noiseLevToUse; | |
397 end | |
398 | |
399 disp(['Current speech level = ' num2str(tempSpeechLev)]); | |
400 disp(['Current noise level = ' num2str(tempNoiseLev)]); | |
401 | |
402 obj.currentSpeechLevel = tempSpeechLev; | |
403 obj.currentNoiseLevel = tempNoiseLev; | |
404 [finalFeatures, ~] = processWavs(obj, currentWav); %discard the output from ANprobabilityResponse and method using ~ | |
405 opForHTK(obj, currentWav, finalFeatures); | |
406 end % ------ OF GENFEAT | |
407 | |
408 %% ********************************************************** | |
409 % write o/p in HTK friendly format | |
410 %************************************************************ | |
411 function obj = opForHTK(obj, currentWav, featureData) | |
412 | |
413 featureName = strrep(currentWav, '.wav','.map'); | |
414 targetFilename = fullfile(obj.opFolder, featureName); | |
415 | |
416 % write in a format HTK compliant for the recogniser to use | |
417 obj.writeHTK(... | |
418 targetFilename,... | |
419 featureData,... | |
420 size(featureData,2),... | |
421 obj.frameshift*obj.sampPeriodFromMsFactor,... | |
422 size(featureData,1)*4,... | |
423 obj.paramKind,... | |
424 obj.byteOrder); | |
425 end % ------ OF opForHTK | |
426 | |
427 | |
428 %% ********************************************************** | |
429 % _ _ _ | |
430 % (_) | | (_) | |
431 % ___ _ __ _ _ __ __ _| | _ __ _ __ ___ ___ ___ ___ ___ _ _ __ __ _ | |
432 % / __| |/ _` | '_ \ / _` | | | '_ \| '__/ _ \ / __/ _ \/ __/ __| | '_ \ / _` | | |
433 % \__ \ | (_| | | | | (_| | | | |_) | | | (_) | (_| __/\__ \__ \ | | | | (_| | | |
434 % |___/_|\__, |_| |_|\__,_|_| | .__/|_| \___/ \___\___||___/___/_|_| |_|\__, | | |
435 % __/ | | | __/ | | |
436 % |___/ |_| |___/ | |
437 %************************************************************ | |
438 | |
439 %% ********************************************************** | |
440 % getStimulus - what it says on the tin | |
441 %************************************************************ | |
442 function [stimulus, sampleRate] = getStimulus(obj, currentWav) | |
443 | |
444 % getStimulus.m - NC Aug 2010 | |
445 % Modified version of Rob's script to include: | |
446 % | |
447 % 1)Signal and noise samples with different sample rate. The component with | |
448 % lower sample rate is upsampled to the rate of that with the | |
449 % higher rate. | |
450 % 2) Clearer level setting | |
451 % 3) Parameter to change noise intro duration | |
452 % 4) Noise padding at end of stimulus | |
453 % | |
454 % ORIGINAL HEADER: | |
455 % getStimulus.m | |
456 % | |
457 % Robert T. Ferry | |
458 % 13th May 2009 | |
459 | |
460 % Set levels | |
461 [speech speechSampleRate] = wavread(fullfile(obj.wavFolder, currentWav )); | |
462 speech = speech./sqrt(mean(speech.^2)); %Normalize RMS to 1 | |
463 speech = speech * 20e-6 * 10^(obj.currentSpeechLevel/20); %Convert RMS to pascals at desired level | |
464 %disp(20*log10(sqrt(mean(speech.^2))/20e-6)) | |
465 | |
466 [noise noiseSampleRate] = wavread(fullfile(obj.noiseFolder, obj.noiseName )); | |
467 noise = noise./sqrt(mean(noise.^2)); %Normalize RMS to 1 | |
468 noise = noise * 20e-6*10^(obj.currentNoiseLevel/20); %Convert RMS to pascals at desired level | |
469 %disp(20*log10(sqrt(mean(noise.^2))/20e-6)) | |
470 | |
471 % Do sample rate conversion if needed | |
472 % Will always convert stimulus component with lower rate up to that with | |
473 % higher rate. | |
474 if speechSampleRate > noiseSampleRate | |
475 % disp('S>N') | |
476 [p,q] = rat(speechSampleRate/noiseSampleRate,0.0001); | |
477 noise = resample(noise, p, q); | |
478 noiseSampleRate = speechSampleRate; | |
479 elseif noiseSampleRate > speechSampleRate | |
480 % disp('N>S') | |
481 [p,q] = rat(noiseSampleRate/speechSampleRate,0.0001); | |
482 speech = resample(speech, p, q); | |
483 speechSampleRate = noiseSampleRate; %#ok<NASGU> | |
484 else | |
485 %DO NOTHING BUT ASSERT | |
486 assert(speechSampleRate == noiseSampleRate); | |
487 end | |
488 sampleRate = noiseSampleRate; | |
489 dt = 1/sampleRate; | |
490 | |
491 % mix stimuli | |
492 % Everything from here down (mostly) is RTF's original | |
493 silenceStart = floor(obj.noisePreDur*sampleRate); | |
494 silenceEnd = floor(obj.noisePostDur*sampleRate); | |
495 | |
496 silencePointsStart = zeros(silenceStart,1); | |
497 silencePointsEnd = zeros(silenceEnd,1); | |
498 | |
499 speech = [silencePointsStart; speech; silencePointsEnd]; | |
500 | |
501 stimLength = length(speech); | |
502 noiseLength = length(noise); | |
503 if obj.freezeNoise | |
504 idx = 1; | |
505 else | |
506 idx = ceil(rand*(noiseLength-stimLength)); | |
507 end | |
508 noise = noise(idx:idx+stimLength-1); | |
509 | |
510 stimulus = speech+noise; | |
511 | |
512 % add ramps to noise | |
513 stimInNoiseTime = dt:dt:dt*length(stimulus); | |
514 rampDuration = 0.100; | |
515 rampTime = dt : dt : rampDuration; | |
516 ramp = [0.5*(1+cos(2*pi*rampTime/(2*rampDuration)+pi)) ones(1,length(stimInNoiseTime)-length(rampTime))]; | |
517 stimulus = stimulus'.*ramp; | |
518 ramp = fliplr(ramp); | |
519 stimulus = stimulus.*ramp; | |
520 stimulus = stimulus'; | |
521 %disp(20*log10(sqrt(mean(stimulus.^2))/20e-6)) | |
522 | |
523 % add additional silent period to start of stimulus for model to 'settle down' | |
524 additionalSilenceLength = round(0.050*sampleRate); | |
525 additionalSilencePoints = zeros(additionalSilenceLength,1); | |
526 stimulus = [additionalSilencePoints; stimulus]'; %now rotated. | |
527 %disp(20*log10(sqrt(mean(stimulus.^2))/20e-6)) | |
528 end% ------ OF GETSTIMULUS | |
529 | |
530 | |
531 %% ********************************************************** | |
532 % processWavs - do all of the MAP signal processing | |
533 %************************************************************ | |
534 function [finalFeatures, ANprobabilityResponse] = processWavs(obj, currentWav) | |
535 | |
536 %********************************************************** | |
537 % FIRST GET THE STIMULUS | |
538 %********************************************************** | |
539 [stimulus, sampleRate] = obj.getStimulus(currentWav); | |
540 | |
541 %********************************************************** | |
542 % NOW TO LOAD IN THE HEARING AID | |
543 %********************************************************** | |
544 if obj.useAid | |
545 stimulus = [stimulus; stimulus]'; %EsxAid requires stereo stim | |
546 stimulus = EssexAid_guiEmulatorWrapper(stimulus, sampleRate, obj); | |
547 stimulus = stimulus(1,:); %convert back to mono | |
548 end | |
549 | |
550 AN_spikesOrProbability = 'probability'; | |
551 | |
552 if obj.useSpectrogram | |
553 lowestBF=100; highestBF= 4500; numChannels=30; | |
554 F=round(logspace(log10(lowestBF),log10(highestBF),numChannels)); | |
555 | |
556 nfft = 1024; | |
557 hopSamples = 64; | |
558 noverlap = nfft - hopSamples; | |
559 dt = hopSamples/sampleRate; | |
560 [~,~,~,P] = spectrogram(stimulus,nfft,noverlap,F,sampleRate); | |
561 | |
562 ANprobabilityResponse = 10*log10( abs(P) / ((20e-6)^2) ); %now correct [(a^2)/(b^2) = (a/b)^2] | |
563 | |
564 else | |
565 [ANprobabilityResponse, dt, myBFlist] = MAPwrap(stimulus, sampleRate, -1, obj.participant, AN_spikesOrProbability, obj.MAPparamChanges); | |
566 end | |
567 nChannels = numel(myBFlist); | |
568 | |
569 | |
570 time_ANresponse = dt:dt:dt*size(ANprobabilityResponse,2); | |
571 idx = time_ANresponse > obj.truncateDur; %RTF had this @ 0.550 | |
572 ANprobabilityResponse = ANprobabilityResponse(:, idx); | |
573 | |
574 | |
575 if ~obj.useSpectrogram | |
576 if obj.MAPopLSR && ~obj.MAPopHSR | |
577 ANprobabilityResponse = ANprobabilityResponse(1:nChannels, :); %use LSR | |
578 end | |
579 if ~obj.MAPopLSR && obj.MAPopHSR | |
580 ANprobabilityResponse = ANprobabilityResponse(end-nChannels+1:end, :); %or use HSR | |
581 end | |
582 if obj.MAPopMSR | |
583 assert(0,'Not working with MSR at the mo') | |
584 end | |
585 end | |
586 | |
587 % OPTIONAL PLOTTING | |
588 YTickIdx = 1:floor(numel(myBFlist)/6):numel(myBFlist); | |
589 YTickIdxRev = numel(myBFlist)+1-YTickIdx; | |
590 if ~isempty(obj.probHaxes) | |
591 axes(obj.probHaxes); %#ok<MAXES> | |
592 imagesc(ANprobabilityResponse); colorbar('peer', obj.probHaxes) | |
593 set(obj.probHaxes, 'YTick', YTickIdx); | |
594 set(obj.probHaxes, 'YTickLabel', num2str( myBFlist(YTickIdxRev)' )); | |
595 ylabel('cf in Hz') | |
596 end | |
597 | |
598 % OPTIONAL PLOTTING SMOOTHED | |
599 if ~isempty(obj.probHaxesSM) | |
600 axes(obj.probHaxesSM); %#ok<MAXES> | |
601 anSM=flipud(obj.makeANsmooth(ANprobabilityResponse, 1/dt)); | |
602 imagesc((1:size(anSM,2))./100,1:size(ANprobabilityResponse,1),anSM); | |
603 set(obj.probHaxesSM, 'YTick', YTickIdx); | |
604 set(obj.probHaxesSM, 'YTickLabel', num2str( myBFlist(YTickIdxRev)' )); | |
605 shading(obj.probHaxesSM, 'flat'); colorbar('peer', obj.probHaxesSM) | |
606 ylabel('cf (Hz)') | |
607 xlabel('Time (s)') | |
608 end | |
609 | |
610 | |
611 %********************************************************** | |
612 % optional SACF stage | |
613 %********************************************************** | |
614 if obj.useSACF | |
615 | |
616 % A slightly ugly copying is needed | |
617 SACFparams.lambda = obj.SACFlambda; | |
618 SACFparams.acfTau = obj.SACFacfTau; | |
619 SACFparams.lags = logspace(log10(obj.SACFminLag),log10(obj.SACFmaxLag),obj.SACFnBins); | |
620 SACFparams.lags = linspace(obj.SACFminLag, obj.SACFmaxLag,obj.SACFnBins ); | |
621 | |
622 SACFmethod.dt = dt; | |
623 SACFmethod.nonlinCF = myBFlist; | |
624 | |
625 %This is slightly misleading as the ANprob is now a SACF | |
626 [ANprobabilityResponse, ~, ~, ~] = filteredSACF(ANprobabilityResponse, SACFmethod, SACFparams); | |
627 | |
628 % OPTIONAL PLOTTING | |
629 YTickIdx = 1:floor(obj.SACFnBins/6):obj.SACFnBins; | |
630 %YTickIdxRev = obj.SACFnBins+1-YTickIdx; | |
631 if ~isempty(obj.sacfHaxes) | |
632 axes(obj.sacfHaxes); %#ok<MAXES> | |
633 imagesc(flipud(ANprobabilityResponse)); shading(obj.sacfHaxes, 'flat'); colorbar('peer', obj.sacfHaxes) | |
634 set(obj.sacfHaxes, 'YTick', YTickIdx); | |
635 set(obj.sacfHaxes, 'YTickLabel', num2str( 1./SACFparams.lags(YTickIdx)' ,'%0.1f' )); | |
636 ylabel('Pitch in Hz') | |
637 end | |
638 | |
639 % OPTIONAL PLOTTING SMOOTHED | |
640 if ~isempty(obj.sacfHaxesSM) | |
641 axes(obj.sacfHaxesSM); %#ok<MAXES> | |
642 imagesc(flipud(obj.makeANsmooth(ANprobabilityResponse, 1/dt))); shading(obj.sacfHaxesSM, 'flat'); colorbar('peer', obj.sacfHaxesSM) | |
643 set(obj.sacfHaxesSM, 'YTick', YTickIdx); | |
644 set(obj.sacfHaxesSM, 'YTickLabel', num2str( 1./SACFparams.lags(YTickIdx)' ,'%0.1f' )); | |
645 ylabel('Pitch in Hz') | |
646 end | |
647 | |
648 end | |
649 | |
650 | |
651 finalFeatures = obj.makeANfeatures( ... | |
652 obj.makeANsmooth(ANprobabilityResponse, 1/dt), obj.numCoeff ); | |
653 | |
654 if obj.removeEnergyStatic | |
655 finalFeatures = finalFeatures(2:end,:); | |
656 % disp(size(finalFeatures)) | |
657 end | |
658 | |
659 if obj.doCMN | |
660 m = mean(finalFeatures,2); | |
661 finalFeatures = finalFeatures - repmat(m,1,size(finalFeatures,2)); | |
662 end | |
663 | |
664 % OPTIONAL PLOTTING | |
665 if ~isempty(obj.featHaxes) | |
666 pcolor(obj.featHaxes, finalFeatures); shading(obj.featHaxes, 'flat'); colorbar('peer', obj.featHaxes) | |
667 end | |
668 if ~isempty(obj.reconHaxes) | |
669 reconsData = idct(finalFeatures,obj.SACFnBins); | |
670 axes(obj.reconHaxes); %#ok<MAXES> | |
671 imagesc(flipud( reconsData )); | |
672 end | |
673 | |
674 opForHTK(obj, currentWav, finalFeatures); | |
675 end % ------ OF PROCESSWAVS | |
676 | |
677 end % ------ OF METHODS | |
678 | |
679 %% ********************************************************* | |
680 % _ _ _ _ _ _ | |
681 % | | | | (_) | | | | | | | |
682 % ___| |_ __ _| |_ _ ___ _ __ ___ ___| |_| |__ ___ __| |___ | |
683 % / __| __/ _` | __| |/ __| | '_ ` _ \ / _ \ __| '_ \ / _ \ / _` / __| | |
684 % \__ \ || (_| | |_| | (__ | | | | | | __/ |_| | | | (_) | (_| \__ \ | |
685 % |___/\__\__,_|\__|_|\___| |_| |_| |_|\___|\__|_| |_|\___/ \__,_|___/ | |
686 %************************************************************ | |
687 | |
688 methods(Static) | |
689 %% ******************************************************** | |
690 % makeANsmooth - smooth the AN response into hanning windowed chunks | |
691 %********************************************************** | |
692 function ANsmooth = makeANsmooth(ANresponse, sampleRate, winSize, hopSize) | |
693 if nargin < 3 | |
694 winSize = 25; %default 25 ms window | |
695 end | |
696 if nargin < 4 | |
697 hopSize = 10; %default 10 ms jump between windows | |
698 end | |
699 | |
700 winSizeSamples = round(winSize*sampleRate/1000); | |
701 hopSizeSamples = round(hopSize*sampleRate/1000); | |
702 | |
703 % smooth | |
704 hann = cJob.NRC_hanning(winSizeSamples); | |
705 | |
706 ANsmooth = [];%Cannot pre-allocate a size as it is unknown until the enframing | |
707 for chan = 1:size(ANresponse,1) | |
708 f = cJob.enframe(ANresponse(chan,:), hann, hopSizeSamples); | |
709 ANsmooth(chan,:) = mean(f,2)'; %#ok<AGROW> see above comment | |
710 end | |
711 end% ------ OF makeANsmooth | |
712 | |
713 %% ******************************************************** | |
714 % makeANfeatures - dct wizardry | |
715 %********************************************************** | |
716 function ANfeatures = makeANfeatures(ANrate, numCoeff) | |
717 % make feature vectors | |
718 features = cJob.GJB_dct(ANrate); | |
719 ANfeatures = features(1:numCoeff,:); | |
720 end % ------ OF makeANfeatures | |
721 | |
722 %% ************************************************************************ | |
723 % enframe - AVOID SIGNAL PROCESSING TOOLBOX buffer function | |
724 %************************************************************************** | |
725 function f=enframe(x,win,inc) | |
726 %ENFRAME split signal up into (overlapping) frames: one per row. F=(X,WIN,INC) | |
727 % | |
728 % F = ENFRAME(X,LEN) splits the vector X(:) up into | |
729 % frames. Each frame is of length LEN and occupies | |
730 % one row of the output matrix. The last few frames of X | |
731 % will be ignored if its length is not divisible by LEN. | |
732 % It is an error if X is shorter than LEN. | |
733 % | |
734 % F = ENFRAME(X,LEN,INC) has frames beginning at increments of INC | |
735 % The centre of frame I is X((I-1)*INC+(LEN+1)/2) for I=1,2,... | |
736 % The number of frames is fix((length(X)-LEN+INC)/INC) | |
737 % | |
738 % F = ENFRAME(X,WINDOW) or ENFRAME(X,WINDOW,INC) multiplies | |
739 % each frame by WINDOW(:) | |
740 | |
741 % Copyright (C) Mike Brookes 1997 | |
742 % Version: $Id: enframe.m,v 1.4 2006/06/22 19:07:50 dmb Exp $ | |
743 % | |
744 % VOICEBOX is a MATLAB toolbox for speech processing. | |
745 % Home page: http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html | |
746 % | |
747 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
748 % This program is free software; you can redistribute it and/or modify | |
749 % it under the terms of the GNU General Public License as published by | |
750 % the Free Software Foundation; either version 2 of the License, or | |
751 % (at your option) any later version. | |
752 % | |
753 % This program is distributed in the hope that it will be useful, | |
754 % but WITHOUT ANY WARRANTY; without even the implied warranty of | |
755 % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
756 % GNU General Public License for more details. | |
757 % | |
758 % You can obtain a copy of the GNU General Public License from | |
759 % http://www.gnu.org/copyleft/gpl.html or by writing to | |
760 % Free Software Foundation, Inc.,675 Mass Ave, Cambridge, MA 02139, USA. | |
761 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | |
762 | |
763 nx=length(x(:)); | |
764 nwin=length(win); | |
765 if (nwin == 1) | |
766 len = win; | |
767 else | |
768 len = nwin; | |
769 end | |
770 if (nargin < 3) | |
771 inc = len; | |
772 end | |
773 nf = fix((nx-len+inc)/inc); | |
774 f=zeros(nf,len); | |
775 indf= inc*(0:(nf-1)).'; | |
776 inds = (1:len); | |
777 f(:) = x(indf(:,ones(1,len))+inds(ones(nf,1),:)); | |
778 if (nwin > 1) | |
779 w = win(:)'; | |
780 f = f .* w(ones(nf,1),:); | |
781 end | |
782 end % ------ OF ENFRAME | |
783 | |
784 %% ************************************************************************ | |
785 % GJB_dct - AVOID SIGNAL PROCESSING TOOLBOX | |
786 %************************************************************************** | |
787 function b=GJB_dct(a,n) | |
788 | |
789 if nargin == 0, | |
790 error('Not enough input arguments.'); | |
791 end | |
792 | |
793 if isempty(a) | |
794 b = []; | |
795 return | |
796 end | |
797 | |
798 % If input is a vector, make it a column: | |
799 do_trans = (size(a,1) == 1); | |
800 if do_trans, a = a(:); end | |
801 | |
802 if nargin==1, | |
803 n = size(a,1); | |
804 end | |
805 m = size(a,2); | |
806 | |
807 % Pad or truncate input if necessary | |
808 if size(a,1)<n, | |
809 aa = zeros(n,m); | |
810 aa(1:size(a,1),:) = a; | |
811 else | |
812 aa = a(1:n,:); | |
813 end | |
814 | |
815 % Compute weights to multiply DFT coefficients | |
816 ww = (exp(-1i*(0:n-1)*pi/(2*n))/sqrt(2*n)).'; | |
817 ww(1) = ww(1) / sqrt(2); | |
818 | |
819 if rem(n,2)==1 || ~isreal(a), % odd case | |
820 % Form intermediate even-symmetric matrix | |
821 y = zeros(2*n,m); | |
822 y(1:n,:) = aa; | |
823 y(n+1:2*n,:) = flipud(aa); | |
824 | |
825 % Compute the FFT and keep the appropriate portion: | |
826 yy = fft(y); | |
827 yy = yy(1:n,:); | |
828 | |
829 else % even case | |
830 % Re-order the elements of the columns of x | |
831 y = [ aa(1:2:n,:); aa(n:-2:2,:) ]; | |
832 yy = fft(y); | |
833 ww = 2*ww; % Double the weights for even-length case | |
834 end | |
835 | |
836 % Multiply FFT by weights: | |
837 b = ww(:,ones(1,m)) .* yy; | |
838 | |
839 if isreal(a), b = real(b); end | |
840 if do_trans, b = b.'; end | |
841 end % ----- of GJB_DCT | |
842 | |
843 %% ************************************************************************ | |
844 % NRC_hanning - AVOID SIGNAL PROCESSING TOOLBOX | |
845 %************************************************************************** | |
846 function w=NRC_hanning(n) | |
847 calc_hanning = @(m,n)0.5*(1 - cos(2*pi*(1:m)'/(n+1))); %cheeky anonymous function - I <3 these | |
848 if ~rem(n,2) | |
849 % Even length window | |
850 half = n/2; | |
851 w = calc_hanning(half,n); | |
852 w = [w; w(end:-1:1)]; | |
853 else | |
854 % Odd length window | |
855 half = (n+1)/2; | |
856 w = calc_hanning(half,n); | |
857 w = [w; w(end-1:-1:1)]; | |
858 end | |
859 end % ------ of NRC_HANNING | |
860 | |
861 %% ************************************************************************ | |
862 % writeHTK - convert data to htk format -> by anonymous Sue (2001) | |
863 %************************************************************************** | |
864 function retcode = writeHTK(filename, htkdata, nFrames, sampPeriod, SampSize, ParamKind, byte_order) | |
865 % Write an HTK format file. | |
866 % | |
867 % Input parameters: | |
868 % filename HTK data file | |
869 % htkdata HTK data read: an m x n matrix with | |
870 % m = no. of channels | |
871 % n = no. of frames | |
872 % The following are from the HTK header (see HTK manual): | |
873 % nFrames no. of frames (samples) | |
874 % sampPeriod sample period (in 100 ns units?) | |
875 % SampSize sample size | |
876 % ParamKind parameter kind code | |
877 % | |
878 % byteorder 'be' for big-endian (typical for Unix) (default) | |
879 % 'le' for little-endian (typical for MSWindows) | |
880 % | |
881 % Output parameters: | |
882 % retcode 0 if successful | |
883 | |
884 % Written by Sue 17/12/01 | |
885 | |
886 retcode=-1; % initialise in case of error | |
887 if nargin < 6 | |
888 fprintf('Usage: %s(filename, htkdata, nFrames, sampPeriod, SampSize, ParamKind [, byte_order])', mfilename); | |
889 end; | |
890 | |
891 % Default to big-endian (HTK format) | |
892 if nargin < 7 | |
893 byte_order = 'be'; | |
894 end; | |
895 | |
896 fid = fopen (filename, 'w', sprintf('ieee-%s', byte_order)); | |
897 if fid < 1 | |
898 fprintf('%s: can''t open output file %s\n', mfilename, filename); | |
899 return | |
900 end | |
901 | |
902 % Write header | |
903 fwrite (fid, nFrames, 'int32'); %nSamples in HTK | |
904 fwrite (fid, sampPeriod, 'int32'); | |
905 fwrite (fid, SampSize, 'int16'); | |
906 fwrite (fid, ParamKind, 'int16'); | |
907 | |
908 % Write actual data | |
909 fwrite(fid, htkdata, 'float32'); | |
910 | |
911 fclose(fid); | |
912 | |
913 retcode=0; | |
914 end% ------ OF WRITEHTK | |
915 | |
916 %% ************************************************************************ | |
917 % readHTK - just incase you ever want to go backwards | |
918 %************************************************************************** | |
919 function [htkdata,nframes,sampPeriod,sampSize,paramKind] = readHTK(filename,byte_order) | |
920 | |
921 if nargin<2 | |
922 byte_order = 'be'; | |
923 end | |
924 | |
925 fid = fopen(filename,'r',sprintf('ieee-%s',byte_order)); | |
926 | |
927 nframes = fread(fid,1,'int32'); | |
928 sampPeriod = fread(fid,1,'int32'); | |
929 sampSize = fread(fid,1,'int16'); | |
930 paramKind = fread(fid,1,'int16'); | |
931 | |
932 % read the data | |
933 | |
934 htkdata = fread(fid,nframes*(sampSize/4),'float32'); | |
935 htkdata = reshape(htkdata,sampSize/4,nframes); | |
936 fclose(fid); | |
937 end % ------ OF READHTK | |
938 | |
939 end % ------ OF STATIC METHODS | |
940 | |
941 end % ------ OF CLASS |