comparison userProgramsASRforDummies/cJob.m @ 38:c2204b18f4a2 tip

End nov big change
author Ray Meddis <rmeddis@essex.ac.uk>
date Mon, 28 Nov 2011 13:34:28 +0000
parents
children
comparison
equal deleted inserted replaced
37:771a643d5c29 38:c2204b18f4a2
1 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2 % This program is free software; you can redistribute it and/or modify
3 % it under the terms of the GNU General Public License as published by
4 % the Free Software Foundation; either version 2 of the License, or
5 % (at your option) any later version.
6 %
7 % This program is distributed in the hope that it will be useful,
8 % but WITHOUT ANY WARRANTY; without even the implied warranty of
9 % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 % GNU General Public License for more details.
11 %
12 % You can obtain a copy of the GNU General Public License from
13 % http://www.gnu.org/copyleft/gpl.html or by writing to
14 % Free Software Foundation, Inc.,675 Mass Ave, Cambridge, MA 02139, USA.
15 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
16
17 classdef cJob
18 %CJOB Responsible for conversion of wav files into recogniser features
19 % Please see the documentation located in a separate file for further
20 % information.
21
22 %% *********************************************************
23 % properties _ _
24 % | | (_)
25 % _ __ _ __ ___ _ __ ___ _ __| |_ _ ___ ___
26 % | '_ \| '__/ _ \| '_ \ / _ \ '__| __| |/ _ \/ __|
27 % | |_) | | | (_) | |_) | __/ | | |_| | __/\__ \
28 % | .__/|_| \___/| .__/ \___|_| \__|_|\___||___/
29 % | | | |
30 % |_| |_|
31 %************************************************************
32
33 %% **********************************************************
34 % Public properties - can be set by user
35 %************************************************************
36 properties(Access = public)
37 wavFolder
38 opFolder
39 noiseFolder
40
41 wavList
42 todoStatus
43
44
45 participant = 'Normal';%'DEMO2_multiSpont';
46 noiseName = '8TalkerBabble';
47 numWavs = 5;
48 noiseLevToUse = -200;
49 speechLevToUse = 50;
50
51 speechLevStd = 0;
52 noiseLevStd = 0;
53 freezeNoise = 0;
54 meanSNR = 20;
55 speechDist = 'None';
56 noiseDist = 'None';
57
58 noisePreDur = 0.0;
59 noisePostDur = 0.0;
60 truncateDur = 0.0; %Dr. RF used 0.550
61
62 currentSpeechLevel
63 currentNoiseLevel
64
65
66 useSpectrogram = false;
67 numCoeff = 9;
68
69 %************************************************************
70 % plotting handles
71 %************************************************************
72 probHaxes = [];
73 probHaxesSM = [];
74 sacfHaxes = [];
75 sacfHaxesSM = [];
76 featHaxes = [];
77 reconHaxes = [];
78
79 %************************************************************
80 % SACF params
81 %************************************************************
82 useSACF = false;
83 SACFacfTau = 2; % > 1 = Wiegrebe mode
84 SACFnBins = 128;
85 SACFminLag = 1 / 4000;
86 SACFmaxLag = 1 / 50;
87 SACFlambda = 10e-3;
88
89 %************************************************************
90 % MAP params
91 %************************************************************
92 MAProot = fullfile('..');
93 MAPplotGraphs = 0;
94 MAPDRNLSave = 0;
95
96 MAPopLSR = 0;
97 MAPopMSR = 0;
98 MAPopHSR = 1;
99
100 MAPparamChanges = {};
101
102 %************************************************************
103 % HTK stuff - writing to HTK recogniser format
104 %************************************************************
105 frameshift = 10; % shift between frames (ms)
106 sampPeriodFromMsFactor = 10000; % appropriate for 10ms frame rate
107 paramKind = 9; % HTK USER format for user-defined features (p73 of HTK book)
108
109 removeEnergyStatic = false;
110 doCMN = false; % Cepstral mean normalisation
111
112 %************************************************************
113 % Portable EssexAid params
114 %************************************************************
115 bwOct = 1/1;
116 filterOrder = 2;
117
118 mainGain = [ 1; 1; 1; 1; 1]; % gain in linear units
119 TCdBO = [40; 40; 40; 40; 40]; %Compression thresholds (in dB OUTPUT from 2nd filt)
120 TMdBO = [10; 10; 10; 10; 10]; %MOC thresholds (in dB OUTPUT from 2nd filt)
121 DRNLc = [ 0.2; 0.2; 0.2; 0.2; 0.2;]
122
123 ARtau = 0.03; % decay time constant
124 ARthresholddB = 85; % dB SPL (input signal level) =>200 to disable
125 MOCtau = 0.3;
126 MOCfactor = 0.5; %dB per dB OUTPUT
127
128 numSamples = 1024; %MAX=6912
129 useAid = 0;
130 end
131
132 %% *********************************************************
133 % Protected properties - inter/ra class communication
134 %************************************************************
135 properties(Access = protected)
136 jobLockFid % File identifier for mutex
137
138 %Nick C comment on this:
139 %OK. The big-endian thing used to work because in the config file
140 %'config_tr_zcpa12' there was a flag called NATURALREADORDER that was set to
141 %FALSE and thus appeared to override x86 standard:
142 %little-endian. Endianess has **!nothing!** to do with win vs *nix
143 byteOrder = 'le'; % byte order is big endian
144 end
145 properties(Dependent = true)
146 jobLockTxtFile
147 end
148
149 %% *********************************************************
150 % methods _ _ _
151 % | | | | | |
152 % _ __ ___ ___| |_| |__ ___ __| |___
153 %| '_ ` _ \ / _ \ __| '_ \ / _ \ / _` / __|
154 %| | | | | | __/ |_| | | | (_) | (_| \__ \
155 %|_| |_| |_|\___|\__|_| |_|\___/ \__,_|___/
156 %************************************************************
157
158 methods
159 %% **********************************************************
160 % Constructor
161 %************************************************************
162 function obj = cJob(LearnOrRecWavs, jobFolder)
163 if nargin < 1
164 warning('job:ambiguouscorpus',...
165 'We need to know whether the (L)earning or (R)ecognition corpus is being used, assuming ''L''');
166 LearnOrRecWavs = 'L';
167 end
168 if nargin > 1
169 obj.opFolder = jobFolder;
170 else
171 if isunix
172 if ismac
173 obj.opFolder = '~/ASR/exps/_foo';
174 else
175 obj.opFolder = '/scratch/nrclark/exps/_foo';
176 end
177 else
178 obj.opFolder = 'D:\exps\_foo';
179 end
180 end
181
182 obj = obj.assignWavPaths(LearnOrRecWavs);
183 obj = obj.initMAP;
184
185
186 end % ------ OF CONSTRUCTOR
187
188 %% **********************************************************
189 % Set Wav Paths
190 %************************************************************
191 function obj = assignWavPaths(obj, LearnOrRecWavs)
192 if isunix
193 if ismac
194 lWAVpath = fullfile('demo_wavs', 'TrainingData-Clean');
195 rWAVpath = fullfile('demo_wavs', 'TripletTestData');
196 obj.noiseFolder = fullfile('demo_wavs', 'noises');
197 else
198 lWAVpath = fullfile('demo_wavs', 'TrainingData-Clean');
199 rWAVpath = fullfile('demo_wavs', 'TripletTestData');
200 obj.noiseFolder = fullfile('demo_wavs', 'noises');
201 end
202 else
203 lWAVpath = 'D:\AURORA digits (wav)\TrainingData-Clean';
204 rWAVpath = 'D:\AURORA digits (wav)\TripletTestData';
205 obj.noiseFolder = 'D:\AURORA digits (wav)\noises';
206 end
207
208 if strcmpi(LearnOrRecWavs, 'l')
209 obj.wavFolder = lWAVpath;
210 elseif strcmpi(LearnOrRecWavs, 'r')
211 obj.wavFolder = rWAVpath;
212 else
213 error('First argument to constructor must be ''L'' or ''R''');
214 end
215 end
216
217 %% **********************************************************
218 % mutex related _
219 % | |
220 % _ __ ___ _ _| |_ _____ __
221 % | '_ ` _ \| | | | __/ _ \ \/ /
222 % | | | | | | |_| | || __/> <
223 % |_| |_| |_|\__,_|\__\___/_/\_\
224 %************************************************************
225
226 %% **********************************************************
227 % lockJobList - File mutex protecting from race conditions
228 %************************************************************
229 function obj = lockJobList(obj)
230 lockON = false;
231 while (~lockON)
232 if numel(dir(obj.jobLockTxtFile)) %Check to see if lock already in place
233 wTime = randi(750)+250; %3,000-10,000 ms
234 disp(['File mutex in place. Retrying in ' num2str(wTime) ' ms'])
235 pause(wTime/1000);
236 else
237 obj.jobLockFid = fopen(obj.jobLockTxtFile,'w');
238 disp('Job locked');
239 pause(50/1000);
240 lockON = true;
241 end
242 end
243 fclose(obj.jobLockFid);
244 end% ------ OF LOCKJOBLIST
245
246 %% **********************************************************
247 % unlockJobList - unlocks for other workers
248 %************************************************************
249 function obj = unlockJobList(obj)
250 lockON = logical(numel(dir(obj.jobLockTxtFile)));
251 while(lockON)
252 try
253 delete(obj.jobLockTxtFile);
254 disp('Job unlocked');
255 pause(50/1000);
256 lockON = false;
257 catch %#ok<CTCH>
258 disp('Unjamming lock - retrying immediately')
259 pause(200/1000)
260 end
261 end
262 end% ------ OF UNLOCKJOBLIST
263
264 %% **********************************************************
265 % storeSelf - save a copy of this object in opFolder
266 %************************************************************
267 function storeSelf(obj)
268 doneSaving = false;
269 while(~doneSaving)
270 try
271 save(fullfile(obj.opFolder, 'jobObject'), 'obj');
272 doneSaving = true;
273 catch %#ok<CTCH>
274 wTime = randi(3800)+200; %200-4000 ms
275 disp(['Save collision (THIS IS VERY VERY BAD - have you not used the mutex?). Retrying in ' num2str(wTime) ' ms']);
276 pause(wTime/1000);
277 end
278 end
279 end % ------ OF STORESELF
280
281 %% **********************************************************
282 % loadSelf - load a copy of this object from opFolder
283 %************************************************************
284 function obj = loadSelf(obj)
285 doneLoading = false;
286 while(~doneLoading)
287 try
288 load(fullfile(obj.opFolder,'jobObject'));
289 doneLoading = true;
290 catch %#ok<CTCH>
291 wTime = randi(3800)+200; %200-4000 ms
292 disp(['Load collision (THIS IS VERY VERY BAD - have you not used the mutex?). Retrying in ' num2str(wTime) ' ms'])
293 pause(wTime/1000);
294 end
295 end
296 end% ------ OF LOADSELF
297
298
299 %% *********************************************************
300 % _ _ _
301 % | | | | (_)
302 % | |__ ___ _ _ ___ ___| | _____ ___ _ __ _ _ __ __ _
303 % | '_ \ / _ \| | | / __|/ _ \ |/ / _ \/ _ \ '_ \| | '_ \ / _` |
304 % | | | | (_) | |_| \__ \ __/ < __/ __/ |_) | | | | | (_| |
305 % |_| |_|\___/ \__,_|___/\___|_|\_\___|\___| .__/|_|_| |_|\__, |
306 % | | __/ |
307 % |_| |___/
308 %************************************************************
309
310 %% **********************************************************
311 % get method for dependtent var jobLockTxtFile
312 %************************************************************
313 function value = get.jobLockTxtFile(obj)
314 %Name the MUTEX file here
315 value = [fullfile(obj.opFolder, 'jobLock') '.txt'];
316 end
317
318 %% **********************************************************
319 % checkStatus - see how much of the current job is complete
320 %************************************************************
321 function checkStatus(obj)
322 NOWopen = sum(obj.todoStatus==0); %Starting from R2010b, Matlab supports enumerations. For now, we resort to integers for compatibility.
323 NOWpend = sum(obj.todoStatus==1);
324 NOWdone = sum(obj.todoStatus==2);
325
326 zz = clock;
327 disp([num2str(zz(3)) '-' num2str(zz(2)) '-' num2str(zz(1)) ' ' num2str(zz(4)) ':' num2str(zz(5))])
328 disp(['CURRENT JOB:' obj.opFolder]);
329 disp(' ')
330 disp(['open - ' num2str(NOWopen) ' || pending - ' num2str(NOWpend) ' || complete - ' num2str(NOWdone)])
331
332 % ---- PROGRESS BAR ----
333 pcDone = 100*NOWdone/obj.numWavs;
334 progBarLength = 40;
335 charBars = repmat('=',1,floor(pcDone/100 * progBarLength));
336 charWhiteSpace = repmat(' ',1, progBarLength - numel(charBars));
337 disp(' ')
338 disp([' -[' charBars charWhiteSpace ']- ' num2str(pcDone, '%0.1f') '%'])
339 disp(' ')
340 disp(' ')
341 % -- END PROGRESS BAR ---
342 end% ------ OF CHECKSTATUS
343
344 %% **********************************************************
345 % initMAP - add MAP stuff to path
346 %************************************************************
347 function obj = initMAP(obj)
348 addpath(...fullfile(obj.MAProot, 'modules'),...
349 fullfile(obj.MAProot, 'utilities'),...
350 fullfile(obj.MAProot, 'MAP'),...
351 fullfile(obj.MAProot, 'parameterStore'));
352 end % ------ OF INIT MAP
353
354 %% **********************************************************
355 % assign files to testing and training sets
356 %************************************************************
357 function obj = assignFiles(obj)
358 speechWavs = dir(fullfile(obj.wavFolder, '*.wav'));
359 assert(obj.numWavs <= size(speechWavs, 1) ,...
360 'not enough files available in the corpus'); % make sure we have enough wavs
361
362 randomWavs = rand(1, size(speechWavs, 1));
363 [~,b] = sort(randomWavs);
364 trainFileIdx = b(1:obj.numWavs);
365
366 obj.wavList = speechWavs(trainFileIdx); %This is a record of all of the wavs that should be done
367
368 %Starting from R2010b, Matlab should support enumerated types. For now we
369 %use integers for compatibility.
370 %0=open, 1=processing, 2=done
371 obj.todoStatus = zeros(numel(obj.wavList), 1);
372
373 end % ------ OF ASSIGN FILES
374
375 %% **********************************************************
376 % generate feature
377 %************************************************************
378 function obj = genFeat(obj, currentWav)
379 fprintf(1,'Processing: %s \n', currentWav);
380 if strcmpi(obj.speechDist,'Gaussian')
381 tempSpeechLev = obj.speechLevToUse + obj.speechLevStd*randn;
382 elseif strcmpi(obj.speechDist,'Uniform')
383 % for a uniform distribution, the standard deviation is
384 % range/sqrt(12)
385 % http://mathforum.org/library/drmath/view/52066.html
386 tempSpeechLev = obj.speechLevToUse - obj.speechLevStd*sqrt(12)/2 + obj.speechLevStd*sqrt(12)*rand;
387 elseif strcmpi(obj.speechDist,'None')
388 tempSpeechLev = obj.speechLevToUse;
389 end
390
391 if strcmpi(obj.noiseDist,'Gaussian')
392 tempNoiseLev = speechLev - obj.meanSNR + obj.noiseLevStd*randn;
393 elseif strcmpi(obj.noiseDist,'Uniform')
394 tempNoiseLev = tempSpeechLev - obj.meanSNR - obj.noiseLevStd*sqrt(12)/2 + obj.noiseLevStd*sqrt(12)*rand;
395 elseif strcmpi(obj.noiseDist,'None')
396 tempNoiseLev = obj.noiseLevToUse;
397 end
398
399 disp(['Current speech level = ' num2str(tempSpeechLev)]);
400 disp(['Current noise level = ' num2str(tempNoiseLev)]);
401
402 obj.currentSpeechLevel = tempSpeechLev;
403 obj.currentNoiseLevel = tempNoiseLev;
404 [finalFeatures, ~] = processWavs(obj, currentWav); %discard the output from ANprobabilityResponse and method using ~
405 opForHTK(obj, currentWav, finalFeatures);
406 end % ------ OF GENFEAT
407
408 %% **********************************************************
409 % write o/p in HTK friendly format
410 %************************************************************
411 function obj = opForHTK(obj, currentWav, featureData)
412
413 featureName = strrep(currentWav, '.wav','.map');
414 targetFilename = fullfile(obj.opFolder, featureName);
415
416 % write in a format HTK compliant for the recogniser to use
417 obj.writeHTK(...
418 targetFilename,...
419 featureData,...
420 size(featureData,2),...
421 obj.frameshift*obj.sampPeriodFromMsFactor,...
422 size(featureData,1)*4,...
423 obj.paramKind,...
424 obj.byteOrder);
425 end % ------ OF opForHTK
426
427
428 %% **********************************************************
429 % _ _ _
430 % (_) | | (_)
431 % ___ _ __ _ _ __ __ _| | _ __ _ __ ___ ___ ___ ___ ___ _ _ __ __ _
432 % / __| |/ _` | '_ \ / _` | | | '_ \| '__/ _ \ / __/ _ \/ __/ __| | '_ \ / _` |
433 % \__ \ | (_| | | | | (_| | | | |_) | | | (_) | (_| __/\__ \__ \ | | | | (_| |
434 % |___/_|\__, |_| |_|\__,_|_| | .__/|_| \___/ \___\___||___/___/_|_| |_|\__, |
435 % __/ | | | __/ |
436 % |___/ |_| |___/
437 %************************************************************
438
439 %% **********************************************************
440 % getStimulus - what it says on the tin
441 %************************************************************
442 function [stimulus, sampleRate] = getStimulus(obj, currentWav)
443
444 % getStimulus.m - NC Aug 2010
445 % Modified version of Rob's script to include:
446 %
447 % 1)Signal and noise samples with different sample rate. The component with
448 % lower sample rate is upsampled to the rate of that with the
449 % higher rate.
450 % 2) Clearer level setting
451 % 3) Parameter to change noise intro duration
452 % 4) Noise padding at end of stimulus
453 %
454 % ORIGINAL HEADER:
455 % getStimulus.m
456 %
457 % Robert T. Ferry
458 % 13th May 2009
459
460 % Set levels
461 [speech speechSampleRate] = wavread(fullfile(obj.wavFolder, currentWav ));
462 speech = speech./sqrt(mean(speech.^2)); %Normalize RMS to 1
463 speech = speech * 20e-6 * 10^(obj.currentSpeechLevel/20); %Convert RMS to pascals at desired level
464 %disp(20*log10(sqrt(mean(speech.^2))/20e-6))
465
466 [noise noiseSampleRate] = wavread(fullfile(obj.noiseFolder, obj.noiseName ));
467 noise = noise./sqrt(mean(noise.^2)); %Normalize RMS to 1
468 noise = noise * 20e-6*10^(obj.currentNoiseLevel/20); %Convert RMS to pascals at desired level
469 %disp(20*log10(sqrt(mean(noise.^2))/20e-6))
470
471 % Do sample rate conversion if needed
472 % Will always convert stimulus component with lower rate up to that with
473 % higher rate.
474 if speechSampleRate > noiseSampleRate
475 % disp('S>N')
476 [p,q] = rat(speechSampleRate/noiseSampleRate,0.0001);
477 noise = resample(noise, p, q);
478 noiseSampleRate = speechSampleRate;
479 elseif noiseSampleRate > speechSampleRate
480 % disp('N>S')
481 [p,q] = rat(noiseSampleRate/speechSampleRate,0.0001);
482 speech = resample(speech, p, q);
483 speechSampleRate = noiseSampleRate; %#ok<NASGU>
484 else
485 %DO NOTHING BUT ASSERT
486 assert(speechSampleRate == noiseSampleRate);
487 end
488 sampleRate = noiseSampleRate;
489 dt = 1/sampleRate;
490
491 % mix stimuli
492 % Everything from here down (mostly) is RTF's original
493 silenceStart = floor(obj.noisePreDur*sampleRate);
494 silenceEnd = floor(obj.noisePostDur*sampleRate);
495
496 silencePointsStart = zeros(silenceStart,1);
497 silencePointsEnd = zeros(silenceEnd,1);
498
499 speech = [silencePointsStart; speech; silencePointsEnd];
500
501 stimLength = length(speech);
502 noiseLength = length(noise);
503 if obj.freezeNoise
504 idx = 1;
505 else
506 idx = ceil(rand*(noiseLength-stimLength));
507 end
508 noise = noise(idx:idx+stimLength-1);
509
510 stimulus = speech+noise;
511
512 % add ramps to noise
513 stimInNoiseTime = dt:dt:dt*length(stimulus);
514 rampDuration = 0.100;
515 rampTime = dt : dt : rampDuration;
516 ramp = [0.5*(1+cos(2*pi*rampTime/(2*rampDuration)+pi)) ones(1,length(stimInNoiseTime)-length(rampTime))];
517 stimulus = stimulus'.*ramp;
518 ramp = fliplr(ramp);
519 stimulus = stimulus.*ramp;
520 stimulus = stimulus';
521 %disp(20*log10(sqrt(mean(stimulus.^2))/20e-6))
522
523 % add additional silent period to start of stimulus for model to 'settle down'
524 additionalSilenceLength = round(0.050*sampleRate);
525 additionalSilencePoints = zeros(additionalSilenceLength,1);
526 stimulus = [additionalSilencePoints; stimulus]'; %now rotated.
527 %disp(20*log10(sqrt(mean(stimulus.^2))/20e-6))
528 end% ------ OF GETSTIMULUS
529
530
531 %% **********************************************************
532 % processWavs - do all of the MAP signal processing
533 %************************************************************
534 function [finalFeatures, ANprobabilityResponse] = processWavs(obj, currentWav)
535
536 %**********************************************************
537 % FIRST GET THE STIMULUS
538 %**********************************************************
539 [stimulus, sampleRate] = obj.getStimulus(currentWav);
540
541 %**********************************************************
542 % NOW TO LOAD IN THE HEARING AID
543 %**********************************************************
544 if obj.useAid
545 stimulus = [stimulus; stimulus]'; %EsxAid requires stereo stim
546 stimulus = EssexAid_guiEmulatorWrapper(stimulus, sampleRate, obj);
547 stimulus = stimulus(1,:); %convert back to mono
548 end
549
550 AN_spikesOrProbability = 'probability';
551
552 if obj.useSpectrogram
553 lowestBF=100; highestBF= 4500; numChannels=30;
554 F=round(logspace(log10(lowestBF),log10(highestBF),numChannels));
555
556 nfft = 1024;
557 hopSamples = 64;
558 noverlap = nfft - hopSamples;
559 dt = hopSamples/sampleRate;
560 [~,~,~,P] = spectrogram(stimulus,nfft,noverlap,F,sampleRate);
561
562 ANprobabilityResponse = 10*log10( abs(P) / ((20e-6)^2) ); %now correct [(a^2)/(b^2) = (a/b)^2]
563
564 else
565 [ANprobabilityResponse, dt, myBFlist] = MAPwrap(stimulus, sampleRate, -1, obj.participant, AN_spikesOrProbability, obj.MAPparamChanges);
566 end
567 nChannels = numel(myBFlist);
568
569
570 time_ANresponse = dt:dt:dt*size(ANprobabilityResponse,2);
571 idx = time_ANresponse > obj.truncateDur; %RTF had this @ 0.550
572 ANprobabilityResponse = ANprobabilityResponse(:, idx);
573
574
575 if ~obj.useSpectrogram
576 if obj.MAPopLSR && ~obj.MAPopHSR
577 ANprobabilityResponse = ANprobabilityResponse(1:nChannels, :); %use LSR
578 end
579 if ~obj.MAPopLSR && obj.MAPopHSR
580 ANprobabilityResponse = ANprobabilityResponse(end-nChannels+1:end, :); %or use HSR
581 end
582 if obj.MAPopMSR
583 assert(0,'Not working with MSR at the mo')
584 end
585 end
586
587 % OPTIONAL PLOTTING
588 YTickIdx = 1:floor(numel(myBFlist)/6):numel(myBFlist);
589 YTickIdxRev = numel(myBFlist)+1-YTickIdx;
590 if ~isempty(obj.probHaxes)
591 axes(obj.probHaxes); %#ok<MAXES>
592 imagesc(ANprobabilityResponse); colorbar('peer', obj.probHaxes)
593 set(obj.probHaxes, 'YTick', YTickIdx);
594 set(obj.probHaxes, 'YTickLabel', num2str( myBFlist(YTickIdxRev)' ));
595 ylabel('cf in Hz')
596 end
597
598 % OPTIONAL PLOTTING SMOOTHED
599 if ~isempty(obj.probHaxesSM)
600 axes(obj.probHaxesSM); %#ok<MAXES>
601 anSM=flipud(obj.makeANsmooth(ANprobabilityResponse, 1/dt));
602 imagesc((1:size(anSM,2))./100,1:size(ANprobabilityResponse,1),anSM);
603 set(obj.probHaxesSM, 'YTick', YTickIdx);
604 set(obj.probHaxesSM, 'YTickLabel', num2str( myBFlist(YTickIdxRev)' ));
605 shading(obj.probHaxesSM, 'flat'); colorbar('peer', obj.probHaxesSM)
606 ylabel('cf (Hz)')
607 xlabel('Time (s)')
608 end
609
610
611 %**********************************************************
612 % optional SACF stage
613 %**********************************************************
614 if obj.useSACF
615
616 % A slightly ugly copying is needed
617 SACFparams.lambda = obj.SACFlambda;
618 SACFparams.acfTau = obj.SACFacfTau;
619 SACFparams.lags = logspace(log10(obj.SACFminLag),log10(obj.SACFmaxLag),obj.SACFnBins);
620 SACFparams.lags = linspace(obj.SACFminLag, obj.SACFmaxLag,obj.SACFnBins );
621
622 SACFmethod.dt = dt;
623 SACFmethod.nonlinCF = myBFlist;
624
625 %This is slightly misleading as the ANprob is now a SACF
626 [ANprobabilityResponse, ~, ~, ~] = filteredSACF(ANprobabilityResponse, SACFmethod, SACFparams);
627
628 % OPTIONAL PLOTTING
629 YTickIdx = 1:floor(obj.SACFnBins/6):obj.SACFnBins;
630 %YTickIdxRev = obj.SACFnBins+1-YTickIdx;
631 if ~isempty(obj.sacfHaxes)
632 axes(obj.sacfHaxes); %#ok<MAXES>
633 imagesc(flipud(ANprobabilityResponse)); shading(obj.sacfHaxes, 'flat'); colorbar('peer', obj.sacfHaxes)
634 set(obj.sacfHaxes, 'YTick', YTickIdx);
635 set(obj.sacfHaxes, 'YTickLabel', num2str( 1./SACFparams.lags(YTickIdx)' ,'%0.1f' ));
636 ylabel('Pitch in Hz')
637 end
638
639 % OPTIONAL PLOTTING SMOOTHED
640 if ~isempty(obj.sacfHaxesSM)
641 axes(obj.sacfHaxesSM); %#ok<MAXES>
642 imagesc(flipud(obj.makeANsmooth(ANprobabilityResponse, 1/dt))); shading(obj.sacfHaxesSM, 'flat'); colorbar('peer', obj.sacfHaxesSM)
643 set(obj.sacfHaxesSM, 'YTick', YTickIdx);
644 set(obj.sacfHaxesSM, 'YTickLabel', num2str( 1./SACFparams.lags(YTickIdx)' ,'%0.1f' ));
645 ylabel('Pitch in Hz')
646 end
647
648 end
649
650
651 finalFeatures = obj.makeANfeatures( ...
652 obj.makeANsmooth(ANprobabilityResponse, 1/dt), obj.numCoeff );
653
654 if obj.removeEnergyStatic
655 finalFeatures = finalFeatures(2:end,:);
656 % disp(size(finalFeatures))
657 end
658
659 if obj.doCMN
660 m = mean(finalFeatures,2);
661 finalFeatures = finalFeatures - repmat(m,1,size(finalFeatures,2));
662 end
663
664 % OPTIONAL PLOTTING
665 if ~isempty(obj.featHaxes)
666 pcolor(obj.featHaxes, finalFeatures); shading(obj.featHaxes, 'flat'); colorbar('peer', obj.featHaxes)
667 end
668 if ~isempty(obj.reconHaxes)
669 reconsData = idct(finalFeatures,obj.SACFnBins);
670 axes(obj.reconHaxes); %#ok<MAXES>
671 imagesc(flipud( reconsData ));
672 end
673
674 opForHTK(obj, currentWav, finalFeatures);
675 end % ------ OF PROCESSWAVS
676
677 end % ------ OF METHODS
678
679 %% *********************************************************
680 % _ _ _ _ _ _
681 % | | | | (_) | | | | | |
682 % ___| |_ __ _| |_ _ ___ _ __ ___ ___| |_| |__ ___ __| |___
683 % / __| __/ _` | __| |/ __| | '_ ` _ \ / _ \ __| '_ \ / _ \ / _` / __|
684 % \__ \ || (_| | |_| | (__ | | | | | | __/ |_| | | | (_) | (_| \__ \
685 % |___/\__\__,_|\__|_|\___| |_| |_| |_|\___|\__|_| |_|\___/ \__,_|___/
686 %************************************************************
687
688 methods(Static)
689 %% ********************************************************
690 % makeANsmooth - smooth the AN response into hanning windowed chunks
691 %**********************************************************
692 function ANsmooth = makeANsmooth(ANresponse, sampleRate, winSize, hopSize)
693 if nargin < 3
694 winSize = 25; %default 25 ms window
695 end
696 if nargin < 4
697 hopSize = 10; %default 10 ms jump between windows
698 end
699
700 winSizeSamples = round(winSize*sampleRate/1000);
701 hopSizeSamples = round(hopSize*sampleRate/1000);
702
703 % smooth
704 hann = cJob.NRC_hanning(winSizeSamples);
705
706 ANsmooth = [];%Cannot pre-allocate a size as it is unknown until the enframing
707 for chan = 1:size(ANresponse,1)
708 f = cJob.enframe(ANresponse(chan,:), hann, hopSizeSamples);
709 ANsmooth(chan,:) = mean(f,2)'; %#ok<AGROW> see above comment
710 end
711 end% ------ OF makeANsmooth
712
713 %% ********************************************************
714 % makeANfeatures - dct wizardry
715 %**********************************************************
716 function ANfeatures = makeANfeatures(ANrate, numCoeff)
717 % make feature vectors
718 features = cJob.GJB_dct(ANrate);
719 ANfeatures = features(1:numCoeff,:);
720 end % ------ OF makeANfeatures
721
722 %% ************************************************************************
723 % enframe - AVOID SIGNAL PROCESSING TOOLBOX buffer function
724 %**************************************************************************
725 function f=enframe(x,win,inc)
726 %ENFRAME split signal up into (overlapping) frames: one per row. F=(X,WIN,INC)
727 %
728 % F = ENFRAME(X,LEN) splits the vector X(:) up into
729 % frames. Each frame is of length LEN and occupies
730 % one row of the output matrix. The last few frames of X
731 % will be ignored if its length is not divisible by LEN.
732 % It is an error if X is shorter than LEN.
733 %
734 % F = ENFRAME(X,LEN,INC) has frames beginning at increments of INC
735 % The centre of frame I is X((I-1)*INC+(LEN+1)/2) for I=1,2,...
736 % The number of frames is fix((length(X)-LEN+INC)/INC)
737 %
738 % F = ENFRAME(X,WINDOW) or ENFRAME(X,WINDOW,INC) multiplies
739 % each frame by WINDOW(:)
740
741 % Copyright (C) Mike Brookes 1997
742 % Version: $Id: enframe.m,v 1.4 2006/06/22 19:07:50 dmb Exp $
743 %
744 % VOICEBOX is a MATLAB toolbox for speech processing.
745 % Home page: http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html
746 %
747 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
748 % This program is free software; you can redistribute it and/or modify
749 % it under the terms of the GNU General Public License as published by
750 % the Free Software Foundation; either version 2 of the License, or
751 % (at your option) any later version.
752 %
753 % This program is distributed in the hope that it will be useful,
754 % but WITHOUT ANY WARRANTY; without even the implied warranty of
755 % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
756 % GNU General Public License for more details.
757 %
758 % You can obtain a copy of the GNU General Public License from
759 % http://www.gnu.org/copyleft/gpl.html or by writing to
760 % Free Software Foundation, Inc.,675 Mass Ave, Cambridge, MA 02139, USA.
761 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
762
763 nx=length(x(:));
764 nwin=length(win);
765 if (nwin == 1)
766 len = win;
767 else
768 len = nwin;
769 end
770 if (nargin < 3)
771 inc = len;
772 end
773 nf = fix((nx-len+inc)/inc);
774 f=zeros(nf,len);
775 indf= inc*(0:(nf-1)).';
776 inds = (1:len);
777 f(:) = x(indf(:,ones(1,len))+inds(ones(nf,1),:));
778 if (nwin > 1)
779 w = win(:)';
780 f = f .* w(ones(nf,1),:);
781 end
782 end % ------ OF ENFRAME
783
784 %% ************************************************************************
785 % GJB_dct - AVOID SIGNAL PROCESSING TOOLBOX
786 %**************************************************************************
787 function b=GJB_dct(a,n)
788
789 if nargin == 0,
790 error('Not enough input arguments.');
791 end
792
793 if isempty(a)
794 b = [];
795 return
796 end
797
798 % If input is a vector, make it a column:
799 do_trans = (size(a,1) == 1);
800 if do_trans, a = a(:); end
801
802 if nargin==1,
803 n = size(a,1);
804 end
805 m = size(a,2);
806
807 % Pad or truncate input if necessary
808 if size(a,1)<n,
809 aa = zeros(n,m);
810 aa(1:size(a,1),:) = a;
811 else
812 aa = a(1:n,:);
813 end
814
815 % Compute weights to multiply DFT coefficients
816 ww = (exp(-1i*(0:n-1)*pi/(2*n))/sqrt(2*n)).';
817 ww(1) = ww(1) / sqrt(2);
818
819 if rem(n,2)==1 || ~isreal(a), % odd case
820 % Form intermediate even-symmetric matrix
821 y = zeros(2*n,m);
822 y(1:n,:) = aa;
823 y(n+1:2*n,:) = flipud(aa);
824
825 % Compute the FFT and keep the appropriate portion:
826 yy = fft(y);
827 yy = yy(1:n,:);
828
829 else % even case
830 % Re-order the elements of the columns of x
831 y = [ aa(1:2:n,:); aa(n:-2:2,:) ];
832 yy = fft(y);
833 ww = 2*ww; % Double the weights for even-length case
834 end
835
836 % Multiply FFT by weights:
837 b = ww(:,ones(1,m)) .* yy;
838
839 if isreal(a), b = real(b); end
840 if do_trans, b = b.'; end
841 end % ----- of GJB_DCT
842
843 %% ************************************************************************
844 % NRC_hanning - AVOID SIGNAL PROCESSING TOOLBOX
845 %**************************************************************************
846 function w=NRC_hanning(n)
847 calc_hanning = @(m,n)0.5*(1 - cos(2*pi*(1:m)'/(n+1))); %cheeky anonymous function - I <3 these
848 if ~rem(n,2)
849 % Even length window
850 half = n/2;
851 w = calc_hanning(half,n);
852 w = [w; w(end:-1:1)];
853 else
854 % Odd length window
855 half = (n+1)/2;
856 w = calc_hanning(half,n);
857 w = [w; w(end-1:-1:1)];
858 end
859 end % ------ of NRC_HANNING
860
861 %% ************************************************************************
862 % writeHTK - convert data to htk format -> by anonymous Sue (2001)
863 %**************************************************************************
864 function retcode = writeHTK(filename, htkdata, nFrames, sampPeriod, SampSize, ParamKind, byte_order)
865 % Write an HTK format file.
866 %
867 % Input parameters:
868 % filename HTK data file
869 % htkdata HTK data read: an m x n matrix with
870 % m = no. of channels
871 % n = no. of frames
872 % The following are from the HTK header (see HTK manual):
873 % nFrames no. of frames (samples)
874 % sampPeriod sample period (in 100 ns units?)
875 % SampSize sample size
876 % ParamKind parameter kind code
877 %
878 % byteorder 'be' for big-endian (typical for Unix) (default)
879 % 'le' for little-endian (typical for MSWindows)
880 %
881 % Output parameters:
882 % retcode 0 if successful
883
884 % Written by Sue 17/12/01
885
886 retcode=-1; % initialise in case of error
887 if nargin < 6
888 fprintf('Usage: %s(filename, htkdata, nFrames, sampPeriod, SampSize, ParamKind [, byte_order])', mfilename);
889 end;
890
891 % Default to big-endian (HTK format)
892 if nargin < 7
893 byte_order = 'be';
894 end;
895
896 fid = fopen (filename, 'w', sprintf('ieee-%s', byte_order));
897 if fid < 1
898 fprintf('%s: can''t open output file %s\n', mfilename, filename);
899 return
900 end
901
902 % Write header
903 fwrite (fid, nFrames, 'int32'); %nSamples in HTK
904 fwrite (fid, sampPeriod, 'int32');
905 fwrite (fid, SampSize, 'int16');
906 fwrite (fid, ParamKind, 'int16');
907
908 % Write actual data
909 fwrite(fid, htkdata, 'float32');
910
911 fclose(fid);
912
913 retcode=0;
914 end% ------ OF WRITEHTK
915
916 %% ************************************************************************
917 % readHTK - just incase you ever want to go backwards
918 %**************************************************************************
919 function [htkdata,nframes,sampPeriod,sampSize,paramKind] = readHTK(filename,byte_order)
920
921 if nargin<2
922 byte_order = 'be';
923 end
924
925 fid = fopen(filename,'r',sprintf('ieee-%s',byte_order));
926
927 nframes = fread(fid,1,'int32');
928 sampPeriod = fread(fid,1,'int32');
929 sampSize = fread(fid,1,'int16');
930 paramKind = fread(fid,1,'int16');
931
932 % read the data
933
934 htkdata = fread(fid,nframes*(sampSize/4),'float32');
935 htkdata = reshape(htkdata,sampSize/4,nframes);
936 fclose(fid);
937 end % ------ OF READHTK
938
939 end % ------ OF STATIC METHODS
940
941 end % ------ OF CLASS