diff Problems/generateAudioDenoiseProblem.m @ 161:f42aa8bcb82f ivand_dev

debug and clean the SMALLbox Problems code
author Ivan Damnjanovic lnx <ivan.damnjanovic@eecs.qmul.ac.uk>
date Wed, 31 Aug 2011 12:02:19 +0100
parents 8e660fd14774
children 9c418bea7f6a
line wrap: on
line diff
--- a/Problems/generateAudioDenoiseProblem.m	Mon Aug 22 11:46:35 2011 +0100
+++ b/Problems/generateAudioDenoiseProblem.m	Wed Aug 31 12:02:19 2011 +0100
@@ -1,20 +1,40 @@
-function data=generateAudioDenoiseProblem(au, trainnum, blocksize, dictsize, overlap, sigma, gain, maxval, initdict);
-%%  Audio Denoising Problem - needs revision, not yet finalised
+function data = generateAudioDenoiseProblem(soundfile, sigma, windowSize,...
+    overlap, wa, ws, trainnum, redundancyFactor, initdict)
+%%  Audio Denoising Problem
 %
 %   generateAudioDenoiseProblem is part of the SMALLbox and generate a
 %   problem for comaprison of Dictionary Learning/Sparse Representation
-%   techniques in audio denoising scenario. It is based on KSVD image
-%   denoise demo by Ron Rubinstein (see bellow).
-%   The fuction takes as an optional input 
-%       au - audio samples to be denoised
-%       trainnum - number of frames for training 
-%       blocksize - 1D frame size (eg 512)
-%       dictsize - number of atoms to be trained
-%       overlap - ammount of overlaping frames between 0 and 1
+%   techniques in audio denoising scenario.
+%
+%   The function takes as an optional input 
+%       soundfile   - name of the file
+%       sigma       - noise level (dB)
+%       windowSize  - 1D frame size (eg 512)
+%       overlap     - ammount of overlaping frames between 0 and 1
+%       wa,ws       - analisys and synthesis window functions
+%       
+%       trainnum    - number of frames for training 
+%       redundancyFactor - overcompletness of dictionary (default 2)
+%       initdict    - initial dictionary
 %   
+%   The function outputs the structure with following fields:
+%       Original    - original signal
+%       Noisy       - signal with added noise
+%       fs          - sample rate of the original signal in Hertz
+%       nbits       - the number of bits per sample
+%       sigma       - added noise level
+%       b           - matrix of training samples for dictionary learning
+%       b1          - matrix containing all frames for reconstruction step
+%       m           - size od dictionary atom 
+%       n           - number of frames for training
+%       p           - number of atoms in dictionary
+%       windowSize  - 1D frame size (eg 512)
+%       overlap     - ammount of overlaping frames between 0 and 1
+%       wa,ws       - analisys and synthesis window functions
+%       initdict	- initial dictionary
 
 %   Centre for Digital Music, Queen Mary, University of London.
-%   This file copyright 2010 Ivan Damnjanovic.
+%   This file copyright 2011 Ivan Damnjanovic.
 %
 %   This program is free software; you can redistribute it and/or
 %   modify it under the terms of the GNU General Public License as
@@ -30,67 +50,69 @@
 disp(' ');
 
 FS=filesep;
-if ~ exist( 'sigma', 'var' ) || isempty(sigma), sigma = 26.74; end
-if ~ exist( 'gain', 'var' ) || isempty(gain), gain = 1.15; end
 
-if ~ exist( 'initdict', 'var' ) || isempty(initdict), initdict = 'odct'; end
-if ~ exist( 'overlap', 'var' ) || isempty(overlap), overlap = 15/16; end
 %% prompt user for wav file %%
 %ask for file name
 
 TMPpath=pwd;
-if ~ exist( 'au', 'var' ) || isempty(au)
+if ~ exist( 'soundfile', 'var' ) || isempty(soundfile)
+    %ask for file name 
     [pathstr1, name, ext, versn] = fileparts(which('SMALLboxSetup.m'));
-    cd([pathstr1,FS,'data',FS,'audio',FS,'wav']);
-    [filename,pathname] = uigetfile({'*.wav;'},'Select a wav file');
+    cd([pathstr1,FS,'data',FS,'audio']);
+    [filename,pathname] = uigetfile({'*.mat; *.mid; *.wav'},'Select a file to transcribe');
     [pathstr, name, ext, versn] = fileparts(filename);
     data.name=name;
-    
-    au = wavread(filename);
-    au = mean(au,2); % turn it into mono.
-end;
-if ~ exist( 'maxval', 'var' ) || isempty(maxval), maxval = max(au); end
 
-%% generate noisy audio %%
-
-disp(' ');
-disp('Generating noisy audio...');
-sigma = max(au)/10^(sigma/20); 
-n = randn(size(au)) .* sigma;
-aunoise = au + n;%  here we can load noise audio if available 
-                 %  for example: wavread('icassp06_x.wav');%
-
-
+    if strcmp(ext,'.mid')
+        midi=readmidi(filename);
+%         data.notesOriginal=midiInfo(midi);
+        y=midi2audio(midi);
+        wavwrite(y, 44100, 16, 'temp.wav');
+        [x.signal, x.fs, x.nbits]=wavread('temp.wav');
+        delete('temp.wav');
+    elseif strcmp(ext,'.wav')
+%         cd([pathstr1,FS, 'data', FS, 'audio', FS, 'midi']);
+%         filename1=[name, '.mid'];
+%         if exist(filename1, 'file')
+%             midi=readmidi(filename1);
+%             data.notesOriginal=midiInfo(midi);
+%         end
+        cd([pathstr1,FS, 'data', FS, 'audio', FS, 'wav']);
+        [x.signal, x.fs, x.nbits]=wavread(filename);
+    else
+%         cd([pathstr1,FS, 'data', FS, 'audio', FS, 'midi']);
+%         filename1=[name, '.mid'];
+%         if exist(filename1, 'file')
+%             midi=readmidi(filename1);
+%             data.notesOriginal=midiInfo(midi);
+%         end
+        cd([pathstr1,FS, 'data', FS, 'audio', FS, 'mat']);
+        x=load([pathname,filename]);
+    end
+else
+    [x.signal, x.fs, x.nbits]=wavread(soundfile);
+    [pathstr, name, ext, versn] = fileparts(soundfile);
+    data.name=name;
+end
 
 %% set parameters %%
+if ~ exist( 'sigma', 'var' ) || isempty(sigma), sigma = 0.2; end
 
-x = aunoise;
-if ~ exist( 'blocksize', 'var' ) || isempty(blocksize),blocksize = 512;end
-if ~ exist( 'dictsize', 'var' ) || isempty(dictsize), dictsize = 2048;end
+if ~ exist( 'windowSize', 'var' ) || isempty(windowSize), windowSize = 256;end
+if ~ exist( 'overlap', 'var' ) || isempty(overlap), overlap = 0.5; end
+if ~ exist( 'wa', 'var' ) || isempty(wa), wa = @wSine; end % Analysis window
+if ~ exist( 'ws', 'var' ) || isempty(ws), ws = @wSine; end % Synthesis window
 
-if ~ exist( 'trainnum', 'var' ) || isempty(trainnum),trainnum = (size(x,1)-blocksize+1);end
 
+if ~ exist( 'redundancyFactor', 'var' ) || isempty(windowSize),...
+        redundancyFactor = 2;end
+if ~ exist( 'initdict', 'var' ) || isempty(initdict),...
+        initdict = 'odct'; end
+if ~ exist( 'trainnum', 'var' ) || isempty(trainnum), ...
+        trainnum = 16*redundancyFactor*windowSize;end
 
-
-
-
-p=1;
-
-
-% 
-% msgdelta = 5;
-% 
-% verbose = 't';
-% if (msgdelta <= 0)
-%   verbose='';
-%   msgdelta = -1;
-% end
-% 
-% 
-% % initial dictionary %
-% 
 if (strcmpi(initdict,'odct'))
-    initdict = odctndict(blocksize,dictsize,p);
+    initdict = odctndict(windowSize, redundancyFactor*windowSize, 1);
 elseif (strcmpi(initdict,'data'))
     clear initdict;    % causes initialization using random examples
 else
@@ -98,45 +120,31 @@
 end
 
 if exist( 'initdict', 'var' ) 
-  initdict = initdict(:,1:dictsize);
+  initdict = initdict(:,1:redundancyFactor*windowSize);
 end
 
 
-% noise mode %
-% if (isfield(params,'noisemode'))
-%   switch lower(params.noisemode)
-%     case 'psnr'
-%       sigma = maxval / 10^(params.psnr/20);
-%     case 'sigma'
-%       sigma = params.sigma;
-%     otherwise
-%       error('Invalid noise mode specified');
-%   end
-% elseif (isfield(params,'sigma'))
-%   sigma = params.sigma;
-% elseif (isfield(params,'psnr'))
-%   sigma = maxval / 10^(params.psnr/20);
-% else
-%   error('Noise strength not specified');
-% end
-
-% params.Edata = sqrt(prod(blocksize)) * sigma * gain;   % target error for omp
-% params.codemode = 'error';
-% 
-% params.sigma = sigma;
-% params.noisemode = 'sigma';
-% 
-% 
-% % make sure test data is not present in params
-% if (isfield(params,'testdata'))
-%   params = rmfield(params,'testdata');
-% end
-
-
 %%%% create training data %%%
 
+%% generate noisy audio %%
 
-X = buffer( x(1:trainnum),blocksize, overlap*blocksize);
+disp(' ');
+disp('Generating noisy audio...');
+x.signal = x.signal/max(abs(x.signal(:)))*0.99999;
+n = randn(size(x.signal)) .* sigma;
+
+xnoise = x.signal + n;%  here we can load noise audio if available 
+                 %  for example: wavread('icassp06_x.wav');%
+
+                 
+
+
+X = im2colstep(xnoise,[windowSize 1],[overlap*windowSize 1]);
+X = diag(wa(windowSize)) * X;
+
+
+
+
 
 % remove dc in blocks to conserve memory %
 % bsize = 2000;
@@ -144,17 +152,32 @@
 %   blockids = i : min(i+bsize-1,size(X,2));
 %   X(:,blockids) = remove_dc(X(:,blockids),'columns');
 % end
-data.Original = au;
-data.Noisy = aunoise;
-data.b = X;
-data.m = size(X,1);
-data.n = size(X,2);
-data.p = dictsize;
-data.blocksize=blocksize;
+data.Original = x.signal;
+data.Noisy = xnoise;
+data.fs = x.fs;
+data.nbits = x.nbits;
+
 data.sigma = sigma;
-data.gain = gain;
-data.maxval = maxval;
+
+
+if (trainnum<size(X,2))
+    p = randperm(size(X,2));
+    p=sort(p(1:trainnum));
+    data.b = X(:,p);
+else
+    data.b = X;
+end
+
+data.b1 = X;
+[data.m, data.n] = size(data.b);
+data.p = redundancyFactor*windowSize;
+
+data.windowSize = windowSize;
+data.overlap = overlap;
+data.ws = ws;
+data.wa = wa;
+
 data.initdict= initdict;
-data.signalDim=1;
+
 cd(TMPpath);