Mercurial > hg > smallbox
comparison Problems/generateAudioDenoiseProblem.m @ 161:f42aa8bcb82f ivand_dev
debug and clean the SMALLbox Problems code
author | Ivan Damnjanovic lnx <ivan.damnjanovic@eecs.qmul.ac.uk> |
---|---|
date | Wed, 31 Aug 2011 12:02:19 +0100 |
parents | 8e660fd14774 |
children | 9c418bea7f6a |
comparison
equal
deleted
inserted
replaced
155:b14209313ba4 | 161:f42aa8bcb82f |
---|---|
1 function data=generateAudioDenoiseProblem(au, trainnum, blocksize, dictsize, overlap, sigma, gain, maxval, initdict); | 1 function data = generateAudioDenoiseProblem(soundfile, sigma, windowSize,... |
2 %% Audio Denoising Problem - needs revision, not yet finalised | 2 overlap, wa, ws, trainnum, redundancyFactor, initdict) |
3 %% Audio Denoising Problem | |
3 % | 4 % |
4 % generateAudioDenoiseProblem is part of the SMALLbox and generate a | 5 % generateAudioDenoiseProblem is part of the SMALLbox and generate a |
5 % problem for comaprison of Dictionary Learning/Sparse Representation | 6 % problem for comaprison of Dictionary Learning/Sparse Representation |
6 % techniques in audio denoising scenario. It is based on KSVD image | 7 % techniques in audio denoising scenario. |
7 % denoise demo by Ron Rubinstein (see bellow). | 8 % |
8 % The fuction takes as an optional input | 9 % The function takes as an optional input |
9 % au - audio samples to be denoised | 10 % soundfile - name of the file |
10 % trainnum - number of frames for training | 11 % sigma - noise level (dB) |
11 % blocksize - 1D frame size (eg 512) | 12 % windowSize - 1D frame size (eg 512) |
12 % dictsize - number of atoms to be trained | 13 % overlap - ammount of overlaping frames between 0 and 1 |
13 % overlap - ammount of overlaping frames between 0 and 1 | 14 % wa,ws - analisys and synthesis window functions |
15 % | |
16 % trainnum - number of frames for training | |
17 % redundancyFactor - overcompletness of dictionary (default 2) | |
18 % initdict - initial dictionary | |
14 % | 19 % |
20 % The function outputs the structure with following fields: | |
21 % Original - original signal | |
22 % Noisy - signal with added noise | |
23 % fs - sample rate of the original signal in Hertz | |
24 % nbits - the number of bits per sample | |
25 % sigma - added noise level | |
26 % b - matrix of training samples for dictionary learning | |
27 % b1 - matrix containing all frames for reconstruction step | |
28 % m - size od dictionary atom | |
29 % n - number of frames for training | |
30 % p - number of atoms in dictionary | |
31 % windowSize - 1D frame size (eg 512) | |
32 % overlap - ammount of overlaping frames between 0 and 1 | |
33 % wa,ws - analisys and synthesis window functions | |
34 % initdict - initial dictionary | |
15 | 35 |
16 % Centre for Digital Music, Queen Mary, University of London. | 36 % Centre for Digital Music, Queen Mary, University of London. |
17 % This file copyright 2010 Ivan Damnjanovic. | 37 % This file copyright 2011 Ivan Damnjanovic. |
18 % | 38 % |
19 % This program is free software; you can redistribute it and/or | 39 % This program is free software; you can redistribute it and/or |
20 % modify it under the terms of the GNU General Public License as | 40 % modify it under the terms of the GNU General Public License as |
21 % published by the Free Software Foundation; either version 2 of the | 41 % published by the Free Software Foundation; either version 2 of the |
22 % License, or (at your option) any later version. See the file | 42 % License, or (at your option) any later version. See the file |
28 disp(' This function reads an audio, adds random Gaussian noise,'); | 48 disp(' This function reads an audio, adds random Gaussian noise,'); |
29 disp(' that can be later denoised by using dictionary learning techniques.'); | 49 disp(' that can be later denoised by using dictionary learning techniques.'); |
30 disp(' '); | 50 disp(' '); |
31 | 51 |
32 FS=filesep; | 52 FS=filesep; |
33 if ~ exist( 'sigma', 'var' ) || isempty(sigma), sigma = 26.74; end | |
34 if ~ exist( 'gain', 'var' ) || isempty(gain), gain = 1.15; end | |
35 | 53 |
36 if ~ exist( 'initdict', 'var' ) || isempty(initdict), initdict = 'odct'; end | |
37 if ~ exist( 'overlap', 'var' ) || isempty(overlap), overlap = 15/16; end | |
38 %% prompt user for wav file %% | 54 %% prompt user for wav file %% |
39 %ask for file name | 55 %ask for file name |
40 | 56 |
41 TMPpath=pwd; | 57 TMPpath=pwd; |
42 if ~ exist( 'au', 'var' ) || isempty(au) | 58 if ~ exist( 'soundfile', 'var' ) || isempty(soundfile) |
59 %ask for file name | |
43 [pathstr1, name, ext, versn] = fileparts(which('SMALLboxSetup.m')); | 60 [pathstr1, name, ext, versn] = fileparts(which('SMALLboxSetup.m')); |
44 cd([pathstr1,FS,'data',FS,'audio',FS,'wav']); | 61 cd([pathstr1,FS,'data',FS,'audio']); |
45 [filename,pathname] = uigetfile({'*.wav;'},'Select a wav file'); | 62 [filename,pathname] = uigetfile({'*.mat; *.mid; *.wav'},'Select a file to transcribe'); |
46 [pathstr, name, ext, versn] = fileparts(filename); | 63 [pathstr, name, ext, versn] = fileparts(filename); |
47 data.name=name; | 64 data.name=name; |
48 | |
49 au = wavread(filename); | |
50 au = mean(au,2); % turn it into mono. | |
51 end; | |
52 if ~ exist( 'maxval', 'var' ) || isempty(maxval), maxval = max(au); end | |
53 | 65 |
54 %% generate noisy audio %% | 66 if strcmp(ext,'.mid') |
67 midi=readmidi(filename); | |
68 % data.notesOriginal=midiInfo(midi); | |
69 y=midi2audio(midi); | |
70 wavwrite(y, 44100, 16, 'temp.wav'); | |
71 [x.signal, x.fs, x.nbits]=wavread('temp.wav'); | |
72 delete('temp.wav'); | |
73 elseif strcmp(ext,'.wav') | |
74 % cd([pathstr1,FS, 'data', FS, 'audio', FS, 'midi']); | |
75 % filename1=[name, '.mid']; | |
76 % if exist(filename1, 'file') | |
77 % midi=readmidi(filename1); | |
78 % data.notesOriginal=midiInfo(midi); | |
79 % end | |
80 cd([pathstr1,FS, 'data', FS, 'audio', FS, 'wav']); | |
81 [x.signal, x.fs, x.nbits]=wavread(filename); | |
82 else | |
83 % cd([pathstr1,FS, 'data', FS, 'audio', FS, 'midi']); | |
84 % filename1=[name, '.mid']; | |
85 % if exist(filename1, 'file') | |
86 % midi=readmidi(filename1); | |
87 % data.notesOriginal=midiInfo(midi); | |
88 % end | |
89 cd([pathstr1,FS, 'data', FS, 'audio', FS, 'mat']); | |
90 x=load([pathname,filename]); | |
91 end | |
92 else | |
93 [x.signal, x.fs, x.nbits]=wavread(soundfile); | |
94 [pathstr, name, ext, versn] = fileparts(soundfile); | |
95 data.name=name; | |
96 end | |
55 | 97 |
56 disp(' '); | 98 %% set parameters %% |
57 disp('Generating noisy audio...'); | 99 if ~ exist( 'sigma', 'var' ) || isempty(sigma), sigma = 0.2; end |
58 sigma = max(au)/10^(sigma/20); | 100 |
59 n = randn(size(au)) .* sigma; | 101 if ~ exist( 'windowSize', 'var' ) || isempty(windowSize), windowSize = 256;end |
60 aunoise = au + n;% here we can load noise audio if available | 102 if ~ exist( 'overlap', 'var' ) || isempty(overlap), overlap = 0.5; end |
61 % for example: wavread('icassp06_x.wav');% | 103 if ~ exist( 'wa', 'var' ) || isempty(wa), wa = @wSine; end % Analysis window |
104 if ~ exist( 'ws', 'var' ) || isempty(ws), ws = @wSine; end % Synthesis window | |
62 | 105 |
63 | 106 |
107 if ~ exist( 'redundancyFactor', 'var' ) || isempty(windowSize),... | |
108 redundancyFactor = 2;end | |
109 if ~ exist( 'initdict', 'var' ) || isempty(initdict),... | |
110 initdict = 'odct'; end | |
111 if ~ exist( 'trainnum', 'var' ) || isempty(trainnum), ... | |
112 trainnum = 16*redundancyFactor*windowSize;end | |
64 | 113 |
65 %% set parameters %% | |
66 | |
67 x = aunoise; | |
68 if ~ exist( 'blocksize', 'var' ) || isempty(blocksize),blocksize = 512;end | |
69 if ~ exist( 'dictsize', 'var' ) || isempty(dictsize), dictsize = 2048;end | |
70 | |
71 if ~ exist( 'trainnum', 'var' ) || isempty(trainnum),trainnum = (size(x,1)-blocksize+1);end | |
72 | |
73 | |
74 | |
75 | |
76 | |
77 p=1; | |
78 | |
79 | |
80 % | |
81 % msgdelta = 5; | |
82 % | |
83 % verbose = 't'; | |
84 % if (msgdelta <= 0) | |
85 % verbose=''; | |
86 % msgdelta = -1; | |
87 % end | |
88 % | |
89 % | |
90 % % initial dictionary % | |
91 % | |
92 if (strcmpi(initdict,'odct')) | 114 if (strcmpi(initdict,'odct')) |
93 initdict = odctndict(blocksize,dictsize,p); | 115 initdict = odctndict(windowSize, redundancyFactor*windowSize, 1); |
94 elseif (strcmpi(initdict,'data')) | 116 elseif (strcmpi(initdict,'data')) |
95 clear initdict; % causes initialization using random examples | 117 clear initdict; % causes initialization using random examples |
96 else | 118 else |
97 error('Invalid initial dictionary specified.'); | 119 error('Invalid initial dictionary specified.'); |
98 end | 120 end |
99 | 121 |
100 if exist( 'initdict', 'var' ) | 122 if exist( 'initdict', 'var' ) |
101 initdict = initdict(:,1:dictsize); | 123 initdict = initdict(:,1:redundancyFactor*windowSize); |
102 end | 124 end |
103 | |
104 | |
105 % noise mode % | |
106 % if (isfield(params,'noisemode')) | |
107 % switch lower(params.noisemode) | |
108 % case 'psnr' | |
109 % sigma = maxval / 10^(params.psnr/20); | |
110 % case 'sigma' | |
111 % sigma = params.sigma; | |
112 % otherwise | |
113 % error('Invalid noise mode specified'); | |
114 % end | |
115 % elseif (isfield(params,'sigma')) | |
116 % sigma = params.sigma; | |
117 % elseif (isfield(params,'psnr')) | |
118 % sigma = maxval / 10^(params.psnr/20); | |
119 % else | |
120 % error('Noise strength not specified'); | |
121 % end | |
122 | |
123 % params.Edata = sqrt(prod(blocksize)) * sigma * gain; % target error for omp | |
124 % params.codemode = 'error'; | |
125 % | |
126 % params.sigma = sigma; | |
127 % params.noisemode = 'sigma'; | |
128 % | |
129 % | |
130 % % make sure test data is not present in params | |
131 % if (isfield(params,'testdata')) | |
132 % params = rmfield(params,'testdata'); | |
133 % end | |
134 | 125 |
135 | 126 |
136 %%%% create training data %%% | 127 %%%% create training data %%% |
137 | 128 |
129 %% generate noisy audio %% | |
138 | 130 |
139 X = buffer( x(1:trainnum),blocksize, overlap*blocksize); | 131 disp(' '); |
132 disp('Generating noisy audio...'); | |
133 x.signal = x.signal/max(abs(x.signal(:)))*0.99999; | |
134 n = randn(size(x.signal)) .* sigma; | |
135 | |
136 xnoise = x.signal + n;% here we can load noise audio if available | |
137 % for example: wavread('icassp06_x.wav');% | |
138 | |
139 | |
140 | |
141 | |
142 X = im2colstep(xnoise,[windowSize 1],[overlap*windowSize 1]); | |
143 X = diag(wa(windowSize)) * X; | |
144 | |
145 | |
146 | |
147 | |
140 | 148 |
141 % remove dc in blocks to conserve memory % | 149 % remove dc in blocks to conserve memory % |
142 % bsize = 2000; | 150 % bsize = 2000; |
143 % for i = 1:bsize:size(X,2) | 151 % for i = 1:bsize:size(X,2) |
144 % blockids = i : min(i+bsize-1,size(X,2)); | 152 % blockids = i : min(i+bsize-1,size(X,2)); |
145 % X(:,blockids) = remove_dc(X(:,blockids),'columns'); | 153 % X(:,blockids) = remove_dc(X(:,blockids),'columns'); |
146 % end | 154 % end |
147 data.Original = au; | 155 data.Original = x.signal; |
148 data.Noisy = aunoise; | 156 data.Noisy = xnoise; |
149 data.b = X; | 157 data.fs = x.fs; |
150 data.m = size(X,1); | 158 data.nbits = x.nbits; |
151 data.n = size(X,2); | 159 |
152 data.p = dictsize; | |
153 data.blocksize=blocksize; | |
154 data.sigma = sigma; | 160 data.sigma = sigma; |
155 data.gain = gain; | 161 |
156 data.maxval = maxval; | 162 |
163 if (trainnum<size(X,2)) | |
164 p = randperm(size(X,2)); | |
165 p=sort(p(1:trainnum)); | |
166 data.b = X(:,p); | |
167 else | |
168 data.b = X; | |
169 end | |
170 | |
171 data.b1 = X; | |
172 [data.m, data.n] = size(data.b); | |
173 data.p = redundancyFactor*windowSize; | |
174 | |
175 data.windowSize = windowSize; | |
176 data.overlap = overlap; | |
177 data.ws = ws; | |
178 data.wa = wa; | |
179 | |
157 data.initdict= initdict; | 180 data.initdict= initdict; |
158 data.signalDim=1; | 181 |
159 cd(TMPpath); | 182 cd(TMPpath); |
160 | 183 |