boblsturm@0
|
1 function y = istft(spec,parameter, varargin)
|
boblsturm@0
|
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
boblsturm@0
|
3 % Name: istft
|
boblsturm@0
|
4 % Date: 03-2014
|
boblsturm@0
|
5 % Programmer: Jonathan Driedger
|
boblsturm@0
|
6 % http://www.audiolabs-erlangen.de/resources/MIR/TSMtoolbox/
|
boblsturm@0
|
7 %
|
boblsturm@0
|
8 % Computing the 'inverse' of the stft according to the paper "Signal
|
boblsturm@0
|
9 % Estimation from Modified Short-Time Fourier Transform" by Griffin and
|
boblsturm@0
|
10 % Lim.
|
boblsturm@0
|
11 %
|
boblsturm@0
|
12 % Input: spec a complex spectrogram generated by stft.
|
boblsturm@0
|
13 % parameter.
|
boblsturm@0
|
14 % synHop hop size of the synthesis window.
|
boblsturm@0
|
15 % win the synthesis window.
|
boblsturm@0
|
16 % zeroPad number of zeros that were padded to the
|
boblsturm@0
|
17 % window to increase the fft size and therefore
|
boblsturm@0
|
18 % the frequency resolution.
|
boblsturm@0
|
19 % numOfIter number of iterations the algorithm should
|
boblsturm@0
|
20 % perform to adapt the phase.
|
boblsturm@0
|
21 % origSigLen original length of the audio signal such that
|
boblsturm@0
|
22 % the output can be trimmed accordingly.
|
boblsturm@0
|
23 % restoreEnergy when windowing the synthesis frames, there is a
|
boblsturm@0
|
24 % potential for some energy loss. This option
|
boblsturm@0
|
25 % will rescale every windowed synthesis frame to
|
boblsturm@0
|
26 % compensate for this energy leakage.
|
boblsturm@0
|
27 % fftShift in case the stft was computed with an fftShift,
|
boblsturm@0
|
28 % setting this parameter to 1 will compensate for
|
boblsturm@0
|
29 % that by applying the same shifting operation
|
boblsturm@0
|
30 % again to each frame after the ifft.
|
boblsturm@0
|
31 %
|
boblsturm@0
|
32 % Output: y the time-domain signal.
|
boblsturm@0
|
33 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
boblsturm@0
|
34 % Reference:
|
boblsturm@0
|
35 % If you use the 'TSM toolbox' please refer to:
|
boblsturm@0
|
36 % [DM14] Jonathan Driedger, Meinard Mueller
|
boblsturm@0
|
37 % TSM Toolbox: MATLAB Implementations of Time-Scale Modification
|
boblsturm@0
|
38 % Algorithms
|
boblsturm@0
|
39 % Proceedings of the 17th International Conference on Digital Audio
|
boblsturm@0
|
40 % Effects, Erlangen, Germany, 2014.
|
boblsturm@0
|
41 %
|
boblsturm@0
|
42 % License:
|
boblsturm@0
|
43 % This file is part of 'TSM toolbox'.
|
boblsturm@0
|
44 %
|
boblsturm@0
|
45 % 'TSM toolbox' is free software: you can redistribute it and/or modify it
|
boblsturm@0
|
46 % under the terms of the GNU General Public License as published by the
|
boblsturm@0
|
47 % the Free Software Foundation, either version 3 of the License, or (at
|
boblsturm@0
|
48 % your option) any later version.
|
boblsturm@0
|
49 %
|
boblsturm@0
|
50 % 'TSM toolbox' is distributed in the hope that it will be useful, but
|
boblsturm@0
|
51 % WITHOUT ANY WARRANTY; without even the implied warranty of
|
boblsturm@0
|
52 % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
boblsturm@0
|
53 % Public License for more details.
|
boblsturm@0
|
54 %
|
boblsturm@0
|
55 % You should have received a copy of the GNU General Public License along
|
boblsturm@0
|
56 % with 'TSM toolbox'. If not, see http://www.gnu.org/licenses/.
|
boblsturm@0
|
57 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
boblsturm@0
|
58
|
boblsturm@0
|
59 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
boblsturm@0
|
60 % check parameters
|
boblsturm@0
|
61 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
boblsturm@0
|
62 if nargin<2
|
boblsturm@0
|
63 parameter=[];
|
boblsturm@0
|
64 end
|
boblsturm@0
|
65
|
boblsturm@0
|
66 if ~isfield(parameter,'synHop')
|
boblsturm@0
|
67 parameter.synHop = 2048;
|
boblsturm@0
|
68 end
|
boblsturm@0
|
69 if ~isfield(parameter,'win')
|
boblsturm@0
|
70 parameter.win = win(4096,2); % hann window
|
boblsturm@0
|
71 end
|
boblsturm@0
|
72 if ~isfield(parameter,'zeroPad')
|
boblsturm@0
|
73 parameter.zeroPad = 0;
|
boblsturm@0
|
74 end
|
boblsturm@0
|
75 if ~isfield(parameter,'numOfIter')
|
boblsturm@0
|
76 parameter.numOfIter = 1;
|
boblsturm@0
|
77 end
|
boblsturm@0
|
78 if ~isfield(parameter,'origSigLen')
|
boblsturm@0
|
79 parameter.origSigLen = -1; % no trimming
|
boblsturm@0
|
80 end
|
boblsturm@0
|
81 if ~isfield(parameter,'restoreEnergy')
|
boblsturm@0
|
82 parameter.restoreEnergy = 0;
|
boblsturm@0
|
83 end
|
boblsturm@0
|
84 if ~isfield(parameter,'fftShift')
|
boblsturm@0
|
85 parameter.fftShift = 0;
|
boblsturm@0
|
86 end
|
boblsturm@0
|
87
|
boblsturm@0
|
88 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
boblsturm@0
|
89 % some pre calculations
|
boblsturm@0
|
90 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
boblsturm@0
|
91 numOfFrames = size(spec,2);
|
boblsturm@0
|
92 numOfIter = parameter.numOfIter;
|
boblsturm@0
|
93
|
boblsturm@0
|
94 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
boblsturm@0
|
95 % audio calculation
|
boblsturm@0
|
96 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
boblsturm@0
|
97 % first iteration
|
boblsturm@0
|
98 Yi = spec;
|
boblsturm@0
|
99 yi = LSEE_MSTFT(Yi,parameter);
|
boblsturm@0
|
100
|
boblsturm@0
|
101 % remaining iterations
|
boblsturm@0
|
102 parStft.win = parameter.win;
|
boblsturm@0
|
103 parStft.numOfFrames = numOfFrames;
|
boblsturm@0
|
104 parStft.zeroPad = parameter.zeroPad;
|
boblsturm@0
|
105 parStft.anaHop = parameter.synHop;
|
boblsturm@0
|
106 for j = 2 : numOfIter
|
boblsturm@0
|
107 Yi = abs(spec) .* exp(1j*angle(stft(yi,parStft)));
|
boblsturm@0
|
108 yi = LSEE_MSTFT(Yi,parameter);
|
boblsturm@0
|
109 end
|
boblsturm@0
|
110 y = yi';
|
boblsturm@0
|
111 y = y./max(y);
|
boblsturm@0
|
112
|
boblsturm@0
|
113 % if the original Length of the signal is known, also remove the zero
|
boblsturm@0
|
114 % padding at the end
|
boblsturm@0
|
115 if parameter.origSigLen > 0
|
boblsturm@0
|
116 y = y(1:parameter.origSigLen);
|
boblsturm@0
|
117 end
|
boblsturm@0
|
118
|
boblsturm@0
|
119 end
|
boblsturm@0
|
120
|
boblsturm@0
|
121 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
boblsturm@0
|
122 % the Griffin Lim procedure
|
boblsturm@0
|
123 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
boblsturm@0
|
124 function x = LSEE_MSTFT(X,parameter)
|
boblsturm@0
|
125 % some pre calculations
|
boblsturm@0
|
126 w = parameter.win;
|
boblsturm@0
|
127 w = w(:);
|
boblsturm@0
|
128 zp = parameter.zeroPad;
|
boblsturm@0
|
129 w = [zeros(floor(zp/2),1);w;zeros(floor(zp/2),1)];
|
boblsturm@0
|
130 winLen = length(w);
|
boblsturm@0
|
131 winLenHalf = round(winLen/2);
|
boblsturm@0
|
132 synHop = parameter.synHop;
|
boblsturm@0
|
133 numOfFrames = size(X,2);
|
boblsturm@0
|
134 winPos = (0:numOfFrames-1) * synHop + 1;
|
boblsturm@0
|
135 signalLength = winPos(end) + winLen - 1;
|
boblsturm@0
|
136
|
boblsturm@0
|
137 x = zeros(signalLength,1); % resynthesized signal
|
boblsturm@0
|
138 ow = zeros(signalLength,1); % sum of the overlapping windows
|
boblsturm@0
|
139 for i = 1 : numOfFrames
|
boblsturm@0
|
140 currSpec = X(:,i);
|
boblsturm@0
|
141
|
boblsturm@0
|
142 % add the conjugate complex symmetric upper half of the spectrum
|
boblsturm@0
|
143 Xi = [currSpec;conj(currSpec(end-1:-1:2))];
|
boblsturm@0
|
144 xi = real(ifft(Xi));
|
boblsturm@0
|
145 if parameter.fftShift == 1
|
boblsturm@0
|
146 xi = fftshift(xi);
|
boblsturm@0
|
147 end
|
boblsturm@0
|
148 xiw = xi .* w;
|
boblsturm@0
|
149
|
boblsturm@0
|
150 if parameter.restoreEnergy == 1
|
boblsturm@0
|
151 xiEnergy = sum(abs(xi));
|
boblsturm@0
|
152 xiwEnergy = sum(abs(xiw));
|
boblsturm@0
|
153 xiw = xiw * (xiEnergy/(xiwEnergy+eps));
|
boblsturm@0
|
154 end
|
boblsturm@0
|
155
|
boblsturm@0
|
156 x(winPos(i):winPos(i)+winLen-1) = ...
|
boblsturm@0
|
157 x(winPos(i):winPos(i)+winLen-1) + xiw;
|
boblsturm@0
|
158
|
boblsturm@0
|
159 ow(winPos(i):winPos(i)+winLen-1) = ...
|
boblsturm@0
|
160 ow(winPos(i):winPos(i)+winLen-1) + w.^2;
|
boblsturm@0
|
161 end
|
boblsturm@0
|
162 ow(ow<10^-3) = 1; % avoid potential division by zero
|
boblsturm@0
|
163 x = x ./ ow;
|
boblsturm@0
|
164
|
boblsturm@0
|
165 % knowing the zeropads that were added in the stft computation, we can
|
boblsturm@0
|
166 % remove them again now. But since we do not know exactly how many
|
boblsturm@0
|
167 % zeros were padded at the end of the signal, it is only safe to remove
|
boblsturm@0
|
168 % winLenHalf zeros.
|
boblsturm@0
|
169 x = x(winLenHalf+1:end-winLenHalf);
|
boblsturm@0
|
170 end |