# HG changeset patch # User SebastianEwert # Date 1390327708 0 # Node ID 76f45f5c9afdada5dedb2627f47e7de553a52b54 # Parent 5ab87a0152e70ae3ce9ad05faf258e1343110d79 - added units * adaptiveEqualizer * applyMfccMeanAdaption - added corresponding data files for presets - modified applyImpulseReponse to use the estimated average group delay to adjust the output audio and keep the timestamps as is (was vice versa before) - added new units demos, incl one for applyLowpass diff -r 5ab87a0152e7 -r 76f45f5c9afd AudioDegradationToolbox/adthelper_normalizeAudio.m --- a/AudioDegradationToolbox/adthelper_normalizeAudio.m Wed Nov 13 16:51:34 2013 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,77 +0,0 @@ -function [f_audio_out,timepositions_afterDegr] = adthelper_normalizeAudio(f_audio, samplingFreq, timepositions_beforeDegr, parameter) -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Name: adthelper_normalizeAudio -% Date of Revision: 2013-04 -% Programmer: Sebastian Ewert -% -% Description: -% - normalizes audio -% -% Input: -% f_audio - audio signal \in [-1,1]^{NxC} with C being the number of -% channels -% samplingFreq - sampling frequency of f_audio -% timepositions_beforeDegr - some degradations delay the input signal. If -% some points in time are given via this -% parameter, timepositions_afterDegr will -% return the corresponding positions in the -% output. Set to [] if unavailable. Set f_audio -% and samplingFreq to [] to compute only -% timepositions_afterDegr. -% -% Input (optional): parameter -% .maxAmplitude = 0.999; -% -% Output: -% f_audio_out - audio signal \in [-1,1]^{NxC} with C being the number of -% channels -% timepositions_afterDegr - time positions corresponding to timepositions_beforeDegr -% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Audio Degradation Toolbox -% -% Centre for Digital Music, Queen Mary University of London. -% This file copyright 2013 Sebastian Ewert, Matthias Mauch and QMUL. -% -% This program is free software; you can redistribute it and/or -% modify it under the terms of the GNU General Public License as -% published by the Free Software Foundation; either version 2 of the -% License, or (at your option) any later version. See the file -% COPYING included with this distribution for more information. -% -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Check parameters -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -if nargin<4 - parameter=[]; -end -if nargin<3 - timepositions_beforeDegr=[]; -end -if nargin<2 - error('Please specify input data'); -end -if isfield(parameter,'maxAmplitude')==0 - parameter.maxAmplitude = 0.999; -end - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Main program -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - -f_audio_out = []; -if ~isempty(f_audio) - f_audio_out = parameter.maxAmplitude * f_audio / max(max(max(abs(f_audio))),eps); -end - -% This function does not impose a delay -timepositions_afterDegr = timepositions_beforeDegr; - -end - - - diff -r 5ab87a0152e7 -r 76f45f5c9afd AudioDegradationToolbox/degradationData/SpecEnvelopes/Beatles_NorwegianWood.mat Binary file AudioDegradationToolbox/degradationData/SpecEnvelopes/Beatles_NorwegianWood.mat has changed diff -r 5ab87a0152e7 -r 76f45f5c9afd AudioDegradationToolbox/degradationData/SpecEnvelopes/Beethoven_Appasionata_Rwc.mat Binary file AudioDegradationToolbox/degradationData/SpecEnvelopes/Beethoven_Appasionata_Rwc.mat has changed diff -r 5ab87a0152e7 -r 76f45f5c9afd AudioDegradationToolbox/degradationUnits/degradationUnit_adaptiveEqualizer.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/AudioDegradationToolbox/degradationUnits/degradationUnit_adaptiveEqualizer.m Tue Jan 21 18:08:28 2014 +0000 @@ -0,0 +1,275 @@ +function [f_audio_out,timepositions_afterDegr] = degradationUnit_adaptiveEqualizer(f_audio, samplingFreq, timepositions_beforeDegr, parameter) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Name: degradationUnit_adaptiveEqualizer +% Date of Revision: 2013-01-23 +% Programmer: Sebastian Ewert +% +% Description: +% - designs a filter such that the mean spectrum of f_audio becomes similar +% to a given mean spectrum +% - mean spectra are specified using a decibel scale, i.e. if x is a magnitude +% spectrum, then apply x -> 20*log10(x) +% - there are four ways to specify the destination mean spectrum: 1. by +% loading example files provided with the toolbox, 2. by using specific +% noise "color" profiles, 3. by providing the destination mean spectrum +% using the parameter destMagFreqResp, 4. by providing audio data from +% which the destination mean spectrum is computed. +% - if mean spectra are computed, this is done by computing a magnitude +% spectrogram, deriving the corresponding values in dB, and then averaging +% over all time frames. +% - the same is done for f_audio and the two mean spectral vectors are +% compared +% - Then a filter is designed such that f_audio's mean spectral vector +% becomes similar to destMagFreqResp +% - filtering is done using a linear-phase FIR filter +% - this unit normalizes the output +% +% Notes: +% - note that the mean spectrum is often a good approximation of what is +% sometimes referred to as the mean spectral shape (however, this term is not +% defined for general sound recordings). In this sense, the function +% modifies the mean spectral shape to the desired one. +% +% Input: +% f_audio - audio signal \in [-1,1]^{NxC} with C being the number of +% channels +% samplingFreq - sampling frequency of f_audio +% timepositions_beforeDegr - some degradations delay the input signal. If +% some points in time are given via this +% parameter, timepositions_afterDegr will +% return the corresponding positions in the +% output. Set to [] if unavailable. Set f_audio +% and samplingFreq to [] to compute only +% timepositions_afterDegr. +% +% Input (optional): parameter +% .loadInternalMagFreqResp=1 - loads one of the destMagFreqResp provided +% by the toolbox +% .internalMagFreqResp='Beatles_NorwegianWood' +% .computeMagFreqRespFromAudio - computes destMagFreqResp from given +% audio data +% .computeMagFreqRespFromAudio_audioData +% - audio data for .computeMagFreqRespFromAudio +% .computeMagFreqRespFromAudio_sf - sampl freq for .computeMagFreqRespFromAudio +% .destMagFreqResp = [] - in db. See above. +% .destMagFreqResp_freqs = [] - must have same length as destMagFreqResp. +% In Hertz +% .fftLength = 2 ^ nextpow2(0.02 * samplingFreq); - fft length to +% calculate spectrogram of f_audio. +% +% Output: +% f_audio_out - audio signal \in [-1,1]^{NxC} with C being the number +% of channels +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Audio Degradation Toolbox +% +% Centre for Digital Music, Queen Mary University of London. +% This file copyright 2013 Sebastian Ewert, Matthias Mauch and QMUL. +% +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License as +% published by the Free Software Foundation; either version 2 of the +% License, or (at your option) any later version. See the file +% COPYING included with this distribution for more information. +% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Check parameters +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +if nargin<4 + parameter=[]; +end +if nargin<3 + timepositions_beforeDegr=[]; +end +if nargin<2 + error('Please specify input data'); +end + +if isfield(parameter,'loadInternalMagFreqResp')==0 + parameter.loadInternalMagFreqResp = 1; +end +if isfield(parameter,'loadNoiseColorPreset')==0 + parameter.loadNoiseColorPreset = 0; +end +if isfield(parameter,'computeMagFreqRespFromAudio')==0 + parameter.computeMagFreqRespFromAudio = 0; +end + +if isfield(parameter,'internalMagFreqResp')==0 + parameter.internalMagFreqResp = 'Beethoven_Appasionata_Rwc'; +end +if isfield(parameter,'noiseColorPreset')==0 + parameter.noiseColorPreset = 'pink'; +end +if isfield(parameter,'computeMagFreqRespFromAudio_audioData')==0 + parameter.computeMagFreqRespFromAudio_audioData = []; +end +if isfield(parameter,'computeMagFreqRespFromAudio_sf')==0 + parameter.computeMagFreqRespFromAudio_sf = []; +end +if isfield(parameter,'destMagFreqResp')==0 + parameter.destMagFreqResp = []; +end +if isfield(parameter,'destMagFreqResp_freqs')==0 + parameter.destMagFreqResp_freqs = []; +end +if isfield(parameter,'fftLength')==0 + parameter.fftLength = 2 ^ nextpow2(0.02 * samplingFreq); +end +if isfield(parameter,'filterOrder')==0 + parameter.filterOrder = round(parameter.fftLength/2); +end +if isfield(parameter,'visualizations')==0 + parameter.visualizations = 0; +end + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Main program +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +if isempty(f_audio) + % we design a linear-phase filter. Such filters have the property that + % the group delay for every frequency is always equal to + % parameter.filterOrder/2. Therefore, although we built a signal + % adaptive filter, we can adjust for that delay. That means that the + % timepositions_beforeDegr don't require any adjustment. + timepositions_afterDegr = timepositions_beforeDegr; + return; +end + +% load/compute destMagFreqResp +[destMagFreqResp,destMagFreqResp_freqs] = internal_getDestMagFreqResp(parameter); + +% compute mean spectral vector for f_audio +[meanMagSpec,meanMagSpec_freqs] = internal_computeMeanSpectralVector(f_audio,samplingFreq,parameter.fftLength); +meanMagSpec = internal_standardizeMagFreqResp(meanMagSpec); + +% compute magnitude response for the filter to be designed +destMagFreqResp_org = destMagFreqResp; +destMagFreqResp_freqs_org = destMagFreqResp_freqs; +if ~((length(destMagFreqResp_freqs)==length(meanMagSpec_freqs)) && (all(meanMagSpec_freqs(:) == destMagFreqResp_freqs(:)))) + % in this case we interpolate the frequency response using a + % spline interpolation + destMagFreqResp = spline(destMagFreqResp,destMagFreqResp_freqs,meanMagSpec_freqs); +end +filter_magFreqResp = destMagFreqResp(:) - meanMagSpec(:); + +% design filter (fir2 is linear phase) +filter_magFreqResp_linear = 10 .^ (filter_magFreqResp/20); +b = fir2(parameter.filterOrder,meanMagSpec_freqs/(samplingFreq/2),filter_magFreqResp_linear); + +% apply filter +parameterApplyImpulseResponse.loadInternalIR = 0; +parameterApplyImpulseResponse.impulseResponse = b; +parameterApplyImpulseResponse.impulseResponseSampFreq = samplingFreq; +parameterApplyImpulseResponse.normalizeOutputAudio = 1; +parameterApplyImpulseResponse.averageGroupDelayOfFilter = round(parameter.filterOrder/2); +[f_audio_out,timepositions_afterDegr] = degradationUnit_applyImpulseResponse(f_audio, samplingFreq, timepositions_beforeDegr, parameterApplyImpulseResponse); + +if parameter.visualizations + fvtool(b,1); + + [meanMagSpecOut,meanMagSpecOut_freqs] = internal_computeMeanSpectralVector(f_audio_out,samplingFreq,parameter.fftLength); + meanMagSpecOut = internal_standardizeMagFreqResp(meanMagSpecOut); + + figure; + plot(destMagFreqResp_freqs_org,destMagFreqResp_org,'y'); + hold on; + plot(meanMagSpecOut_freqs,meanMagSpecOut,'k'); + title('Comparison: destMagFreqResp(y) and mean spectral vector of output(k)') +end + + +end + +function [f_meanmagspec_db,freqs] = internal_computeMeanSpectralVector(f_audio,fs,fftLength) + +f_audio = mean(f_audio,2); + +[f_spec,freqs,time] = spectrogram(f_audio,hanning(fftLength),fftLength/2,fftLength,fs); + +f_magspec_db = 20 * log10(abs(f_spec)); + +f_magspec_db(:,isinf(sum(abs(f_magspec_db),1))) = []; % ignore columns with -inf/inf entries +f_magspec_db(:,isnan(sum(abs(f_magspec_db),1))) = []; + +f_meanmagspec_db = mean(f_magspec_db,2); + +end + +function magFreqResp = internal_standardizeMagFreqResp(magFreqResp) + +temp = magFreqResp(~isinf(magFreqResp)); +temp = temp(~isnan(magFreqResp)); +maxRobust = max(temp); + +magFreqResp = magFreqResp - maxRobust; + +magFreqResp(magFreqResp > 0) = 0; % remaining positive inf +magFreqResp(magFreqResp < -80) = -80; % remaining positive inf + +end + +function [destMagFreqResp,destMagFreqResp_freqs,fftLength] = internal_getDestMagFreqResp(parameter) +if parameter.loadInternalMagFreqResp + % load example included in toolbox + + fullFilenameMfile = mfilename('fullpath'); + [pathstr,name,ext] = fileparts(fullFilenameMfile); + dirRootIRs = fullfile(pathstr,'../degradationData'); + + names_internal = {'Beatles_NorwegianWood','Beethoven_Appasionata_Rwc'}; + indexInternal = find(strcmpi(names_internal,parameter.internalMagFreqResp), 1); + if isempty(indexInternal) + error('Please specify a valid internal name') + end + + switch indexInternal + case 1 + file = fullfile(dirRootIRs,'SpecEnvelopes/Beatles_NorwegianWood.mat'); + case 2 + file = fullfile(dirRootIRs,'SpecEnvelopes/Beethoven_Appasionata_Rwc.mat'); + end + load(file, 'destMagFreqResp', 'destMagFreqResp_freqs'); + +elseif parameter.loadNoiseColorPreset + switch(lower( parameter.noiseColorPreset)) + case 'white' + freqExponent = 0; + case 'pink' + freqExponent = 0.5; + case 'brown' + freqExponent = 1; + case 'blue' + freqExponent = -0.5; + case 'violet' + freqExponent = -1; + end + + lengthMagResp = parameter.fftLength/2+1; + destMagFreqResp_freqs = linspace(0,samplingFreq/2,lengthMagResp); + magResp = 1./destMagFreqResp_freqs.^freqExponent; + magResp(1) = 1; + destMagFreqResp = 20 * log10(magResp); + +elseif parameter.computeMagFreqRespFromAudio + % compute destMagFreqResp as mean spectral vector from given audio data + [destMagFreqResp,destMagFreqResp_freqs] = internal_computeMeanSpectralVector(... + parameter.computeMagFreqRespFromAudio_audioData,parameter.computeMagFreqRespFromAudio_sf,parameter.fftLength); +else + destMagFreqResp = parameter.destMagFreqResp; + destMagFreqResp_freqs = parameter.destMagFreqResp_freqs; +end + +% standardize destMagFreqResp +destMagFreqResp = internal_standardizeMagFreqResp(destMagFreqResp); +end + + + + + + diff -r 5ab87a0152e7 -r 76f45f5c9afd AudioDegradationToolbox/degradationUnits/degradationUnit_applyImpulseResponse.m --- a/AudioDegradationToolbox/degradationUnits/degradationUnit_applyImpulseResponse.m Wed Nov 13 16:51:34 2013 +0000 +++ b/AudioDegradationToolbox/degradationUnits/degradationUnit_applyImpulseResponse.m Tue Jan 21 18:08:28 2014 +0000 @@ -84,6 +84,9 @@ if isfield(parameter,'impulseResponseSampFreq')==0 parameter.impulseResponseSampFreq = 0; end +if isfield(parameter,'averageGroupDelayOfFilter')==0 + parameter.averageGroupDelayOfFilter = []; % specify this only if you know what you are doing +end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Main program @@ -146,23 +149,28 @@ end end -% This degradation does impose a delay -timepositions_afterDegr = []; -if ~isempty(timepositions_beforeDegr) - % approximation via group delay - - [Gd,W] = mygrpdelay(h_org,1,fs_h,2048); +% Applying an FIR filter imposes a delay on the signal. We estimate this +% delay using the average group delay of the filter and adjust the +% output signal accordingly. +if isempty(parameter.averageGroupDelayOfFilter) + [Gd,W] = mygrpdelay(h_org,1,fs_h,2048); %[Gd,W] = grpdelay(h_org,1,1024); % Matlab's own group delay function. Failt for some filters considerably. - - %figure; - %plot(W,Gd/samplingFreqFilter) + if any(Gd) < 0 + error('A group delay was negative. This should not happen!?') + end + averageOfGroupDelays = mean(Gd); +else + averageOfGroupDelays = parameter.averageGroupDelayOfFilter; +end - averageOfGroupDelays = mean(Gd); - timeOffset_sec = averageOfGroupDelays / fs_h; - - timepositions_afterDegr = timepositions_beforeDegr + timeOffset_sec; +if ~isempty(f_audio) + % let's adjust the output audio and eliminate the delay + delay = max(1,ceil(averageOfGroupDelays)); + f_audio_out = f_audio_out(delay:end,:); end +timepositions_afterDegr = timepositions_beforeDegr; + end function [gd,w] = mygrpdelay(b,a,Fs,nfft) diff -r 5ab87a0152e7 -r 76f45f5c9afd AudioDegradationToolbox/degradationUnits/degradationUnit_applyMfccMeanAdaption.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/AudioDegradationToolbox/degradationUnits/degradationUnit_applyMfccMeanAdaption.m Tue Jan 21 18:08:28 2014 +0000 @@ -0,0 +1,475 @@ +function [f_audio_out,timepositions_afterDegr] = degradationUnit_applyMfccMeanAdaption(f_audio, samplingFreq, timepositions_beforeDegr, parameter) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Name: degradationUnit_applyMfccMeanAdaption +% Date of Revision: 2013-01-23 +% Programmer: Sebastian Ewert +% +% Description: +% - designs a filter such that the mean MFCCs of f_audio become similar +% to a given mean MFCC +% - the destination MFCC can either be provided or computed from other +% audio data. +% - since there is not just one way to compute MFCCs, the provided MFCCs +% should be computed exactly as done in the code below +% - filtering is done using a linear-phase FIR filter +% - this unit normalizes the output +% +% Notes: +% - the mean MFCC corresponds to the maximum likelihood estimate of the mean +% vector for a multivariate Gaussian fitted to the set of MFCCs +% - that means that using this function the recording in f_audio is altered +% such that it looks almost identical to the one in audioDataForDestinationMfcc +% (or the one corresponding to the mean MFCC) in terms of the mean of a +% fitted Gaussian. +% - This is intersting in the context of studying the behaviour of methods +% that employ statistics over MFCCs to classify/analyse recordings. +% - The method essentially aims for having an MFCC implementation very +% similar to the one in Malcolm Slaney's auditory toolbox. +% +% Input: +% f_audio - audio signal \in [-1,1]^{NxC} with C being the number of +% channels +% samplingFreq - sampling frequency of f_audio +% timepositions_beforeDegr - some degradations delay the input signal. If +% some points in time are given via this +% parameter, timepositions_afterDegr will +% return the corresponding positions in the +% output. Set to [] if unavailable. Set f_audio +% and samplingFreq to [] to compute only +% timepositions_afterDegr. +% +% Input (optional): parameter +% .loadInternalMagFreqResp=1 - loads one of the destMagFreqResp provided +% by the toolbox +% .internalMagFreqResp='Beatles_NorwegianWood' +% .computeMagFreqRespFromAudio - computes destMagFreqResp from given +% audio data +% .computeMagFreqRespFromAudio_audioData +% - audio data for .computeMagFreqRespFromAudio +% .computeMagFreqRespFromAudio_sf - sampl freq for .computeMagFreqRespFromAudio +% .destMagFreqResp = [] - in db. See above. +% .destMagFreqResp_freqs = [] - must have same length as destMagFreqResp. +% In Hertz +% .fftLength = 2 ^ nextpow2(0.02 * samplingFreq); - fft length to +% calculate spectrogram of f_audio. +% +% Output: +% f_audio_out - audio signal \in [-1,1]^{NxC} with C being the number +% of channels +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Audio Degradation Toolbox +% +% Centre for Digital Music, Queen Mary University of London. +% This file copyright 2013 Sebastian Ewert, Matthias Mauch and QMUL. +% +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License as +% published by the Free Software Foundation; either version 2 of the +% License, or (at your option) any later version. See the file +% COPYING included with this distribution for more information. +% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Check parameters +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +if nargin<4 + parameter=[]; +end +if nargin<3 + timepositions_beforeDegr=[]; +end +if nargin<2 + error('Please specify input data'); +end + +if isfield(parameter,'destinationMeanMfcc')==0 + parameter.destinationMeanMfcc = []; + % make sure to provide the same coefficients as specified in + % parameter.MFCC_cepstralCoeffsToKeep. Also use the same method to + % compute the MFCCs as used here. +end +if isfield(parameter,'audioDataForDestinationMfcc')==0 + parameter.audioDataForDestinationMfcc = []; +end +if isfield(parameter,'audioDataForDestMfcc_sf')==0 + parameter.audioDataForDestMfcc_sf = []; +end +if isfield(parameter,'TikhonovLambdaCandidates')==0 + parameter.TikhonovLambdaCandidates = 10.^-[10:-1:0]; +end +if isfield(parameter,'TikhonovMaxError')==0 + parameter.TikhonovMaxError = 10^-5; +end +if isfield(parameter,'filterOrder')==0 + parameter.filterOrder = 500; +end +if isfield(parameter,'visualizations')==0 + parameter.visualizations = 0; +end + + +% All following parameters were taken from the auditory toolbox. Some of +% these value only make sense for speech but are left as is as they are +% used like this in many other toolboxes +if isfield(parameter,'MFCC_lowestFrequencyHz')==0 + parameter.MFCC_lowestFrequencyHz = 133.3333; + % this is not a good choice for music. Bass is filtered out completely. + % This chould be lowered to 66.66666666 and MFCC_numLinearFilters + % increased by one. +end +if isfield(parameter,'MFCC_numLinearFilters')==0 + parameter.MFCC_numLinearFilters = 13; +end +if isfield(parameter,'MFCC_numLogFilters')==0 + parameter.MFCC_numLogFilters = 27; +end +if isfield(parameter,'MFCC_linearFreqSpacingHz')==0 + parameter.MFCC_linearFreqSpacingHz = 66.66666666; +end +if isfield(parameter,'MFCC_logFreqSpacing')==0 + parameter.MFCC_logFreqSpacing = 1.0711703; +end +if isfield(parameter,'MFCC_windowSizeSec')==0 + parameter.MFCC_windowSizeSec = 0.016; % AT sets 256 samples for a 16kHz signal +end +if isfield(parameter,'MFCC_featuresPerSecond')==0 + parameter.MFCC_featuresPerSecond = 100; +end +if isfield(parameter,'MFCC_applyPreemphasis')==0 + parameter.MFCC_applyPreemphasis = 1; +end +if isfield(parameter,'MFCC_preemphasisFilter')==0 + parameter.MFCC_preemphasisFilter = [1 -0.97]; + % this was never adapted to work for every sampling rate! It is too + % weak for sampling rates higher than 16000. +end +if isfield(parameter,'MFCC_cepstralCoeffsToKeep')==0 + parameter.MFCC_cepstralCoeffsToKeep = [2:13]; + % in contrast to the AT we ignore the first MFCC coefficient (DC) by default +end +if isfield(parameter,'MFCC_additionalFilter')==0 + parameter.MFCC_additionalFilter = []; +end + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Main program +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +if isempty(f_audio) + % we design a linear-phase filter. Such filters have the property that + % the group delay for every frequency is always equal to + % parameter.filterOrder/2. Therefore, although we built a signal + % adaptive filter, we can adjust for that delay. That means that the + % timepositions_beforeDegr don't require any adjustment. + timepositions_afterDegr = timepositions_beforeDegr; + return; +end + +% compute average MFCC which we will try to achieve for f_audio as well. +% [f_spec,freqinfo,timeinfo] = spectrogram(mean(parameter.audioDataForDestMfcc,2),hanning(parameter.fftLength),parameter.fftLength/2,parameter.fftLength,parameter.audioDataForDestMfcc_sf); +[f_mfccDest,sideinfo] = internal_computeMfccs(parameter.audioDataForDestMfcc,parameter.audioDataForDestMfcc_sf,parameter); +f_mfccDestMean = internal_infNanRobustMatrixMean(f_mfccDest,2); + +% compute log-mel-spectra of input +f_mfccMean = internal_infNanRobustMatrixMean(internal_computeMfccs(f_audio, samplingFreq,parameter),2); + +numTotalFilters = parameter.MFCC_numLinearFilters+parameter.MFCC_numLogFilters; + +% Now we find the desired magnitude response for the filter we are going to design +% by minimizing a least squares distance between the mfcc_dest and the mean MFCC +% computed from the filtered audio. This is a two step process. + +% Step1: First we have to reconstruct the required gain/attenuation on mel +% spectrum from the information we have. +% That means we blow up a 12-13 dimensional vector back to 40, and +% hence there are many possible solutions. One way to find a solution is to +% expand the coeefficient vector to 40 dimensions by setting all unknown +% entries to 0, and then apply an inverse DCT, which is the transposed +% matrix (orthogonal). Another solution is to find the smoothest solution +% to the problem DCTcropped * x == f_mfccDestMean-f_mfccMean, and we get +% that by solving the corresponding tikhonov regularised least squares +% problem: ||DCTcropped * x - f_mfccDestMean-f_mfccMean||^2_2 + +% sigma*||x||_2 for some sigma>0. However, as it turns out, the direct +% inversion of the DCT is actually yielding more or less exactly this +% solution. Therefore, we don't have to do much about it here usually. Only +% if one prefers an even smoother solution (at the cost of not htting +% DCTcropped * x == f_mfccDestMean-f_mfccMean exactely), then the tikhonov +% solution might be helpful, so I'll leave the code in for now. +useTikhonovInversionStep1 = 0; +if useTikhonovInversionStep1 + DCTcropped = internal_getCroppedDCTMatrix(numTotalFilters,parameter.MFCC_cepstralCoeffsToKeep); + + %find the smoothest solution to this problem using a binary search on + %candidates for the tikhonov lambda parameter. + b = [(f_mfccDestMean-f_mfccMean);zeros(numTotalFilters,1)]; + lambda_candidates = sort(10.^-[0:0.2:5]); + idxLow = 1; + idxHigh = length(lambda_candidates); + while idxHigh-idxLow>1 + fprintf('Low: %d High: %d\n',idxLow,idxHigh) + lambda = lambda_candidates(round( (idxHigh+idxLow)/2 )); + A = [DCTcropped;lambda*eye(numTotalFilters)]; + logL_smooth = A \ b; + euclDistance = norm(f_mfccMean + DCTcropped*logL_smooth - f_mfccDestMean); + if euclDistance > 10^-3 + idxHigh = round( (idxHigh+idxLow)/2 )-1; + else + idxLow = round( (idxHigh+idxLow)/2 ); + end + end + lambda = lambda_candidates(idxLow); + disp(lambda); +else + b = zeros(numTotalFilters,1); + b(parameter.MFCC_cepstralCoeffsToKeep) = f_mfccDestMean-f_mfccMean; + logL_smooth = dct(eye(numTotalFilters))' * b; +end + +% step 2: Here, just as above, we could transpose the mel filterbank and +% use its analysis functions as synthesis function, and indeed this kind +% of works OK. However, the mel filterbank matrix is not orthogonal, and +% when we would compute MFCCs from the reconstructed spectrum, we would get +% different MFCC. Hence we can't get around solving an actual system of +% equations here. Since this is underdetermined, we apply tikhonov +% regularization to get the smoothest solution. We also have to account for +% the preamphasis filter that was potentially applied +melFilterWeights = internal_getMelFilterbank(sideinfo.fftSize,samplingFreq,parameter); +% melFilterWeightsOrg = melFilterWeights; +if parameter.MFCC_applyPreemphasis + freqRespPreemphasisFilter = abs(fft(parameter.MFCC_preemphasisFilter,sideinfo.fftSize)); + freqRespPreemphasisFilter = freqRespPreemphasisFilter(1:sideinfo.fftSize/2+1); + melFilterWeights = melFilterWeights * diag(freqRespPreemphasisFilter); +end + +numFreqBins = sideinfo.fftSize/2+1; +numTikhonovEntries = sideinfo.fftSize/2+1; +mainA = sparse([],[],[],numTotalFilters*numFreqBins,numFreqBins); +for f=1:numFreqBins + mainA((f-1)*numTotalFilters+1:f*numTotalFilters,f) = melFilterWeights(:,f); +end +b = (diag(10.^logL_smooth) * melFilterWeights); +b = [b(:);zeros(numTikhonovEntries,1)]; % all cols are stacked to form one long upright vector +lambda_candidates = parameter.TikhonovLambdaCandidates; +idxLow = 1; +idxHigh = length(lambda_candidates); +runOnce = 0; +fullL_smooth = []; +while (~runOnce) || (idxHigh-idxLow>1) + runOnce = 1; + candIdx = round( (idxHigh+idxLow)/2 ); + lambda = lambda_candidates(candIdx); + A = [mainA;lambda*eye(numTikhonovEntries)]; + fullL_smoothTemp = A \ b; + if any(fullL_smoothTemp<0) + %Usually, this does not happen. This is just in case. lsqnonneg is much slower + warning('lsqnonneg is used to solve the Tikhonov reg. LS problem. This is just a comment to let you know that the method is slower than usual..') + fullL_smoothTemp = lsqnonneg(A,b); + end + euclDistance = sqrt(sum(sum( (diag(10.^logL_smooth)*melFilterWeights - melFilterWeights*diag(fullL_smoothTemp)).^2))); + if euclDistance > parameter.TikhonovMaxError + idxHigh = candIdx-1; + else + idxLow = candIdx; + fullL_smooth = fullL_smoothTemp; + end +end +if isempty(fullL_smooth) + lambda = min(lambda_candidates); + A = [mainA;lambda*eye(numTikhonovEntries)]; + fullL_smooth = lsqnonneg(A,b); +end + +% now we 'scale' (subtract in log scale) the frequency response globally to +% get the least amount of overall gain. Here, the idea is that global +% scaling is just useful to get the same first MFCC coefficient as in the +% destination recording. However, since we normalize the recoring in the +% end anyway, that would be lost anyway. Since we don't have information +% about what to do with areas that are not covered by MFCC analysis +% function, we just try to keep the gain/attenuation everywhere as low as +% possible to get the remaining MFCCs done and don't do anything in the +% areas without support from the analysis functions. That gives room to +% design a more simple filter afterwards. +fullL_smooth_dB = zeros(size(fullL_smooth)); +maxAtt_db = -20; +meanScaler = mean(20*log10(fullL_smooth(abs(fullL_smooth)>10^(maxAtt_db/20)))); +fullL_smooth_dB(abs(fullL_smooth)>10^(maxAtt_db/20)) = 20*log10(fullL_smooth(abs(fullL_smooth)>10^(maxAtt_db/20))) - meanScaler; + +% design filter (fir2 is linear phase) +filter_magFreqResp_dB_dest = fullL_smooth_dB; +filter_magFreqResp_linear_dest = 10 .^ (filter_magFreqResp_dB_dest/20); +h = fir2(parameter.filterOrder,sideinfo.freqinfoFFT/(samplingFreq/2),filter_magFreqResp_linear_dest); + +% apply filter +parameterApplyImpulseResponse.loadInternalIR = 0; +parameterApplyImpulseResponse.impulseResponse = h; +parameterApplyImpulseResponse.impulseResponseSampFreq = samplingFreq; +parameterApplyImpulseResponse.normalizeOutputAudio = 1; +parameterApplyImpulseResponse.averageGroupDelayOfFilter = round(parameter.filterOrder/2); +[f_audio_out,timepositions_afterDegr] = degradationUnit_applyImpulseResponse(f_audio, samplingFreq, timepositions_beforeDegr, parameterApplyImpulseResponse); + +if parameter.visualizations + filterFreqResp_db = 20*log10(abs(fft(h,sideinfo.fftSize))); + figure; + plot([0:sideinfo.fftSize/2]/sideinfo.fftSize*samplingFreq,fullL_smooth_dB,'k'); + hold on + plot([0:sideinfo.fftSize/2]/sideinfo.fftSize*samplingFreq,filterFreqResp_db(1:sideinfo.fftSize/2+1),'r'); + legend('required magnitude response','actual magnitude response of filter') + + parameter.MFCC_additionalFilter = 10.^(fullL_smooth_dB/20); + f_mfccOutputMeanTemp = internal_infNanRobustMatrixMean(internal_computeMfccs(f_audio, samplingFreq,parameter),2); + parameter.MFCC_additionalFilter = []; + f_mfccOutputMean = internal_infNanRobustMatrixMean(internal_computeMfccs(f_audio_out, samplingFreq,parameter),2); + figure; + plot(f_mfccMean,'b'); + hold on + plot(f_mfccDestMean,'k'); + plot(f_mfccOutputMeanTemp,'g'); + plot(f_mfccOutputMean,'r'); + legend('average MFCC of input audio','average MFCC of destination', 'average MFCC of input audio after filtering in freq domain','average MFCC of input audio after filtering in time domain') +end + +end + + +function [f_logMelSpectra,sideinfo] = internal_computeLogMelSpectra(f_audio,fs,parameter) +% the code here was essentially taken from Malcalm Slaney's auditory +% toolbox and then simplified/rewritten where possible + +% setting some parameters +windowSize = 2^nextpow2(fs*parameter.MFCC_windowSizeSec); +fftSize = 2*windowSize; +featuresPerSecond = parameter.MFCC_featuresPerSecond; +applyPreemphasis = parameter.MFCC_applyPreemphasis; +preemphasisFilter = parameter.MFCC_preemphasisFilter; +additionalFilter = parameter.MFCC_additionalFilter; + +[melFilterWeights,sideinfoMel] = internal_getMelFilterbank(fftSize,fs,parameter); + +% Filter the input with the preemphasis filter. Also figure how +% many columns of data we will end up with. +preEmphasized = mean(f_audio,2); +if applyPreemphasis + preEmphasized = filter(preemphasisFilter, 1, preEmphasized); %oh boy. This is too weak for high sampling rates... +end +nOverlap = windowSize - round(fs/featuresPerSecond); + +% spectrogram +[f_spec,freqinfo,timeinfo] = spectrogram(preEmphasized,hamming(windowSize),nOverlap,fftSize,fs); +if ~isempty(additionalFilter) + f_spec = f_spec.* repmat(additionalFilter(:),1,size(f_spec,2)); +end +f_logMelSpectra = log10(melFilterWeights * abs(f_spec)); + +if nargout > 1 + sideinfo.timeinfo = timeinfo; + sideinfo.freqinfoFFT = freqinfo; + sideinfo.allFreqs = sideinfoMel.freqs; + sideinfo.centreFrequencies = sideinfoMel.center; + sideinfo.lowerFrequencies = sideinfoMel.lower; + sideinfo.upperFrequencies = sideinfoMel.upper; + sideinfo.numTotalFilters = parameter.MFCC_numLinearFilters + parameter.MFCC_numLogFilters; + sideinfo.windowSize = windowSize; + sideinfo.fftSize = fftSize; +end + +end + +function DCTcropped = internal_getCroppedDCTMatrix(sizeDCT,cepstralCoeffsToKeep) +DCTcropped = dct(eye(sizeDCT)); +DCTcropped = DCTcropped(cepstralCoeffsToKeep,:); +end + +function [melFilterWeights,sideinfo] = internal_getMelFilterbank(fftSize,fs,parameter) +lowestFrequency = parameter.MFCC_lowestFrequencyHz; +numLinearFilters = parameter.MFCC_numLinearFilters; +numLogFilters = parameter.MFCC_numLogFilters; +numTotalFilters = numLinearFilters + numLogFilters; +linearFreqSpacingHz = parameter.MFCC_linearFreqSpacingHz; +logFreqSpacing = parameter.MFCC_logFreqSpacing; + +% Now figure the band edges. Interesting frequencies are spaced +% by linearSpacing for a while, then go logarithmic. First figure +% all the interesting frequencies. Lower, center, and upper band +% edges are all consequtive interesting frequencies. +freqs = lowestFrequency + (0:numLinearFilters-1)*linearFreqSpacingHz; +freqs(numLinearFilters+1:numTotalFilters+2) = ... + freqs(numLinearFilters) * logFreqSpacing.^(1:numLogFilters+2); + +lower = freqs(1:numTotalFilters); +center = freqs(2:numTotalFilters+1); +upper = freqs(3:numTotalFilters+2); + +% We now want to combine FFT bins so that each filter has unit +% weight, assuming a triangular weighting function. First figure +% out the height of the triangle, then we can figure out each +% frequencies contribution +melFilterWeights = zeros(numTotalFilters,fftSize/2+1); +triangleHeight = 2./(upper-lower); +fftFreqs = (0:fftSize/2)/fftSize*fs; + +for chan=1:numTotalFilters + melFilterWeights(chan,:) = ... + (fftFreqs > lower(chan) & fftFreqs <= center(chan)).* ... + triangleHeight(chan).*(fftFreqs-lower(chan))/(center(chan)-lower(chan)) + ... + (fftFreqs > center(chan) & fftFreqs < upper(chan)).* ... + triangleHeight(chan).*(upper(chan)-fftFreqs)/(upper(chan)-center(chan)); +end + +sideinfo.freqs = freqs; +sideinfo.lower = lower; +sideinfo.center = center; +sideinfo.upper = upper; + +end + +function [f_mfcc,sideinfo] = internal_computeMfccsFromLogMelSpectra(f_logMelSpectra,parameter) + +cepstralCoeffsToKeep = parameter.MFCC_cepstralCoeffsToKeep; + +% The following is equivalent to the auditory toolbox code, just shorter +% and more versatile. +DCTcropped = internal_getCroppedDCTMatrix(size(f_logMelSpectra,1),cepstralCoeffsToKeep); + +f_mfcc = DCTcropped * f_logMelSpectra; + +if nargout>1 + sideinfo.DCTcropped = DCTcropped; +end + +end + +function [f_mfcc,sideinfo] = internal_computeMfccs(f_audio,fs,parameter) + +if nargout>1 + [f_logMelSpectra,sideinfo] = internal_computeLogMelSpectra(f_audio,fs,parameter); + [f_mfcc,sideinfo2] = internal_computeMfccsFromLogMelSpectra(f_logMelSpectra,parameter); + sideinfo.f_logMelSpectra = f_logMelSpectra; + sideinfo.DCTcropped = sideinfo2.DCTcropped; +else + f_logMelSpectra = internal_computeLogMelSpectra(f_audio,fs,parameter); + f_mfcc = internal_computeMfccsFromLogMelSpectra(f_logMelSpectra,parameter); +end + +end + +function [output,locationWoutProblems] = internal_infNanRobustMatrixMean(matrix,dim) + +summedMatrix = sum(matrix,mod(dim,2)+1); % 'preserves' nan and inf +locationWoutProblems = ~isinf(summedMatrix) & ~isnan(summedMatrix); + +if dim == 1 + output = mean(matrix(locationWoutProblems,:),dim); +elseif dim == 2 + output = mean(matrix(:,locationWoutProblems),dim); +end + +end + + + + + + + diff -r 5ab87a0152e7 -r 76f45f5c9afd AudioDegradationToolbox/support/adthelper_normalizeAudio.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/AudioDegradationToolbox/support/adthelper_normalizeAudio.m Tue Jan 21 18:08:28 2014 +0000 @@ -0,0 +1,77 @@ +function [f_audio_out,timepositions_afterDegr] = adthelper_normalizeAudio(f_audio, samplingFreq, timepositions_beforeDegr, parameter) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Name: adthelper_normalizeAudio +% Date of Revision: 2013-04 +% Programmer: Sebastian Ewert +% +% Description: +% - normalizes audio +% +% Input: +% f_audio - audio signal \in [-1,1]^{NxC} with C being the number of +% channels +% samplingFreq - sampling frequency of f_audio +% timepositions_beforeDegr - some degradations delay the input signal. If +% some points in time are given via this +% parameter, timepositions_afterDegr will +% return the corresponding positions in the +% output. Set to [] if unavailable. Set f_audio +% and samplingFreq to [] to compute only +% timepositions_afterDegr. +% +% Input (optional): parameter +% .maxAmplitude = 0.999; +% +% Output: +% f_audio_out - audio signal \in [-1,1]^{NxC} with C being the number of +% channels +% timepositions_afterDegr - time positions corresponding to timepositions_beforeDegr +% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Audio Degradation Toolbox +% +% Centre for Digital Music, Queen Mary University of London. +% This file copyright 2013 Sebastian Ewert, Matthias Mauch and QMUL. +% +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License as +% published by the Free Software Foundation; either version 2 of the +% License, or (at your option) any later version. See the file +% COPYING included with this distribution for more information. +% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Check parameters +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +if nargin<4 + parameter=[]; +end +if nargin<3 + timepositions_beforeDegr=[]; +end +if nargin<2 + error('Please specify input data'); +end +if isfield(parameter,'maxAmplitude')==0 + parameter.maxAmplitude = 0.999; +end + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Main program +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +f_audio_out = []; +if ~isempty(f_audio) + f_audio_out = parameter.maxAmplitude * f_audio / max(max(max(abs(f_audio))),eps); +end + +% This function does not impose a delay +timepositions_afterDegr = timepositions_beforeDegr; + +end + + + diff -r 5ab87a0152e7 -r 76f45f5c9afd AudioDegradationToolbox/support/computeMeanSpectralEnvelope.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/AudioDegradationToolbox/support/computeMeanSpectralEnvelope.m Tue Jan 21 18:08:28 2014 +0000 @@ -0,0 +1,52 @@ +function computeMeanSpectralEnvelope(audiofile, fftLength) +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Audio Degradation Toolbox +% +% Centre for Digital Music, Queen Mary University of London. +% This file copyright 2013 Sebastian Ewert, Matthias Mauch and QMUL. +% +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License as +% published by the Free Software Foundation; either version 2 of the +% License, or (at your option) any later version. See the file +% COPYING included with this distribution for more information. +% +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +[f_audio,samplingFreq]=wavread(audiofile); + +% compute mean spectral vector for f_audio +[destMagFreqResp, destMagFreqResp_freqs] = internal_computeMeanSpectralVector(f_audio,samplingFreq,fftLength); +destMagFreqResp = internal_standardizeMagFreqResp(destMagFreqResp); + +outputFilename = [audiofile(1:end-4),'_specEnv']; +save(outputFilename,'destMagFreqResp','destMagFreqResp_freqs') + +end + + +function [f_meanmagspec_db,freqs] = internal_computeMeanSpectralVector(f_audio,fs,fftLength) + +f_audio = mean(f_audio,2); + +[f_spec,freqs,time] = spectrogram(f_audio,hanning(fftLength),fftLength/2,fftLength,fs); + +f_magspec_db = 20 * log10(abs(f_spec)); + +f_magspec_db(:,isinf(sum(abs(f_magspec_db),1))) = []; % ignore rows with -inf/inf entries + +f_meanmagspec_db = mean(f_magspec_db,2); + +end + +function magFreqResp = internal_standardizeMagFreqResp(magFreqResp) + +maxRobust = max(magFreqResp(~isinf(magFreqResp))); + +magFreqResp = magFreqResp - maxRobust; + +magFreqResp(magFreqResp > 0) = 0; % remaining positive inf +magFreqResp(magFreqResp < -80) = -80; % remaining positive inf + +end + diff -r 5ab87a0152e7 -r 76f45f5c9afd demo_degradationUnits.m --- a/demo_degradationUnits.m Wed Nov 13 16:51:34 2013 +0000 +++ b/demo_degradationUnits.m Tue Jan 21 18:08:28 2014 +0000 @@ -72,137 +72,6 @@ end %% -for k=1:length(filenames) - [f_audio,samplingFreq]=wavread(filenames{k}); - - parameter.snrRatio = 10; % in dB - parameter.loadInternalSound = 1; - parameter.internalSound = 'PubEnvironment1'; - f_audio_out = degradationUnit_addSound(f_audio, samplingFreq, [], parameter); - - wavwrite(f_audio_out,samplingFreq,16,fullfile(pathOutputDemo,sprintf('Unit_02_addSound_file%d.wav',k))); - if createSpectrograms - [s,f,t] = spectrogram(f_audio_out,hamming(round(samplingFreq*0.093)),round(samplingFreq*0.093/2),[],samplingFreq); - figure; imagesc(t,f,log10(abs(s)+1),[0 maxValueRangeVis(k)]); axis xy; colormap(hot); ylim([0,8000]); colorbar; print('-dpng', fullfile(pathOutputDemo,sprintf('Unit_02_addSound_file%d.png',k))) - end -end; - -%% -for k=1:length(filenames) - [f_audio,samplingFreq]=wavread(filenames{k}); - - parameter.dsFrequency = 4000; - f_audio_out = degradationUnit_applyAliasing(f_audio, samplingFreq, [], parameter); - - wavwrite(f_audio_out,samplingFreq,16,fullfile(pathOutputDemo,sprintf('Unit_03_applyAliasing_file%d.wav',k))); - if createSpectrograms - [s,f,t] = spectrogram(f_audio_out,hamming(round(samplingFreq*0.093)),round(samplingFreq*0.093/2),[],samplingFreq); - figure; imagesc(t,f,log10(abs(s)+1),[0 maxValueRangeVis(k)]); axis xy; colormap(hot); ylim([0,8000]); colorbar; print('-dpng', fullfile(pathOutputDemo,sprintf('Unit_03_applyAliasing_file%d.png',k))) - end -end; - -%% -for k=1:length(filenames) - [f_audio,samplingFreq]=wavread(filenames{k}); - - parameter.percentOfSamples = 10; % signal is scaled so that n% of samples clip - f_audio_out = degradationUnit_applyClippingAlternative(f_audio, samplingFreq, [], parameter); - - wavwrite(f_audio_out,samplingFreq,16,fullfile(pathOutputDemo,sprintf('Unit_04_applyClipping_file%d.wav',k))); - if createSpectrograms - [s,f,t] = spectrogram(f_audio_out,hamming(round(samplingFreq*0.093)),round(samplingFreq*0.093/2),[],samplingFreq); - figure; imagesc(t,f,log10(abs(s)+1),[0 maxValueRangeVis(k)]); axis xy; colormap(hot); ylim([0,8000]); colorbar; print('-dpng', fullfile(pathOutputDemo,sprintf('Unit_04_applyClipping_file%d.png',k))) - end -end; -%% -for k=1:length(filenames) - [f_audio,samplingFreq]=wavread(filenames{k}); - - parameter.compressorSlope = 0.9; - parameter.normalizeOutputAudio = 1; - f_audio_out = degradationUnit_applyDynamicRangeCompression(f_audio, samplingFreq, [], parameter); - - wavwrite(f_audio_out,samplingFreq,16,fullfile(pathOutputDemo,sprintf('Unit_05_applyDynamicRangeCompression_file%d.wav',k))); - if createSpectrograms - [s,f,t] = spectrogram(f_audio_out,hamming(round(samplingFreq*0.093)),round(samplingFreq*0.093/2),[],samplingFreq); - figure; imagesc(t,f,log10(abs(s)+1),[0 maxValueRangeVis(k)]); axis xy; colormap(hot); ylim([0,8000]); colorbar; print('-dpng', fullfile(pathOutputDemo,sprintf('Unit_05_applyDynamicRangeCompression_file%d.png',k))) - end -end; - -%% -for k=1:length(filenames) - [f_audio,samplingFreq]=wavread(filenames{k}); - - parameter.nApplications = 5; - f_audio_out = degradationUnit_applyHarmonicDistortion(f_audio, samplingFreq, [], parameter); - - wavwrite(f_audio_out,samplingFreq,16,fullfile(pathOutputDemo,sprintf('Unit_06_applyHarmonicDistortion_file%d.wav',k))); - if createSpectrograms - [s,f,t] = spectrogram(f_audio_out,hamming(round(samplingFreq*0.093)),round(samplingFreq*0.093/2),[],samplingFreq); - figure; imagesc(t,f,log10(abs(s)+1),[0 maxValueRangeVis(k)]); axis xy; colormap(hot); ylim([0,8000]); colorbar; print('-dpng', fullfile(pathOutputDemo,sprintf('Unit_06_applyHarmonicDistortion_file%d.png',k))) - end -end; - -%% -for k=1:length(filenames) - [f_audio,samplingFreq]=wavread(filenames{k}); - - parameter.LameOptions = '--preset cbr 32'; - f_audio_out = degradationUnit_applyMp3Compression(f_audio, samplingFreq, [], parameter); - - wavwrite(f_audio_out,samplingFreq,16,fullfile(pathOutputDemo,sprintf('Unit_07_applyMp3Compression_file%d.wav',k))); - if createSpectrograms - [s,f,t] = spectrogram(f_audio_out,hamming(round(samplingFreq*0.093)),round(samplingFreq*0.093/2),[],samplingFreq); - figure; imagesc(t,f,log10(abs(s)+1),[0 maxValueRangeVis(k)]); axis xy; colormap(hot); ylim([0,8000]); colorbar; print('-dpng', fullfile(pathOutputDemo,sprintf('Unit_07_applyMp3Compression_file%d.png',k))) - end -end; - -%% -for k=1:length(filenames) - [f_audio,samplingFreq]=wavread(filenames{k}); - - parameter.changeInPercent = +5; - f_audio_out = degradationUnit_applySpeedup(f_audio, samplingFreq, [], parameter); - - - wavwrite(f_audio_out,samplingFreq,16,fullfile(pathOutputDemo,sprintf('Unit_08_applySpeedup_file%d.wav',k))); - if createSpectrograms - [s,f,t] = spectrogram(f_audio_out,hamming(round(samplingFreq*0.093)),round(samplingFreq*0.093/2),[],samplingFreq); - figure; imagesc(t,f,log10(abs(s)+1),[0 maxValueRangeVis(k)]); axis xy; colormap(hot); ylim([0,8000]); colorbar; print('-dpng', fullfile(pathOutputDemo,sprintf('Unit_08_applySpeedup_file%d.png',k))) - end -end; - -%% -for k=1:length(filenames) - [f_audio,samplingFreq]=wavread(filenames{k}); - - parameter.intensityOfChange = 3; - parameter.frequencyOfChange = 0.5; - f_audio_out = degradationUnit_applyWowResampling(f_audio, samplingFreq, [], parameter); - - wavwrite(f_audio_out,samplingFreq,16,fullfile(pathOutputDemo,sprintf('Unit_09_applyWowResampling_file%d.wav',k))); - if createSpectrograms - [s,f,t] = spectrogram(f_audio_out,hamming(round(samplingFreq*0.093)),round(samplingFreq*0.093/2),[],samplingFreq); - figure; imagesc(t,f,log10(abs(s)+1),[0 maxValueRangeVis(k)]); axis xy; colormap(hot); ylim([0,8000]); colorbar; print('-dpng', fullfile(pathOutputDemo,sprintf('Unit_09_applyWowResampling_file%d.png',k))) - end -end; - -%% -for k=1:length(filenames) - [f_audio,samplingFreq]=wavread(filenames{k}); - - parameter.stopFrequency = 1000; - f_audio_out = degradationUnit_applyHighpassFilter(f_audio, samplingFreq, [], parameter); - - wavwrite(f_audio_out,samplingFreq,16,fullfile(pathOutputDemo,sprintf('Unit_10_applyHighpassFilter_file%d.wav',k))); - if createSpectrograms - [s,f,t] = spectrogram(f_audio_out,hamming(round(samplingFreq*0.093)),round(samplingFreq*0.093/2),[],samplingFreq); - figure; imagesc(t,f,log10(abs(s)+1),[0 maxValueRangeVis(k)]); axis xy; colormap(hot); ylim([0,8000]); colorbar; print('-dpng', fullfile(pathOutputDemo,sprintf('Unit_10_applyHighpassFilter_file%d.png',k))) - end -end; - - -%% % Some degradations delay the input signal. If some timepositions are given % via timepositions_beforeDegr, the corresponding positions will be returned % in timepositions_afterDegr. @@ -223,10 +92,197 @@ fprintf('degradation_applyFirFilter: adjusting time positions\n'); for m=1:length(timepositions_afterDegr) fprintf('%g -> %g\n',timepositions_beforeDegr(m),timepositions_afterDegr(m)); end - wavwrite(f_audio_out,samplingFreq,16,fullfile(pathOutputDemo,sprintf('Unit_11_applyImpulseResponse_file%d.wav',k))); + wavwrite(f_audio_out,samplingFreq,16,fullfile(pathOutputDemo,sprintf('Unit_02_applyImpulseResponse_file%d.wav',k))); if createSpectrograms [s,f,t] = spectrogram(f_audio_out,hamming(round(samplingFreq*0.093)),round(samplingFreq*0.093/2),[],samplingFreq); - figure; imagesc(t,f,log10(abs(s)+1),[0 maxValueRangeVis(k)]); axis xy; colormap(hot); ylim([0,8000]); colorbar; print('-dpng', fullfile(pathOutputDemo,sprintf('Unit_11_applyImpulseResponse_file%d.png',k))) + figure; imagesc(t,f,log10(abs(s)+1),[0 maxValueRangeVis(k)]); axis xy; colormap(hot); ylim([0,8000]); colorbar; print('-dpng', fullfile(pathOutputDemo,sprintf('Unit_02_applyImpulseResponse_file%d.png',k))) end end; +%% +for k=1:length(filenames) + [f_audio,samplingFreq]=wavread(filenames{k}); + + parameter.snrRatio = 10; % in dB + parameter.loadInternalSound = 1; + parameter.internalSound = 'PubEnvironment1'; + f_audio_out = degradationUnit_addSound(f_audio, samplingFreq, [], parameter); + + wavwrite(f_audio_out,samplingFreq,16,fullfile(pathOutputDemo,sprintf('Unit_03_addSound_file%d.wav',k))); + if createSpectrograms + [s,f,t] = spectrogram(f_audio_out,hamming(round(samplingFreq*0.093)),round(samplingFreq*0.093/2),[],samplingFreq); + figure; imagesc(t,f,log10(abs(s)+1),[0 maxValueRangeVis(k)]); axis xy; colormap(hot); ylim([0,8000]); colorbar; print('-dpng', fullfile(pathOutputDemo,sprintf('Unit_03_addSound_file%d.png',k))) + end +end; + +%% +for k=1:length(filenames) + [f_audio,samplingFreq]=wavread(filenames{k}); + + parameter.dsFrequency = 4000; + f_audio_out = degradationUnit_applyAliasing(f_audio, samplingFreq, [], parameter); + + wavwrite(f_audio_out,samplingFreq,16,fullfile(pathOutputDemo,sprintf('Unit_04_applyAliasing_file%d.wav',k))); + if createSpectrograms + [s,f,t] = spectrogram(f_audio_out,hamming(round(samplingFreq*0.093)),round(samplingFreq*0.093/2),[],samplingFreq); + figure; imagesc(t,f,log10(abs(s)+1),[0 maxValueRangeVis(k)]); axis xy; colormap(hot); ylim([0,8000]); colorbar; print('-dpng', fullfile(pathOutputDemo,sprintf('Unit_04_applyAliasing_file%d.png',k))) + end +end; + +%% +for k=1:length(filenames) + [f_audio,samplingFreq]=wavread(filenames{k}); + + parameter.percentOfSamples = 10; % signal is scaled so that n% of samples clip + f_audio_out = degradationUnit_applyClippingAlternative(f_audio, samplingFreq, [], parameter); + + wavwrite(f_audio_out,samplingFreq,16,fullfile(pathOutputDemo,sprintf('Unit_05_applyClipping_file%d.wav',k))); + if createSpectrograms + [s,f,t] = spectrogram(f_audio_out,hamming(round(samplingFreq*0.093)),round(samplingFreq*0.093/2),[],samplingFreq); + figure; imagesc(t,f,log10(abs(s)+1),[0 maxValueRangeVis(k)]); axis xy; colormap(hot); ylim([0,8000]); colorbar; print('-dpng', fullfile(pathOutputDemo,sprintf('Unit_05_applyClipping_file%d.png',k))) + end +end; +%% +for k=1:length(filenames) + [f_audio,samplingFreq]=wavread(filenames{k}); + + parameter.compressorSlope = 0.9; + parameter.normalizeOutputAudio = 1; + f_audio_out = degradationUnit_applyDynamicRangeCompression(f_audio, samplingFreq, [], parameter); + + wavwrite(f_audio_out,samplingFreq,16,fullfile(pathOutputDemo,sprintf('Unit_06_applyDynamicRangeCompression_file%d.wav',k))); + if createSpectrograms + [s,f,t] = spectrogram(f_audio_out,hamming(round(samplingFreq*0.093)),round(samplingFreq*0.093/2),[],samplingFreq); + figure; imagesc(t,f,log10(abs(s)+1),[0 maxValueRangeVis(k)]); axis xy; colormap(hot); ylim([0,8000]); colorbar; print('-dpng', fullfile(pathOutputDemo,sprintf('Unit_06_applyDynamicRangeCompression_file%d.png',k))) + end +end; + +%% +for k=1:length(filenames) + [f_audio,samplingFreq]=wavread(filenames{k}); + + parameter.nApplications = 5; + f_audio_out = degradationUnit_applyHarmonicDistortion(f_audio, samplingFreq, [], parameter); + + wavwrite(f_audio_out,samplingFreq,16,fullfile(pathOutputDemo,sprintf('Unit_07_applyHarmonicDistortion_file%d.wav',k))); + if createSpectrograms + [s,f,t] = spectrogram(f_audio_out,hamming(round(samplingFreq*0.093)),round(samplingFreq*0.093/2),[],samplingFreq); + figure; imagesc(t,f,log10(abs(s)+1),[0 maxValueRangeVis(k)]); axis xy; colormap(hot); ylim([0,8000]); colorbar; print('-dpng', fullfile(pathOutputDemo,sprintf('Unit_07_applyHarmonicDistortion_file%d.png',k))) + end +end; + +%% +for k=1:length(filenames) + [f_audio,samplingFreq]=wavread(filenames{k}); + + parameter.LameOptions = '--preset cbr 32'; + f_audio_out = degradationUnit_applyMp3Compression(f_audio, samplingFreq, [], parameter); + + wavwrite(f_audio_out,samplingFreq,16,fullfile(pathOutputDemo,sprintf('Unit_08_applyMp3Compression_file%d.wav',k))); + if createSpectrograms + [s,f,t] = spectrogram(f_audio_out,hamming(round(samplingFreq*0.093)),round(samplingFreq*0.093/2),[],samplingFreq); + figure; imagesc(t,f,log10(abs(s)+1),[0 maxValueRangeVis(k)]); axis xy; colormap(hot); ylim([0,8000]); colorbar; print('-dpng', fullfile(pathOutputDemo,sprintf('Unit_08_applyMp3Compression_file%d.png',k))) + end +end; + +%% +for k=1:length(filenames) + [f_audio,samplingFreq]=wavread(filenames{k}); + + parameter.changeInPercent = +5; + f_audio_out = degradationUnit_applySpeedup(f_audio, samplingFreq, [], parameter); + + + wavwrite(f_audio_out,samplingFreq,16,fullfile(pathOutputDemo,sprintf('Unit_09_applySpeedup_file%d.wav',k))); + if createSpectrograms + [s,f,t] = spectrogram(f_audio_out,hamming(round(samplingFreq*0.093)),round(samplingFreq*0.093/2),[],samplingFreq); + figure; imagesc(t,f,log10(abs(s)+1),[0 maxValueRangeVis(k)]); axis xy; colormap(hot); ylim([0,8000]); colorbar; print('-dpng', fullfile(pathOutputDemo,sprintf('Unit_09_applySpeedup_file%d.png',k))) + end +end; + +%% +for k=1:length(filenames) + [f_audio,samplingFreq]=wavread(filenames{k}); + + parameter.intensityOfChange = 3; + parameter.frequencyOfChange = 0.5; + f_audio_out = degradationUnit_applyWowResampling(f_audio, samplingFreq, [], parameter); + + wavwrite(f_audio_out,samplingFreq,16,fullfile(pathOutputDemo,sprintf('Unit_10_applyWowResampling_file%d.wav',k))); + if createSpectrograms + [s,f,t] = spectrogram(f_audio_out,hamming(round(samplingFreq*0.093)),round(samplingFreq*0.093/2),[],samplingFreq); + figure; imagesc(t,f,log10(abs(s)+1),[0 maxValueRangeVis(k)]); axis xy; colormap(hot); ylim([0,8000]); colorbar; print('-dpng', fullfile(pathOutputDemo,sprintf('Unit_10_applyWowResampling_file%d.png',k))) + end +end; + +%% +for k=1:length(filenames) + [f_audio,samplingFreq]=wavread(filenames{k}); + + parameter.stopFrequency = 1000; + f_audio_out = degradationUnit_applyHighpassFilter(f_audio, samplingFreq, [], parameter); + + wavwrite(f_audio_out,samplingFreq,16,fullfile(pathOutputDemo,sprintf('Unit_11_applyHighpassFilter_file%d.wav',k))); + if createSpectrograms + [s,f,t] = spectrogram(f_audio_out,hamming(round(samplingFreq*0.093)),round(samplingFreq*0.093/2),[],samplingFreq); + figure; imagesc(t,f,log10(abs(s)+1),[0 maxValueRangeVis(k)]); axis xy; colormap(hot); ylim([0,8000]); colorbar; print('-dpng', fullfile(pathOutputDemo,sprintf('Unit_11_applyHighpassFilter_file%d.png',k))) + end +end; + +%% +for k=1:length(filenames) + [f_audio,samplingFreq]=wavread(filenames{k}); + + parameter.stopFrequency = 1000; + f_audio_out = degradationUnit_applyLowpassFilter(f_audio, samplingFreq, [], parameter); + + wavwrite(f_audio_out,samplingFreq,16,fullfile(pathOutputDemo,sprintf('Unit_12_applyLowpassFilter_file%d.wav',k))); + if createSpectrograms + [s,f,t] = spectrogram(f_audio_out,hamming(round(samplingFreq*0.093)),round(samplingFreq*0.093/2),[],samplingFreq); + figure; imagesc(t,f,log10(abs(s)+1),[0 maxValueRangeVis(k)]); axis xy; colormap(hot); ylim([0,8000]); colorbar; print('-dpng', fullfile(pathOutputDemo,sprintf('Unit_12_applyLowpassFilter_file%d.png',k))) + end +end; + + +%% +for k=1:length(filenames) + [f_audio,samplingFreq]=wavread(filenames{k}); + + % There are four ways to specify the destination mean spectrum: 1. by + % loading example files provided with the toolbox, 2. by using specific + % noise "color" profiles, 3. by providing the destination mean spectrum + % using the parameter destMagFreqResp, 4. by providing audio data from + % which the destination mean spectrum is computed. + parameter.loadInternalMagFreqResp = 1; + parameter.internalMagFreqResp = 'Beethoven_Appasionata_Rwc'; + f_audio_out = degradationUnit_adaptiveEqualizer(f_audio, samplingFreq, [], parameter); + + wavwrite(f_audio_out,samplingFreq,16,fullfile(pathOutputDemo,sprintf('Unit_13_adaptiveEqualizer_file%d.wav',k))); + if createSpectrograms + [s,f,t] = spectrogram(f_audio_out,hamming(round(samplingFreq*0.093)),round(samplingFreq*0.093/2),[],samplingFreq); + figure; imagesc(t,f,log10(abs(s)+1),[0 maxValueRangeVis(k)]); axis xy; colormap(hot); ylim([0,8000]); colorbar; print('-dpng', fullfile(pathOutputDemo,sprintf('Unit_13_adaptiveEqualizer_file%d.png',k))) + end +end; + +%% +if 1 + for k=1:length(filenames) + [f_audio,samplingFreq]=wavread(filenames{k}); + + [parameter.audioDataForDestMfcc,parameter.audioDataForDestMfcc_sf]=wavread('testdata/RWC_P009m_drum.wav'); + + % After this unit, the mean MFCC vector of f_audio_out is almost identical to + % the one of RWC_P009m_drum.wav + parameter.visualizations = createSpectrograms; + f_audio_out = degradationUnit_applyMfccMeanAdaption(f_audio, samplingFreq, [], parameter); + + wavwrite(f_audio_out,samplingFreq,16,fullfile(pathOutputDemo,sprintf('Unit_14_applyMfccMeanAdaption_file%d.wav',k))); + if createSpectrograms + [s,f,t] = spectrogram(f_audio_out,hamming(round(samplingFreq*0.093)),round(samplingFreq*0.093/2),[],samplingFreq); + figure; imagesc(t,f,log10(abs(s)+1),[0 maxValueRangeVis(k)]); axis xy; colormap(hot); ylim([0,8000]); colorbar; print('-dpng', fullfile(pathOutputDemo,sprintf('Unit_14_applyMfccMeanAdaption_file%d.png',k))) + end + end +end + + +