Mercurial > hg > mauch-mirex-2010
view _misc/featureextraction/chromagenerator.m @ 9:4ea6619cb3f5 tip
removed log files
author | matthiasm |
---|---|
date | Fri, 11 Apr 2014 15:55:11 +0100 |
parents | b5b38998ef3b |
children |
line wrap: on
line source
function [chroma, t, salience, tuning, URIstring] = chromagenerator(filename, kammerton, nBin) %% make URI string URIstring = strcat('chromagenerator-', num2str(kammerton), '-',num2str(nBin)); %% parameters % set the bins per semitone (i.e. binsperoctave/12) % kammerton = 440; % nBin = 7; midbin = ceil(nBin/2); % minimum and maximum MIDI bin, should be n octaves apart. midimin = 21; % 69 has 440 Hz % midimin = 45; % 69 has 440 Hz midimax = 92; % midimax = 68; noterange = midimax - midimin + 1; % midi note values including the fractions "between" actual MIDI notes midibins = midimin-(midbin-1)/nBin:(1/nBin):midimax+(midbin-1)/nBin; % the same in frequencies fbins = kammerton * 2.^((midibins-69)/12); nFbin = length(fbins); % the sampling frequency everything is (down)sampled to used_fs = 11025; % intuitive setting of how long the frame is (quite short here!) fracofsecond = 4; hopspersecond = 20; % calculate a convenient FFT length nFFT = 2^nextpow2(used_fs/fracofsecond); % hopsize in samples hopsize = used_fs/hopspersecond/nFFT; % "instrument" means a particular spectral shape used_instruments = 1:2; nInstrument = length(used_instruments); %% generate dictionary % (complex) tones for all fundamental frequencies are generated. then their % amplitude spectrum is calculated and normalised according to the L2 norm. % preliminary t for dictionary generation t = (1:nFFT)/used_fs; % initialise note dictionary A = zeros(nFbin,nFFT/2,nInstrument); % loop over all instruments for iInstrument = used_instruments % loop over all fundamental frequencies for iFbin = 1:nFbin; f0 = fbins(iFbin); switch iInstrument case 1 wave = sum([sin(t*2*pi*f0*1); ... 0.9^1 * sin(t*2*pi*f0*2); ... 0.9^2 * sin(t*2*pi*f0*3); ... 0.9^3 * sin(t*2*pi*f0*4); ... ...0.9^4 * sin(t*2*pi*f0*5); ... ]); case 2 wave = sin(t*2*pi*f0*1); end % window wave and fft wave = hamming(nFFT)' .* wave; fullfft = fft(wave); % calculate amplitude spectrum from fft A(iFbin,:,iInstrument) = abs(fullfft(1:round(nFFT/2))); end % normalise (L2 norm) A(:,:,iInstrument) = qnormalise(A(:,:,iInstrument),2,2); end %% get the fft frames from a wave file %this is probably ending up not being used in a vamp plugin % fprintf(1,'%s\n',filename) % [audiosize,fs] = wavread(filename,'size'); % chunk_sec = 20; % nChunk = ceil(audiosize(1)/fs/chunk_sec); % songframes = []; % start = 1; % % extract fft from chunks (for less memory consumption) % for iChunk = 1:nChunk % samplemin = (iChunk-1) * chunk_sec * fs + 1; % samplemax = min(audiosize(1), (iChunk * chunk_sec + 1) * fs); % audiodata0 = wavread(filename,[samplemin samplemax]); % audiodata = resample(mean(audiodata0,2),used_fs,fs,20); % songframes0 = myframefft(audiodata,nFFT,hopsize,'hamming'); % if size(songframes0,2)>=start % songframes = [songframes(:,1:end-start) abs(songframes0(1:round(nFFT/2),start:end))]; % end % start = round(hopspersecond/2)+1; % end % clear songframes0 % clear audiodata % clear audiodata0 % t = (0:T-1)./hopspersecond; %% [songframes,f,t]=chunkspectrogram(filename,4096,4096*7/8,4096,inf); hopspersecond = 1/(t(2)-t(1)); songframes = abs(songframes); %% T = size(songframes,2); fprintf(1, '%d different notes, %d frequency bins, %d time frames\n', noterange,nFFT/2, T) notespectrum0 = zeros(nFbin, T, nInstrument); songframes = qnormalise(songframes,2,1); songframes(isnan(songframes)) = 0; %% calculate cosine distance for iInstrument = 1:nInstrument for iTone = 1:nFbin notespectrum0(iTone,:,iInstrument) = A(iTone,:,iInstrument) * songframes; end end %% get only the bins with "more than 1 st. deviation" notespectrum = zeros(size(notespectrum0)); wind_size = 6 * nBin; wind = hamming(wind_size)/sum(hamming(wind_size)); stdthresh = 1; for iInstrument = used_instruments % calculate running mean m = conv2(notespectrum0(:,:,iInstrument),wind(:),'same'); % calculate running standard deviation stdev = sqrt(conv2((notespectrum0(:,:,iInstrument)-m).^2,wind(:),'same')); % take only those tones that are above 1 standard deviation from the % mean notespectrum(:,:,iInstrument) = (notespectrum0(:,:,iInstrument)-m)./stdev; temp = notespectrum(:,:,iInstrument); temp(temp<stdthresh) = 0; notespectrum(:,:,iInstrument) = temp; end % notes that are one were not "good enough" (not greater than 1 st. % deviation reater than the mean) and are hence set to 0. notespectrum(notespectrum<=stdthresh) = 0; % we require that the notes be on in *both* "instruments". hence the final % note spectrum note_s is their product, which works a bit like a minimum note_s = notespectrum(:,:,1) .* notespectrum(:,:,2); note_s(1:nBin*6,:) = 0; note_s(isnan(note_s)) = 0; %% tuning % I wrap the note spectrum to one semitone, kind of generating the note % spectrum modulo one semitone. one gets a matrix of size nBin x T. from % that matrix, the tuning can be calculated by extracting the angle of the % sum of all columns, the sum of all columns being a nBin-dimensional % vector. th = tuner(note_s(36*nBin+(1:12*nBin),:), nBin) % th = tuner(notespectrum0(36*nBin+(1:12*nBin),:,2), nBin) tuning = kammerton*2^((th/2/pi)/12) % tune the spectrum by interpolating the bins such that now the middle bin % of every semitone corresponds to the right frequency in equal % temperament tuned_s = zeros(size(note_s)); for iFrame = 1:T tuned_s(:,iFrame) = interp1(1:nFbin, note_s(:,iFrame), (1:nFbin) + th / (2 * pi) * nBin,[],0); end %% reduce to 1 bin per semitone % just sum the nBin bins that correspond to one semitone. wrapped_tuned_s = reshape(tuned_s,[nBin,noterange*T]); % if you want to emphasize the middle bin, use a fancy window, otherwise % just use the rectangular window wei = rectwin(nBin)'; salience.reduced = reshape(wei * wrapped_tuned_s,noterange,T); % median filter in time direction, if you like salience.reduced = medfilt1(salience.reduced,floor(hopspersecond/5)-1,[],2); %% make chromagram % actually the edge of the note spectrum is a bit noisy (due to the % convolution three parts above. that's why I start some bins from the % lower edge (i.e. midimin_used). maybe one could do this properly % sometime. midimin_used = midimin+6; % uebergang is where bass and treble chroma overlap equally uebergang = 50; % some simple linear functions to generate the treble and bass profiles leftfunc = zeros(2,noterange); rightfunc = zeros(2,noterange); leftfunc(1,:) = 2/12*((midimin:midimax)-midimin_used); leftfunc(2,:) = 1/12*((midimin:midimax)-uebergang) + 0.5; rightfunc(1,:) = -1/12*((midimin:midimax)-uebergang) + 0.5; rightfunc(2,:) = -(.5)/12*((midimin:midimax)-midimax); % initialise stuff salience.bass = zeros(size(salience.reduced)); salience.treble = zeros(size(salience.reduced)); profiles = zeros(noterange,3); % bass and treble profiles for i = 1:2 profiles(:,i) = max(0,min(1,min(leftfunc(i,:),rightfunc(i,:))))'; end % wide profile (i.e. both bass and treble) profiles(:,3) = min(1,sum(profiles(:,1:2),2)); % calculate specific saliences (windowed in f0 direction) salience.bass = salience.reduced .* profiles(:,ones(1,T)); salience.treble = salience.reduced .* profiles(:,2 * ones(1,T)); salience.wide = salience.reduced .* profiles(:,3 * ones(1,T)); % calculate chromagrams chroma.bass = squeeze(max(reshape(salience.bass,[12,6,T]),[],2)); chroma.treble = squeeze(max(reshape(salience.treble,[12,6,T]),[],2)); chroma.wide = squeeze(max(reshape(salience.wide,[12,6,T]),[],2));