Mercurial > hg > mauch-mirex-2010
view _misc/featureextraction/.svn/text-base/mychroma.m.svn-base @ 9:4ea6619cb3f5 tip
removed log files
author | matthiasm |
---|---|
date | Fri, 11 Apr 2014 15:55:11 +0100 |
parents | b5b38998ef3b |
children |
line wrap: on
line source
function [chromagram,t,salience] = ... mychroma(filename, nBins, fmin, used_fs, fracofsecond, calcu) if isempty(nBins) nBins = 3; end midbin = ceil(nBins/2); if isempty(fmin) fmin = 55 * 2^(-7/12-(midbin-1)/12/nBins); end if isempty(used_fs) used_fs=round(44100/4); end hopspersecond = 20; nFFT = 2^nextpow2(used_fs/fracofsecond); hopsize = used_fs/hopspersecond/nFFT; noterange = 61; used_instruments = 1:2; nInstrument = length(used_instruments); %% load or generate dictionary dirc = what('./data'); dicfilename = ['mydict-' num2str(nBins) '.mat']; if ~calcu && any(strcmp(dirc.mat, dicfilename)) load(['data/' dicfilename]) else % dictionary parameters midbin = nBins/2 + .5; %hopspersecond = 10; secs = .8; t = linspace(0,secs,secs*used_fs)'; % make note dictionary A = zeros(noterange*nBins,nFFT/2,nInstrument); for iInstrument = used_instruments for iMidiNote = 1:noterange for iBin = 1:nBins f0 = fmin * 2^((iMidiNote-1)/12 + (iBin-1)/12/nBins); %wave = zeros(size(t)); switch iInstrument case 1 wave = sum([sin(t*2*pi*f0*1) ... 0.6^1 * sin(t*2*pi*f0*2) ... 0.6^2 * sin(t*2*pi*f0*3) ... 0.6^3 * sin(t*2*pi*f0*4)],2); case 2 wave = sin(t*2*pi*f0*1); case 3 wave = sum([... 0.6^1 * sin(t*2*pi*f0*1/2) ... 0.6^2 * sin(t*2*pi*f0*1/3) ... 0.6^3 * sin(t*2*pi*f0*1/4) ... 0.6^4 * sin(t*2*pi*f0*1/5) ... 0.6^5 * sin(t*2*pi*f0*1/6) ... 0.6^6 * sin(t*2*pi*f0*1/7) ... 0.6^7 * sin(t*2*pi*f0*1/8) ... 0.6^8 * sin(t*2*pi*f0*1/9)],2); end fftframes0 = myframefft(wave,nFFT,hopsize,'hamming'); A(nBins*(iMidiNote-1)+iBin,:,iInstrument) = mean(abs(fftframes0(1:round(nFFT/2),:)),2); end end end save(['data/' dicfilename], 'A', 'nFFT', 'hopsize'); end %% get the fft frames from a wave file fprintf(1,'%s\n',filename) [audiosize,fs] = wavread(filename,'size'); chunk_sec = 20; nChunk = ceil(audiosize(1)/fs/chunk_sec); songframes = []; s = []; start = 1; for iChunk = 1:nChunk samplemin = (iChunk-1) * chunk_sec * fs + 1; samplemax = min(audiosize(1), (iChunk * chunk_sec + 1) * fs); audiodata0 = wavread(filename,[samplemin samplemax]); audiodata = resample(mean(audiodata0,2),used_fs,fs,20); songframes0 = myframefft(audiodata,nFFT,hopsize,'hamming'); if size(songframes0,2)>=start songframes = [songframes(:,1:end-start) abs(songframes0(1:round(nFFT/2),start:end))]; end start = round(hopspersecond/2)+1; end clear songframes0 clear audiodata clear audiodata0 T = size(songframes,2); t = (0:T-1)./hopspersecond; fprintf(1, '%d different notes, %d frequency bins, %d time frames\n', noterange,nFFT/2, T) simpleS = zeros(noterange*nBins,T, nInstrument+1); songframes(isnan(songframes)) = 0; for iInstrument = 1:nInstrument At = A(:,:,iInstrument); for kFrame = 1:T simpleS(:,kFrame,iInstrument) = At * songframes(:,kFrame); end end simpleS(:,:,2) = max(0,conv2(simpleS(:,:,2), ... repmat([-1 -1 4 -1 -1]',1,1),'same')); %% add the instruments (i.e. the different partial decay bla) %--------------------------------------------------------- note_s = (simpleS(:,:,1) .* simpleS(:,:,2)); clear simpleS %% tuning wrapped_s = zeros(nBins,T); for iBin = 1:nBins wrapped_s(iBin,:) = sum(note_s(iBin:nBins:end,:)); end on_circle = exp(i * ((1:nBins)-midbin)/nBins * 2 * pi); complex_tuning = on_circle * wrapped_s; smooth_complex_tuning = conv2(complex_tuning,hamming(20*hopspersecond)','same'); tooflat = angle(mean(smooth_complex_tuning)) / (2 * pi) * nBins; tooflat = repmat(tooflat,T,1); tuned_s = note_s; for iFrame = 1:T tuned_s(:,iFrame) = interp1(1:nBins*noterange, note_s(:,iFrame), (1:nBins*noterange) + 1 * tooflat(iFrame),[],0); end %% reduce to 1 bin per semitone wrapped_tuned_s = reshape(tuned_s,[nBins,noterange*T]); wei = rectwin(nBins)'; reduced = reshape(wei * wrapped_tuned_s,noterange,T); salience.full = s; salience.reduced = reduced; %% make chromagram reduced = medfilt1(reduced',9)'; chromagen = [zeros(12,1) repmat(eye(12),1,5)]; treble_profile = ([0 zeros(1,24) linspace(0,1,6) linspace(1,1,6) ones(1,12) linspace(1,0,6) zeros(1,6)]); bass_profile = [0 ones(1,18) linspace(1,0,12) ,zeros(1,noterange-31)]; wide_profile = max(treble_profile, bass_profile); treble_salience = reduced .* repmat(treble_profile(:),1,T); bass_salience = reduced .* repmat(bass_profile(:),1,T); wide_salience = reduced .* repmat(wide_profile(:),1,T); bass_chromag = circshift(chromagen * bass_salience,6); wide_chromag = circshift(chromagen * wide_salience,6); [peakindex,peakindex]=sort(treble_salience,'descend'); for iFrame = 1:T treble_salience(peakindex(1,iFrame),iFrame) = treble_salience(peakindex(2,iFrame),iFrame); end treble_chromag = circshift(chromagen * treble_salience,6); chromagram.treble = treble_chromag; chromagram.bass = bass_chromag; chromagram.wide = wide_chromag; % csvwrite('chr.csv',[ones(25,1) [maxn(circshift(bass_chromag,4));ones(1,size(treble_chromag,2)); maxn(circshift(treble_chromag,4))]]'); % csvwrite('tuned_s.csv',[maxn(reduced)]');