mauch-mirex-2010: _misc/featureextraction/chromagenerator.m annotate

annotate _misc/featureextraction/chromagenerator.m @ 9:4ea6619cb3f5 tip

removed log files

author	matthiasm
date	Fri, 11 Apr 2014 15:55:11 +0100
parents	b5b38998ef3b
children

rev	line source
matthiasm@8	1 function [chroma, t, salience, tuning, URIstring] = chromagenerator(filename, kammerton, nBin)
matthiasm@8	2
matthiasm@8	3 %% make URI string
matthiasm@8	4 URIstring = strcat('chromagenerator-', num2str(kammerton), '-',num2str(nBin));
matthiasm@8	5
matthiasm@8	6 %% parameters
matthiasm@8	7 % set the bins per semitone (i.e. binsperoctave/12)
matthiasm@8	8 % kammerton = 440;
matthiasm@8	9 % nBin = 7;
matthiasm@8	10 midbin = ceil(nBin/2);
matthiasm@8	11
matthiasm@8	12 % minimum and maximum MIDI bin, should be n octaves apart.
matthiasm@8	13 midimin = 21; % 69 has 440 Hz
matthiasm@8	14 % midimin = 45; % 69 has 440 Hz
matthiasm@8	15 midimax = 92;
matthiasm@8	16 % midimax = 68;
matthiasm@8	17 noterange = midimax - midimin + 1;
matthiasm@8	18
matthiasm@8	19 % midi note values including the fractions "between" actual MIDI notes
matthiasm@8	20 midibins = midimin-(midbin-1)/nBin:(1/nBin):midimax+(midbin-1)/nBin;
matthiasm@8	21 % the same in frequencies
matthiasm@8	22 fbins = kammerton * 2.^((midibins-69)/12);
matthiasm@8	23 nFbin = length(fbins);
matthiasm@8	24
matthiasm@8	25 % the sampling frequency everything is (down)sampled to
matthiasm@8	26 used_fs = 11025;
matthiasm@8	27
matthiasm@8	28 % intuitive setting of how long the frame is (quite short here!)
matthiasm@8	29 fracofsecond = 4;
matthiasm@8	30
matthiasm@8	31 hopspersecond = 20;
matthiasm@8	32
matthiasm@8	33 % calculate a convenient FFT length
matthiasm@8	34 nFFT = 2^nextpow2(used_fs/fracofsecond);
matthiasm@8	35 % hopsize in samples
matthiasm@8	36 hopsize = used_fs/hopspersecond/nFFT;
matthiasm@8	37
matthiasm@8	38 % "instrument" means a particular spectral shape
matthiasm@8	39 used_instruments = 1:2;
matthiasm@8	40 nInstrument = length(used_instruments);
matthiasm@8	41
matthiasm@8	42 %% generate dictionary
matthiasm@8	43 % (complex) tones for all fundamental frequencies are generated. then their
matthiasm@8	44 % amplitude spectrum is calculated and normalised according to the L2 norm.
matthiasm@8	45
matthiasm@8	46 % preliminary t for dictionary generation
matthiasm@8	47 t = (1:nFFT)/used_fs;
matthiasm@8	48 % initialise note dictionary
matthiasm@8	49 A = zeros(nFbin,nFFT/2,nInstrument);
matthiasm@8	50
matthiasm@8	51 % loop over all instruments
matthiasm@8	52 for iInstrument = used_instruments
matthiasm@8	53 % loop over all fundamental frequencies
matthiasm@8	54 for iFbin = 1:nFbin;
matthiasm@8	55 f0 = fbins(iFbin);
matthiasm@8	56 switch iInstrument
matthiasm@8	57 case 1
matthiasm@8	58 wave = sum([sin(t2pif01); ...
matthiasm@8	59 0.9^1 * sin(t2pif02); ...
matthiasm@8	60 0.9^2 * sin(t2pif03); ...
matthiasm@8	61 0.9^3 * sin(t2pif04); ...
matthiasm@8	62 ...0.9^4 * sin(t2pif05); ...
matthiasm@8	63 ]);
matthiasm@8	64 case 2
matthiasm@8	65 wave = sin(t2pif01);
matthiasm@8	66 end
matthiasm@8	67 % window wave and fft
matthiasm@8	68 wave = hamming(nFFT)' .* wave;
matthiasm@8	69 fullfft = fft(wave);
matthiasm@8	70 % calculate amplitude spectrum from fft
matthiasm@8	71 A(iFbin,:,iInstrument) = abs(fullfft(1:round(nFFT/2)));
matthiasm@8	72 end
matthiasm@8	73 % normalise (L2 norm)
matthiasm@8	74 A(:,:,iInstrument) = qnormalise(A(:,:,iInstrument),2,2);
matthiasm@8	75 end
matthiasm@8	76
matthiasm@8	77 %% get the fft frames from a wave file
matthiasm@8	78 %this is probably ending up not being used in a vamp plugin
matthiasm@8	79 % fprintf(1,'%s\n',filename)
matthiasm@8	80 % [audiosize,fs] = wavread(filename,'size');
matthiasm@8	81 % chunk_sec = 20;
matthiasm@8	82 % nChunk = ceil(audiosize(1)/fs/chunk_sec);
matthiasm@8	83 % songframes = [];
matthiasm@8	84 % start = 1;
matthiasm@8	85 % % extract fft from chunks (for less memory consumption)
matthiasm@8	86 % for iChunk = 1:nChunk
matthiasm@8	87 % samplemin = (iChunk-1) * chunk_sec * fs + 1;
matthiasm@8	88 % samplemax = min(audiosize(1), (iChunk * chunk_sec + 1) * fs);
matthiasm@8	89 % audiodata0 = wavread(filename,[samplemin samplemax]);
matthiasm@8	90 % audiodata = resample(mean(audiodata0,2),used_fs,fs,20);
matthiasm@8	91 % songframes0 = myframefft(audiodata,nFFT,hopsize,'hamming');
matthiasm@8	92 % if size(songframes0,2)>=start
matthiasm@8	93 % songframes = [songframes(:,1:end-start) abs(songframes0(1:round(nFFT/2),start:end))];
matthiasm@8	94 % end
matthiasm@8	95 % start = round(hopspersecond/2)+1;
matthiasm@8	96 % end
matthiasm@8	97 % clear songframes0
matthiasm@8	98 % clear audiodata
matthiasm@8	99 % clear audiodata0
matthiasm@8	100 % t = (0:T-1)./hopspersecond;
matthiasm@8	101
matthiasm@8	102 %%
matthiasm@8	103 [songframes,f,t]=chunkspectrogram(filename,4096,4096*7/8,4096,inf);
matthiasm@8	104 hopspersecond = 1/(t(2)-t(1));
matthiasm@8	105 songframes = abs(songframes);
matthiasm@8	106 %%
matthiasm@8	107 T = size(songframes,2);
matthiasm@8	108
matthiasm@8	109 fprintf(1, '%d different notes, %d frequency bins, %d time frames\n', noterange,nFFT/2, T)
matthiasm@8	110
matthiasm@8	111 notespectrum0 = zeros(nFbin, T, nInstrument);
matthiasm@8	112 songframes = qnormalise(songframes,2,1);
matthiasm@8	113 songframes(isnan(songframes)) = 0;
matthiasm@8	114
matthiasm@8	115 %% calculate cosine distance
matthiasm@8	116 for iInstrument = 1:nInstrument
matthiasm@8	117 for iTone = 1:nFbin
matthiasm@8	118 notespectrum0(iTone,:,iInstrument) = A(iTone,:,iInstrument) * songframes;
matthiasm@8	119 end
matthiasm@8	120 end
matthiasm@8	121 %% get only the bins with "more than 1 st. deviation"
matthiasm@8	122 notespectrum = zeros(size(notespectrum0));
matthiasm@8	123 wind_size = 6 * nBin;
matthiasm@8	124 wind = hamming(wind_size)/sum(hamming(wind_size));
matthiasm@8	125 stdthresh = 1;
matthiasm@8	126 for iInstrument = used_instruments
matthiasm@8	127 % calculate running mean
matthiasm@8	128 m = conv2(notespectrum0(:,:,iInstrument),wind(:),'same');
matthiasm@8	129 % calculate running standard deviation
matthiasm@8	130 stdev = sqrt(conv2((notespectrum0(:,:,iInstrument)-m).^2,wind(:),'same'));
matthiasm@8	131 % take only those tones that are above 1 standard deviation from the
matthiasm@8	132 % mean
matthiasm@8	133 notespectrum(:,:,iInstrument) = (notespectrum0(:,:,iInstrument)-m)./stdev;
matthiasm@8	134 temp = notespectrum(:,:,iInstrument);
matthiasm@8	135 temp(temp<stdthresh) = 0;
matthiasm@8	136 notespectrum(:,:,iInstrument) = temp;
matthiasm@8	137 end
matthiasm@8	138 % notes that are one were not "good enough" (not greater than 1 st.
matthiasm@8	139 % deviation reater than the mean) and are hence set to 0.
matthiasm@8	140 notespectrum(notespectrum<=stdthresh) = 0;
matthiasm@8	141 % we require that the notes be on in both "instruments". hence the final
matthiasm@8	142 % note spectrum note_s is their product, which works a bit like a minimum
matthiasm@8	143 note_s = notespectrum(:,:,1) .* notespectrum(:,:,2);
matthiasm@8	144
matthiasm@8	145 note_s(1:nBin*6,:) = 0;
matthiasm@8	146 note_s(isnan(note_s)) = 0;
matthiasm@8	147 %% tuning
matthiasm@8	148 % I wrap the note spectrum to one semitone, kind of generating the note
matthiasm@8	149 % spectrum modulo one semitone. one gets a matrix of size nBin x T. from
matthiasm@8	150 % that matrix, the tuning can be calculated by extracting the angle of the
matthiasm@8	151 % sum of all columns, the sum of all columns being a nBin-dimensional
matthiasm@8	152 % vector.
matthiasm@8	153 th = tuner(note_s(36nBin+(1:12nBin),:), nBin)
matthiasm@8	154 % th = tuner(notespectrum0(36nBin+(1:12nBin),:,2), nBin)
matthiasm@8	155 tuning = kammerton*2^((th/2/pi)/12)
matthiasm@8	156 % tune the spectrum by interpolating the bins such that now the middle bin
matthiasm@8	157 % of every semitone corresponds to the right frequency in equal
matthiasm@8	158 % temperament
matthiasm@8	159 tuned_s = zeros(size(note_s));
matthiasm@8	160 for iFrame = 1:T
matthiasm@8	161 tuned_s(:,iFrame) = interp1(1:nFbin, note_s(:,iFrame), (1:nFbin) + th / (2 * pi) * nBin,[],0);
matthiasm@8	162 end
matthiasm@8	163
matthiasm@8	164 %% reduce to 1 bin per semitone
matthiasm@8	165 % just sum the nBin bins that correspond to one semitone.
matthiasm@8	166 wrapped_tuned_s = reshape(tuned_s,[nBin,noterange*T]);
matthiasm@8	167 % if you want to emphasize the middle bin, use a fancy window, otherwise
matthiasm@8	168 % just use the rectangular window
matthiasm@8	169 wei = rectwin(nBin)';
matthiasm@8	170 salience.reduced = reshape(wei * wrapped_tuned_s,noterange,T);
matthiasm@8	171 % median filter in time direction, if you like
matthiasm@8	172 salience.reduced = medfilt1(salience.reduced,floor(hopspersecond/5)-1,[],2);
matthiasm@8	173
matthiasm@8	174 %% make chromagram
matthiasm@8	175 % actually the edge of the note spectrum is a bit noisy (due to the
matthiasm@8	176 % convolution three parts above. that's why I start some bins from the
matthiasm@8	177 % lower edge (i.e. midimin_used). maybe one could do this properly
matthiasm@8	178 % sometime.
matthiasm@8	179 midimin_used = midimin+6;
matthiasm@8	180 % uebergang is where bass and treble chroma overlap equally
matthiasm@8	181 uebergang = 50;
matthiasm@8	182
matthiasm@8	183 % some simple linear functions to generate the treble and bass profiles
matthiasm@8	184 leftfunc = zeros(2,noterange);
matthiasm@8	185 rightfunc = zeros(2,noterange);
matthiasm@8	186 leftfunc(1,:) = 2/12*((midimin:midimax)-midimin_used);
matthiasm@8	187 leftfunc(2,:) = 1/12*((midimin:midimax)-uebergang) + 0.5;
matthiasm@8	188 rightfunc(1,:) = -1/12*((midimin:midimax)-uebergang) + 0.5;
matthiasm@8	189 rightfunc(2,:) = -(.5)/12*((midimin:midimax)-midimax);
matthiasm@8	190
matthiasm@8	191 % initialise stuff
matthiasm@8	192 salience.bass = zeros(size(salience.reduced));
matthiasm@8	193 salience.treble = zeros(size(salience.reduced));
matthiasm@8	194 profiles = zeros(noterange,3);
matthiasm@8	195
matthiasm@8	196 % bass and treble profiles
matthiasm@8	197 for i = 1:2
matthiasm@8	198 profiles(:,i) = max(0,min(1,min(leftfunc(i,:),rightfunc(i,:))))';
matthiasm@8	199 end
matthiasm@8	200 % wide profile (i.e. both bass and treble)
matthiasm@8	201 profiles(:,3) = min(1,sum(profiles(:,1:2),2));
matthiasm@8	202
matthiasm@8	203 % calculate specific saliences (windowed in f0 direction)
matthiasm@8	204 salience.bass = salience.reduced .* profiles(:,ones(1,T));
matthiasm@8	205 salience.treble = salience.reduced .* profiles(:,2 * ones(1,T));
matthiasm@8	206 salience.wide = salience.reduced .* profiles(:,3 * ones(1,T));
matthiasm@8	207
matthiasm@8	208 % calculate chromagrams
matthiasm@8	209 chroma.bass = squeeze(max(reshape(salience.bass,[12,6,T]),[],2));
matthiasm@8	210 chroma.treble = squeeze(max(reshape(salience.treble,[12,6,T]),[],2));
matthiasm@8	211 chroma.wide = squeeze(max(reshape(salience.wide,[12,6,T]),[],2));
matthiasm@8	212

Mercurial > hg > mauch-mirex-2010

annotate _misc/featureextraction/chromagenerator.m @ 9:4ea6619cb3f5 tip