camir-aes2014: core/magnatagatune/MTTAudioFeatureHMM.m annotate

annotate core/magnatagatune/MTTAudioFeatureHMM.m @ 0:e9a9cd732c1e tip

first hg version after svn

author	wolffd
date	Tue, 10 Feb 2015 15:05:51 +0000
parents
children

rev	line source
wolffd@0	1 classdef MTTAudioFeatureHMM < MTTAudioFeature & handle
wolffd@0	2 % ---
wolffd@0	3 % the MTTAudioFeatureBasicSm Class contains
wolffd@0	4 % a basic summary of chroma, mfcc and tempo features
wolffd@0	5 % a few common chroma and mfcc vectors are concatenated
wolffd@0	6 % along with some clip-wide variance
wolffd@0	7 % a metric / rhythm fingerprint is added
wolffd@0	8 %
wolffd@0	9 % The usual workflow for these features consists of three steps
wolffd@0	10 % 1. extract: extracts the basic single-file dependent features
wolffd@0	11 % 2. define_global_transform: calculates the global feature
wolffd@0	12 % transformation parameters
wolffd@0	13 % 3. finalise: applies the common transformations to a specific feature
wolffd@0	14 % ---
wolffd@0	15
wolffd@0	16 properties(Constant = true)
wolffd@0	17
wolffd@0	18 % svn hook
wolffd@0	19 my_revision = str2double(substr('$Rev: 2332 $', 5, -1));
wolffd@0	20 end
wolffd@0	21
wolffd@0	22 properties
wolffd@0	23 % ---
wolffd@0	24 % Set default parameters
wolffd@0	25 % ---
wolffd@0	26 my_params = struct(...
wolffd@0	27 'nstates', 4 ... % predefined number of states
wolffd@0	28 );
wolffd@0	29 end
wolffd@0	30
wolffd@0	31 % ---
wolffd@0	32 % member functions
wolffd@0	33 % ---
wolffd@0	34 methods
wolffd@0	35
wolffd@0	36 % ---
wolffd@0	37 % constructor: pointer to feature in database
wolffd@0	38 % ---
wolffd@0	39 function feature = MTTAudioFeatureHMM(varargin)
wolffd@0	40
wolffd@0	41 feature = feature@MTTAudioFeature(varargin{:});
wolffd@0	42
wolffd@0	43 end
wolffd@0	44 % ---
wolffd@0	45 % extract feature data from raw audio features
wolffd@0	46 % ---
wolffd@0	47 function data = extract(feature, clip)
wolffd@0	48 % ---
wolffd@0	49 % get Basic Summary audio features. this includes possible
wolffd@0	50 % local normalisations
wolffd@0	51 % ---
wolffd@0	52
wolffd@0	53 global globalvars;
wolffd@0	54
wolffd@0	55 % ---
wolffd@0	56 % get casimir child clip if available
wolffd@0	57 % ---
wolffd@0	58 if isa(clip, 'CASIMIRClip')
wolffd@0	59 baseclip = clip.child_clip();
wolffd@0	60 else
wolffd@0	61 baseclip = clip;
wolffd@0	62 end
wolffd@0	63 if isa(baseclip, 'MTTClip')
wolffd@0	64 rawf = baseclip.audio_features_raw();
wolffd@0	65 elseif isa(baseclip, 'MSDClip')
wolffd@0	66 rawf = baseclip.features('MSDAudioFeatureRAW');
wolffd@0	67 end
wolffd@0	68
wolffd@0	69 % ---
wolffd@0	70 % now extract the features
wolffd@0	71 % first step: chroma clustering
wolffd@0	72 % ---
wolffd@0	73 weights = [rawf.data.segments_duration];
wolffd@0	74
wolffd@0	75 % normalise weights
wolffd@0	76 weights = weights / rawf.data.duration;
wolffd@0	77
wolffd@0	78 % get the chroma features
wolffd@0	79 chroma = [rawf.data.segments_pitches]';
wolffd@0	80
wolffd@0	81 % ---
wolffd@0	82 % TODO: train hmm
wolffd@0	83 % ---
wolffd@0	84
wolffd@0	85
wolffd@0	86 % save hmm into data variable
wolffd@0	87 data.mu = mu1
wolffd@0	88 data.transmat1 = mu1
wolffd@0	89
wolffd@0	90
wolffd@0	91
wolffd@0	92
wolffd@0	93
wolffd@0	94
wolffd@0	95 % prepare field for final features
wolffd@0	96 data.final.vector = [];
wolffd@0	97 data.final.vector_info = struct();
wolffd@0	98 data.final.dim = 0;
wolffd@0	99
wolffd@0	100 % save info data
wolffd@0	101 data.info.type = 'MTTAudioFeatureBasicSm';
wolffd@0	102 data.info.owner = clip;
wolffd@0	103 data.info.owner_id = clip.id;
wolffd@0	104 data.info.creatorrev = feature.my_revision;
wolffd@0	105
wolffd@0	106 % save parameters
wolffd@0	107 data.info.params = feature.my_params;
wolffd@0	108 end
wolffd@0	109
wolffd@0	110 function define_global_transform(features)
wolffd@0	111 % calculate and set normalization factors from the group of
wolffd@0	112 % input features. These features will be set for the full database
wolffd@0	113
wolffd@0	114
wolffd@0	115
wolffd@0	116 end
wolffd@0	117
wolffd@0	118
wolffd@0	119 function finalise(feature)
wolffd@0	120 % applies a final transformation and
wolffd@0	121 % collects the information of this feature within a single vector
wolffd@0	122 % see info for types in specific dimensions
wolffd@0	123
wolffd@0	124 for i = 1:numel(feature)
wolffd@0	125
wolffd@0	126 % check for neccesary parameters
wolffd@0	127 if isempty(feature(i).my_db.commondb)
wolffd@0	128
wolffd@0	129 error('Define the global transformation first')
wolffd@0	130 return;
wolffd@0	131 end
wolffd@0	132
wolffd@0	133 if feature(1).my_params.ntimbres > 0
wolffd@0	134 % ---
wolffd@0	135 % normalise features
wolffd@0	136 % ---
wolffd@0	137 % norm timbre features if neccesary
wolffd@0	138 timbren = [];
wolffd@0	139 if feature(i).my_params.norm_timbres
wolffd@0	140 for j = 1:numel(feature(i).data.timbre)
wolffd@0	141
wolffd@0	142 timbren = cat(1, timbren, ...
wolffd@0	143 MTTAudioFeatureBasicSm.norm_timbre...
wolffd@0	144 (feature(i).data.timbre(j).means, feature(i).my_db.commondb.post_normf.timbre));
wolffd@0	145 end
wolffd@0	146 else
wolffd@0	147
wolffd@0	148 timbren = cat(1, timbren, feature(i).data.timbre(:).means);
wolffd@0	149 end
wolffd@0	150 end
wolffd@0	151
wolffd@0	152 % ---
wolffd@0	153 % construct resulting feature vector out of features
wolffd@0	154 % ---
wolffd@0	155 vec = [];
wolffd@0	156 info = {};
wolffd@0	157 if feature(i).my_params.nchromas > 0
wolffd@0	158
wolffd@0	159 info{numel(vec)+ 1} = 'chroma';
wolffd@0	160 vec = cat(1, vec, feature(i).data.chroma(:).means);
wolffd@0	161
wolffd@0	162 info{numel(vec)+ 1} = 'chroma weights';
wolffd@0	163 vec = cat(1, vec, [feature(i).data.chroma(:).means_weight]');
wolffd@0	164
wolffd@0	165 % ---
wolffd@0	166 % NORMALISE Chroma variance
wolffd@0	167 % ---
wolffd@0	168 if feature(i).my_params.chroma_var >= 1
wolffd@0	169
wolffd@0	170 info{numel(vec)+ 1} = 'chroma variance';
wolffd@0	171
wolffd@0	172 % normalise this pack of variance vectors
wolffd@0	173 tmp_var = mapminmax('apply', [feature(i).data.chroma(:).vars],...
wolffd@0	174 feature(i).common.post_normf.chroma_var);
wolffd@0	175
wolffd@0	176 % concatenate normalised data to vector
wolffd@0	177 for vari = 1:size(tmp_var,2)
wolffd@0	178
wolffd@0	179 vec = cat(1, vec, tmp_var(:, vari));
wolffd@0	180 end
wolffd@0	181 end
wolffd@0	182 end
wolffd@0	183
wolffd@0	184
wolffd@0	185 if feature(i).my_params.ntimbres > 0
wolffd@0	186
wolffd@0	187 info{numel(vec)+ 1} = 'timbre';
wolffd@0	188 vec = cat(1, vec, timbren);
wolffd@0	189
wolffd@0	190 info{numel(vec)+ 1} = 'timbre weights';
wolffd@0	191 vec = cat(1, vec, [feature(i).data.timbre(:).means_weight]');
wolffd@0	192
wolffd@0	193 % ---
wolffd@0	194 % NORMALISE timbre variance
wolffd@0	195 % ---
wolffd@0	196 if feature(i).my_params.timbre_var >= 1
wolffd@0	197
wolffd@0	198 info{numel(vec)+ 1} = 'timbre variance';
wolffd@0	199
wolffd@0	200 % normalise this pack of variance vectors
wolffd@0	201 tmp_var = mapminmax('apply', [feature(i).data.timbre(:).vars],...
wolffd@0	202 feature(i).common.post_normf.timbre_var);
wolffd@0	203
wolffd@0	204 % concatenate normalised data to vector
wolffd@0	205 for vari = 1:size(tmp_var,2)
wolffd@0	206
wolffd@0	207 vec = cat(1, vec, tmp_var(:, vari));
wolffd@0	208 end
wolffd@0	209 end
wolffd@0	210 end
wolffd@0	211
wolffd@0	212 if feature(i).my_params.nrhythms > 0
wolffd@0	213
wolffd@0	214 info{numel(vec)+ 1} = 'rhythm 8';
wolffd@0	215 vec = cat(1, vec, feature(i).data.rhythm.acorr8);
wolffd@0	216
wolffd@0	217 info{numel(vec)+ 1} = 'int 8';
wolffd@0	218 vec = cat(1, vec, feature(i).data.rhythm.interval8);
wolffd@0	219
wolffd@0	220 if feature(i).my_params.nrhythms >= 2
wolffd@0	221
wolffd@0	222 info{numel(vec)+ 1} = 'rhythm 16';
wolffd@0	223 vec = cat(1, vec, feature(i).data.rhythm.acorr16);
wolffd@0	224
wolffd@0	225 info{numel(vec)+ 1} = 'int 16';
wolffd@0	226 vec = cat(1, vec, feature(i).data.rhythm.interval16);
wolffd@0	227 end
wolffd@0	228 end
wolffd@0	229
wolffd@0	230 feature(i).data.final.vector = vec;
wolffd@0	231 feature(i).data.final.dim = numel(feature(i).data.final.vector);
wolffd@0	232
wolffd@0	233 % fill up info struct and append to feature
wolffd@0	234
wolffd@0	235 info(end+1: feature(i).data.final.dim) = ...
wolffd@0	236 cell(feature(i).data.final.dim - numel(info),1);
wolffd@0	237
wolffd@0	238 feature(i).data.final.vector_info.labels = info;
wolffd@0	239 end
wolffd@0	240
wolffd@0	241 % ---
wolffd@0	242 % TODO: Maybe delete more basic features again at this point?
wolffd@0	243 % ---
wolffd@0	244 end
wolffd@0	245
wolffd@0	246 % ---
wolffd@0	247 % destructor: do we really want to remove this
wolffd@0	248 % from the database? No, but
wolffd@0	249 % TODO: create marker for unused objects in db, and a cleanup
wolffd@0	250 % function
wolffd@0	251 % ---
wolffd@0	252 function delete(feature)
wolffd@0	253
wolffd@0	254 end
wolffd@0	255
wolffd@0	256
wolffd@0	257 function visualise(feature)
wolffd@0	258 % ---
wolffd@0	259 % plots the different data types collected in this feature
wolffd@0	260 % ---
wolffd@0	261 for i = 1:numel(feature)
wolffd@0	262 clip = feature(i).data.info.owner;
wolffd@0	263
wolffd@0	264 % display raw features
wolffd@0	265 if isa(clip, 'CASIMIRClip')
wolffd@0	266 baseclip = clip.child_clip();
wolffd@0	267 else
wolffd@0	268 baseclip = clip;
wolffd@0	269 end
wolffd@0	270 if isa(baseclip, 'MTTClip')
wolffd@0	271 rawf = baseclip.audio_features_raw();
wolffd@0	272 elseif isa(baseclip, 'MSDClip')
wolffd@0	273 rawf = baseclip.features('MSDAudioFeatureRAW');
wolffd@0	274 end
wolffd@0	275
wolffd@0	276 % ---
wolffd@0	277 % @todo: implement MSD feature visualisation
wolffd@0	278 % ---
wolffd@0	279 [a1, a2, a3] = rawf.visualise();
wolffd@0	280
wolffd@0	281 % ---
wolffd@0	282 % Display chroma features
wolffd@0	283 % ---
wolffd@0	284 if isfield(feature(i).data, 'chroma')
wolffd@0	285
wolffd@0	286 chroma_labels = {'c', 'c#', 'd','d#', 'e', 'f','f#', 'g','g#', 'a', 'a#', 'h'};
wolffd@0	287 mode_labels = {'minor', 'major'};
wolffd@0	288
wolffd@0	289 % change labels to reflect detected mode
wolffd@0	290 chroma_labels{rawf.data.key + 1} = ...
wolffd@0	291 sprintf('(%s) %s',mode_labels{rawf.data.mode + 1}, chroma_labels{rawf.data.key + 1});
wolffd@0	292
wolffd@0	293 % transpose labels and data
wolffd@0	294 chroma_labels = circshift(chroma_labels, [0, feature(i).data.chroma(1).shift]);
wolffd@0	295 chromar = circshift([rawf.data.segments_pitches], [feature(i).data.chroma(1).shift, 0]);
wolffd@0	296
wolffd@0	297 % image transposed chromas again
wolffd@0	298 segments = [rawf.data.segments_start];
wolffd@0	299 segments(end) = rawf.data.duration;
wolffd@0	300
wolffd@0	301 hold(a1);
wolffd@0	302 uimagesc(segments, 0:11, chromar, 'Parent', a1);
wolffd@0	303 set(a1,'YTick',[0:11], 'YTickLabel', chroma_labels);
wolffd@0	304
wolffd@0	305 % enlarge plot and plot new data after the old ones
wolffd@0	306 ax = axis(a1);
wolffd@0	307 ax(2) = ax(2) + 2*feature(i).my_params.nchromas + 0.5;
wolffd@0	308 axis(a1, 'xy');
wolffd@0	309 axis(a1, ax);
wolffd@0	310
wolffd@0	311 imagesc(rawf.data.duration + (1:feature(i).my_params.nchromas), (-1:11), ...
wolffd@0	312 [ feature(i).data.chroma(:).means_weight; feature(i).data.chroma(:).means],...
wolffd@0	313 'Parent', a1);
wolffd@0	314 % variance calculated?
wolffd@0	315 if isfield(feature(i).data.chroma, 'vars')
wolffd@0	316
wolffd@0	317 imagesc(rawf.data.duration + feature(i).my_params.nchromas + (1:feature(i).my_params.nchromas), (-1:11), ...
wolffd@0	318 [feature(i).data.chroma(:).vars],...
wolffd@0	319 'Parent', a1);
wolffd@0	320 end
wolffd@0	321 end
wolffd@0	322
wolffd@0	323 % ---
wolffd@0	324 % Display timbre features
wolffd@0	325 % ---
wolffd@0	326 if isfield(feature(i).data, 'timbre')
wolffd@0	327
wolffd@0	328 % enlarge plot and plot new data after the old ones
wolffd@0	329 hold(a2);
wolffd@0	330 ax = axis(a2);
wolffd@0	331 ax(2) = ax(2) + 2*feature(i).my_params.ntimbres + 0.5;
wolffd@0	332
wolffd@0	333 axis(a2, ax);
wolffd@0	334 imagesc(rawf.data.duration + (1:feature(i).my_params.ntimbres), (-1:11), ...
wolffd@0	335 [ feature(i).data.timbre(:).means_weight; feature(i).data.timbre(:).means],...
wolffd@0	336 'Parent', a2);
wolffd@0	337 if isfield(feature(i).data.timbre, 'vars')
wolffd@0	338
wolffd@0	339 imagesc(rawf.data.duration + feature(i).my_params.ntimbres + (1:feature(i).my_params.ntimbres), (-1:11), ...
wolffd@0	340 [feature(i).data.timbre(:).vars],...
wolffd@0	341 'Parent', a1);
wolffd@0	342 end
wolffd@0	343 end
wolffd@0	344
wolffd@0	345 % ---
wolffd@0	346 % Display rhythm features
wolffd@0	347 % ---
wolffd@0	348 if isfield(feature(i).data, 'rhythm')
wolffd@0	349 % data.rhythm.interval
wolffd@0	350 % get timecode
wolffd@0	351 eightt = feature(i).data.rhythm.energy8_time;
wolffd@0	352 sixt = feature(i).data.rhythm.energy16_time;
wolffd@0	353
wolffd@0	354 hold(a3);
wolffd@0	355 % plot sixteens acorr and energy
wolffd@0	356 plot(sixt, feature(i).data.rhythm.energy16, 'bx')
wolffd@0	357
wolffd@0	358 plot(sixt, feature(i).data.rhythm.acorr16, 'b')
wolffd@0	359
wolffd@0	360 % plot eights acorr and energy
wolffd@0	361 plot(eightt, feature(i).data.rhythm.energy8, 'rx')
wolffd@0	362
wolffd@0	363 plot(eightt, feature(i).data.rhythm.acorr8, 'r')
wolffd@0	364
wolffd@0	365 % broaden view by fixed 4 seconds
wolffd@0	366 ax = axis(a3);
wolffd@0	367 axis(a3, [max(0, eightt(1)-( eightt(end) - eightt(1) + 4 )) ...
wolffd@0	368 min(rawf.data.duration, eightt(end) +4) ...
wolffd@0	369 ax(3:4)]);
wolffd@0	370 end
wolffd@0	371 end
wolffd@0	372 end
wolffd@0	373 end
wolffd@0	374
wolffd@0	375
wolffd@0	376 methods (Hidden = true)
wolffd@0	377
wolffd@0	378 function [env, time] = energy_envelope(feature, clip)
wolffd@0	379 % extracts the envelope of energy for the given clip
wolffd@0	380
wolffd@0	381 % ---
wolffd@0	382 % TODO: externalise envelope etc in external audio features
wolffd@0	383 % ---
wolffd@0	384
wolffd@0	385 [null, src] = evalc('miraudio(clip.mp3file_full())');
wolffd@0	386 [null, env] = evalc('mirenvelope(src, ''Sampling'', feature.my_params.energy_sr)');
wolffd@0	387
wolffd@0	388 time = get(env,'Time');
wolffd@0	389 time = time{1}{1};
wolffd@0	390 env = mirgetdata(env);
wolffd@0	391 end
wolffd@0	392
wolffd@0	393 function [acorr, base_sig, base_t] = beat_histogram(feature, startt, interval, signal, signal_t)
wolffd@0	394 % acorr = beat_histogram(feature, startt, interval, signal, time)
wolffd@0	395 %
wolffd@0	396 % compute correlation for beats of specified length in energy curve
wolffd@0	397
wolffd@0	398 % get corresponding energy values
wolffd@0	399 dt = signal_t(2) - signal_t(1);
wolffd@0	400 base_t = startt:interval:(startt + (feature.my_params.nints2-1) interval);
wolffd@0	401 base_sig = signal( min( numel(signal), max(1,round((base_t - signal_t(1))/dt))));
wolffd@0	402
wolffd@0	403 % normalise energy
wolffd@0	404 acbase_sig = base_sig./max(base_sig);
wolffd@0	405
wolffd@0	406 % calculate their cyclic autocorrelation
wolffd@0	407 acorr = circshift(xcorr(acbase_sig,acbase_sig(1:end/2)),...
wolffd@0	408 [numel(acbase_sig) 0]);
wolffd@0	409
wolffd@0	410 % cut acorr to relevant points, normalise and square
wolffd@0	411 acorr = (acorr(1:feature.my_params.nints)./feature.my_params.nints).^2;
wolffd@0	412
wolffd@0	413 % ---
wolffd@0	414 % NOTE: we normalise the autocorrelation locally, to compare the
wolffd@0	415 % (rhythmic) shape
wolffd@0	416 % ---
wolffd@0	417 if feature.my_params.norm_acorr;
wolffd@0	418
wolffd@0	419 acorr = acorr - min(acorr);
wolffd@0	420 acorr = acorr/max(acorr);
wolffd@0	421 end
wolffd@0	422 end
wolffd@0	423 end
wolffd@0	424
wolffd@0	425 methods(Static)
wolffd@0	426
wolffd@0	427 function timbre = norm_timbre(in, normfs)
wolffd@0	428 % returns normed timbre data
wolffd@0	429
wolffd@0	430 % ---
wolffd@0	431 % individually scale the data using
wolffd@0	432 % the dimensions factors
wolffd@0	433 % ---
wolffd@0	434 timbre = zeros(size(in));
wolffd@0	435 for i = 1:size(in,2)
wolffd@0	436
wolffd@0	437 timbre(:,i) = normfs .* in(:,i);
wolffd@0	438 end
wolffd@0	439
wolffd@0	440 % shift to positive values
wolffd@0	441 timbre = (1 + timbre) /2;
wolffd@0	442
wolffd@0	443 % clip features to [0,1]
wolffd@0	444 timbre = min(1, max(timbre, 0));
wolffd@0	445 end
wolffd@0	446
wolffd@0	447 % ---
wolffd@0	448 % returns parameter md5 hash for comparison
wolffd@0	449 % ---
wolffd@0	450 end
wolffd@0	451
wolffd@0	452 end

Mercurial > hg > camir-aes2014

annotate core/magnatagatune/MTTAudioFeatureHMM.m @ 0:e9a9cd732c1e tip