camir-aes2014: core/magnatagatune/MTTAudioFeatureBasicSm.m annotate

annotate core/magnatagatune/MTTAudioFeatureBasicSm.m @ 0:e9a9cd732c1e tip

first hg version after svn

author	wolffd
date	Tue, 10 Feb 2015 15:05:51 +0000
parents
children

rev	line source
wolffd@0	1 classdef MTTAudioFeatureBasicSm < MTTAudioFeature & handle
wolffd@0	2 % ---
wolffd@0	3 % the MTTAudioFeatureBasicSm Class contains
wolffd@0	4 % a basic summary of chroma, mfcc and tempo features
wolffd@0	5 % a few common chroma and mfcc vectors are concatenated
wolffd@0	6 % along with some clip-wide variance
wolffd@0	7 % a metric / rhythm fingerprint is added
wolffd@0	8 %
wolffd@0	9 % The usual workflow for these features consists of three steps
wolffd@0	10 % 1. extract: extracts the basic single-file dependent features
wolffd@0	11 % 2. define_global_transform: calculates the global feature
wolffd@0	12 % transformation parameters
wolffd@0	13 % 3. finalise: applies the common transformations to a specific feature
wolffd@0	14 % ---
wolffd@0	15
wolffd@0	16 properties(Constant = true)
wolffd@0	17
wolffd@0	18 % svn hook
wolffd@0	19 my_revision = str2double(substr('$Rev$', 5, -1));
wolffd@0	20 end
wolffd@0	21
wolffd@0	22 properties
wolffd@0	23 % ---
wolffd@0	24 % Set default parameters
wolffd@0	25 % ---
wolffd@0	26 my_params = struct(...
wolffd@0	27 'nchromas', 4, ... % 4 chroma vectors
wolffd@0	28 'chroma_var', 0, ... % chroma variance
wolffd@0	29 'norm_chromas', 0, ... % not implemented, chromas already rel.
wolffd@0	30 'min_kshift_chromas', 0.1, ... % treshold for key shift. set to 1 for no shift (0-1)
wolffd@0	31 ...
wolffd@0	32 'ntimbres', 4, ...
wolffd@0	33 'timbre_var', 0, ... % timbre variance
wolffd@0	34 'norm_timbres', 1, ...
wolffd@0	35 'clip_timbres', 0.85, ... % percentile of data which has to be inside 0-1 bounds
wolffd@0	36 ...
wolffd@0	37 'norm_weights',0, ... % globally norm weights for chroma times?
wolffd@0	38 'norm_interval',1, ...
wolffd@0	39 'max_iter',100, ... % max iterations for chroma and timbre knn
wolffd@0	40 ...
wolffd@0	41 'nrhythms', 0, ...
wolffd@0	42 'nints', 11, ...
wolffd@0	43 'energy_sr', 1000, ... % sample rate for energy curve
wolffd@0	44 'norm_acorr', 1 ... % normalise arcorr locally-> shape imp... energy is normalised anyways
wolffd@0	45 );
wolffd@0	46 end
wolffd@0	47
wolffd@0	48 % ---
wolffd@0	49 % member functions
wolffd@0	50 % ---
wolffd@0	51 methods
wolffd@0	52
wolffd@0	53 % ---
wolffd@0	54 % constructor: pointer to feature in database
wolffd@0	55 % ---
wolffd@0	56 function feature = MTTAudioFeatureBasicSm(varargin)
wolffd@0	57
wolffd@0	58 feature = feature@MTTAudioFeature(varargin{:});
wolffd@0	59
wolffd@0	60 end
wolffd@0	61 % ---
wolffd@0	62 % extract feature data from raw audio features
wolffd@0	63 % ---
wolffd@0	64 function data = extract(feature, clip)
wolffd@0	65 % ---
wolffd@0	66 % get Basic Summary audio features. this includes possible
wolffd@0	67 % local normalisations
wolffd@0	68 % ---
wolffd@0	69
wolffd@0	70 global globalvars;
wolffd@0	71
wolffd@0	72 % ---
wolffd@0	73 % get casimir child clip if available
wolffd@0	74 % ---
wolffd@0	75 if isa(clip, 'CASIMIRClip')
wolffd@0	76 baseclip = clip.child_clip();
wolffd@0	77 else
wolffd@0	78 baseclip = clip;
wolffd@0	79 end
wolffd@0	80 if isa(baseclip, 'MTTClip')
wolffd@0	81 rawf = baseclip.audio_features_raw();
wolffd@0	82 elseif isa(baseclip, 'MSDClip')
wolffd@0	83 rawf = baseclip.features('MSDAudioFeatureRAW');
wolffd@0	84 end
wolffd@0	85
wolffd@0	86 % ---
wolffd@0	87 % now extract the features
wolffd@0	88 % first step: chroma clustering
wolffd@0	89 % ---
wolffd@0	90 weights = [rawf.data.segments_duration];
wolffd@0	91
wolffd@0	92 % normalise weights
wolffd@0	93 weights = weights / rawf.data.duration;
wolffd@0	94
wolffd@0	95 chroma = [rawf.data.segments_pitches]';
wolffd@0	96
wolffd@0	97 % ---
wolffd@0	98 % get most present chroma vectors.
wolffd@0	99 % the weighted k-means should return the four most prominent
wolffd@0	100 % chroma vectors and their weight
wolffd@0	101 % ---
wolffd@0	102 % display error values
wolffd@0	103
wolffd@0	104 op = foptions();
wolffd@0	105 op(1) = 0;
wolffd@0	106 op(14) = feature.my_params.max_iter;
wolffd@0	107
wolffd@0	108 % check for trivial case
wolffd@0	109 if feature.my_params.nchromas == 0
wolffd@0	110
wolffd@0	111 chromas = [];
wolffd@0	112 cwght = [];
wolffd@0	113
wolffd@0	114 elseif feature.my_params.nchromas == 1
wolffd@0	115
wolffd@0	116 chromas = mean(chroma, 1);
wolffd@0	117 chroma_var = var(chroma, 0, 1);
wolffd@0	118 cwght = 1;
wolffd@0	119
wolffd@0	120 elseif numel(weights) > feature.my_params.nchromas
wolffd@0	121
wolffd@0	122 % ---
wolffd@0	123 % there may be few chromas, try kmeans several (20) times
wolffd@0	124 % ---
wolffd@0	125 cont = 0;
wolffd@0	126 cwght = [];
wolffd@0	127 while (numel(cwght) ~= feature.my_params.nchromas) && (cont < 20);
wolffd@0	128
wolffd@0	129 [chromas, cwght, post] = ...
wolffd@0	130 weighted_kmeans(feature.my_params.nchromas, chroma, weights, op);
wolffd@0	131
wolffd@0	132 cont = cont + 1;
wolffd@0	133 end
wolffd@0	134
wolffd@0	135 if (numel(cwght) ~= feature.my_params.nchromas)
wolffd@0	136
wolffd@0	137 error('cannot find enough chroma centres');
wolffd@0	138 end
wolffd@0	139
wolffd@0	140 % ---
wolffd@0	141 % Calculate the weighted variance of the chroma clusters
wolffd@0	142 % ---
wolffd@0	143 if feature.my_params.chroma_var >= 1
wolffd@0	144
wolffd@0	145 chroma_var = zeros(size(chromas));
wolffd@0	146 for i = 1:size(chroma_var,1)
wolffd@0	147
wolffd@0	148 % get distance from cluster centroid
wolffd@0	149 tmp_var = (chroma(post(:,i),:) - repmat(chromas(i,:), sum(post(:,i)),1)).^2;
wolffd@0	150
wolffd@0	151 % add up the weighted differences and normalise by sum
wolffd@0	152 % of weights
wolffd@0	153 chroma_var(i,:) = (weights(post(:,i)) * tmp_var) ./...
wolffd@0	154 (sum(weights(post(:,i))));
wolffd@0	155 end
wolffd@0	156 end
wolffd@0	157 else
wolffd@0	158 % ---
wolffd@0	159 % odd case: less than nchroma data points.
wolffd@0	160 % we repeat the mean vector at the end
wolffd@0	161 % ---
wolffd@0	162 chromas = [chroma; repmat(mean(chroma, 1),...
wolffd@0	163 feature.my_params.nchromas - numel(weights), 1 )];
wolffd@0	164
wolffd@0	165 cwght = weights;
wolffd@0	166 cwght( end + 1:feature.my_params.nchromas ) = 0;
wolffd@0	167
wolffd@0	168 % ---
wolffd@0	169 % TODO: get a variance for odd case :
wolffd@0	170 % replicate the complete data variance?
wolffd@0	171 % NO: every vector is a clsuter => zero variance
wolffd@0	172 % ---
wolffd@0	173 end
wolffd@0	174
wolffd@0	175 % trivial case: no variance requested
wolffd@0	176 if ~exist('chroma_var','var')
wolffd@0	177 chroma_var = zeros(size(chromas));
wolffd@0	178 end
wolffd@0	179
wolffd@0	180 % sort by associated time
wolffd@0	181 [cwght, idx] = sort(cwght, 'descend');
wolffd@0	182 chromas = chromas(idx,:);
wolffd@0	183 chroma_var = chroma_var(idx,:);
wolffd@0	184
wolffd@0	185 % ---
wolffd@0	186 % shift according to detected key, but only if
wolffd@0	187 % the confidencee is high enough
wolffd@0	188 % ---
wolffd@0	189 shift = 0;
wolffd@0	190 if rawf.data.keyConfidence > feature.my_params.min_kshift_chromas;
wolffd@0	191
wolffd@0	192 shift = -rawf.data.key;
wolffd@0	193 chromas = circshift(chromas, [0 shift]);
wolffd@0	194 chroma_var = circshift(chroma_var, [0 shift]);
wolffd@0	195 end
wolffd@0	196
wolffd@0	197 % ---
wolffd@0	198 % get mfcc centres:
wolffd@0	199 % the same for mfccs
wolffd@0	200 % ---
wolffd@0	201 mfcc = [rawf.data.segments_timbre]';
wolffd@0	202 if feature.my_params.ntimbres == 0
wolffd@0	203
wolffd@0	204 mfccs = [];
wolffd@0	205 mwght = [];
wolffd@0	206
wolffd@0	207 elseif feature.my_params.ntimbres == 1
wolffd@0	208
wolffd@0	209 mfccs = mean(mfcc, 1);
wolffd@0	210 timbre_var = var(mfccs, 0, 1);
wolffd@0	211 mwght = 1;
wolffd@0	212
wolffd@0	213 elseif numel(weights) > feature.my_params.ntimbres
wolffd@0	214
wolffd@0	215 % ---
wolffd@0	216 % there may be few mfccs, try kmeans several times
wolffd@0	217 % ---
wolffd@0	218 cont = 0;
wolffd@0	219 mwght = [];
wolffd@0	220 while (numel(mwght) ~= feature.my_params.ntimbres) && (cont < 20);
wolffd@0	221
wolffd@0	222 [mfccs, mwght, post] = ...
wolffd@0	223 weighted_kmeans(feature.my_params.ntimbres, mfcc, weights, op);
wolffd@0	224 cont = cont + 1;
wolffd@0	225 end
wolffd@0	226
wolffd@0	227 if (numel(mwght) ~= feature.my_params.ntimbres)
wolffd@0	228
wolffd@0	229 error('cannot find enough mfcc centres');
wolffd@0	230 end
wolffd@0	231
wolffd@0	232 % ---
wolffd@0	233 % Calculate the weighted variance of the chroma clusters
wolffd@0	234 % ---
wolffd@0	235 if feature.my_params.timbre_var >= 1
wolffd@0	236
wolffd@0	237 timbre_var = zeros(size(mfccs));
wolffd@0	238 for i = 1:size(timbre_var,1)
wolffd@0	239
wolffd@0	240 % get distance from cluster centroid
wolffd@0	241 tmp_var = (mfcc(post(:,i),:) - repmat(mfccs(i,:), sum(post(:,i)),1)).^2;
wolffd@0	242
wolffd@0	243 % add up the weighted differences and normalise by sum
wolffd@0	244 % of weights
wolffd@0	245 timbre_var(i,:) = (weights(post(:,i)) * tmp_var) ./...
wolffd@0	246 (sum(weights(post(:,i))));
wolffd@0	247 end
wolffd@0	248 end
wolffd@0	249
wolffd@0	250 else
wolffd@0	251 % ---
wolffd@0	252 % odd case: less than nchroma data points.
wolffd@0	253 % we repeat the mean vector at the end
wolffd@0	254 % ---
wolffd@0	255 mfccs = [mfcc; repmat(mean(mfcc, 1),...
wolffd@0	256 feature.my_params.ntimbres - numel(weights), 1)];
wolffd@0	257 mwght = weights;
wolffd@0	258 mwght( end + 1:feature.my_params.ntimbres) = 0;
wolffd@0	259 end
wolffd@0	260
wolffd@0	261 % trivial case: no variance requested
wolffd@0	262 if ~exist('timbre_var','var')
wolffd@0	263 timbre_var = zeros(size(mfccs));
wolffd@0	264 end
wolffd@0	265
wolffd@0	266 % sort by associated time
wolffd@0	267 [mwght, idx] = sort(mwght, 'descend');
wolffd@0	268 mfccs = mfccs(idx,:);
wolffd@0	269 timbre_var = timbre_var(idx,:);
wolffd@0	270
wolffd@0	271 % ---
wolffd@0	272 % get beat features:
wolffd@0	273 % the autocorrelation curve over n quarters of length
wolffd@0	274 %
wolffd@0	275 % alternative: how about using the n=8 quarters relative
wolffd@0	276 % volumes from the start of a sure measure?
wolffd@0	277 % ---
wolffd@0	278 if feature.my_params.nrhythms >= 1
wolffd@0	279 bars = rawf.data.bars;
wolffd@0	280 beats = rawf.data.beats;
wolffd@0	281 tatums = rawf.data.tatums;
wolffd@0	282 % ---
wolffd@0	283 % NOTE: the beat and tatum markers seem to have an offset :(
wolffd@0	284 % ---
wolffd@0	285 offset = 0.118; %seconds
wolffd@0	286
wolffd@0	287 [envelope, time] = energy_envelope(feature, clip);
wolffd@0	288
wolffd@0	289 % we offset the energy curve
wolffd@0	290 time = time + offset;
wolffd@0	291
wolffd@0	292 % ---
wolffd@0	293 % we try to start at the best beat confidence more
wolffd@0	294 % than sixteen eights from the end
wolffd@0	295 % ---
wolffd@0	296
wolffd@0	297 if rawf.data.tempo > 0
wolffd@0	298
wolffd@0	299 eightl = 30 / rawf.data.tempo;
wolffd@0	300 else
wolffd@0	301 % ---
wolffd@0	302 % odd case: no rhythm data. assume 100 bpm
wolffd@0	303 % ---
wolffd@0	304
wolffd@0	305 eightl = 0.3;
wolffd@0	306 end
wolffd@0	307
wolffd@0	308 if isempty(beats)
wolffd@0	309 % ---
wolffd@0	310 % odd case: no beats detected. -> use best tatum
wolffd@0	311 % ---
wolffd@0	312 if ~isempty(tatums)
wolffd@0	313
wolffd@0	314 beats = tatums;
wolffd@0	315 else
wolffd@0	316
wolffd@0	317 % ok, just take the beginning
wolffd@0	318 beats = [0; 1];
wolffd@0	319 end
wolffd@0	320 end
wolffd@0	321
wolffd@0	322 last_valid = find(beats(1,:) < ...
wolffd@0	323 (rawf.data.duration - feature.my_params.nints * eightl),1, 'last');
wolffd@0	324
wolffd@0	325 % find the best valid beat postition
wolffd@0	326 [null, max_measure] = max( beats(2, 1:last_valid));
wolffd@0	327 max_mtime = beats(1,max_measure);
wolffd@0	328
wolffd@0	329 % ---
wolffd@0	330 % the correlation is calculated for the estimated eights lenght
wolffd@0	331 % and for the 16th intervals, respectively.
wolffd@0	332 % ---
wolffd@0	333
wolffd@0	334 % calculate the EIGHTS correlation for the following segment
wolffd@0	335 [acorr8, eight_en, eightt] = ...
wolffd@0	336 beat_histogram(feature, max_mtime, eightl, envelope, time);
wolffd@0	337
wolffd@0	338 % calculate the SIXTEENTHS correlation for the following segment
wolffd@0	339 [acorr16, six_en, sixt] = ...
wolffd@0	340 beat_histogram(feature, max_mtime, eightl / 2, envelope, time);
wolffd@0	341
wolffd@0	342 % ---
wolffd@0	343 % save the various features
wolffd@0	344 % ---
wolffd@0	345 % save rythm feature data
wolffd@0	346
wolffd@0	347 data.rhythm.acorr8 = acorr8;
wolffd@0	348 data.rhythm.acorr8_lag = eightt(1:end/2)-eightt(1);
wolffd@0	349
wolffd@0	350 data.rhythm.energy8 = eight_en(1:end/2);
wolffd@0	351 data.rhythm.energy8_time = eightt(1:end/2);
wolffd@0	352
wolffd@0	353 % --
wolffd@0	354 % the interval is normed locally up to a max value
wolffd@0	355 % associated to 30bpm
wolffd@0	356 % ---
wolffd@0	357 if feature.my_params.norm_interval
wolffd@0	358
wolffd@0	359 % 1 second max value
wolffd@0	360 data.rhythm.interval8 = eightl / 2;
wolffd@0	361 else
wolffd@0	362 data.rhythm.interval8 = eightl / 2;
wolffd@0	363 end
wolffd@0	364
wolffd@0	365 if feature.my_params.nrhythms >= 2
wolffd@0	366
wolffd@0	367 data.rhythm.acorr16 = acorr16;
wolffd@0	368 data.rhythm.acorr16_lag = data.rhythm.acorr8_lag / 2;
wolffd@0	369
wolffd@0	370 data.rhythm.energy16 = six_en(1:end/2);
wolffd@0	371 data.rhythm.energy16_time = sixt(1:end/2);
wolffd@0	372
wolffd@0	373
wolffd@0	374 % save beat interval / tempo
wolffd@0	375 if feature.my_params.norm_interval
wolffd@0	376
wolffd@0	377 % 1 second max value
wolffd@0	378 data.rhythm.interval16 = eightl / 2;
wolffd@0	379 else
wolffd@0	380 data.rhythm.interval16 = eightl / 2;
wolffd@0	381 end
wolffd@0	382
wolffd@0	383 end
wolffd@0	384 else
wolffd@0	385
wolffd@0	386 % % save empty rythm struct
wolffd@0	387 % data.rhythm = struct([]);
wolffd@0	388 end
wolffd@0	389
wolffd@0	390 % chroma feature data
wolffd@0	391 for i = 1:size(chromas,1)
wolffd@0	392 data.chroma(i).means = chromas(i,:)';
wolffd@0	393 data.chroma(i).means_weight = cwght(i);
wolffd@0	394 data.chroma(i).vars = chroma_var(i,:)';
wolffd@0	395 data.chroma(i).shift = shift;
wolffd@0	396 end
wolffd@0	397
wolffd@0	398 % mfcc feature data
wolffd@0	399 for i = 1:size(mfccs,1)
wolffd@0	400 data.timbre(i).means = mfccs(i,:)';
wolffd@0	401 data.timbre(i).means_weight = mwght(i);
wolffd@0	402 data.timbre(i).vars = timbre_var(i,:)';
wolffd@0	403 end
wolffd@0	404
wolffd@0	405 % prepare field for final features
wolffd@0	406 data.final.vector = [];
wolffd@0	407 data.final.vector_info = struct();
wolffd@0	408 data.final.dim = 0;
wolffd@0	409
wolffd@0	410 % save info data
wolffd@0	411 data.info.type = 'MTTAudioFeatureBasicSm';
wolffd@0	412 data.info.owner = clip;
wolffd@0	413 data.info.owner_id = clip.id;
wolffd@0	414 data.info.creatorrev = feature.my_revision;
wolffd@0	415
wolffd@0	416 % save parameters
wolffd@0	417 data.info.params = feature.my_params;
wolffd@0	418 end
wolffd@0	419
wolffd@0	420 function define_global_transform(features)
wolffd@0	421 % calculate and set normalization factors from the group of
wolffd@0	422 % input features. These features will be set for the full database
wolffd@0	423
wolffd@0	424 if numel(features) == 1
wolffd@0	425 error ('Insert feature array for this method');
wolffd@0	426 end
wolffd@0	427
wolffd@0	428 % ---
wolffd@0	429 % here, we only need to define the post-normalisation
wolffd@0	430 % ---
wolffd@0	431
wolffd@0	432 % ---
wolffd@0	433 % get chroma variance data NORMALISATION Factors
wolffd@0	434 % TODO: transport chroma variance to finalise step
wolffd@0	435 % ---
wolffd@0	436 if features(1).my_params.chroma_var >= 1
wolffd@0	437 allfeat = abs(cat(2, features(1).data.chroma(:).vars));
wolffd@0	438 for i = 2:numel(features)
wolffd@0	439
wolffd@0	440 allfeat = cat(2 , allfeat, abs(abs(cat(2, features(i).data.chroma(:).vars))));
wolffd@0	441 end
wolffd@0	442 [~, common.post_normf.chroma_var] = mapminmax(allfeat,0,1);
wolffd@0	443 end
wolffd@0	444
wolffd@0	445 % ---
wolffd@0	446 % get timbre variance data NORMALISATION Factors
wolffd@0	447 % TODO: transport chroma variance to finalise step
wolffd@0	448 % ---
wolffd@0	449 if features(1).my_params.timbre_var >= 1
wolffd@0	450 allfeat = abs(cat(2, features(1).data.timbre(:).vars));
wolffd@0	451 for i = 2:numel(features)
wolffd@0	452
wolffd@0	453 allfeat = cat(2 , allfeat, abs(abs(cat(2, features(i).data.timbre(:).vars))));
wolffd@0	454 end
wolffd@0	455 [~, common.post_normf.timbre_var] = mapminmax(allfeat,0,1);
wolffd@0	456 end
wolffd@0	457
wolffd@0	458 % ---
wolffd@0	459 % derive normalisation for timbre features:
wolffd@0	460 % MFCC's are actually special filter outputs
wolffd@0	461 % (see developer.echonest.com/docs/v4/_static/AnalyzeDocumentation_2.2.pdf
wolffd@0	462 % they are unbounded, so just the relative information will be
wolffd@0	463 % used here.
wolffd@0	464 % We normalise each bin independently
wolffd@0	465 % ---
wolffd@0	466 if features(1).my_params.ntimbres > 0
wolffd@0	467
wolffd@0	468 allfeat = abs(cat(2, features(1).data.timbre(:).means));
wolffd@0	469 for i = 2:numel(features)
wolffd@0	470
wolffd@0	471 allfeat = cat(2 , allfeat, abs(cat(2, features(i).data.timbre(:).means)));
wolffd@0	472 end
wolffd@0	473
wolffd@0	474 % ---
wolffd@0	475 % get normalisation factors
wolffd@0	476 % NOTE: the values will later be clipped to [0,1]
wolffd@0	477 % anyways
wolffd@0	478 % ---
wolffd@0	479 if (features(1).my_params.clip_timbres ~= 0 ) \|\| ...
wolffd@0	480 (features(1).my_params.clip_timbres ~= 1 )
wolffd@0	481
wolffd@0	482 common.post_normf.timbre = 1 ./ prctile(allfeat, features(1).my_params.clip_timbres * 100, 2);
wolffd@0	483
wolffd@0	484 else
wolffd@0	485 % just use the maximum
wolffd@0	486 common.post_normf.timbre = 1/max(allfeat, 2);
wolffd@0	487 end
wolffd@0	488
wolffd@0	489 % set common feature values
wolffd@0	490 features(1).my_db.set_common(common);
wolffd@0	491
wolffd@0	492 else
wolffd@0	493
wolffd@0	494 features(1).my_db.set_common([1]);
wolffd@0	495 end
wolffd@0	496 end
wolffd@0	497
wolffd@0	498
wolffd@0	499 function finalise(feature)
wolffd@0	500 % applies a final transformation and
wolffd@0	501 % collects the information of this feature within a single vector
wolffd@0	502 % see info for types in specific dimensions
wolffd@0	503
wolffd@0	504 for i = 1:numel(feature)
wolffd@0	505
wolffd@0	506 % check for neccesary parameters
wolffd@0	507 if isempty(feature(i).my_db.commondb)
wolffd@0	508
wolffd@0	509 error('Define the global transformation first')
wolffd@0	510 return;
wolffd@0	511 end
wolffd@0	512
wolffd@0	513 if feature(1).my_params.ntimbres > 0
wolffd@0	514 % ---
wolffd@0	515 % normalise features
wolffd@0	516 % ---
wolffd@0	517 % norm timbre features if neccesary
wolffd@0	518 timbren = [];
wolffd@0	519 if feature(i).my_params.norm_timbres
wolffd@0	520 for j = 1:numel(feature(i).data.timbre)
wolffd@0	521
wolffd@0	522 timbren = cat(1, timbren, ...
wolffd@0	523 MTTAudioFeatureBasicSm.norm_timbre...
wolffd@0	524 (feature(i).data.timbre(j).means, feature(i).my_db.commondb.post_normf.timbre));
wolffd@0	525 end
wolffd@0	526 else
wolffd@0	527
wolffd@0	528 timbren = cat(1, timbren, feature(i).data.timbre(:).means);
wolffd@0	529 end
wolffd@0	530 end
wolffd@0	531
wolffd@0	532 % ---
wolffd@0	533 % construct resulting feature vector out of features
wolffd@0	534 % ---
wolffd@0	535 vec = [];
wolffd@0	536 info = {};
wolffd@0	537 if feature(i).my_params.nchromas > 0
wolffd@0	538
wolffd@0	539 info{numel(vec)+ 1} = 'chroma';
wolffd@0	540 vec = cat(1, vec, feature(i).data.chroma(:).means);
wolffd@0	541
wolffd@0	542 info{numel(vec)+ 1} = 'chroma weights';
wolffd@0	543 vec = cat(1, vec, [feature(i).data.chroma(:).means_weight]');
wolffd@0	544
wolffd@0	545 % ---
wolffd@0	546 % NORMALISE Chroma variance
wolffd@0	547 % ---
wolffd@0	548 if feature(i).my_params.chroma_var >= 1
wolffd@0	549
wolffd@0	550 info{numel(vec)+ 1} = 'chroma variance';
wolffd@0	551
wolffd@0	552 % normalise this pack of variance vectors
wolffd@0	553 tmp_var = mapminmax('apply', [feature(i).data.chroma(:).vars],...
wolffd@0	554 feature(i).common.post_normf.chroma_var);
wolffd@0	555
wolffd@0	556 % concatenate normalised data to vector
wolffd@0	557 for vari = 1:size(tmp_var,2)
wolffd@0	558
wolffd@0	559 vec = cat(1, vec, tmp_var(:, vari));
wolffd@0	560 end
wolffd@0	561 end
wolffd@0	562 end
wolffd@0	563
wolffd@0	564
wolffd@0	565 if feature(i).my_params.ntimbres > 0
wolffd@0	566
wolffd@0	567 info{numel(vec)+ 1} = 'timbre';
wolffd@0	568 vec = cat(1, vec, timbren);
wolffd@0	569
wolffd@0	570 info{numel(vec)+ 1} = 'timbre weights';
wolffd@0	571 vec = cat(1, vec, [feature(i).data.timbre(:).means_weight]');
wolffd@0	572
wolffd@0	573 % ---
wolffd@0	574 % NORMALISE timbre variance
wolffd@0	575 % ---
wolffd@0	576 if feature(i).my_params.timbre_var >= 1
wolffd@0	577
wolffd@0	578 info{numel(vec)+ 1} = 'timbre variance';
wolffd@0	579
wolffd@0	580 % normalise this pack of variance vectors
wolffd@0	581 tmp_var = mapminmax('apply', [feature(i).data.timbre(:).vars],...
wolffd@0	582 feature(i).common.post_normf.timbre_var);
wolffd@0	583
wolffd@0	584 % concatenate normalised data to vector
wolffd@0	585 for vari = 1:size(tmp_var,2)
wolffd@0	586
wolffd@0	587 vec = cat(1, vec, tmp_var(:, vari));
wolffd@0	588 end
wolffd@0	589 end
wolffd@0	590 end
wolffd@0	591
wolffd@0	592 if feature(i).my_params.nrhythms > 0
wolffd@0	593
wolffd@0	594 info{numel(vec)+ 1} = 'rhythm 8';
wolffd@0	595 vec = cat(1, vec, feature(i).data.rhythm.acorr8);
wolffd@0	596
wolffd@0	597 info{numel(vec)+ 1} = 'int 8';
wolffd@0	598 vec = cat(1, vec, feature(i).data.rhythm.interval8);
wolffd@0	599
wolffd@0	600 if feature(i).my_params.nrhythms >= 2
wolffd@0	601
wolffd@0	602 info{numel(vec)+ 1} = 'rhythm 16';
wolffd@0	603 vec = cat(1, vec, feature(i).data.rhythm.acorr16);
wolffd@0	604
wolffd@0	605 info{numel(vec)+ 1} = 'int 16';
wolffd@0	606 vec = cat(1, vec, feature(i).data.rhythm.interval16);
wolffd@0	607 end
wolffd@0	608 end
wolffd@0	609
wolffd@0	610 feature(i).data.final.vector = vec;
wolffd@0	611 feature(i).data.final.dim = numel(feature(i).data.final.vector);
wolffd@0	612
wolffd@0	613 % fill up info struct and append to feature
wolffd@0	614
wolffd@0	615 info(end+1: feature(i).data.final.dim) = ...
wolffd@0	616 cell(feature(i).data.final.dim - numel(info),1);
wolffd@0	617
wolffd@0	618 feature(i).data.final.vector_info.labels = info;
wolffd@0	619 end
wolffd@0	620
wolffd@0	621 % ---
wolffd@0	622 % TODO: Maybe delete more basic features again at this point?
wolffd@0	623 % ---
wolffd@0	624 end
wolffd@0	625
wolffd@0	626 % ---
wolffd@0	627 % destructor: do we really want to remove this
wolffd@0	628 % from the database? No, but
wolffd@0	629 % TODO: create marker for unused objects in db, and a cleanup
wolffd@0	630 % function
wolffd@0	631 % ---
wolffd@0	632 function delete(feature)
wolffd@0	633
wolffd@0	634 end
wolffd@0	635
wolffd@0	636
wolffd@0	637 function visualise(feature)
wolffd@0	638 % ---
wolffd@0	639 % plots the different data types collected in this feature
wolffd@0	640 % ---
wolffd@0	641 for i = 1:numel(feature)
wolffd@0	642 clip = feature(i).data.info.owner;
wolffd@0	643
wolffd@0	644 % display raw features
wolffd@0	645 if isa(clip, 'CASIMIRClip')
wolffd@0	646 baseclip = clip.child_clip();
wolffd@0	647 else
wolffd@0	648 baseclip = clip;
wolffd@0	649 end
wolffd@0	650 if isa(baseclip, 'MTTClip')
wolffd@0	651 rawf = baseclip.audio_features_raw();
wolffd@0	652 elseif isa(baseclip, 'MSDClip')
wolffd@0	653 rawf = baseclip.features('MSDAudioFeatureRAW');
wolffd@0	654 end
wolffd@0	655
wolffd@0	656 % ---
wolffd@0	657 % @todo: implement MSD feature visualisation
wolffd@0	658 % ---
wolffd@0	659 [a1, a2, a3] = rawf.visualise();
wolffd@0	660
wolffd@0	661 % ---
wolffd@0	662 % Display chroma features
wolffd@0	663 % ---
wolffd@0	664 if isfield(feature(i).data, 'chroma')
wolffd@0	665
wolffd@0	666 chroma_labels = {'c', 'c#', 'd','d#', 'e', 'f','f#', 'g','g#', 'a', 'a#', 'h'};
wolffd@0	667 mode_labels = {'minor', 'major'};
wolffd@0	668
wolffd@0	669 % change labels to reflect detected mode
wolffd@0	670 chroma_labels{rawf.data.key + 1} = ...
wolffd@0	671 sprintf('(%s) %s',mode_labels{rawf.data.mode + 1}, chroma_labels{rawf.data.key + 1});
wolffd@0	672
wolffd@0	673 % transpose labels and data
wolffd@0	674 chroma_labels = circshift(chroma_labels, [0, feature(i).data.chroma(1).shift]);
wolffd@0	675 chromar = circshift([rawf.data.segments_pitches], [feature(i).data.chroma(1).shift, 0]);
wolffd@0	676
wolffd@0	677 % image transposed chromas again
wolffd@0	678 segments = [rawf.data.segments_start];
wolffd@0	679 segments(end) = rawf.data.duration;
wolffd@0	680
wolffd@0	681 hold(a1);
wolffd@0	682 uimagesc(segments, 0:11, chromar, 'Parent', a1);
wolffd@0	683 set(a1,'YTick',[0:11], 'YTickLabel', chroma_labels);
wolffd@0	684
wolffd@0	685 % enlarge plot and plot new data after the old ones
wolffd@0	686 ax = axis(a1);
wolffd@0	687 ax(2) = ax(2) + 2*feature(i).my_params.nchromas + 0.5;
wolffd@0	688 axis(a1, 'xy');
wolffd@0	689 axis(a1, ax);
wolffd@0	690
wolffd@0	691 imagesc(rawf.data.duration + (1:feature(i).my_params.nchromas), (-1:11), ...
wolffd@0	692 [ feature(i).data.chroma(:).means_weight; feature(i).data.chroma(:).means],...
wolffd@0	693 'Parent', a1);
wolffd@0	694 % variance calculated?
wolffd@0	695 if isfield(feature(i).data.chroma, 'vars')
wolffd@0	696
wolffd@0	697 imagesc(rawf.data.duration + feature(i).my_params.nchromas + (1:feature(i).my_params.nchromas), (-1:11), ...
wolffd@0	698 [feature(i).data.chroma(:).vars],...
wolffd@0	699 'Parent', a1);
wolffd@0	700 end
wolffd@0	701 end
wolffd@0	702
wolffd@0	703 % ---
wolffd@0	704 % Display timbre features
wolffd@0	705 % ---
wolffd@0	706 if isfield(feature(i).data, 'timbre')
wolffd@0	707
wolffd@0	708 % enlarge plot and plot new data after the old ones
wolffd@0	709 hold(a2);
wolffd@0	710 ax = axis(a2);
wolffd@0	711 ax(2) = ax(2) + 2*feature(i).my_params.ntimbres + 0.5;
wolffd@0	712
wolffd@0	713 axis(a2, ax);
wolffd@0	714 imagesc(rawf.data.duration + (1:feature(i).my_params.ntimbres), (-1:11), ...
wolffd@0	715 [ feature(i).data.timbre(:).means_weight; feature(i).data.timbre(:).means],...
wolffd@0	716 'Parent', a2);
wolffd@0	717 if isfield(feature(i).data.timbre, 'vars')
wolffd@0	718
wolffd@0	719 imagesc(rawf.data.duration + feature(i).my_params.ntimbres + (1:feature(i).my_params.ntimbres), (-1:11), ...
wolffd@0	720 [feature(i).data.timbre(:).vars],...
wolffd@0	721 'Parent', a1);
wolffd@0	722 end
wolffd@0	723 end
wolffd@0	724
wolffd@0	725 % ---
wolffd@0	726 % Display rhythm features
wolffd@0	727 % ---
wolffd@0	728 if isfield(feature(i).data, 'rhythm')
wolffd@0	729 % data.rhythm.interval
wolffd@0	730 % get timecode
wolffd@0	731 eightt = feature(i).data.rhythm.energy8_time;
wolffd@0	732 sixt = feature(i).data.rhythm.energy16_time;
wolffd@0	733
wolffd@0	734 hold(a3);
wolffd@0	735 % plot sixteens acorr and energy
wolffd@0	736 plot(sixt, feature(i).data.rhythm.energy16, 'bx')
wolffd@0	737
wolffd@0	738 plot(sixt, feature(i).data.rhythm.acorr16, 'b')
wolffd@0	739
wolffd@0	740 % plot eights acorr and energy
wolffd@0	741 plot(eightt, feature(i).data.rhythm.energy8, 'rx')
wolffd@0	742
wolffd@0	743 plot(eightt, feature(i).data.rhythm.acorr8, 'r')
wolffd@0	744
wolffd@0	745 % broaden view by fixed 4 seconds
wolffd@0	746 ax = axis(a3);
wolffd@0	747 axis(a3, [max(0, eightt(1)-( eightt(end) - eightt(1) + 4 )) ...
wolffd@0	748 min(rawf.data.duration, eightt(end) +4) ...
wolffd@0	749 ax(3:4)]);
wolffd@0	750 end
wolffd@0	751 end
wolffd@0	752 end
wolffd@0	753 end
wolffd@0	754
wolffd@0	755
wolffd@0	756 methods (Hidden = true)
wolffd@0	757
wolffd@0	758 function [env, time] = energy_envelope(feature, clip)
wolffd@0	759 % extracts the envelope of energy for the given clip
wolffd@0	760
wolffd@0	761 % ---
wolffd@0	762 % TODO: externalise envelope etc in external audio features
wolffd@0	763 % ---
wolffd@0	764
wolffd@0	765 [null, src] = evalc('miraudio(clip.mp3file_full())');
wolffd@0	766 [null, env] = evalc('mirenvelope(src, ''Sampling'', feature.my_params.energy_sr)');
wolffd@0	767
wolffd@0	768 time = get(env,'Time');
wolffd@0	769 time = time{1}{1};
wolffd@0	770 env = mirgetdata(env);
wolffd@0	771 end
wolffd@0	772
wolffd@0	773 function [acorr, base_sig, base_t] = beat_histogram(feature, startt, interval, signal, signal_t)
wolffd@0	774 % acorr = beat_histogram(feature, startt, interval, signal, time)
wolffd@0	775 %
wolffd@0	776 % compute correlation for beats of specified length in energy curve
wolffd@0	777
wolffd@0	778 % get corresponding energy values
wolffd@0	779 dt = signal_t(2) - signal_t(1);
wolffd@0	780 base_t = startt:interval:(startt + (feature.my_params.nints2-1) interval);
wolffd@0	781 base_sig = signal( min( numel(signal), max(1,round((base_t - signal_t(1))/dt))));
wolffd@0	782
wolffd@0	783 % normalise energy
wolffd@0	784 acbase_sig = base_sig./max(base_sig);
wolffd@0	785
wolffd@0	786 % calculate their cyclic autocorrelation
wolffd@0	787 acorr = circshift(xcorr(acbase_sig,acbase_sig(1:end/2)),...
wolffd@0	788 [numel(acbase_sig) 0]);
wolffd@0	789
wolffd@0	790 % cut acorr to relevant points, normalise and square
wolffd@0	791 acorr = (acorr(1:feature.my_params.nints)./feature.my_params.nints).^2;
wolffd@0	792
wolffd@0	793 % ---
wolffd@0	794 % NOTE: we normalise the autocorrelation locally, to compare the
wolffd@0	795 % (rhythmic) shape
wolffd@0	796 % ---
wolffd@0	797 if feature.my_params.norm_acorr;
wolffd@0	798
wolffd@0	799 acorr = acorr - min(acorr);
wolffd@0	800 acorr = acorr/max(acorr);
wolffd@0	801 end
wolffd@0	802 end
wolffd@0	803 end
wolffd@0	804
wolffd@0	805 methods(Static)
wolffd@0	806
wolffd@0	807 function timbre = norm_timbre(in, normfs)
wolffd@0	808 % returns normed timbre data
wolffd@0	809
wolffd@0	810 % ---
wolffd@0	811 % individually scale the data using
wolffd@0	812 % the dimensions factors
wolffd@0	813 % ---
wolffd@0	814 timbre = zeros(size(in));
wolffd@0	815 for i = 1:size(in,2)
wolffd@0	816
wolffd@0	817 timbre(:,i) = normfs .* in(:,i);
wolffd@0	818 end
wolffd@0	819
wolffd@0	820 % shift to positive values
wolffd@0	821 timbre = (1 + timbre) /2;
wolffd@0	822
wolffd@0	823 % clip features to [0,1]
wolffd@0	824 timbre = min(1, max(timbre, 0));
wolffd@0	825 end
wolffd@0	826
wolffd@0	827 % ---
wolffd@0	828 % returns parameter md5 hash for comparison
wolffd@0	829 % ---
wolffd@0	830 end
wolffd@0	831
wolffd@0	832 end

Mercurial > hg > camir-aes2014

annotate core/magnatagatune/MTTAudioFeatureBasicSm.m @ 0:e9a9cd732c1e tip