annotate core/magnatagatune/MTTAudioFeatureBasicSm.m @ 0:e9a9cd732c1e tip

first hg version after svn
author wolffd
date Tue, 10 Feb 2015 15:05:51 +0000
parents
children
rev   line source
wolffd@0 1 classdef MTTAudioFeatureBasicSm < MTTAudioFeature & handle
wolffd@0 2 % ---
wolffd@0 3 % the MTTAudioFeatureBasicSm Class contains
wolffd@0 4 % a basic summary of chroma, mfcc and tempo features
wolffd@0 5 % a few common chroma and mfcc vectors are concatenated
wolffd@0 6 % along with some clip-wide variance
wolffd@0 7 % a metric / rhythm fingerprint is added
wolffd@0 8 %
wolffd@0 9 % The usual workflow for these features consists of three steps
wolffd@0 10 % 1. extract: extracts the basic single-file dependent features
wolffd@0 11 % 2. define_global_transform: calculates the global feature
wolffd@0 12 % transformation parameters
wolffd@0 13 % 3. finalise: applies the common transformations to a specific feature
wolffd@0 14 % ---
wolffd@0 15
wolffd@0 16 properties(Constant = true)
wolffd@0 17
wolffd@0 18 % svn hook
wolffd@0 19 my_revision = str2double(substr('$Rev$', 5, -1));
wolffd@0 20 end
wolffd@0 21
wolffd@0 22 properties
wolffd@0 23 % ---
wolffd@0 24 % Set default parameters
wolffd@0 25 % ---
wolffd@0 26 my_params = struct(...
wolffd@0 27 'nchromas', 4, ... % 4 chroma vectors
wolffd@0 28 'chroma_var', 0, ... % chroma variance
wolffd@0 29 'norm_chromas', 0, ... % not implemented, chromas already rel.
wolffd@0 30 'min_kshift_chromas', 0.1, ... % treshold for key shift. set to 1 for no shift (0-1)
wolffd@0 31 ...
wolffd@0 32 'ntimbres', 4, ...
wolffd@0 33 'timbre_var', 0, ... % timbre variance
wolffd@0 34 'norm_timbres', 1, ...
wolffd@0 35 'clip_timbres', 0.85, ... % percentile of data which has to be inside 0-1 bounds
wolffd@0 36 ...
wolffd@0 37 'norm_weights',0, ... % globally norm weights for chroma times?
wolffd@0 38 'norm_interval',1, ...
wolffd@0 39 'max_iter',100, ... % max iterations for chroma and timbre knn
wolffd@0 40 ...
wolffd@0 41 'nrhythms', 0, ...
wolffd@0 42 'nints', 11, ...
wolffd@0 43 'energy_sr', 1000, ... % sample rate for energy curve
wolffd@0 44 'norm_acorr', 1 ... % normalise arcorr locally-> shape imp... energy is normalised anyways
wolffd@0 45 );
wolffd@0 46 end
wolffd@0 47
wolffd@0 48 % ---
wolffd@0 49 % member functions
wolffd@0 50 % ---
wolffd@0 51 methods
wolffd@0 52
wolffd@0 53 % ---
wolffd@0 54 % constructor: pointer to feature in database
wolffd@0 55 % ---
wolffd@0 56 function feature = MTTAudioFeatureBasicSm(varargin)
wolffd@0 57
wolffd@0 58 feature = feature@MTTAudioFeature(varargin{:});
wolffd@0 59
wolffd@0 60 end
wolffd@0 61 % ---
wolffd@0 62 % extract feature data from raw audio features
wolffd@0 63 % ---
wolffd@0 64 function data = extract(feature, clip)
wolffd@0 65 % ---
wolffd@0 66 % get Basic Summary audio features. this includes possible
wolffd@0 67 % local normalisations
wolffd@0 68 % ---
wolffd@0 69
wolffd@0 70 global globalvars;
wolffd@0 71
wolffd@0 72 % ---
wolffd@0 73 % get casimir child clip if available
wolffd@0 74 % ---
wolffd@0 75 if isa(clip, 'CASIMIRClip')
wolffd@0 76 baseclip = clip.child_clip();
wolffd@0 77 else
wolffd@0 78 baseclip = clip;
wolffd@0 79 end
wolffd@0 80 if isa(baseclip, 'MTTClip')
wolffd@0 81 rawf = baseclip.audio_features_raw();
wolffd@0 82 elseif isa(baseclip, 'MSDClip')
wolffd@0 83 rawf = baseclip.features('MSDAudioFeatureRAW');
wolffd@0 84 end
wolffd@0 85
wolffd@0 86 % ---
wolffd@0 87 % now extract the features
wolffd@0 88 % first step: chroma clustering
wolffd@0 89 % ---
wolffd@0 90 weights = [rawf.data.segments_duration];
wolffd@0 91
wolffd@0 92 % normalise weights
wolffd@0 93 weights = weights / rawf.data.duration;
wolffd@0 94
wolffd@0 95 chroma = [rawf.data.segments_pitches]';
wolffd@0 96
wolffd@0 97 % ---
wolffd@0 98 % get most present chroma vectors.
wolffd@0 99 % the weighted k-means should return the four most prominent
wolffd@0 100 % chroma vectors and their weight
wolffd@0 101 % ---
wolffd@0 102 % display error values
wolffd@0 103
wolffd@0 104 op = foptions();
wolffd@0 105 op(1) = 0;
wolffd@0 106 op(14) = feature.my_params.max_iter;
wolffd@0 107
wolffd@0 108 % check for trivial case
wolffd@0 109 if feature.my_params.nchromas == 0
wolffd@0 110
wolffd@0 111 chromas = [];
wolffd@0 112 cwght = [];
wolffd@0 113
wolffd@0 114 elseif feature.my_params.nchromas == 1
wolffd@0 115
wolffd@0 116 chromas = mean(chroma, 1);
wolffd@0 117 chroma_var = var(chroma, 0, 1);
wolffd@0 118 cwght = 1;
wolffd@0 119
wolffd@0 120 elseif numel(weights) > feature.my_params.nchromas
wolffd@0 121
wolffd@0 122 % ---
wolffd@0 123 % there may be few chromas, try kmeans several (20) times
wolffd@0 124 % ---
wolffd@0 125 cont = 0;
wolffd@0 126 cwght = [];
wolffd@0 127 while (numel(cwght) ~= feature.my_params.nchromas) && (cont < 20);
wolffd@0 128
wolffd@0 129 [chromas, cwght, post] = ...
wolffd@0 130 weighted_kmeans(feature.my_params.nchromas, chroma, weights, op);
wolffd@0 131
wolffd@0 132 cont = cont + 1;
wolffd@0 133 end
wolffd@0 134
wolffd@0 135 if (numel(cwght) ~= feature.my_params.nchromas)
wolffd@0 136
wolffd@0 137 error('cannot find enough chroma centres');
wolffd@0 138 end
wolffd@0 139
wolffd@0 140 % ---
wolffd@0 141 % Calculate the weighted variance of the chroma clusters
wolffd@0 142 % ---
wolffd@0 143 if feature.my_params.chroma_var >= 1
wolffd@0 144
wolffd@0 145 chroma_var = zeros(size(chromas));
wolffd@0 146 for i = 1:size(chroma_var,1)
wolffd@0 147
wolffd@0 148 % get distance from cluster centroid
wolffd@0 149 tmp_var = (chroma(post(:,i),:) - repmat(chromas(i,:), sum(post(:,i)),1)).^2;
wolffd@0 150
wolffd@0 151 % add up the weighted differences and normalise by sum
wolffd@0 152 % of weights
wolffd@0 153 chroma_var(i,:) = (weights(post(:,i)) * tmp_var) ./...
wolffd@0 154 (sum(weights(post(:,i))));
wolffd@0 155 end
wolffd@0 156 end
wolffd@0 157 else
wolffd@0 158 % ---
wolffd@0 159 % odd case: less than nchroma data points.
wolffd@0 160 % we repeat the mean vector at the end
wolffd@0 161 % ---
wolffd@0 162 chromas = [chroma; repmat(mean(chroma, 1),...
wolffd@0 163 feature.my_params.nchromas - numel(weights), 1 )];
wolffd@0 164
wolffd@0 165 cwght = weights;
wolffd@0 166 cwght( end + 1:feature.my_params.nchromas ) = 0;
wolffd@0 167
wolffd@0 168 % ---
wolffd@0 169 % TODO: get a variance for odd case :
wolffd@0 170 % replicate the complete data variance?
wolffd@0 171 % NO: every vector is a clsuter => zero variance
wolffd@0 172 % ---
wolffd@0 173 end
wolffd@0 174
wolffd@0 175 % trivial case: no variance requested
wolffd@0 176 if ~exist('chroma_var','var')
wolffd@0 177 chroma_var = zeros(size(chromas));
wolffd@0 178 end
wolffd@0 179
wolffd@0 180 % sort by associated time
wolffd@0 181 [cwght, idx] = sort(cwght, 'descend');
wolffd@0 182 chromas = chromas(idx,:);
wolffd@0 183 chroma_var = chroma_var(idx,:);
wolffd@0 184
wolffd@0 185 % ---
wolffd@0 186 % shift according to detected key, but only if
wolffd@0 187 % the confidencee is high enough
wolffd@0 188 % ---
wolffd@0 189 shift = 0;
wolffd@0 190 if rawf.data.keyConfidence > feature.my_params.min_kshift_chromas;
wolffd@0 191
wolffd@0 192 shift = -rawf.data.key;
wolffd@0 193 chromas = circshift(chromas, [0 shift]);
wolffd@0 194 chroma_var = circshift(chroma_var, [0 shift]);
wolffd@0 195 end
wolffd@0 196
wolffd@0 197 % ---
wolffd@0 198 % get mfcc centres:
wolffd@0 199 % the same for mfccs
wolffd@0 200 % ---
wolffd@0 201 mfcc = [rawf.data.segments_timbre]';
wolffd@0 202 if feature.my_params.ntimbres == 0
wolffd@0 203
wolffd@0 204 mfccs = [];
wolffd@0 205 mwght = [];
wolffd@0 206
wolffd@0 207 elseif feature.my_params.ntimbres == 1
wolffd@0 208
wolffd@0 209 mfccs = mean(mfcc, 1);
wolffd@0 210 timbre_var = var(mfccs, 0, 1);
wolffd@0 211 mwght = 1;
wolffd@0 212
wolffd@0 213 elseif numel(weights) > feature.my_params.ntimbres
wolffd@0 214
wolffd@0 215 % ---
wolffd@0 216 % there may be few mfccs, try kmeans several times
wolffd@0 217 % ---
wolffd@0 218 cont = 0;
wolffd@0 219 mwght = [];
wolffd@0 220 while (numel(mwght) ~= feature.my_params.ntimbres) && (cont < 20);
wolffd@0 221
wolffd@0 222 [mfccs, mwght, post] = ...
wolffd@0 223 weighted_kmeans(feature.my_params.ntimbres, mfcc, weights, op);
wolffd@0 224 cont = cont + 1;
wolffd@0 225 end
wolffd@0 226
wolffd@0 227 if (numel(mwght) ~= feature.my_params.ntimbres)
wolffd@0 228
wolffd@0 229 error('cannot find enough mfcc centres');
wolffd@0 230 end
wolffd@0 231
wolffd@0 232 % ---
wolffd@0 233 % Calculate the weighted variance of the chroma clusters
wolffd@0 234 % ---
wolffd@0 235 if feature.my_params.timbre_var >= 1
wolffd@0 236
wolffd@0 237 timbre_var = zeros(size(mfccs));
wolffd@0 238 for i = 1:size(timbre_var,1)
wolffd@0 239
wolffd@0 240 % get distance from cluster centroid
wolffd@0 241 tmp_var = (mfcc(post(:,i),:) - repmat(mfccs(i,:), sum(post(:,i)),1)).^2;
wolffd@0 242
wolffd@0 243 % add up the weighted differences and normalise by sum
wolffd@0 244 % of weights
wolffd@0 245 timbre_var(i,:) = (weights(post(:,i)) * tmp_var) ./...
wolffd@0 246 (sum(weights(post(:,i))));
wolffd@0 247 end
wolffd@0 248 end
wolffd@0 249
wolffd@0 250 else
wolffd@0 251 % ---
wolffd@0 252 % odd case: less than nchroma data points.
wolffd@0 253 % we repeat the mean vector at the end
wolffd@0 254 % ---
wolffd@0 255 mfccs = [mfcc; repmat(mean(mfcc, 1),...
wolffd@0 256 feature.my_params.ntimbres - numel(weights), 1)];
wolffd@0 257 mwght = weights;
wolffd@0 258 mwght( end + 1:feature.my_params.ntimbres) = 0;
wolffd@0 259 end
wolffd@0 260
wolffd@0 261 % trivial case: no variance requested
wolffd@0 262 if ~exist('timbre_var','var')
wolffd@0 263 timbre_var = zeros(size(mfccs));
wolffd@0 264 end
wolffd@0 265
wolffd@0 266 % sort by associated time
wolffd@0 267 [mwght, idx] = sort(mwght, 'descend');
wolffd@0 268 mfccs = mfccs(idx,:);
wolffd@0 269 timbre_var = timbre_var(idx,:);
wolffd@0 270
wolffd@0 271 % ---
wolffd@0 272 % get beat features:
wolffd@0 273 % the autocorrelation curve over n quarters of length
wolffd@0 274 %
wolffd@0 275 % alternative: how about using the n=8 quarters relative
wolffd@0 276 % volumes from the start of a sure measure?
wolffd@0 277 % ---
wolffd@0 278 if feature.my_params.nrhythms >= 1
wolffd@0 279 bars = rawf.data.bars;
wolffd@0 280 beats = rawf.data.beats;
wolffd@0 281 tatums = rawf.data.tatums;
wolffd@0 282 % ---
wolffd@0 283 % NOTE: the beat and tatum markers seem to have an offset :(
wolffd@0 284 % ---
wolffd@0 285 offset = 0.118; %seconds
wolffd@0 286
wolffd@0 287 [envelope, time] = energy_envelope(feature, clip);
wolffd@0 288
wolffd@0 289 % we offset the energy curve
wolffd@0 290 time = time + offset;
wolffd@0 291
wolffd@0 292 % ---
wolffd@0 293 % we try to start at the best beat confidence more
wolffd@0 294 % than sixteen eights from the end
wolffd@0 295 % ---
wolffd@0 296
wolffd@0 297 if rawf.data.tempo > 0
wolffd@0 298
wolffd@0 299 eightl = 30 / rawf.data.tempo;
wolffd@0 300 else
wolffd@0 301 % ---
wolffd@0 302 % odd case: no rhythm data. assume 100 bpm
wolffd@0 303 % ---
wolffd@0 304
wolffd@0 305 eightl = 0.3;
wolffd@0 306 end
wolffd@0 307
wolffd@0 308 if isempty(beats)
wolffd@0 309 % ---
wolffd@0 310 % odd case: no beats detected. -> use best tatum
wolffd@0 311 % ---
wolffd@0 312 if ~isempty(tatums)
wolffd@0 313
wolffd@0 314 beats = tatums;
wolffd@0 315 else
wolffd@0 316
wolffd@0 317 % ok, just take the beginning
wolffd@0 318 beats = [0; 1];
wolffd@0 319 end
wolffd@0 320 end
wolffd@0 321
wolffd@0 322 last_valid = find(beats(1,:) < ...
wolffd@0 323 (rawf.data.duration - feature.my_params.nints * eightl),1, 'last');
wolffd@0 324
wolffd@0 325 % find the best valid beat postition
wolffd@0 326 [null, max_measure] = max( beats(2, 1:last_valid));
wolffd@0 327 max_mtime = beats(1,max_measure);
wolffd@0 328
wolffd@0 329 % ---
wolffd@0 330 % the correlation is calculated for the estimated eights lenght
wolffd@0 331 % and for the 16th intervals, respectively.
wolffd@0 332 % ---
wolffd@0 333
wolffd@0 334 % calculate the EIGHTS correlation for the following segment
wolffd@0 335 [acorr8, eight_en, eightt] = ...
wolffd@0 336 beat_histogram(feature, max_mtime, eightl, envelope, time);
wolffd@0 337
wolffd@0 338 % calculate the SIXTEENTHS correlation for the following segment
wolffd@0 339 [acorr16, six_en, sixt] = ...
wolffd@0 340 beat_histogram(feature, max_mtime, eightl / 2, envelope, time);
wolffd@0 341
wolffd@0 342 % ---
wolffd@0 343 % save the various features
wolffd@0 344 % ---
wolffd@0 345 % save rythm feature data
wolffd@0 346
wolffd@0 347 data.rhythm.acorr8 = acorr8;
wolffd@0 348 data.rhythm.acorr8_lag = eightt(1:end/2)-eightt(1);
wolffd@0 349
wolffd@0 350 data.rhythm.energy8 = eight_en(1:end/2);
wolffd@0 351 data.rhythm.energy8_time = eightt(1:end/2);
wolffd@0 352
wolffd@0 353 % --
wolffd@0 354 % the interval is normed locally up to a max value
wolffd@0 355 % associated to 30bpm
wolffd@0 356 % ---
wolffd@0 357 if feature.my_params.norm_interval
wolffd@0 358
wolffd@0 359 % 1 second max value
wolffd@0 360 data.rhythm.interval8 = eightl / 2;
wolffd@0 361 else
wolffd@0 362 data.rhythm.interval8 = eightl / 2;
wolffd@0 363 end
wolffd@0 364
wolffd@0 365 if feature.my_params.nrhythms >= 2
wolffd@0 366
wolffd@0 367 data.rhythm.acorr16 = acorr16;
wolffd@0 368 data.rhythm.acorr16_lag = data.rhythm.acorr8_lag / 2;
wolffd@0 369
wolffd@0 370 data.rhythm.energy16 = six_en(1:end/2);
wolffd@0 371 data.rhythm.energy16_time = sixt(1:end/2);
wolffd@0 372
wolffd@0 373
wolffd@0 374 % save beat interval / tempo
wolffd@0 375 if feature.my_params.norm_interval
wolffd@0 376
wolffd@0 377 % 1 second max value
wolffd@0 378 data.rhythm.interval16 = eightl / 2;
wolffd@0 379 else
wolffd@0 380 data.rhythm.interval16 = eightl / 2;
wolffd@0 381 end
wolffd@0 382
wolffd@0 383 end
wolffd@0 384 else
wolffd@0 385
wolffd@0 386 % % save empty rythm struct
wolffd@0 387 % data.rhythm = struct([]);
wolffd@0 388 end
wolffd@0 389
wolffd@0 390 % chroma feature data
wolffd@0 391 for i = 1:size(chromas,1)
wolffd@0 392 data.chroma(i).means = chromas(i,:)';
wolffd@0 393 data.chroma(i).means_weight = cwght(i);
wolffd@0 394 data.chroma(i).vars = chroma_var(i,:)';
wolffd@0 395 data.chroma(i).shift = shift;
wolffd@0 396 end
wolffd@0 397
wolffd@0 398 % mfcc feature data
wolffd@0 399 for i = 1:size(mfccs,1)
wolffd@0 400 data.timbre(i).means = mfccs(i,:)';
wolffd@0 401 data.timbre(i).means_weight = mwght(i);
wolffd@0 402 data.timbre(i).vars = timbre_var(i,:)';
wolffd@0 403 end
wolffd@0 404
wolffd@0 405 % prepare field for final features
wolffd@0 406 data.final.vector = [];
wolffd@0 407 data.final.vector_info = struct();
wolffd@0 408 data.final.dim = 0;
wolffd@0 409
wolffd@0 410 % save info data
wolffd@0 411 data.info.type = 'MTTAudioFeatureBasicSm';
wolffd@0 412 data.info.owner = clip;
wolffd@0 413 data.info.owner_id = clip.id;
wolffd@0 414 data.info.creatorrev = feature.my_revision;
wolffd@0 415
wolffd@0 416 % save parameters
wolffd@0 417 data.info.params = feature.my_params;
wolffd@0 418 end
wolffd@0 419
wolffd@0 420 function define_global_transform(features)
wolffd@0 421 % calculate and set normalization factors from the group of
wolffd@0 422 % input features. These features will be set for the full database
wolffd@0 423
wolffd@0 424 if numel(features) == 1
wolffd@0 425 error ('Insert feature array for this method');
wolffd@0 426 end
wolffd@0 427
wolffd@0 428 % ---
wolffd@0 429 % here, we only need to define the post-normalisation
wolffd@0 430 % ---
wolffd@0 431
wolffd@0 432 % ---
wolffd@0 433 % get chroma variance data NORMALISATION Factors
wolffd@0 434 % TODO: transport chroma variance to finalise step
wolffd@0 435 % ---
wolffd@0 436 if features(1).my_params.chroma_var >= 1
wolffd@0 437 allfeat = abs(cat(2, features(1).data.chroma(:).vars));
wolffd@0 438 for i = 2:numel(features)
wolffd@0 439
wolffd@0 440 allfeat = cat(2 , allfeat, abs(abs(cat(2, features(i).data.chroma(:).vars))));
wolffd@0 441 end
wolffd@0 442 [~, common.post_normf.chroma_var] = mapminmax(allfeat,0,1);
wolffd@0 443 end
wolffd@0 444
wolffd@0 445 % ---
wolffd@0 446 % get timbre variance data NORMALISATION Factors
wolffd@0 447 % TODO: transport chroma variance to finalise step
wolffd@0 448 % ---
wolffd@0 449 if features(1).my_params.timbre_var >= 1
wolffd@0 450 allfeat = abs(cat(2, features(1).data.timbre(:).vars));
wolffd@0 451 for i = 2:numel(features)
wolffd@0 452
wolffd@0 453 allfeat = cat(2 , allfeat, abs(abs(cat(2, features(i).data.timbre(:).vars))));
wolffd@0 454 end
wolffd@0 455 [~, common.post_normf.timbre_var] = mapminmax(allfeat,0,1);
wolffd@0 456 end
wolffd@0 457
wolffd@0 458 % ---
wolffd@0 459 % derive normalisation for timbre features:
wolffd@0 460 % MFCC's are actually special filter outputs
wolffd@0 461 % (see developer.echonest.com/docs/v4/_static/AnalyzeDocumentation_2.2.pdf
wolffd@0 462 % they are unbounded, so just the relative information will be
wolffd@0 463 % used here.
wolffd@0 464 % We normalise each bin independently
wolffd@0 465 % ---
wolffd@0 466 if features(1).my_params.ntimbres > 0
wolffd@0 467
wolffd@0 468 allfeat = abs(cat(2, features(1).data.timbre(:).means));
wolffd@0 469 for i = 2:numel(features)
wolffd@0 470
wolffd@0 471 allfeat = cat(2 , allfeat, abs(cat(2, features(i).data.timbre(:).means)));
wolffd@0 472 end
wolffd@0 473
wolffd@0 474 % ---
wolffd@0 475 % get normalisation factors
wolffd@0 476 % NOTE: the values will later be clipped to [0,1]
wolffd@0 477 % anyways
wolffd@0 478 % ---
wolffd@0 479 if (features(1).my_params.clip_timbres ~= 0 ) || ...
wolffd@0 480 (features(1).my_params.clip_timbres ~= 1 )
wolffd@0 481
wolffd@0 482 common.post_normf.timbre = 1 ./ prctile(allfeat, features(1).my_params.clip_timbres * 100, 2);
wolffd@0 483
wolffd@0 484 else
wolffd@0 485 % just use the maximum
wolffd@0 486 common.post_normf.timbre = 1/max(allfeat, 2);
wolffd@0 487 end
wolffd@0 488
wolffd@0 489 % set common feature values
wolffd@0 490 features(1).my_db.set_common(common);
wolffd@0 491
wolffd@0 492 else
wolffd@0 493
wolffd@0 494 features(1).my_db.set_common([1]);
wolffd@0 495 end
wolffd@0 496 end
wolffd@0 497
wolffd@0 498
wolffd@0 499 function finalise(feature)
wolffd@0 500 % applies a final transformation and
wolffd@0 501 % collects the information of this feature within a single vector
wolffd@0 502 % see info for types in specific dimensions
wolffd@0 503
wolffd@0 504 for i = 1:numel(feature)
wolffd@0 505
wolffd@0 506 % check for neccesary parameters
wolffd@0 507 if isempty(feature(i).my_db.commondb)
wolffd@0 508
wolffd@0 509 error('Define the global transformation first')
wolffd@0 510 return;
wolffd@0 511 end
wolffd@0 512
wolffd@0 513 if feature(1).my_params.ntimbres > 0
wolffd@0 514 % ---
wolffd@0 515 % normalise features
wolffd@0 516 % ---
wolffd@0 517 % norm timbre features if neccesary
wolffd@0 518 timbren = [];
wolffd@0 519 if feature(i).my_params.norm_timbres
wolffd@0 520 for j = 1:numel(feature(i).data.timbre)
wolffd@0 521
wolffd@0 522 timbren = cat(1, timbren, ...
wolffd@0 523 MTTAudioFeatureBasicSm.norm_timbre...
wolffd@0 524 (feature(i).data.timbre(j).means, feature(i).my_db.commondb.post_normf.timbre));
wolffd@0 525 end
wolffd@0 526 else
wolffd@0 527
wolffd@0 528 timbren = cat(1, timbren, feature(i).data.timbre(:).means);
wolffd@0 529 end
wolffd@0 530 end
wolffd@0 531
wolffd@0 532 % ---
wolffd@0 533 % construct resulting feature vector out of features
wolffd@0 534 % ---
wolffd@0 535 vec = [];
wolffd@0 536 info = {};
wolffd@0 537 if feature(i).my_params.nchromas > 0
wolffd@0 538
wolffd@0 539 info{numel(vec)+ 1} = 'chroma';
wolffd@0 540 vec = cat(1, vec, feature(i).data.chroma(:).means);
wolffd@0 541
wolffd@0 542 info{numel(vec)+ 1} = 'chroma weights';
wolffd@0 543 vec = cat(1, vec, [feature(i).data.chroma(:).means_weight]');
wolffd@0 544
wolffd@0 545 % ---
wolffd@0 546 % NORMALISE Chroma variance
wolffd@0 547 % ---
wolffd@0 548 if feature(i).my_params.chroma_var >= 1
wolffd@0 549
wolffd@0 550 info{numel(vec)+ 1} = 'chroma variance';
wolffd@0 551
wolffd@0 552 % normalise this pack of variance vectors
wolffd@0 553 tmp_var = mapminmax('apply', [feature(i).data.chroma(:).vars],...
wolffd@0 554 feature(i).common.post_normf.chroma_var);
wolffd@0 555
wolffd@0 556 % concatenate normalised data to vector
wolffd@0 557 for vari = 1:size(tmp_var,2)
wolffd@0 558
wolffd@0 559 vec = cat(1, vec, tmp_var(:, vari));
wolffd@0 560 end
wolffd@0 561 end
wolffd@0 562 end
wolffd@0 563
wolffd@0 564
wolffd@0 565 if feature(i).my_params.ntimbres > 0
wolffd@0 566
wolffd@0 567 info{numel(vec)+ 1} = 'timbre';
wolffd@0 568 vec = cat(1, vec, timbren);
wolffd@0 569
wolffd@0 570 info{numel(vec)+ 1} = 'timbre weights';
wolffd@0 571 vec = cat(1, vec, [feature(i).data.timbre(:).means_weight]');
wolffd@0 572
wolffd@0 573 % ---
wolffd@0 574 % NORMALISE timbre variance
wolffd@0 575 % ---
wolffd@0 576 if feature(i).my_params.timbre_var >= 1
wolffd@0 577
wolffd@0 578 info{numel(vec)+ 1} = 'timbre variance';
wolffd@0 579
wolffd@0 580 % normalise this pack of variance vectors
wolffd@0 581 tmp_var = mapminmax('apply', [feature(i).data.timbre(:).vars],...
wolffd@0 582 feature(i).common.post_normf.timbre_var);
wolffd@0 583
wolffd@0 584 % concatenate normalised data to vector
wolffd@0 585 for vari = 1:size(tmp_var,2)
wolffd@0 586
wolffd@0 587 vec = cat(1, vec, tmp_var(:, vari));
wolffd@0 588 end
wolffd@0 589 end
wolffd@0 590 end
wolffd@0 591
wolffd@0 592 if feature(i).my_params.nrhythms > 0
wolffd@0 593
wolffd@0 594 info{numel(vec)+ 1} = 'rhythm 8';
wolffd@0 595 vec = cat(1, vec, feature(i).data.rhythm.acorr8);
wolffd@0 596
wolffd@0 597 info{numel(vec)+ 1} = 'int 8';
wolffd@0 598 vec = cat(1, vec, feature(i).data.rhythm.interval8);
wolffd@0 599
wolffd@0 600 if feature(i).my_params.nrhythms >= 2
wolffd@0 601
wolffd@0 602 info{numel(vec)+ 1} = 'rhythm 16';
wolffd@0 603 vec = cat(1, vec, feature(i).data.rhythm.acorr16);
wolffd@0 604
wolffd@0 605 info{numel(vec)+ 1} = 'int 16';
wolffd@0 606 vec = cat(1, vec, feature(i).data.rhythm.interval16);
wolffd@0 607 end
wolffd@0 608 end
wolffd@0 609
wolffd@0 610 feature(i).data.final.vector = vec;
wolffd@0 611 feature(i).data.final.dim = numel(feature(i).data.final.vector);
wolffd@0 612
wolffd@0 613 % fill up info struct and append to feature
wolffd@0 614
wolffd@0 615 info(end+1: feature(i).data.final.dim) = ...
wolffd@0 616 cell(feature(i).data.final.dim - numel(info),1);
wolffd@0 617
wolffd@0 618 feature(i).data.final.vector_info.labels = info;
wolffd@0 619 end
wolffd@0 620
wolffd@0 621 % ---
wolffd@0 622 % TODO: Maybe delete more basic features again at this point?
wolffd@0 623 % ---
wolffd@0 624 end
wolffd@0 625
wolffd@0 626 % ---
wolffd@0 627 % destructor: do we really want to remove this
wolffd@0 628 % from the database? No, but
wolffd@0 629 % TODO: create marker for unused objects in db, and a cleanup
wolffd@0 630 % function
wolffd@0 631 % ---
wolffd@0 632 function delete(feature)
wolffd@0 633
wolffd@0 634 end
wolffd@0 635
wolffd@0 636
wolffd@0 637 function visualise(feature)
wolffd@0 638 % ---
wolffd@0 639 % plots the different data types collected in this feature
wolffd@0 640 % ---
wolffd@0 641 for i = 1:numel(feature)
wolffd@0 642 clip = feature(i).data.info.owner;
wolffd@0 643
wolffd@0 644 % display raw features
wolffd@0 645 if isa(clip, 'CASIMIRClip')
wolffd@0 646 baseclip = clip.child_clip();
wolffd@0 647 else
wolffd@0 648 baseclip = clip;
wolffd@0 649 end
wolffd@0 650 if isa(baseclip, 'MTTClip')
wolffd@0 651 rawf = baseclip.audio_features_raw();
wolffd@0 652 elseif isa(baseclip, 'MSDClip')
wolffd@0 653 rawf = baseclip.features('MSDAudioFeatureRAW');
wolffd@0 654 end
wolffd@0 655
wolffd@0 656 % ---
wolffd@0 657 % @todo: implement MSD feature visualisation
wolffd@0 658 % ---
wolffd@0 659 [a1, a2, a3] = rawf.visualise();
wolffd@0 660
wolffd@0 661 % ---
wolffd@0 662 % Display chroma features
wolffd@0 663 % ---
wolffd@0 664 if isfield(feature(i).data, 'chroma')
wolffd@0 665
wolffd@0 666 chroma_labels = {'c', 'c#', 'd','d#', 'e', 'f','f#', 'g','g#', 'a', 'a#', 'h'};
wolffd@0 667 mode_labels = {'minor', 'major'};
wolffd@0 668
wolffd@0 669 % change labels to reflect detected mode
wolffd@0 670 chroma_labels{rawf.data.key + 1} = ...
wolffd@0 671 sprintf('(%s) %s',mode_labels{rawf.data.mode + 1}, chroma_labels{rawf.data.key + 1});
wolffd@0 672
wolffd@0 673 % transpose labels and data
wolffd@0 674 chroma_labels = circshift(chroma_labels, [0, feature(i).data.chroma(1).shift]);
wolffd@0 675 chromar = circshift([rawf.data.segments_pitches], [feature(i).data.chroma(1).shift, 0]);
wolffd@0 676
wolffd@0 677 % image transposed chromas again
wolffd@0 678 segments = [rawf.data.segments_start];
wolffd@0 679 segments(end) = rawf.data.duration;
wolffd@0 680
wolffd@0 681 hold(a1);
wolffd@0 682 uimagesc(segments, 0:11, chromar, 'Parent', a1);
wolffd@0 683 set(a1,'YTick',[0:11], 'YTickLabel', chroma_labels);
wolffd@0 684
wolffd@0 685 % enlarge plot and plot new data after the old ones
wolffd@0 686 ax = axis(a1);
wolffd@0 687 ax(2) = ax(2) + 2*feature(i).my_params.nchromas + 0.5;
wolffd@0 688 axis(a1, 'xy');
wolffd@0 689 axis(a1, ax);
wolffd@0 690
wolffd@0 691 imagesc(rawf.data.duration + (1:feature(i).my_params.nchromas), (-1:11), ...
wolffd@0 692 [ feature(i).data.chroma(:).means_weight; feature(i).data.chroma(:).means],...
wolffd@0 693 'Parent', a1);
wolffd@0 694 % variance calculated?
wolffd@0 695 if isfield(feature(i).data.chroma, 'vars')
wolffd@0 696
wolffd@0 697 imagesc(rawf.data.duration + feature(i).my_params.nchromas + (1:feature(i).my_params.nchromas), (-1:11), ...
wolffd@0 698 [feature(i).data.chroma(:).vars],...
wolffd@0 699 'Parent', a1);
wolffd@0 700 end
wolffd@0 701 end
wolffd@0 702
wolffd@0 703 % ---
wolffd@0 704 % Display timbre features
wolffd@0 705 % ---
wolffd@0 706 if isfield(feature(i).data, 'timbre')
wolffd@0 707
wolffd@0 708 % enlarge plot and plot new data after the old ones
wolffd@0 709 hold(a2);
wolffd@0 710 ax = axis(a2);
wolffd@0 711 ax(2) = ax(2) + 2*feature(i).my_params.ntimbres + 0.5;
wolffd@0 712
wolffd@0 713 axis(a2, ax);
wolffd@0 714 imagesc(rawf.data.duration + (1:feature(i).my_params.ntimbres), (-1:11), ...
wolffd@0 715 [ feature(i).data.timbre(:).means_weight; feature(i).data.timbre(:).means],...
wolffd@0 716 'Parent', a2);
wolffd@0 717 if isfield(feature(i).data.timbre, 'vars')
wolffd@0 718
wolffd@0 719 imagesc(rawf.data.duration + feature(i).my_params.ntimbres + (1:feature(i).my_params.ntimbres), (-1:11), ...
wolffd@0 720 [feature(i).data.timbre(:).vars],...
wolffd@0 721 'Parent', a1);
wolffd@0 722 end
wolffd@0 723 end
wolffd@0 724
wolffd@0 725 % ---
wolffd@0 726 % Display rhythm features
wolffd@0 727 % ---
wolffd@0 728 if isfield(feature(i).data, 'rhythm')
wolffd@0 729 % data.rhythm.interval
wolffd@0 730 % get timecode
wolffd@0 731 eightt = feature(i).data.rhythm.energy8_time;
wolffd@0 732 sixt = feature(i).data.rhythm.energy16_time;
wolffd@0 733
wolffd@0 734 hold(a3);
wolffd@0 735 % plot sixteens acorr and energy
wolffd@0 736 plot(sixt, feature(i).data.rhythm.energy16, 'bx')
wolffd@0 737
wolffd@0 738 plot(sixt, feature(i).data.rhythm.acorr16, 'b')
wolffd@0 739
wolffd@0 740 % plot eights acorr and energy
wolffd@0 741 plot(eightt, feature(i).data.rhythm.energy8, 'rx')
wolffd@0 742
wolffd@0 743 plot(eightt, feature(i).data.rhythm.acorr8, 'r')
wolffd@0 744
wolffd@0 745 % broaden view by fixed 4 seconds
wolffd@0 746 ax = axis(a3);
wolffd@0 747 axis(a3, [max(0, eightt(1)-( eightt(end) - eightt(1) + 4 )) ...
wolffd@0 748 min(rawf.data.duration, eightt(end) +4) ...
wolffd@0 749 ax(3:4)]);
wolffd@0 750 end
wolffd@0 751 end
wolffd@0 752 end
wolffd@0 753 end
wolffd@0 754
wolffd@0 755
wolffd@0 756 methods (Hidden = true)
wolffd@0 757
wolffd@0 758 function [env, time] = energy_envelope(feature, clip)
wolffd@0 759 % extracts the envelope of energy for the given clip
wolffd@0 760
wolffd@0 761 % ---
wolffd@0 762 % TODO: externalise envelope etc in external audio features
wolffd@0 763 % ---
wolffd@0 764
wolffd@0 765 [null, src] = evalc('miraudio(clip.mp3file_full())');
wolffd@0 766 [null, env] = evalc('mirenvelope(src, ''Sampling'', feature.my_params.energy_sr)');
wolffd@0 767
wolffd@0 768 time = get(env,'Time');
wolffd@0 769 time = time{1}{1};
wolffd@0 770 env = mirgetdata(env);
wolffd@0 771 end
wolffd@0 772
wolffd@0 773 function [acorr, base_sig, base_t] = beat_histogram(feature, startt, interval, signal, signal_t)
wolffd@0 774 % acorr = beat_histogram(feature, startt, interval, signal, time)
wolffd@0 775 %
wolffd@0 776 % compute correlation for beats of specified length in energy curve
wolffd@0 777
wolffd@0 778 % get corresponding energy values
wolffd@0 779 dt = signal_t(2) - signal_t(1);
wolffd@0 780 base_t = startt:interval:(startt + (feature.my_params.nints*2-1) * interval);
wolffd@0 781 base_sig = signal( min( numel(signal), max(1,round((base_t - signal_t(1))/dt))));
wolffd@0 782
wolffd@0 783 % normalise energy
wolffd@0 784 acbase_sig = base_sig./max(base_sig);
wolffd@0 785
wolffd@0 786 % calculate their cyclic autocorrelation
wolffd@0 787 acorr = circshift(xcorr(acbase_sig,acbase_sig(1:end/2)),...
wolffd@0 788 [numel(acbase_sig) 0]);
wolffd@0 789
wolffd@0 790 % cut acorr to relevant points, normalise and square
wolffd@0 791 acorr = (acorr(1:feature.my_params.nints)./feature.my_params.nints).^2;
wolffd@0 792
wolffd@0 793 % ---
wolffd@0 794 % NOTE: we normalise the autocorrelation locally, to compare the
wolffd@0 795 % (rhythmic) shape
wolffd@0 796 % ---
wolffd@0 797 if feature.my_params.norm_acorr;
wolffd@0 798
wolffd@0 799 acorr = acorr - min(acorr);
wolffd@0 800 acorr = acorr/max(acorr);
wolffd@0 801 end
wolffd@0 802 end
wolffd@0 803 end
wolffd@0 804
wolffd@0 805 methods(Static)
wolffd@0 806
wolffd@0 807 function timbre = norm_timbre(in, normfs)
wolffd@0 808 % returns normed timbre data
wolffd@0 809
wolffd@0 810 % ---
wolffd@0 811 % individually scale the data using
wolffd@0 812 % the dimensions factors
wolffd@0 813 % ---
wolffd@0 814 timbre = zeros(size(in));
wolffd@0 815 for i = 1:size(in,2)
wolffd@0 816
wolffd@0 817 timbre(:,i) = normfs .* in(:,i);
wolffd@0 818 end
wolffd@0 819
wolffd@0 820 % shift to positive values
wolffd@0 821 timbre = (1 + timbre) /2;
wolffd@0 822
wolffd@0 823 % clip features to [0,1]
wolffd@0 824 timbre = min(1, max(timbre, 0));
wolffd@0 825 end
wolffd@0 826
wolffd@0 827 % ---
wolffd@0 828 % returns parameter md5 hash for comparison
wolffd@0 829 % ---
wolffd@0 830 end
wolffd@0 831
wolffd@0 832 end