qm-dsp: dsp/segmentation/cluster

annotate dsp/segmentation/cluster_segmenter.c @ 321:f1e6be2de9a5

A threshold (delta) is added in the peak picking parameters structure (PPickParams). It is used as an offset when computing the smoothed detection function. A constructor for the structure PPickParams is also added to set the parameters to 0 when a structure instance is created. Hence programmes using the peak picking parameter structure and which do not set the delta parameter (e.g. QM Vamp note onset detector) won't be affected by the modifications. Functions modified: - dsp/onsets/PeakPicking.cpp - dsp/onsets/PeakPicking.h - dsp/signalconditioning/DFProcess.cpp - dsp/signalconditioning/DFProcess.h

author	mathieub <mathieu.barthet@eecs.qmul.ac.uk>
date	Mon, 20 Jun 2011 19:01:48 +0100
parents	d5014ab8b0e5
children	175e51ae78eb

rev	line source
c@243	1 /*
c@243	2 * cluster_segmenter.c
c@243	3 * soundbite
c@243	4 *
c@243	5 * Created by Mark Levy on 06/04/2006.
c@309	6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
c@309	7
c@309	8 This program is free software; you can redistribute it and/or
c@309	9 modify it under the terms of the GNU General Public License as
c@309	10 published by the Free Software Foundation; either version 2 of the
c@309	11 License, or (at your option) any later version. See the file
c@309	12 COPYING included with this distribution for more information.
c@243	13 *
c@243	14 */
c@243	15
c@243	16 #include "cluster_segmenter.h"
c@243	17
c@243	18 extern int readmatarray_size(const char filepath, int n_array, int t, int* d);
c@243	19 extern int readmatarray(const char filepath, int n_array, int t, int d, double* arr);
c@243	20
c@243	21 /* converts constant-Q features to normalised chroma */
c@243	22 void cq2chroma(double cq, int nframes, int ncoeff, int bins, double chroma)
c@243	23 {
c@243	24 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */
c@243	25 int t, b, oct, ix;
c@243	26 //double maxchroma; /* max chroma value at each time, for normalisation */
c@243	27 //double sum; /* for normalisation */
c@243	28
c@243	29 for (t = 0; t < nframes; t++)
c@243	30 {
c@243	31 for (b = 0; b < bins; b++)
c@243	32 chroma[t][b] = 0;
c@243	33 for (oct = 0; oct < noct; oct++)
c@243	34 {
c@243	35 ix = oct * bins;
c@243	36 for (b = 0; b < bins; b++)
c@243	37 chroma[t][b] += fabs(cq[t][ix+b]);
c@243	38 }
c@243	39 /* normalise to unit sum
c@243	40 sum = 0;
c@243	41 for (b = 0; b < bins; b++)
c@243	42 sum += chroma[t][b];
c@243	43 for (b = 0; b < bins; b++)
c@243	44 chroma[t][b] /= sum;
c@245	45 */
c@243	46 /* normalise to unit max - NO this made results much worse!
c@243	47 maxchroma = 0;
c@243	48 for (b = 0; b < bins; b++)
c@243	49 if (chroma[t][b] > maxchroma)
c@243	50 maxchroma = chroma[t][b];
c@243	51 if (maxchroma > 0)
c@243	52 for (b = 0; b < bins; b++)
c@243	53 chroma[t][b] /= maxchroma;
c@243	54 */
c@243	55 }
c@243	56 }
c@243	57
c@243	58 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */
c@243	59 void mpeg7_constq(double** features, int nframes, int ncoeff)
c@243	60 {
c@243	61 int i, j;
c@243	62 double ss;
c@243	63 double env;
c@243	64 double maxenv = 0;
c@243	65
c@243	66 /* convert const-Q features to dB scale */
c@243	67 for (i = 0; i < nframes; i++)
c@243	68 for (j = 0; j < ncoeff; j++)
c@243	69 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
c@243	70
c@243	71 /* normalise each feature vector and add the norm as an extra feature dimension */
c@243	72 for (i = 0; i < nframes; i++)
c@243	73 {
c@243	74 ss = 0;
c@243	75 for (j = 0; j < ncoeff; j++)
c@243	76 ss += features[i][j] * features[i][j];
c@243	77 env = sqrt(ss);
c@243	78 for (j = 0; j < ncoeff; j++)
c@243	79 features[i][j] /= env;
c@243	80 features[i][ncoeff] = env;
c@243	81 if (env > maxenv)
c@243	82 maxenv = env;
c@243	83 }
c@243	84 /* normalise the envelopes */
c@243	85 for (i = 0; i < nframes; i++)
c@243	86 features[i][ncoeff] /= maxenv;
c@243	87 }
c@243	88
c@243	89 /* return histograms h[nxm] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) /
c@243	90 /* NB h is a vector in row major order, as required by cluster_melt() */
c@243	91 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */
c@243	92 void create_histograms(int* x, int nx, int m, int hlen, double* h)
c@243	93 {
c@243	94 int i, j, t;
c@243	95 double norm;
c@266	96
c@266	97 for (i = 0; i < nx*m; i++)
c@266	98 h[i] = 0;
c@266	99
c@243	100 for (i = hlen/2; i < nx-hlen/2; i++)
c@243	101 {
c@243	102 for (j = 0; j < m; j++)
c@243	103 h[i*m+j] = 0;
c@243	104 for (t = i-hlen/2; t <= i+hlen/2; t++)
c@243	105 ++h[i*m+x[t]];
c@243	106 norm = 0;
c@243	107 for (j = 0; j < m; j++)
c@243	108 norm += h[im+j] h[i*m+j];
c@243	109 for (j = 0; j < m; j++)
c@243	110 h[i*m+j] /= norm;
c@243	111 }
c@243	112
c@243	113 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */
c@243	114 for (i = 0; i < hlen/2; i++)
c@243	115 for (j = 0; j < m; j++)
c@243	116 h[im+j] = h[hlen/2m+j];
c@243	117 for (i = nx-hlen/2; i < nx; i++)
c@243	118 for (j = 0; j < m; j++)
c@243	119 h[im+j] = h[(nx-hlen/2-1)m+j];
c@243	120 }
c@243	121
c@243	122 /* segment using HMM and then histogram clustering */
c@243	123 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
c@243	124 int histogram_length, int nclusters, int neighbour_limit)
c@243	125 {
c@243	126 int i, j;
c@243	127
c@243	128 /*****************************/
c@243	129 if (0) {
c@243	130 /* try just using the predominant bin number as a 'decoded state' */
c@243	131 nHMM_states = feature_length + 1; /* allow a 'zero' state */
c@243	132 double chroma_thresh = 0.05;
c@243	133 double maxval;
c@243	134 int maxbin;
c@243	135 for (i = 0; i < frames_read; i++)
c@243	136 {
c@243	137 maxval = 0;
c@243	138 for (j = 0; j < feature_length; j++)
c@243	139 {
c@243	140 if (features[i][j] > maxval)
c@243	141 {
c@243	142 maxval = features[i][j];
c@243	143 maxbin = j;
c@243	144 }
c@243	145 }
c@243	146 if (maxval > chroma_thresh)
c@243	147 q[i] = maxbin;
c@243	148 else
c@243	149 q[i] = feature_length;
c@243	150 }
c@243	151
c@243	152 }
c@243	153 if (1) {
c@243	154 /*****************************/
c@243	155
c@243	156
c@243	157 /* scale all the features to 'balance covariances' during HMM training */
c@243	158 double scale = 10;
c@243	159 for (i = 0; i < frames_read; i++)
c@243	160 for (j = 0; j < feature_length; j++)
c@243	161 features[i][j] *= scale;
c@243	162
c@243	163 /* train an HMM on the features */
c@243	164
c@243	165 /* create a model */
c@243	166 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
c@243	167
c@243	168 /* train the model */
c@243	169 hmm_train(features, frames_read, model);
c@283	170 /*
c@243	171 printf("\n\nafter training:\n");
c@243	172 hmm_print(model);
c@283	173 */
c@243	174 /* decode the hidden state sequence */
c@243	175 viterbi_decode(features, frames_read, model, q);
c@243	176 hmm_close(model);
c@243	177
c@243	178 /*****************************/
c@243	179 }
c@243	180 /*****************************/
c@243	181
c@243	182
c@283	183 /*
c@243	184 fprintf(stderr, "HMM state sequence:\n");
c@243	185 for (i = 0; i < frames_read; i++)
c@243	186 fprintf(stderr, "%d ", q[i]);
c@243	187 fprintf(stderr, "\n\n");
c@283	188 */
c@243	189
c@243	190 /* create histograms of states */
c@243	191 double* h = (double) malloc(frames_readnHMM_statessizeof(double)); / vector in row major order */
c@243	192 create_histograms(q, frames_read, nHMM_states, histogram_length, h);
c@243	193
c@243	194 /* cluster the histograms */
c@243	195 int nbsched = 20; /* length of inverse temperature schedule */
c@243	196 double* bsched = (double) malloc(nbschedsizeof(double)); /* inverse temperature schedule */
c@243	197 double b0 = 100;
c@243	198 double alpha = 0.7;
c@243	199 bsched[0] = b0;
c@243	200 for (i = 1; i < nbsched; i++)
c@243	201 bsched[i] = alpha * bsched[i-1];
c@243	202 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
c@243	203
c@243	204 /* now q holds a sequence of cluster assignments */
c@243	205
c@243	206 free(h);
c@243	207 free(bsched);
c@243	208 }
c@243	209
c@243	210 /* segment constant-Q or chroma features */
c@243	211 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
c@243	212 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
c@243	213 {
c@243	214 int feature_length;
c@243	215 double** chroma;
c@243	216 int i;
c@243	217
c@243	218 if (feature_type == FEATURE_TYPE_CONSTQ)
c@243	219 {
c@283	220 /* fprintf(stderr, "Converting to dB and normalising...\n");
c@283	221 */
c@243	222 mpeg7_constq(features, frames_read, ncoeff);
c@283	223 /*
c@243	224 fprintf(stderr, "Running PCA...\n");
c@283	225 */
c@243	226 /* do PCA on the features (but not the envelope) */
c@243	227 int ncomponents = 20;
c@243	228 pca_project(features, frames_read, ncoeff, ncomponents);
c@243	229
c@243	230 /* copy the envelope so that it immediatly follows the chosen components */
c@243	231 for (i = 0; i < frames_read; i++)
c@243	232 features[i][ncomponents] = features[i][ncoeff];
c@243	233
c@243	234 feature_length = ncomponents + 1;
c@243	235
c@243	236 /**************************************
c@243	237 //TEST
c@243	238 // feature file name
c@243	239 char* dir = "/Users/mark/documents/semma/audio/";
c@243	240 char* file_name = (char) malloc((strlen(dir) + strlen(trackname) + strlen("_features_c20r8h0.2f0.6.mat") + 1)sizeof(char));
c@243	241 strcpy(file_name, dir);
c@243	242 strcat(file_name, trackname);
c@243	243 strcat(file_name, "_features_c20r8h0.2f0.6.mat");
c@243	244
c@243	245 // get the features from Matlab from mat-file
c@243	246 int frames_in_file;
c@243	247 readmatarray_size(file_name, 2, &frames_in_file, &feature_length);
c@243	248 readmatarray(file_name, 2, frames_in_file, feature_length, features);
c@243	249 // copy final frame to ensure that we get as many as we expected
c@243	250 int missing_frames = frames_read - frames_in_file;
c@243	251 while (missing_frames > 0)
c@243	252 {
c@243	253 for (i = 0; i < feature_length; i++)
c@243	254 features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i];
c@243	255 --missing_frames;
c@243	256 }
c@243	257
c@243	258 free(file_name);
c@243	259 ******************************************/
c@243	260
c@243	261 cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
c@243	262 }
c@243	263
c@243	264 if (feature_type == FEATURE_TYPE_CHROMA)
c@243	265 {
c@283	266 /*
c@243	267 fprintf(stderr, "Converting to chroma features...\n");
c@283	268 */
c@243	269 /* convert constant-Q to normalised chroma features */
c@243	270 chroma = (double*) malloc(frames_readsizeof(double*));
c@243	271 for (i = 0; i < frames_read; i++)
c@243	272 chroma[i] = (double) malloc(binssizeof(double));
c@243	273 cq2chroma(features, frames_read, ncoeff, bins, chroma);
c@243	274 feature_length = bins;
c@243	275
c@243	276 cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
c@243	277
c@243	278 for (i = 0; i < frames_read; i++)
c@243	279 free(chroma[i]);
c@243	280 free(chroma);
c@243	281 }
c@243	282 }
c@243	283
c@243	284
c@243	285

Mercurial > hg > qm-dsp

annotate dsp/segmentation/cluster_segmenter.c @ 321:f1e6be2de9a5