qm-dsp: dsp/segmentation/cluster

annotate dsp/segmentation/cluster_segmenter.c @ 298:255e431ae3d4

* Key detector: when returning key strengths, use the peak value of the three underlying chromagram correlations (from 36-bin chromagram) corresponding to each key, instead of the mean. Rationale: This is the same method as used when returning the key value, and it's nice to have the same results in both returned value and plot. The peak performed better than the sum with a simple test set of triads, so it seems reasonable to change the plot to match the key output rather than the other way around. * FFT: kiss_fftr returns only the non-conjugate bins, synthesise the rest rather than leaving them (perhaps dangerously) undefined. Fixes an uninitialised data error in chromagram that could cause garbage results from key detector. * Constant Q: remove precalculated values again, I reckon they're not proving such a good tradeoff.

author	Chris Cannam <c.cannam@qmul.ac.uk>
date	Fri, 05 Jun 2009 15:12:39 +0000
parents	5e125f030287
children	e5907ae6de17

rev	line source
c@243	1 /*
c@243	2 * cluster_segmenter.c
c@243	3 * soundbite
c@243	4 *
c@243	5 * Created by Mark Levy on 06/04/2006.
c@243	6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved.
c@243	7 *
c@243	8 */
c@243	9
c@243	10 #include "cluster_segmenter.h"
c@243	11
c@243	12 extern int readmatarray_size(const char filepath, int n_array, int t, int* d);
c@243	13 extern int readmatarray(const char filepath, int n_array, int t, int d, double* arr);
c@243	14
c@243	15 /* converts constant-Q features to normalised chroma */
c@243	16 void cq2chroma(double cq, int nframes, int ncoeff, int bins, double chroma)
c@243	17 {
c@243	18 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */
c@243	19 int t, b, oct, ix;
c@243	20 //double maxchroma; /* max chroma value at each time, for normalisation */
c@243	21 //double sum; /* for normalisation */
c@243	22
c@243	23 for (t = 0; t < nframes; t++)
c@243	24 {
c@243	25 for (b = 0; b < bins; b++)
c@243	26 chroma[t][b] = 0;
c@243	27 for (oct = 0; oct < noct; oct++)
c@243	28 {
c@243	29 ix = oct * bins;
c@243	30 for (b = 0; b < bins; b++)
c@243	31 chroma[t][b] += fabs(cq[t][ix+b]);
c@243	32 }
c@243	33 /* normalise to unit sum
c@243	34 sum = 0;
c@243	35 for (b = 0; b < bins; b++)
c@243	36 sum += chroma[t][b];
c@243	37 for (b = 0; b < bins; b++)
c@243	38 chroma[t][b] /= sum;
c@245	39 */
c@243	40 /* normalise to unit max - NO this made results much worse!
c@243	41 maxchroma = 0;
c@243	42 for (b = 0; b < bins; b++)
c@243	43 if (chroma[t][b] > maxchroma)
c@243	44 maxchroma = chroma[t][b];
c@243	45 if (maxchroma > 0)
c@243	46 for (b = 0; b < bins; b++)
c@243	47 chroma[t][b] /= maxchroma;
c@243	48 */
c@243	49 }
c@243	50 }
c@243	51
c@243	52 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */
c@243	53 void mpeg7_constq(double** features, int nframes, int ncoeff)
c@243	54 {
c@243	55 int i, j;
c@243	56 double ss;
c@243	57 double env;
c@243	58 double maxenv = 0;
c@243	59
c@243	60 /* convert const-Q features to dB scale */
c@243	61 for (i = 0; i < nframes; i++)
c@243	62 for (j = 0; j < ncoeff; j++)
c@243	63 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
c@243	64
c@243	65 /* normalise each feature vector and add the norm as an extra feature dimension */
c@243	66 for (i = 0; i < nframes; i++)
c@243	67 {
c@243	68 ss = 0;
c@243	69 for (j = 0; j < ncoeff; j++)
c@243	70 ss += features[i][j] * features[i][j];
c@243	71 env = sqrt(ss);
c@243	72 for (j = 0; j < ncoeff; j++)
c@243	73 features[i][j] /= env;
c@243	74 features[i][ncoeff] = env;
c@243	75 if (env > maxenv)
c@243	76 maxenv = env;
c@243	77 }
c@243	78 /* normalise the envelopes */
c@243	79 for (i = 0; i < nframes; i++)
c@243	80 features[i][ncoeff] /= maxenv;
c@243	81 }
c@243	82
c@243	83 /* return histograms h[nxm] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) /
c@243	84 /* NB h is a vector in row major order, as required by cluster_melt() */
c@243	85 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */
c@243	86 void create_histograms(int* x, int nx, int m, int hlen, double* h)
c@243	87 {
c@243	88 int i, j, t;
c@243	89 double norm;
c@266	90
c@266	91 for (i = 0; i < nx*m; i++)
c@266	92 h[i] = 0;
c@266	93
c@243	94 for (i = hlen/2; i < nx-hlen/2; i++)
c@243	95 {
c@243	96 for (j = 0; j < m; j++)
c@243	97 h[i*m+j] = 0;
c@243	98 for (t = i-hlen/2; t <= i+hlen/2; t++)
c@243	99 ++h[i*m+x[t]];
c@243	100 norm = 0;
c@243	101 for (j = 0; j < m; j++)
c@243	102 norm += h[im+j] h[i*m+j];
c@243	103 for (j = 0; j < m; j++)
c@243	104 h[i*m+j] /= norm;
c@243	105 }
c@243	106
c@243	107 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */
c@243	108 for (i = 0; i < hlen/2; i++)
c@243	109 for (j = 0; j < m; j++)
c@243	110 h[im+j] = h[hlen/2m+j];
c@243	111 for (i = nx-hlen/2; i < nx; i++)
c@243	112 for (j = 0; j < m; j++)
c@243	113 h[im+j] = h[(nx-hlen/2-1)m+j];
c@243	114 }
c@243	115
c@243	116 /* segment using HMM and then histogram clustering */
c@243	117 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
c@243	118 int histogram_length, int nclusters, int neighbour_limit)
c@243	119 {
c@243	120 int i, j;
c@243	121
c@243	122 /*****************************/
c@243	123 if (0) {
c@243	124 /* try just using the predominant bin number as a 'decoded state' */
c@243	125 nHMM_states = feature_length + 1; /* allow a 'zero' state */
c@243	126 double chroma_thresh = 0.05;
c@243	127 double maxval;
c@243	128 int maxbin;
c@243	129 for (i = 0; i < frames_read; i++)
c@243	130 {
c@243	131 maxval = 0;
c@243	132 for (j = 0; j < feature_length; j++)
c@243	133 {
c@243	134 if (features[i][j] > maxval)
c@243	135 {
c@243	136 maxval = features[i][j];
c@243	137 maxbin = j;
c@243	138 }
c@243	139 }
c@243	140 if (maxval > chroma_thresh)
c@243	141 q[i] = maxbin;
c@243	142 else
c@243	143 q[i] = feature_length;
c@243	144 }
c@243	145
c@243	146 }
c@243	147 if (1) {
c@243	148 /*****************************/
c@243	149
c@243	150
c@243	151 /* scale all the features to 'balance covariances' during HMM training */
c@243	152 double scale = 10;
c@243	153 for (i = 0; i < frames_read; i++)
c@243	154 for (j = 0; j < feature_length; j++)
c@243	155 features[i][j] *= scale;
c@243	156
c@243	157 /* train an HMM on the features */
c@243	158
c@243	159 /* create a model */
c@243	160 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
c@243	161
c@243	162 /* train the model */
c@243	163 hmm_train(features, frames_read, model);
c@283	164 /*
c@243	165 printf("\n\nafter training:\n");
c@243	166 hmm_print(model);
c@283	167 */
c@243	168 /* decode the hidden state sequence */
c@243	169 viterbi_decode(features, frames_read, model, q);
c@243	170 hmm_close(model);
c@243	171
c@243	172 /*****************************/
c@243	173 }
c@243	174 /*****************************/
c@243	175
c@243	176
c@283	177 /*
c@243	178 fprintf(stderr, "HMM state sequence:\n");
c@243	179 for (i = 0; i < frames_read; i++)
c@243	180 fprintf(stderr, "%d ", q[i]);
c@243	181 fprintf(stderr, "\n\n");
c@283	182 */
c@243	183
c@243	184 /* create histograms of states */
c@243	185 double* h = (double) malloc(frames_readnHMM_statessizeof(double)); / vector in row major order */
c@243	186 create_histograms(q, frames_read, nHMM_states, histogram_length, h);
c@243	187
c@243	188 /* cluster the histograms */
c@243	189 int nbsched = 20; /* length of inverse temperature schedule */
c@243	190 double* bsched = (double) malloc(nbschedsizeof(double)); /* inverse temperature schedule */
c@243	191 double b0 = 100;
c@243	192 double alpha = 0.7;
c@243	193 bsched[0] = b0;
c@243	194 for (i = 1; i < nbsched; i++)
c@243	195 bsched[i] = alpha * bsched[i-1];
c@243	196 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
c@243	197
c@243	198 /* now q holds a sequence of cluster assignments */
c@243	199
c@243	200 free(h);
c@243	201 free(bsched);
c@243	202 }
c@243	203
c@243	204 /* segment constant-Q or chroma features */
c@243	205 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
c@243	206 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
c@243	207 {
c@243	208 int feature_length;
c@243	209 double** chroma;
c@243	210 int i;
c@243	211
c@243	212 if (feature_type == FEATURE_TYPE_CONSTQ)
c@243	213 {
c@283	214 /* fprintf(stderr, "Converting to dB and normalising...\n");
c@283	215 */
c@243	216 mpeg7_constq(features, frames_read, ncoeff);
c@283	217 /*
c@243	218 fprintf(stderr, "Running PCA...\n");
c@283	219 */
c@243	220 /* do PCA on the features (but not the envelope) */
c@243	221 int ncomponents = 20;
c@243	222 pca_project(features, frames_read, ncoeff, ncomponents);
c@243	223
c@243	224 /* copy the envelope so that it immediatly follows the chosen components */
c@243	225 for (i = 0; i < frames_read; i++)
c@243	226 features[i][ncomponents] = features[i][ncoeff];
c@243	227
c@243	228 feature_length = ncomponents + 1;
c@243	229
c@243	230 /**************************************
c@243	231 //TEST
c@243	232 // feature file name
c@243	233 char* dir = "/Users/mark/documents/semma/audio/";
c@243	234 char* file_name = (char) malloc((strlen(dir) + strlen(trackname) + strlen("_features_c20r8h0.2f0.6.mat") + 1)sizeof(char));
c@243	235 strcpy(file_name, dir);
c@243	236 strcat(file_name, trackname);
c@243	237 strcat(file_name, "_features_c20r8h0.2f0.6.mat");
c@243	238
c@243	239 // get the features from Matlab from mat-file
c@243	240 int frames_in_file;
c@243	241 readmatarray_size(file_name, 2, &frames_in_file, &feature_length);
c@243	242 readmatarray(file_name, 2, frames_in_file, feature_length, features);
c@243	243 // copy final frame to ensure that we get as many as we expected
c@243	244 int missing_frames = frames_read - frames_in_file;
c@243	245 while (missing_frames > 0)
c@243	246 {
c@243	247 for (i = 0; i < feature_length; i++)
c@243	248 features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i];
c@243	249 --missing_frames;
c@243	250 }
c@243	251
c@243	252 free(file_name);
c@243	253 ******************************************/
c@243	254
c@243	255 cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
c@243	256 }
c@243	257
c@243	258 if (feature_type == FEATURE_TYPE_CHROMA)
c@243	259 {
c@283	260 /*
c@243	261 fprintf(stderr, "Converting to chroma features...\n");
c@283	262 */
c@243	263 /* convert constant-Q to normalised chroma features */
c@243	264 chroma = (double*) malloc(frames_readsizeof(double*));
c@243	265 for (i = 0; i < frames_read; i++)
c@243	266 chroma[i] = (double) malloc(binssizeof(double));
c@243	267 cq2chroma(features, frames_read, ncoeff, bins, chroma);
c@243	268 feature_length = bins;
c@243	269
c@243	270 cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
c@243	271
c@243	272 for (i = 0; i < frames_read; i++)
c@243	273 free(chroma[i]);
c@243	274 free(chroma);
c@243	275 }
c@243	276 }
c@243	277
c@243	278
c@243	279

Mercurial > hg > qm-dsp

annotate dsp/segmentation/cluster_segmenter.c @ 298:255e431ae3d4