qm-dsp: dsp/segmentation/cluster

annotate dsp/segmentation/cluster_segmenter.c @ 27:b678e72323df

* Adjust MFCC params in segmenter to match timbral MFCC params from Soundbite

author	cannam
date	Thu, 10 Jan 2008 17:26:15 +0000
parents	8bdbda7fb893
children	6fc20388d29e

rev	line source
cannam@18	1 /*
cannam@18	2 * cluster_segmenter.c
cannam@18	3 * soundbite
cannam@18	4 *
cannam@18	5 * Created by Mark Levy on 06/04/2006.
cannam@18	6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved.
cannam@18	7 *
cannam@18	8 */
cannam@18	9
cannam@18	10 #include "cluster_segmenter.h"
cannam@18	11
cannam@18	12 extern int readmatarray_size(const char filepath, int n_array, int t, int* d);
cannam@18	13 extern int readmatarray(const char filepath, int n_array, int t, int d, double* arr);
cannam@18	14
cannam@18	15 /* converts constant-Q features to normalised chroma */
cannam@18	16 void cq2chroma(double cq, int nframes, int ncoeff, int bins, double chroma)
cannam@18	17 {
cannam@18	18 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */
cannam@18	19 int t, b, oct, ix;
cannam@18	20 //double maxchroma; /* max chroma value at each time, for normalisation */
cannam@18	21 //double sum; /* for normalisation */
cannam@18	22
cannam@18	23 for (t = 0; t < nframes; t++)
cannam@18	24 {
cannam@18	25 for (b = 0; b < bins; b++)
cannam@18	26 chroma[t][b] = 0;
cannam@18	27 for (oct = 0; oct < noct; oct++)
cannam@18	28 {
cannam@18	29 ix = oct * bins;
cannam@18	30 for (b = 0; b < bins; b++)
cannam@18	31 chroma[t][b] += fabs(cq[t][ix+b]);
cannam@18	32 }
cannam@18	33 /* normalise to unit sum
cannam@18	34 sum = 0;
cannam@18	35 for (b = 0; b < bins; b++)
cannam@18	36 sum += chroma[t][b];
cannam@18	37 for (b = 0; b < bins; b++)
cannam@18	38 chroma[t][b] /= sum;
cannam@20	39 */
cannam@18	40 /* normalise to unit max - NO this made results much worse!
cannam@18	41 maxchroma = 0;
cannam@18	42 for (b = 0; b < bins; b++)
cannam@18	43 if (chroma[t][b] > maxchroma)
cannam@18	44 maxchroma = chroma[t][b];
cannam@18	45 if (maxchroma > 0)
cannam@18	46 for (b = 0; b < bins; b++)
cannam@18	47 chroma[t][b] /= maxchroma;
cannam@18	48 */
cannam@18	49 }
cannam@18	50 }
cannam@18	51
cannam@18	52 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */
cannam@18	53 void mpeg7_constq(double** features, int nframes, int ncoeff)
cannam@18	54 {
cannam@18	55 int i, j;
cannam@18	56 double ss;
cannam@18	57 double env;
cannam@18	58 double maxenv = 0;
cannam@18	59
cannam@18	60 /* convert const-Q features to dB scale */
cannam@18	61 for (i = 0; i < nframes; i++)
cannam@18	62 for (j = 0; j < ncoeff; j++)
cannam@18	63 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
cannam@18	64
cannam@18	65 /* normalise each feature vector and add the norm as an extra feature dimension */
cannam@18	66 for (i = 0; i < nframes; i++)
cannam@18	67 {
cannam@18	68 ss = 0;
cannam@18	69 for (j = 0; j < ncoeff; j++)
cannam@18	70 ss += features[i][j] * features[i][j];
cannam@18	71 env = sqrt(ss);
cannam@18	72 for (j = 0; j < ncoeff; j++)
cannam@18	73 features[i][j] /= env;
cannam@18	74 features[i][ncoeff] = env;
cannam@18	75 if (env > maxenv)
cannam@18	76 maxenv = env;
cannam@18	77 }
cannam@18	78 /* normalise the envelopes */
cannam@18	79 for (i = 0; i < nframes; i++)
cannam@18	80 features[i][ncoeff] /= maxenv;
cannam@18	81 }
cannam@18	82
cannam@18	83 /* return histograms h[nxm] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) /
cannam@18	84 /* NB h is a vector in row major order, as required by cluster_melt() */
cannam@18	85 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */
cannam@18	86 void create_histograms(int* x, int nx, int m, int hlen, double* h)
cannam@18	87 {
cannam@18	88 int i, j, t;
cannam@18	89 double norm;
cannam@18	90 for (i = hlen/2; i < nx-hlen/2; i++)
cannam@18	91 {
cannam@18	92 for (j = 0; j < m; j++)
cannam@18	93 h[i*m+j] = 0;
cannam@18	94 for (t = i-hlen/2; t <= i+hlen/2; t++)
cannam@18	95 ++h[i*m+x[t]];
cannam@18	96 norm = 0;
cannam@18	97 for (j = 0; j < m; j++)
cannam@18	98 norm += h[im+j] h[i*m+j];
cannam@18	99 for (j = 0; j < m; j++)
cannam@18	100 h[i*m+j] /= norm;
cannam@18	101 }
cannam@18	102
cannam@18	103 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */
cannam@18	104 for (i = 0; i < hlen/2; i++)
cannam@18	105 for (j = 0; j < m; j++)
cannam@18	106 h[im+j] = h[hlen/2m+j];
cannam@18	107 for (i = nx-hlen/2; i < nx; i++)
cannam@18	108 for (j = 0; j < m; j++)
cannam@18	109 h[im+j] = h[(nx-hlen/2-1)m+j];
cannam@18	110 }
cannam@18	111
cannam@18	112 /* segment using HMM and then histogram clustering */
cannam@18	113 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
cannam@18	114 int histogram_length, int nclusters, int neighbour_limit)
cannam@18	115 {
cannam@18	116 int i, j;
cannam@18	117
cannam@18	118 /*****************************/
cannam@18	119 if (0) {
cannam@18	120 /* try just using the predominant bin number as a 'decoded state' */
cannam@18	121 nHMM_states = feature_length + 1; /* allow a 'zero' state */
cannam@18	122 double chroma_thresh = 0.05;
cannam@18	123 double maxval;
cannam@18	124 int maxbin;
cannam@18	125 for (i = 0; i < frames_read; i++)
cannam@18	126 {
cannam@18	127 maxval = 0;
cannam@18	128 for (j = 0; j < feature_length; j++)
cannam@18	129 {
cannam@18	130 if (features[i][j] > maxval)
cannam@18	131 {
cannam@18	132 maxval = features[i][j];
cannam@18	133 maxbin = j;
cannam@18	134 }
cannam@18	135 }
cannam@18	136 if (maxval > chroma_thresh)
cannam@18	137 q[i] = maxbin;
cannam@18	138 else
cannam@18	139 q[i] = feature_length;
cannam@18	140 }
cannam@18	141
cannam@18	142 }
cannam@18	143 if (1) {
cannam@18	144 /*****************************/
cannam@18	145
cannam@18	146
cannam@18	147 /* scale all the features to 'balance covariances' during HMM training */
cannam@18	148 double scale = 10;
cannam@18	149 for (i = 0; i < frames_read; i++)
cannam@18	150 for (j = 0; j < feature_length; j++)
cannam@18	151 features[i][j] *= scale;
cannam@18	152
cannam@18	153 /* train an HMM on the features */
cannam@18	154
cannam@18	155 /* create a model */
cannam@18	156 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
cannam@18	157
cannam@18	158 /* train the model */
cannam@18	159 hmm_train(features, frames_read, model);
cannam@18	160
cannam@18	161 printf("\n\nafter training:\n");
cannam@18	162 hmm_print(model);
cannam@18	163
cannam@18	164 /* decode the hidden state sequence */
cannam@18	165 viterbi_decode(features, frames_read, model, q);
cannam@18	166 hmm_close(model);
cannam@18	167
cannam@18	168 /*****************************/
cannam@18	169 }
cannam@18	170 /*****************************/
cannam@18	171
cannam@18	172
cannam@18	173 fprintf(stderr, "HMM state sequence:\n");
cannam@18	174 for (i = 0; i < frames_read; i++)
cannam@18	175 fprintf(stderr, "%d ", q[i]);
cannam@18	176 fprintf(stderr, "\n\n");
cannam@18	177
cannam@18	178 /* create histograms of states */
cannam@18	179 double* h = (double) malloc(frames_readnHMM_statessizeof(double)); / vector in row major order */
cannam@18	180 create_histograms(q, frames_read, nHMM_states, histogram_length, h);
cannam@18	181
cannam@18	182 /* cluster the histograms */
cannam@18	183 int nbsched = 20; /* length of inverse temperature schedule */
cannam@18	184 double* bsched = (double) malloc(nbschedsizeof(double)); /* inverse temperature schedule */
cannam@18	185 double b0 = 100;
cannam@18	186 double alpha = 0.7;
cannam@18	187 bsched[0] = b0;
cannam@18	188 for (i = 1; i < nbsched; i++)
cannam@18	189 bsched[i] = alpha * bsched[i-1];
cannam@18	190 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
cannam@18	191
cannam@18	192 /* now q holds a sequence of cluster assignments */
cannam@18	193
cannam@18	194 free(h);
cannam@18	195 free(bsched);
cannam@18	196 }
cannam@18	197
cannam@18	198 /* segment constant-Q or chroma features */
cannam@18	199 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
cannam@18	200 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
cannam@18	201 {
cannam@18	202 int feature_length;
cannam@18	203 double** chroma;
cannam@18	204 int i;
cannam@18	205
cannam@18	206 if (feature_type == FEATURE_TYPE_CONSTQ)
cannam@18	207 {
cannam@18	208 fprintf(stderr, "Converting to dB and normalising...\n");
cannam@18	209
cannam@18	210 mpeg7_constq(features, frames_read, ncoeff);
cannam@18	211
cannam@18	212 fprintf(stderr, "Running PCA...\n");
cannam@18	213
cannam@18	214 /* do PCA on the features (but not the envelope) */
cannam@18	215 int ncomponents = 20;
cannam@18	216 pca_project(features, frames_read, ncoeff, ncomponents);
cannam@18	217
cannam@18	218 /* copy the envelope so that it immediatly follows the chosen components */
cannam@18	219 for (i = 0; i < frames_read; i++)
cannam@18	220 features[i][ncomponents] = features[i][ncoeff];
cannam@18	221
cannam@18	222 feature_length = ncomponents + 1;
cannam@18	223
cannam@18	224 /**************************************
cannam@18	225 //TEST
cannam@18	226 // feature file name
cannam@18	227 char* dir = "/Users/mark/documents/semma/audio/";
cannam@18	228 char* file_name = (char) malloc((strlen(dir) + strlen(trackname) + strlen("_features_c20r8h0.2f0.6.mat") + 1)sizeof(char));
cannam@18	229 strcpy(file_name, dir);
cannam@18	230 strcat(file_name, trackname);
cannam@18	231 strcat(file_name, "_features_c20r8h0.2f0.6.mat");
cannam@18	232
cannam@18	233 // get the features from Matlab from mat-file
cannam@18	234 int frames_in_file;
cannam@18	235 readmatarray_size(file_name, 2, &frames_in_file, &feature_length);
cannam@18	236 readmatarray(file_name, 2, frames_in_file, feature_length, features);
cannam@18	237 // copy final frame to ensure that we get as many as we expected
cannam@18	238 int missing_frames = frames_read - frames_in_file;
cannam@18	239 while (missing_frames > 0)
cannam@18	240 {
cannam@18	241 for (i = 0; i < feature_length; i++)
cannam@18	242 features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i];
cannam@18	243 --missing_frames;
cannam@18	244 }
cannam@18	245
cannam@18	246 free(file_name);
cannam@18	247 ******************************************/
cannam@18	248
cannam@18	249 cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
cannam@18	250 }
cannam@18	251
cannam@18	252 if (feature_type == FEATURE_TYPE_CHROMA)
cannam@18	253 {
cannam@18	254 fprintf(stderr, "Converting to chroma features...\n");
cannam@18	255
cannam@18	256 /* convert constant-Q to normalised chroma features */
cannam@18	257 chroma = (double*) malloc(frames_readsizeof(double*));
cannam@18	258 for (i = 0; i < frames_read; i++)
cannam@18	259 chroma[i] = (double) malloc(binssizeof(double));
cannam@18	260 cq2chroma(features, frames_read, ncoeff, bins, chroma);
cannam@18	261 feature_length = bins;
cannam@18	262
cannam@18	263 cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
cannam@18	264
cannam@18	265 for (i = 0; i < frames_read; i++)
cannam@18	266 free(chroma[i]);
cannam@18	267 free(chroma);
cannam@18	268 }
cannam@18	269 }
cannam@18	270
cannam@18	271
cannam@18	272

Mercurial > hg > qm-dsp

annotate dsp/segmentation/cluster_segmenter.c @ 27:b678e72323df