annotate dsp/segmentation/cluster_segmenter.c @ 243:dc30e3864ceb

* merge in segmentation code from soundbite plugin/library repository
author Chris Cannam <c.cannam@qmul.ac.uk>
date Wed, 09 Jan 2008 10:46:25 +0000
parents
children 8bdbda7fb893
rev   line source
c@243 1 /*
c@243 2 * cluster_segmenter.c
c@243 3 * soundbite
c@243 4 *
c@243 5 * Created by Mark Levy on 06/04/2006.
c@243 6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved.
c@243 7 *
c@243 8 */
c@243 9
c@243 10 #include "cluster_segmenter.h"
c@243 11
c@243 12 extern int readmatarray_size(const char *filepath, int n_array, int* t, int* d);
c@243 13 extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr);
c@243 14
c@243 15 /* converts constant-Q features to normalised chroma */
c@243 16 void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
c@243 17 {
c@243 18 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */
c@243 19 int t, b, oct, ix;
c@243 20 //double maxchroma; /* max chroma value at each time, for normalisation */
c@243 21 //double sum; /* for normalisation */
c@243 22
c@243 23 for (t = 0; t < nframes; t++)
c@243 24 {
c@243 25 for (b = 0; b < bins; b++)
c@243 26 chroma[t][b] = 0;
c@243 27 for (oct = 0; oct < noct; oct++)
c@243 28 {
c@243 29 ix = oct * bins;
c@243 30 for (b = 0; b < bins; b++)
c@243 31 chroma[t][b] += fabs(cq[t][ix+b]);
c@243 32 }
c@243 33 /* normalise to unit sum
c@243 34 sum = 0;
c@243 35 for (b = 0; b < bins; b++)
c@243 36 sum += chroma[t][b];
c@243 37 for (b = 0; b < bins; b++)
c@243 38 chroma[t][b] /= sum;
c@243 39 /* normalise to unit max - NO this made results much worse!
c@243 40 maxchroma = 0;
c@243 41 for (b = 0; b < bins; b++)
c@243 42 if (chroma[t][b] > maxchroma)
c@243 43 maxchroma = chroma[t][b];
c@243 44 if (maxchroma > 0)
c@243 45 for (b = 0; b < bins; b++)
c@243 46 chroma[t][b] /= maxchroma;
c@243 47 */
c@243 48 }
c@243 49 }
c@243 50
c@243 51 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */
c@243 52 void mpeg7_constq(double** features, int nframes, int ncoeff)
c@243 53 {
c@243 54 int i, j;
c@243 55 double ss;
c@243 56 double env;
c@243 57 double maxenv = 0;
c@243 58
c@243 59 /* convert const-Q features to dB scale */
c@243 60 for (i = 0; i < nframes; i++)
c@243 61 for (j = 0; j < ncoeff; j++)
c@243 62 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
c@243 63
c@243 64 /* normalise each feature vector and add the norm as an extra feature dimension */
c@243 65 for (i = 0; i < nframes; i++)
c@243 66 {
c@243 67 ss = 0;
c@243 68 for (j = 0; j < ncoeff; j++)
c@243 69 ss += features[i][j] * features[i][j];
c@243 70 env = sqrt(ss);
c@243 71 for (j = 0; j < ncoeff; j++)
c@243 72 features[i][j] /= env;
c@243 73 features[i][ncoeff] = env;
c@243 74 if (env > maxenv)
c@243 75 maxenv = env;
c@243 76 }
c@243 77 /* normalise the envelopes */
c@243 78 for (i = 0; i < nframes; i++)
c@243 79 features[i][ncoeff] /= maxenv;
c@243 80 }
c@243 81
c@243 82 /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */
c@243 83 /* NB h is a vector in row major order, as required by cluster_melt() */
c@243 84 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */
c@243 85 void create_histograms(int* x, int nx, int m, int hlen, double* h)
c@243 86 {
c@243 87 int i, j, t;
c@243 88 double norm;
c@243 89 for (i = hlen/2; i < nx-hlen/2; i++)
c@243 90 {
c@243 91 for (j = 0; j < m; j++)
c@243 92 h[i*m+j] = 0;
c@243 93 for (t = i-hlen/2; t <= i+hlen/2; t++)
c@243 94 ++h[i*m+x[t]];
c@243 95 norm = 0;
c@243 96 for (j = 0; j < m; j++)
c@243 97 norm += h[i*m+j] * h[i*m+j];
c@243 98 for (j = 0; j < m; j++)
c@243 99 h[i*m+j] /= norm;
c@243 100 }
c@243 101
c@243 102 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */
c@243 103 for (i = 0; i < hlen/2; i++)
c@243 104 for (j = 0; j < m; j++)
c@243 105 h[i*m+j] = h[hlen/2*m+j];
c@243 106 for (i = nx-hlen/2; i < nx; i++)
c@243 107 for (j = 0; j < m; j++)
c@243 108 h[i*m+j] = h[(nx-hlen/2-1)*m+j];
c@243 109 }
c@243 110
c@243 111 /* segment using HMM and then histogram clustering */
c@243 112 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
c@243 113 int histogram_length, int nclusters, int neighbour_limit)
c@243 114 {
c@243 115 int i, j;
c@243 116
c@243 117 /*****************************/
c@243 118 if (0) {
c@243 119 /* try just using the predominant bin number as a 'decoded state' */
c@243 120 nHMM_states = feature_length + 1; /* allow a 'zero' state */
c@243 121 double chroma_thresh = 0.05;
c@243 122 double maxval;
c@243 123 int maxbin;
c@243 124 for (i = 0; i < frames_read; i++)
c@243 125 {
c@243 126 maxval = 0;
c@243 127 for (j = 0; j < feature_length; j++)
c@243 128 {
c@243 129 if (features[i][j] > maxval)
c@243 130 {
c@243 131 maxval = features[i][j];
c@243 132 maxbin = j;
c@243 133 }
c@243 134 }
c@243 135 if (maxval > chroma_thresh)
c@243 136 q[i] = maxbin;
c@243 137 else
c@243 138 q[i] = feature_length;
c@243 139 }
c@243 140
c@243 141 }
c@243 142 if (1) {
c@243 143 /*****************************/
c@243 144
c@243 145
c@243 146 /* scale all the features to 'balance covariances' during HMM training */
c@243 147 double scale = 10;
c@243 148 for (i = 0; i < frames_read; i++)
c@243 149 for (j = 0; j < feature_length; j++)
c@243 150 features[i][j] *= scale;
c@243 151
c@243 152 /* train an HMM on the features */
c@243 153
c@243 154 /* create a model */
c@243 155 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
c@243 156
c@243 157 /* train the model */
c@243 158 hmm_train(features, frames_read, model);
c@243 159
c@243 160 printf("\n\nafter training:\n");
c@243 161 hmm_print(model);
c@243 162
c@243 163 /* decode the hidden state sequence */
c@243 164 viterbi_decode(features, frames_read, model, q);
c@243 165 hmm_close(model);
c@243 166
c@243 167 /*****************************/
c@243 168 }
c@243 169 /*****************************/
c@243 170
c@243 171
c@243 172 fprintf(stderr, "HMM state sequence:\n");
c@243 173 for (i = 0; i < frames_read; i++)
c@243 174 fprintf(stderr, "%d ", q[i]);
c@243 175 fprintf(stderr, "\n\n");
c@243 176
c@243 177 /* create histograms of states */
c@243 178 double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */
c@243 179 create_histograms(q, frames_read, nHMM_states, histogram_length, h);
c@243 180
c@243 181 /* cluster the histograms */
c@243 182 int nbsched = 20; /* length of inverse temperature schedule */
c@243 183 double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */
c@243 184 double b0 = 100;
c@243 185 double alpha = 0.7;
c@243 186 bsched[0] = b0;
c@243 187 for (i = 1; i < nbsched; i++)
c@243 188 bsched[i] = alpha * bsched[i-1];
c@243 189 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
c@243 190
c@243 191 /* now q holds a sequence of cluster assignments */
c@243 192
c@243 193 free(h);
c@243 194 free(bsched);
c@243 195 }
c@243 196
c@243 197 /* segment constant-Q or chroma features */
c@243 198 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
c@243 199 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
c@243 200 {
c@243 201 int feature_length;
c@243 202 double** chroma;
c@243 203 int i;
c@243 204
c@243 205 if (feature_type == FEATURE_TYPE_CONSTQ)
c@243 206 {
c@243 207 fprintf(stderr, "Converting to dB and normalising...\n");
c@243 208
c@243 209 mpeg7_constq(features, frames_read, ncoeff);
c@243 210
c@243 211 fprintf(stderr, "Running PCA...\n");
c@243 212
c@243 213 /* do PCA on the features (but not the envelope) */
c@243 214 int ncomponents = 20;
c@243 215 pca_project(features, frames_read, ncoeff, ncomponents);
c@243 216
c@243 217 /* copy the envelope so that it immediatly follows the chosen components */
c@243 218 for (i = 0; i < frames_read; i++)
c@243 219 features[i][ncomponents] = features[i][ncoeff];
c@243 220
c@243 221 feature_length = ncomponents + 1;
c@243 222
c@243 223 /**************************************
c@243 224 //TEST
c@243 225 // feature file name
c@243 226 char* dir = "/Users/mark/documents/semma/audio/";
c@243 227 char* file_name = (char*) malloc((strlen(dir) + strlen(trackname) + strlen("_features_c20r8h0.2f0.6.mat") + 1)*sizeof(char));
c@243 228 strcpy(file_name, dir);
c@243 229 strcat(file_name, trackname);
c@243 230 strcat(file_name, "_features_c20r8h0.2f0.6.mat");
c@243 231
c@243 232 // get the features from Matlab from mat-file
c@243 233 int frames_in_file;
c@243 234 readmatarray_size(file_name, 2, &frames_in_file, &feature_length);
c@243 235 readmatarray(file_name, 2, frames_in_file, feature_length, features);
c@243 236 // copy final frame to ensure that we get as many as we expected
c@243 237 int missing_frames = frames_read - frames_in_file;
c@243 238 while (missing_frames > 0)
c@243 239 {
c@243 240 for (i = 0; i < feature_length; i++)
c@243 241 features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i];
c@243 242 --missing_frames;
c@243 243 }
c@243 244
c@243 245 free(file_name);
c@243 246 ******************************************/
c@243 247
c@243 248 cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
c@243 249 }
c@243 250
c@243 251 if (feature_type == FEATURE_TYPE_CHROMA)
c@243 252 {
c@243 253 fprintf(stderr, "Converting to chroma features...\n");
c@243 254
c@243 255 /* convert constant-Q to normalised chroma features */
c@243 256 chroma = (double**) malloc(frames_read*sizeof(double*));
c@243 257 for (i = 0; i < frames_read; i++)
c@243 258 chroma[i] = (double*) malloc(bins*sizeof(double));
c@243 259 cq2chroma(features, frames_read, ncoeff, bins, chroma);
c@243 260 feature_length = bins;
c@243 261
c@243 262 cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
c@243 263
c@243 264 for (i = 0; i < frames_read; i++)
c@243 265 free(chroma[i]);
c@243 266 free(chroma);
c@243 267 }
c@243 268 }
c@243 269
c@243 270
c@243 271