annotate dsp/segmentation/cluster_segmenter.c @ 209:ccd2019190bf msvc

Some MSVC fixes, including (temporarily, probably) renaming the FFT source file to avoid getting it mixed up with the Vamp SDK one in our object dir
author Chris Cannam
date Thu, 01 Feb 2018 16:34:08 +0000
parents e5907ae6de17
children 175e51ae78eb
rev   line source
cannam@18 1 /*
cannam@18 2 * cluster_segmenter.c
cannam@18 3 * soundbite
cannam@18 4 *
cannam@18 5 * Created by Mark Levy on 06/04/2006.
Chris@84 6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
Chris@84 7
Chris@84 8 This program is free software; you can redistribute it and/or
Chris@84 9 modify it under the terms of the GNU General Public License as
Chris@84 10 published by the Free Software Foundation; either version 2 of the
Chris@84 11 License, or (at your option) any later version. See the file
Chris@84 12 COPYING included with this distribution for more information.
cannam@18 13 *
cannam@18 14 */
cannam@18 15
cannam@18 16 #include "cluster_segmenter.h"
cannam@18 17
cannam@18 18 extern int readmatarray_size(const char *filepath, int n_array, int* t, int* d);
cannam@18 19 extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr);
cannam@18 20
cannam@18 21 /* converts constant-Q features to normalised chroma */
cannam@18 22 void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
cannam@18 23 {
cannam@18 24 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */
cannam@18 25 int t, b, oct, ix;
cannam@18 26 //double maxchroma; /* max chroma value at each time, for normalisation */
cannam@18 27 //double sum; /* for normalisation */
cannam@18 28
cannam@18 29 for (t = 0; t < nframes; t++)
cannam@18 30 {
cannam@18 31 for (b = 0; b < bins; b++)
cannam@18 32 chroma[t][b] = 0;
cannam@18 33 for (oct = 0; oct < noct; oct++)
cannam@18 34 {
cannam@18 35 ix = oct * bins;
cannam@18 36 for (b = 0; b < bins; b++)
cannam@18 37 chroma[t][b] += fabs(cq[t][ix+b]);
cannam@18 38 }
cannam@18 39 /* normalise to unit sum
cannam@18 40 sum = 0;
cannam@18 41 for (b = 0; b < bins; b++)
cannam@18 42 sum += chroma[t][b];
cannam@18 43 for (b = 0; b < bins; b++)
cannam@18 44 chroma[t][b] /= sum;
cannam@20 45 */
cannam@18 46 /* normalise to unit max - NO this made results much worse!
cannam@18 47 maxchroma = 0;
cannam@18 48 for (b = 0; b < bins; b++)
cannam@18 49 if (chroma[t][b] > maxchroma)
cannam@18 50 maxchroma = chroma[t][b];
cannam@18 51 if (maxchroma > 0)
cannam@18 52 for (b = 0; b < bins; b++)
cannam@18 53 chroma[t][b] /= maxchroma;
cannam@18 54 */
cannam@18 55 }
cannam@18 56 }
cannam@18 57
cannam@18 58 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */
cannam@18 59 void mpeg7_constq(double** features, int nframes, int ncoeff)
cannam@18 60 {
cannam@18 61 int i, j;
cannam@18 62 double ss;
cannam@18 63 double env;
cannam@18 64 double maxenv = 0;
cannam@18 65
cannam@18 66 /* convert const-Q features to dB scale */
cannam@18 67 for (i = 0; i < nframes; i++)
cannam@18 68 for (j = 0; j < ncoeff; j++)
cannam@18 69 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
cannam@18 70
cannam@18 71 /* normalise each feature vector and add the norm as an extra feature dimension */
cannam@18 72 for (i = 0; i < nframes; i++)
cannam@18 73 {
cannam@18 74 ss = 0;
cannam@18 75 for (j = 0; j < ncoeff; j++)
cannam@18 76 ss += features[i][j] * features[i][j];
cannam@18 77 env = sqrt(ss);
cannam@18 78 for (j = 0; j < ncoeff; j++)
cannam@18 79 features[i][j] /= env;
cannam@18 80 features[i][ncoeff] = env;
cannam@18 81 if (env > maxenv)
cannam@18 82 maxenv = env;
cannam@18 83 }
cannam@18 84 /* normalise the envelopes */
cannam@18 85 for (i = 0; i < nframes; i++)
cannam@18 86 features[i][ncoeff] /= maxenv;
cannam@18 87 }
cannam@18 88
cannam@18 89 /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */
cannam@18 90 /* NB h is a vector in row major order, as required by cluster_melt() */
cannam@18 91 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */
cannam@18 92 void create_histograms(int* x, int nx, int m, int hlen, double* h)
cannam@18 93 {
cannam@18 94 int i, j, t;
cannam@18 95 double norm;
cannam@41 96
cannam@41 97 for (i = 0; i < nx*m; i++)
cannam@41 98 h[i] = 0;
cannam@41 99
cannam@18 100 for (i = hlen/2; i < nx-hlen/2; i++)
cannam@18 101 {
cannam@18 102 for (j = 0; j < m; j++)
cannam@18 103 h[i*m+j] = 0;
cannam@18 104 for (t = i-hlen/2; t <= i+hlen/2; t++)
cannam@18 105 ++h[i*m+x[t]];
cannam@18 106 norm = 0;
cannam@18 107 for (j = 0; j < m; j++)
cannam@18 108 norm += h[i*m+j] * h[i*m+j];
cannam@18 109 for (j = 0; j < m; j++)
cannam@18 110 h[i*m+j] /= norm;
cannam@18 111 }
cannam@18 112
cannam@18 113 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */
cannam@18 114 for (i = 0; i < hlen/2; i++)
cannam@18 115 for (j = 0; j < m; j++)
cannam@18 116 h[i*m+j] = h[hlen/2*m+j];
cannam@18 117 for (i = nx-hlen/2; i < nx; i++)
cannam@18 118 for (j = 0; j < m; j++)
cannam@18 119 h[i*m+j] = h[(nx-hlen/2-1)*m+j];
cannam@18 120 }
cannam@18 121
cannam@18 122 /* segment using HMM and then histogram clustering */
cannam@18 123 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
cannam@18 124 int histogram_length, int nclusters, int neighbour_limit)
cannam@18 125 {
cannam@18 126 int i, j;
cannam@18 127
cannam@18 128 /*****************************/
cannam@18 129 if (0) {
cannam@18 130 /* try just using the predominant bin number as a 'decoded state' */
cannam@18 131 nHMM_states = feature_length + 1; /* allow a 'zero' state */
cannam@18 132 double chroma_thresh = 0.05;
cannam@18 133 double maxval;
cannam@18 134 int maxbin;
cannam@18 135 for (i = 0; i < frames_read; i++)
cannam@18 136 {
cannam@18 137 maxval = 0;
cannam@18 138 for (j = 0; j < feature_length; j++)
cannam@18 139 {
cannam@18 140 if (features[i][j] > maxval)
cannam@18 141 {
cannam@18 142 maxval = features[i][j];
cannam@18 143 maxbin = j;
cannam@18 144 }
cannam@18 145 }
cannam@18 146 if (maxval > chroma_thresh)
cannam@18 147 q[i] = maxbin;
cannam@18 148 else
cannam@18 149 q[i] = feature_length;
cannam@18 150 }
cannam@18 151
cannam@18 152 }
cannam@18 153 if (1) {
cannam@18 154 /*****************************/
cannam@18 155
cannam@18 156
cannam@18 157 /* scale all the features to 'balance covariances' during HMM training */
cannam@18 158 double scale = 10;
cannam@18 159 for (i = 0; i < frames_read; i++)
cannam@18 160 for (j = 0; j < feature_length; j++)
cannam@18 161 features[i][j] *= scale;
cannam@18 162
cannam@18 163 /* train an HMM on the features */
cannam@18 164
cannam@18 165 /* create a model */
cannam@18 166 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
cannam@18 167
cannam@18 168 /* train the model */
cannam@18 169 hmm_train(features, frames_read, model);
cannam@58 170 /*
cannam@18 171 printf("\n\nafter training:\n");
cannam@18 172 hmm_print(model);
cannam@58 173 */
cannam@18 174 /* decode the hidden state sequence */
cannam@18 175 viterbi_decode(features, frames_read, model, q);
cannam@18 176 hmm_close(model);
cannam@18 177
cannam@18 178 /*****************************/
cannam@18 179 }
cannam@18 180 /*****************************/
cannam@18 181
cannam@18 182
cannam@58 183 /*
cannam@18 184 fprintf(stderr, "HMM state sequence:\n");
cannam@18 185 for (i = 0; i < frames_read; i++)
cannam@18 186 fprintf(stderr, "%d ", q[i]);
cannam@18 187 fprintf(stderr, "\n\n");
cannam@58 188 */
cannam@18 189
cannam@18 190 /* create histograms of states */
cannam@18 191 double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */
cannam@18 192 create_histograms(q, frames_read, nHMM_states, histogram_length, h);
cannam@18 193
cannam@18 194 /* cluster the histograms */
cannam@18 195 int nbsched = 20; /* length of inverse temperature schedule */
cannam@18 196 double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */
cannam@18 197 double b0 = 100;
cannam@18 198 double alpha = 0.7;
cannam@18 199 bsched[0] = b0;
cannam@18 200 for (i = 1; i < nbsched; i++)
cannam@18 201 bsched[i] = alpha * bsched[i-1];
cannam@18 202 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
cannam@18 203
cannam@18 204 /* now q holds a sequence of cluster assignments */
cannam@18 205
cannam@18 206 free(h);
cannam@18 207 free(bsched);
cannam@18 208 }
cannam@18 209
cannam@18 210 /* segment constant-Q or chroma features */
cannam@18 211 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
cannam@18 212 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
cannam@18 213 {
cannam@18 214 int feature_length;
cannam@18 215 double** chroma;
cannam@18 216 int i;
cannam@18 217
cannam@18 218 if (feature_type == FEATURE_TYPE_CONSTQ)
cannam@18 219 {
cannam@58 220 /* fprintf(stderr, "Converting to dB and normalising...\n");
cannam@58 221 */
cannam@18 222 mpeg7_constq(features, frames_read, ncoeff);
cannam@58 223 /*
cannam@18 224 fprintf(stderr, "Running PCA...\n");
cannam@58 225 */
cannam@18 226 /* do PCA on the features (but not the envelope) */
cannam@18 227 int ncomponents = 20;
cannam@18 228 pca_project(features, frames_read, ncoeff, ncomponents);
cannam@18 229
cannam@18 230 /* copy the envelope so that it immediatly follows the chosen components */
cannam@18 231 for (i = 0; i < frames_read; i++)
cannam@18 232 features[i][ncomponents] = features[i][ncoeff];
cannam@18 233
cannam@18 234 feature_length = ncomponents + 1;
cannam@18 235
cannam@18 236 /**************************************
cannam@18 237 //TEST
cannam@18 238 // feature file name
cannam@18 239 char* dir = "/Users/mark/documents/semma/audio/";
cannam@18 240 char* file_name = (char*) malloc((strlen(dir) + strlen(trackname) + strlen("_features_c20r8h0.2f0.6.mat") + 1)*sizeof(char));
cannam@18 241 strcpy(file_name, dir);
cannam@18 242 strcat(file_name, trackname);
cannam@18 243 strcat(file_name, "_features_c20r8h0.2f0.6.mat");
cannam@18 244
cannam@18 245 // get the features from Matlab from mat-file
cannam@18 246 int frames_in_file;
cannam@18 247 readmatarray_size(file_name, 2, &frames_in_file, &feature_length);
cannam@18 248 readmatarray(file_name, 2, frames_in_file, feature_length, features);
cannam@18 249 // copy final frame to ensure that we get as many as we expected
cannam@18 250 int missing_frames = frames_read - frames_in_file;
cannam@18 251 while (missing_frames > 0)
cannam@18 252 {
cannam@18 253 for (i = 0; i < feature_length; i++)
cannam@18 254 features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i];
cannam@18 255 --missing_frames;
cannam@18 256 }
cannam@18 257
cannam@18 258 free(file_name);
cannam@18 259 ******************************************/
cannam@18 260
cannam@18 261 cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
cannam@18 262 }
cannam@18 263
cannam@18 264 if (feature_type == FEATURE_TYPE_CHROMA)
cannam@18 265 {
cannam@58 266 /*
cannam@18 267 fprintf(stderr, "Converting to chroma features...\n");
cannam@58 268 */
cannam@18 269 /* convert constant-Q to normalised chroma features */
cannam@18 270 chroma = (double**) malloc(frames_read*sizeof(double*));
cannam@18 271 for (i = 0; i < frames_read; i++)
cannam@18 272 chroma[i] = (double*) malloc(bins*sizeof(double));
cannam@18 273 cq2chroma(features, frames_read, ncoeff, bins, chroma);
cannam@18 274 feature_length = bins;
cannam@18 275
cannam@18 276 cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
cannam@18 277
cannam@18 278 for (i = 0; i < frames_read; i++)
cannam@18 279 free(chroma[i]);
cannam@18 280 free(chroma);
cannam@18 281 }
cannam@18 282 }
cannam@18 283
cannam@18 284
cannam@18 285