annotate dsp/segmentation/cluster_segmenter.c @ 480:175e51ae78eb

Untabify, indent, tidy
author Chris Cannam <cannam@all-day-breakfast.com>
date Fri, 31 May 2019 10:53:39 +0100
parents d5014ab8b0e5
children d48276a3ae24
rev   line source
c@243 1 /*
c@243 2 * cluster_segmenter.c
c@243 3 * soundbite
c@243 4 *
c@243 5 * Created by Mark Levy on 06/04/2006.
c@309 6 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
c@309 7
cannam@480 8 This program is free software; you can redistribute it and/or
cannam@480 9 modify it under the terms of the GNU General Public License as
cannam@480 10 published by the Free Software Foundation; either version 2 of the
cannam@480 11 License, or (at your option) any later version. See the file
cannam@480 12 COPYING included with this distribution for more information.
c@243 13 *
c@243 14 */
c@243 15
c@243 16 #include "cluster_segmenter.h"
c@243 17
c@243 18 extern int readmatarray_size(const char *filepath, int n_array, int* t, int* d);
c@243 19 extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr);
c@243 20
c@243 21 /* converts constant-Q features to normalised chroma */
c@243 22 void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
c@243 23 {
cannam@480 24 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */
cannam@480 25 int t, b, oct, ix;
cannam@480 26
cannam@480 27 for (t = 0; t < nframes; t++) {
cannam@480 28 for (b = 0; b < bins; b++) {
cannam@480 29 chroma[t][b] = 0;
cannam@480 30 }
cannam@480 31 for (oct = 0; oct < noct; oct++) {
cannam@480 32 ix = oct * bins;
cannam@480 33 for (b = 0; b < bins; b++) {
cannam@480 34 chroma[t][b] += fabs(cq[t][ix+b]);
cannam@480 35 }
cannam@480 36 }
cannam@480 37 }
c@243 38 }
c@243 39
c@243 40 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */
c@243 41 void mpeg7_constq(double** features, int nframes, int ncoeff)
c@243 42 {
cannam@480 43 int i, j;
cannam@480 44 double ss;
cannam@480 45 double env;
cannam@480 46 double maxenv = 0;
cannam@480 47
cannam@480 48 /* convert const-Q features to dB scale */
cannam@480 49 for (i = 0; i < nframes; i++) {
cannam@480 50 for (j = 0; j < ncoeff; j++) {
cannam@480 51 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
cannam@480 52 }
cannam@480 53 }
cannam@480 54
cannam@480 55 /* normalise each feature vector and add the norm as an extra feature dimension */
cannam@480 56 for (i = 0; i < nframes; i++) {
cannam@480 57 ss = 0;
cannam@480 58 for (j = 0; j < ncoeff; j++) {
cannam@480 59 ss += features[i][j] * features[i][j];
cannam@480 60 }
cannam@480 61 env = sqrt(ss);
cannam@480 62 for (j = 0; j < ncoeff; j++) {
cannam@480 63 features[i][j] /= env;
cannam@480 64 }
cannam@480 65 features[i][ncoeff] = env;
cannam@480 66 if (env > maxenv) {
cannam@480 67 maxenv = env;
cannam@480 68 }
cannam@480 69 }
cannam@480 70 /* normalise the envelopes */
cannam@480 71 for (i = 0; i < nframes; i++) {
cannam@480 72 features[i][ncoeff] /= maxenv;
cannam@480 73 }
c@243 74 }
c@243 75
c@243 76 /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */
c@243 77 /* NB h is a vector in row major order, as required by cluster_melt() */
c@243 78 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */
c@243 79 void create_histograms(int* x, int nx, int m, int hlen, double* h)
c@243 80 {
cannam@480 81 int i, j, t;
cannam@480 82 double norm;
c@266 83
cannam@480 84 for (i = 0; i < nx*m; i++) {
cannam@480 85 h[i] = 0;
cannam@480 86 }
c@266 87
cannam@480 88 for (i = hlen/2; i < nx-hlen/2; i++) {
cannam@480 89 for (j = 0; j < m; j++) {
cannam@480 90 h[i*m+j] = 0;
cannam@480 91 }
cannam@480 92 for (t = i-hlen/2; t <= i+hlen/2; t++) {
cannam@480 93 ++h[i*m+x[t]];
cannam@480 94 }
cannam@480 95 norm = 0;
cannam@480 96 for (j = 0; j < m; j++) {
cannam@480 97 norm += h[i*m+j] * h[i*m+j];
cannam@480 98 }
cannam@480 99 for (j = 0; j < m; j++) {
cannam@480 100 h[i*m+j] /= norm;
cannam@480 101 }
cannam@480 102 }
cannam@480 103
cannam@480 104 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */
cannam@480 105 for (i = 0; i < hlen/2; i++) {
cannam@480 106 for (j = 0; j < m; j++) {
cannam@480 107 h[i*m+j] = h[hlen/2*m+j];
cannam@480 108 }
cannam@480 109 }
cannam@480 110 for (i = nx-hlen/2; i < nx; i++) {
cannam@480 111 for (j = 0; j < m; j++) {
cannam@480 112 h[i*m+j] = h[(nx-hlen/2-1)*m+j];
cannam@480 113 }
cannam@480 114 }
c@243 115 }
c@243 116
c@243 117 /* segment using HMM and then histogram clustering */
c@243 118 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
cannam@480 119 int histogram_length, int nclusters, int neighbour_limit)
c@243 120 {
cannam@480 121 int i, j;
cannam@480 122
cannam@480 123 /*****************************/
cannam@480 124 if (0) {
cannam@480 125 /* try just using the predominant bin number as a 'decoded state' */
cannam@480 126 nHMM_states = feature_length + 1; /* allow a 'zero' state */
cannam@480 127 double chroma_thresh = 0.05;
cannam@480 128 double maxval;
cannam@480 129 int maxbin;
cannam@480 130 for (i = 0; i < frames_read; i++) {
cannam@480 131 maxval = 0;
cannam@480 132 for (j = 0; j < feature_length; j++) {
cannam@480 133 if (features[i][j] > maxval) {
cannam@480 134 maxval = features[i][j];
cannam@480 135 maxbin = j;
cannam@480 136 }
cannam@480 137 }
cannam@480 138 if (maxval > chroma_thresh) {
cannam@480 139 q[i] = maxbin;
cannam@480 140 } else {
cannam@480 141 q[i] = feature_length;
cannam@480 142 }
cannam@480 143 }
cannam@480 144
cannam@480 145 }
cannam@480 146 if (1) {
cannam@480 147 /*****************************/
cannam@480 148
cannam@480 149 /* scale all the features to 'balance covariances' during HMM training */
cannam@480 150 double scale = 10;
cannam@480 151 for (i = 0; i < frames_read; i++)
cannam@480 152 for (j = 0; j < feature_length; j++)
cannam@480 153 features[i][j] *= scale;
cannam@480 154
cannam@480 155 /* train an HMM on the features */
cannam@480 156
cannam@480 157 /* create a model */
cannam@480 158 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
cannam@480 159
cannam@480 160 /* train the model */
cannam@480 161 hmm_train(features, frames_read, model);
cannam@480 162 /*
cannam@480 163 printf("\n\nafter training:\n");
cannam@480 164 hmm_print(model);
cannam@480 165 */
cannam@480 166 /* decode the hidden state sequence */
cannam@480 167 viterbi_decode(features, frames_read, model, q);
cannam@480 168 hmm_close(model);
cannam@480 169
cannam@480 170 /*****************************/
cannam@480 171 }
cannam@480 172 /*****************************/
c@243 173
c@283 174 /*
cannam@480 175 fprintf(stderr, "HMM state sequence:\n");
cannam@480 176 for (i = 0; i < frames_read; i++)
cannam@480 177 fprintf(stderr, "%d ", q[i]);
cannam@480 178 fprintf(stderr, "\n\n");
c@283 179 */
cannam@480 180
cannam@480 181 /* create histograms of states */
cannam@480 182 double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */
cannam@480 183 create_histograms(q, frames_read, nHMM_states, histogram_length, h);
cannam@480 184
cannam@480 185 /* cluster the histograms */
cannam@480 186 int nbsched = 20; /* length of inverse temperature schedule */
cannam@480 187 double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */
cannam@480 188 double b0 = 100;
cannam@480 189 double alpha = 0.7;
cannam@480 190 bsched[0] = b0;
cannam@480 191 for (i = 1; i < nbsched; i++) {
cannam@480 192 bsched[i] = alpha * bsched[i-1];
cannam@480 193 }
cannam@480 194 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
cannam@480 195
cannam@480 196 /* now q holds a sequence of cluster assignments */
cannam@480 197
cannam@480 198 free(h);
cannam@480 199 free(bsched);
c@243 200 }
c@243 201
c@243 202 /* segment constant-Q or chroma features */
c@243 203 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
cannam@480 204 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
c@243 205 {
cannam@480 206 int feature_length;
cannam@480 207 double** chroma;
cannam@480 208 int i;
cannam@480 209
cannam@480 210 if (feature_type == FEATURE_TYPE_CONSTQ) {
cannam@480 211
cannam@480 212 mpeg7_constq(features, frames_read, ncoeff);
cannam@480 213
cannam@480 214 /* do PCA on the features (but not the envelope) */
cannam@480 215 int ncomponents = 20;
cannam@480 216 pca_project(features, frames_read, ncoeff, ncomponents);
cannam@480 217
cannam@480 218 /* copy the envelope so that it immediatly follows the chosen components */
cannam@480 219 for (i = 0; i < frames_read; i++) {
cannam@480 220 features[i][ncomponents] = features[i][ncoeff];
cannam@480 221 }
cannam@480 222
cannam@480 223 feature_length = ncomponents + 1;
cannam@480 224
cannam@480 225 cluster_segment(q, features, frames_read, feature_length,
cannam@480 226 nHMM_states, histogram_length, nclusters,
cannam@480 227 neighbour_limit);
cannam@480 228 }
cannam@480 229
cannam@480 230 if (feature_type == FEATURE_TYPE_CHROMA) {
cannam@480 231
cannam@480 232 /* convert constant-Q to normalised chroma features */
cannam@480 233 chroma = (double**) malloc(frames_read*sizeof(double*));
cannam@480 234 for (i = 0; i < frames_read; i++) {
cannam@480 235 chroma[i] = (double*) malloc(bins*sizeof(double));
cannam@480 236 }
cannam@480 237
cannam@480 238 cq2chroma(features, frames_read, ncoeff, bins, chroma);
cannam@480 239
cannam@480 240 feature_length = bins;
cannam@480 241
cannam@480 242 cluster_segment(q, chroma, frames_read, feature_length,
cannam@480 243 nHMM_states, histogram_length, nclusters,
cannam@480 244 neighbour_limit);
cannam@480 245
cannam@480 246 for (i = 0; i < frames_read; i++)
cannam@480 247 free(chroma[i]);
cannam@480 248 free(chroma);
cannam@480 249 }
c@243 250 }
c@243 251
c@243 252
c@243 253