qm-dsp: dsp/segmentation/cluster

annotate dsp/segmentation/cluster_segmenter.c @ 495:1bea13b8f951

Style fixes in constant-Q: avoid unsigned, reuse our Window class, fix comments

author	Chris Cannam <cannam@all-day-breakfast.com>
date	Fri, 31 May 2019 18:25:31 +0100
parents	d48276a3ae24
children

rev	line source
cannam@484	1 /* -- c-basic-offset: 4 indent-tabs-mode: nil -- vi:set ts=8 sts=4 sw=4: */
c@243	2 /*
c@243	3 * cluster_segmenter.c
c@243	4 * soundbite
c@243	5 *
c@243	6 * Created by Mark Levy on 06/04/2006.
c@309	7 * Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
c@309	8
cannam@480	9 This program is free software; you can redistribute it and/or
cannam@480	10 modify it under the terms of the GNU General Public License as
cannam@480	11 published by the Free Software Foundation; either version 2 of the
cannam@480	12 License, or (at your option) any later version. See the file
cannam@480	13 COPYING included with this distribution for more information.
c@243	14 *
c@243	15 */
c@243	16
c@243	17 #include "cluster_segmenter.h"
c@243	18
c@243	19 extern int readmatarray_size(const char filepath, int n_array, int t, int* d);
c@243	20 extern int readmatarray(const char filepath, int n_array, int t, int d, double* arr);
c@243	21
c@243	22 /* converts constant-Q features to normalised chroma */
c@243	23 void cq2chroma(double cq, int nframes, int ncoeff, int bins, double chroma)
c@243	24 {
cannam@480	25 int noct = ncoeff / bins; /* number of complete octaves in constant-Q */
cannam@480	26 int t, b, oct, ix;
cannam@480	27
cannam@480	28 for (t = 0; t < nframes; t++) {
cannam@480	29 for (b = 0; b < bins; b++) {
cannam@480	30 chroma[t][b] = 0;
cannam@480	31 }
cannam@480	32 for (oct = 0; oct < noct; oct++) {
cannam@480	33 ix = oct * bins;
cannam@480	34 for (b = 0; b < bins; b++) {
cannam@480	35 chroma[t][b] += fabs(cq[t][ix+b]);
cannam@480	36 }
cannam@480	37 }
cannam@480	38 }
c@243	39 }
c@243	40
c@243	41 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */
c@243	42 void mpeg7_constq(double** features, int nframes, int ncoeff)
c@243	43 {
cannam@480	44 int i, j;
cannam@480	45 double ss;
cannam@480	46 double env;
cannam@480	47 double maxenv = 0;
cannam@480	48
cannam@480	49 /* convert const-Q features to dB scale */
cannam@480	50 for (i = 0; i < nframes; i++) {
cannam@480	51 for (j = 0; j < ncoeff; j++) {
cannam@480	52 features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
cannam@480	53 }
cannam@480	54 }
cannam@480	55
cannam@480	56 /* normalise each feature vector and add the norm as an extra feature dimension */
cannam@480	57 for (i = 0; i < nframes; i++) {
cannam@480	58 ss = 0;
cannam@480	59 for (j = 0; j < ncoeff; j++) {
cannam@480	60 ss += features[i][j] * features[i][j];
cannam@480	61 }
cannam@480	62 env = sqrt(ss);
cannam@480	63 for (j = 0; j < ncoeff; j++) {
cannam@480	64 features[i][j] /= env;
cannam@480	65 }
cannam@480	66 features[i][ncoeff] = env;
cannam@480	67 if (env > maxenv) {
cannam@480	68 maxenv = env;
cannam@480	69 }
cannam@480	70 }
cannam@480	71 /* normalise the envelopes */
cannam@480	72 for (i = 0; i < nframes; i++) {
cannam@480	73 features[i][ncoeff] /= maxenv;
cannam@480	74 }
c@243	75 }
c@243	76
c@243	77 /* return histograms h[nxm] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) /
c@243	78 /* NB h is a vector in row major order, as required by cluster_melt() */
c@243	79 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */
c@243	80 void create_histograms(int* x, int nx, int m, int hlen, double* h)
c@243	81 {
cannam@480	82 int i, j, t;
cannam@480	83 double norm;
c@266	84
cannam@480	85 for (i = 0; i < nx*m; i++) {
cannam@480	86 h[i] = 0;
cannam@480	87 }
c@266	88
cannam@480	89 for (i = hlen/2; i < nx-hlen/2; i++) {
cannam@480	90 for (j = 0; j < m; j++) {
cannam@480	91 h[i*m+j] = 0;
cannam@480	92 }
cannam@480	93 for (t = i-hlen/2; t <= i+hlen/2; t++) {
cannam@480	94 ++h[i*m+x[t]];
cannam@480	95 }
cannam@480	96 norm = 0;
cannam@480	97 for (j = 0; j < m; j++) {
cannam@480	98 norm += h[im+j] h[i*m+j];
cannam@480	99 }
cannam@480	100 for (j = 0; j < m; j++) {
cannam@480	101 h[i*m+j] /= norm;
cannam@480	102 }
cannam@480	103 }
cannam@480	104
cannam@480	105 /* duplicate histograms at beginning and end to create one histogram for each data value supplied */
cannam@480	106 for (i = 0; i < hlen/2; i++) {
cannam@480	107 for (j = 0; j < m; j++) {
cannam@480	108 h[im+j] = h[hlen/2m+j];
cannam@480	109 }
cannam@480	110 }
cannam@480	111 for (i = nx-hlen/2; i < nx; i++) {
cannam@480	112 for (j = 0; j < m; j++) {
cannam@480	113 h[im+j] = h[(nx-hlen/2-1)m+j];
cannam@480	114 }
cannam@480	115 }
c@243	116 }
c@243	117
c@243	118 /* segment using HMM and then histogram clustering */
c@243	119 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states,
cannam@480	120 int histogram_length, int nclusters, int neighbour_limit)
c@243	121 {
cannam@480	122 int i, j;
cannam@480	123
cannam@480	124 /*****************************/
cannam@480	125 if (0) {
cannam@480	126 /* try just using the predominant bin number as a 'decoded state' */
cannam@480	127 nHMM_states = feature_length + 1; /* allow a 'zero' state */
cannam@480	128 double chroma_thresh = 0.05;
cannam@480	129 double maxval;
cannam@480	130 int maxbin;
cannam@480	131 for (i = 0; i < frames_read; i++) {
cannam@480	132 maxval = 0;
cannam@480	133 for (j = 0; j < feature_length; j++) {
cannam@480	134 if (features[i][j] > maxval) {
cannam@480	135 maxval = features[i][j];
cannam@480	136 maxbin = j;
cannam@480	137 }
cannam@480	138 }
cannam@480	139 if (maxval > chroma_thresh) {
cannam@480	140 q[i] = maxbin;
cannam@480	141 } else {
cannam@480	142 q[i] = feature_length;
cannam@480	143 }
cannam@480	144 }
cannam@480	145
cannam@480	146 }
cannam@480	147 if (1) {
cannam@480	148 /*****************************/
cannam@480	149
cannam@480	150 /* scale all the features to 'balance covariances' during HMM training */
cannam@480	151 double scale = 10;
cannam@480	152 for (i = 0; i < frames_read; i++)
cannam@480	153 for (j = 0; j < feature_length; j++)
cannam@480	154 features[i][j] *= scale;
cannam@480	155
cannam@480	156 /* train an HMM on the features */
cannam@480	157
cannam@480	158 /* create a model */
cannam@480	159 model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
cannam@480	160
cannam@480	161 /* train the model */
cannam@480	162 hmm_train(features, frames_read, model);
cannam@480	163 /*
cannam@480	164 printf("\n\nafter training:\n");
cannam@480	165 hmm_print(model);
cannam@480	166 */
cannam@480	167 /* decode the hidden state sequence */
cannam@480	168 viterbi_decode(features, frames_read, model, q);
cannam@480	169 hmm_close(model);
cannam@480	170
cannam@480	171 /*****************************/
cannam@480	172 }
cannam@480	173 /*****************************/
c@243	174
c@283	175 /*
cannam@480	176 fprintf(stderr, "HMM state sequence:\n");
cannam@480	177 for (i = 0; i < frames_read; i++)
cannam@480	178 fprintf(stderr, "%d ", q[i]);
cannam@480	179 fprintf(stderr, "\n\n");
c@283	180 */
cannam@480	181
cannam@480	182 /* create histograms of states */
cannam@480	183 double* h = (double) malloc(frames_readnHMM_statessizeof(double)); / vector in row major order */
cannam@480	184 create_histograms(q, frames_read, nHMM_states, histogram_length, h);
cannam@480	185
cannam@480	186 /* cluster the histograms */
cannam@480	187 int nbsched = 20; /* length of inverse temperature schedule */
cannam@480	188 double* bsched = (double) malloc(nbschedsizeof(double)); /* inverse temperature schedule */
cannam@480	189 double b0 = 100;
cannam@480	190 double alpha = 0.7;
cannam@480	191 bsched[0] = b0;
cannam@480	192 for (i = 1; i < nbsched; i++) {
cannam@480	193 bsched[i] = alpha * bsched[i-1];
cannam@480	194 }
cannam@480	195 cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
cannam@480	196
cannam@480	197 /* now q holds a sequence of cluster assignments */
cannam@480	198
cannam@480	199 free(h);
cannam@480	200 free(bsched);
c@243	201 }
c@243	202
c@243	203 /* segment constant-Q or chroma features */
c@243	204 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type,
cannam@480	205 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
c@243	206 {
cannam@480	207 int feature_length;
cannam@480	208 double** chroma;
cannam@480	209 int i;
cannam@480	210
cannam@480	211 if (feature_type == FEATURE_TYPE_CONSTQ) {
cannam@480	212
cannam@480	213 mpeg7_constq(features, frames_read, ncoeff);
cannam@480	214
cannam@480	215 /* do PCA on the features (but not the envelope) */
cannam@480	216 int ncomponents = 20;
cannam@480	217 pca_project(features, frames_read, ncoeff, ncomponents);
cannam@480	218
cannam@480	219 /* copy the envelope so that it immediatly follows the chosen components */
cannam@480	220 for (i = 0; i < frames_read; i++) {
cannam@480	221 features[i][ncomponents] = features[i][ncoeff];
cannam@480	222 }
cannam@480	223
cannam@480	224 feature_length = ncomponents + 1;
cannam@480	225
cannam@480	226 cluster_segment(q, features, frames_read, feature_length,
cannam@480	227 nHMM_states, histogram_length, nclusters,
cannam@480	228 neighbour_limit);
cannam@480	229 }
cannam@480	230
cannam@480	231 if (feature_type == FEATURE_TYPE_CHROMA) {
cannam@480	232
cannam@480	233 /* convert constant-Q to normalised chroma features */
cannam@480	234 chroma = (double*) malloc(frames_readsizeof(double*));
cannam@480	235 for (i = 0; i < frames_read; i++) {
cannam@480	236 chroma[i] = (double) malloc(binssizeof(double));
cannam@480	237 }
cannam@480	238
cannam@480	239 cq2chroma(features, frames_read, ncoeff, bins, chroma);
cannam@480	240
cannam@480	241 feature_length = bins;
cannam@480	242
cannam@480	243 cluster_segment(q, chroma, frames_read, feature_length,
cannam@480	244 nHMM_states, histogram_length, nclusters,
cannam@480	245 neighbour_limit);
cannam@480	246
cannam@480	247 for (i = 0; i < frames_read; i++)
cannam@480	248 free(chroma[i]);
cannam@480	249 free(chroma);
cannam@480	250 }
c@243	251 }
c@243	252
c@243	253
c@243	254

Mercurial > hg > qm-dsp

annotate dsp/segmentation/cluster_segmenter.c @ 495:1bea13b8f951