cannam@484: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ c@243: /* c@243: * cluster_segmenter.c c@243: * soundbite c@243: * c@243: * Created by Mark Levy on 06/04/2006. c@309: * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. c@309: cannam@480: This program is free software; you can redistribute it and/or cannam@480: modify it under the terms of the GNU General Public License as cannam@480: published by the Free Software Foundation; either version 2 of the cannam@480: License, or (at your option) any later version. See the file cannam@480: COPYING included with this distribution for more information. c@243: * c@243: */ c@243: c@243: #include "cluster_segmenter.h" c@243: c@243: extern int readmatarray_size(const char *filepath, int n_array, int* t, int* d); c@243: extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr); c@243: c@243: /* converts constant-Q features to normalised chroma */ c@243: void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma) c@243: { cannam@480: int noct = ncoeff / bins; /* number of complete octaves in constant-Q */ cannam@480: int t, b, oct, ix; cannam@480: cannam@480: for (t = 0; t < nframes; t++) { cannam@480: for (b = 0; b < bins; b++) { cannam@480: chroma[t][b] = 0; cannam@480: } cannam@480: for (oct = 0; oct < noct; oct++) { cannam@480: ix = oct * bins; cannam@480: for (b = 0; b < bins; b++) { cannam@480: chroma[t][b] += fabs(cq[t][ix+b]); cannam@480: } cannam@480: } cannam@480: } c@243: } c@243: c@243: /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */ c@243: void mpeg7_constq(double** features, int nframes, int ncoeff) c@243: { cannam@480: int i, j; cannam@480: double ss; cannam@480: double env; cannam@480: double maxenv = 0; cannam@480: cannam@480: /* convert const-Q features to dB scale */ cannam@480: for (i = 0; i < nframes; i++) { cannam@480: for (j = 0; j < ncoeff; j++) { cannam@480: features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON); cannam@480: } cannam@480: } cannam@480: cannam@480: /* normalise each feature vector and add the norm as an extra feature dimension */ cannam@480: for (i = 0; i < nframes; i++) { cannam@480: ss = 0; cannam@480: for (j = 0; j < ncoeff; j++) { cannam@480: ss += features[i][j] * features[i][j]; cannam@480: } cannam@480: env = sqrt(ss); cannam@480: for (j = 0; j < ncoeff; j++) { cannam@480: features[i][j] /= env; cannam@480: } cannam@480: features[i][ncoeff] = env; cannam@480: if (env > maxenv) { cannam@480: maxenv = env; cannam@480: } cannam@480: } cannam@480: /* normalise the envelopes */ cannam@480: for (i = 0; i < nframes; i++) { cannam@480: features[i][ncoeff] /= maxenv; cannam@480: } c@243: } c@243: c@243: /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */ c@243: /* NB h is a vector in row major order, as required by cluster_melt() */ c@243: /* for historical reasons we normalise the histograms by their norm (not to sum to one) */ c@243: void create_histograms(int* x, int nx, int m, int hlen, double* h) c@243: { cannam@480: int i, j, t; cannam@480: double norm; c@266: cannam@480: for (i = 0; i < nx*m; i++) { cannam@480: h[i] = 0; cannam@480: } c@266: cannam@480: for (i = hlen/2; i < nx-hlen/2; i++) { cannam@480: for (j = 0; j < m; j++) { cannam@480: h[i*m+j] = 0; cannam@480: } cannam@480: for (t = i-hlen/2; t <= i+hlen/2; t++) { cannam@480: ++h[i*m+x[t]]; cannam@480: } cannam@480: norm = 0; cannam@480: for (j = 0; j < m; j++) { cannam@480: norm += h[i*m+j] * h[i*m+j]; cannam@480: } cannam@480: for (j = 0; j < m; j++) { cannam@480: h[i*m+j] /= norm; cannam@480: } cannam@480: } cannam@480: cannam@480: /* duplicate histograms at beginning and end to create one histogram for each data value supplied */ cannam@480: for (i = 0; i < hlen/2; i++) { cannam@480: for (j = 0; j < m; j++) { cannam@480: h[i*m+j] = h[hlen/2*m+j]; cannam@480: } cannam@480: } cannam@480: for (i = nx-hlen/2; i < nx; i++) { cannam@480: for (j = 0; j < m; j++) { cannam@480: h[i*m+j] = h[(nx-hlen/2-1)*m+j]; cannam@480: } cannam@480: } c@243: } c@243: c@243: /* segment using HMM and then histogram clustering */ c@243: void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states, cannam@480: int histogram_length, int nclusters, int neighbour_limit) c@243: { cannam@480: int i, j; cannam@480: cannam@480: /*****************************/ cannam@480: if (0) { cannam@480: /* try just using the predominant bin number as a 'decoded state' */ cannam@480: nHMM_states = feature_length + 1; /* allow a 'zero' state */ cannam@480: double chroma_thresh = 0.05; cannam@480: double maxval; cannam@480: int maxbin; cannam@480: for (i = 0; i < frames_read; i++) { cannam@480: maxval = 0; cannam@480: for (j = 0; j < feature_length; j++) { cannam@480: if (features[i][j] > maxval) { cannam@480: maxval = features[i][j]; cannam@480: maxbin = j; cannam@480: } cannam@480: } cannam@480: if (maxval > chroma_thresh) { cannam@480: q[i] = maxbin; cannam@480: } else { cannam@480: q[i] = feature_length; cannam@480: } cannam@480: } cannam@480: cannam@480: } cannam@480: if (1) { cannam@480: /*****************************/ cannam@480: cannam@480: /* scale all the features to 'balance covariances' during HMM training */ cannam@480: double scale = 10; cannam@480: for (i = 0; i < frames_read; i++) cannam@480: for (j = 0; j < feature_length; j++) cannam@480: features[i][j] *= scale; cannam@480: cannam@480: /* train an HMM on the features */ cannam@480: cannam@480: /* create a model */ cannam@480: model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states); cannam@480: cannam@480: /* train the model */ cannam@480: hmm_train(features, frames_read, model); cannam@480: /* cannam@480: printf("\n\nafter training:\n"); cannam@480: hmm_print(model); cannam@480: */ cannam@480: /* decode the hidden state sequence */ cannam@480: viterbi_decode(features, frames_read, model, q); cannam@480: hmm_close(model); cannam@480: cannam@480: /*****************************/ cannam@480: } cannam@480: /*****************************/ c@243: c@283: /* cannam@480: fprintf(stderr, "HMM state sequence:\n"); cannam@480: for (i = 0; i < frames_read; i++) cannam@480: fprintf(stderr, "%d ", q[i]); cannam@480: fprintf(stderr, "\n\n"); c@283: */ cannam@480: cannam@480: /* create histograms of states */ cannam@480: double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */ cannam@480: create_histograms(q, frames_read, nHMM_states, histogram_length, h); cannam@480: cannam@480: /* cluster the histograms */ cannam@480: int nbsched = 20; /* length of inverse temperature schedule */ cannam@480: double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */ cannam@480: double b0 = 100; cannam@480: double alpha = 0.7; cannam@480: bsched[0] = b0; cannam@480: for (i = 1; i < nbsched; i++) { cannam@480: bsched[i] = alpha * bsched[i-1]; cannam@480: } cannam@480: cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q); cannam@480: cannam@480: /* now q holds a sequence of cluster assignments */ cannam@480: cannam@480: free(h); cannam@480: free(bsched); c@243: } c@243: c@243: /* segment constant-Q or chroma features */ c@243: void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type, cannam@480: int nHMM_states, int histogram_length, int nclusters, int neighbour_limit) c@243: { cannam@480: int feature_length; cannam@480: double** chroma; cannam@480: int i; cannam@480: cannam@480: if (feature_type == FEATURE_TYPE_CONSTQ) { cannam@480: cannam@480: mpeg7_constq(features, frames_read, ncoeff); cannam@480: cannam@480: /* do PCA on the features (but not the envelope) */ cannam@480: int ncomponents = 20; cannam@480: pca_project(features, frames_read, ncoeff, ncomponents); cannam@480: cannam@480: /* copy the envelope so that it immediatly follows the chosen components */ cannam@480: for (i = 0; i < frames_read; i++) { cannam@480: features[i][ncomponents] = features[i][ncoeff]; cannam@480: } cannam@480: cannam@480: feature_length = ncomponents + 1; cannam@480: cannam@480: cluster_segment(q, features, frames_read, feature_length, cannam@480: nHMM_states, histogram_length, nclusters, cannam@480: neighbour_limit); cannam@480: } cannam@480: cannam@480: if (feature_type == FEATURE_TYPE_CHROMA) { cannam@480: cannam@480: /* convert constant-Q to normalised chroma features */ cannam@480: chroma = (double**) malloc(frames_read*sizeof(double*)); cannam@480: for (i = 0; i < frames_read; i++) { cannam@480: chroma[i] = (double*) malloc(bins*sizeof(double)); cannam@480: } cannam@480: cannam@480: cq2chroma(features, frames_read, ncoeff, bins, chroma); cannam@480: cannam@480: feature_length = bins; cannam@480: cannam@480: cluster_segment(q, chroma, frames_read, feature_length, cannam@480: nHMM_states, histogram_length, nclusters, cannam@480: neighbour_limit); cannam@480: cannam@480: for (i = 0; i < frames_read; i++) cannam@480: free(chroma[i]); cannam@480: free(chroma); cannam@480: } c@243: } c@243: c@243: c@243: