Mercurial > hg > qm-dsp
diff dsp/segmentation/cluster_segmenter.c @ 18:8e90a56b4b5f
* merge in segmentation code from soundbite plugin/library repository
author | cannam |
---|---|
date | Wed, 09 Jan 2008 10:46:25 +0000 |
parents | |
children | 8bdbda7fb893 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dsp/segmentation/cluster_segmenter.c Wed Jan 09 10:46:25 2008 +0000 @@ -0,0 +1,271 @@ +/* + * cluster_segmenter.c + * soundbite + * + * Created by Mark Levy on 06/04/2006. + * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. + * + */ + +#include "cluster_segmenter.h" + +extern int readmatarray_size(const char *filepath, int n_array, int* t, int* d); +extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr); + +/* converts constant-Q features to normalised chroma */ +void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma) +{ + int noct = ncoeff / bins; /* number of complete octaves in constant-Q */ + int t, b, oct, ix; + //double maxchroma; /* max chroma value at each time, for normalisation */ + //double sum; /* for normalisation */ + + for (t = 0; t < nframes; t++) + { + for (b = 0; b < bins; b++) + chroma[t][b] = 0; + for (oct = 0; oct < noct; oct++) + { + ix = oct * bins; + for (b = 0; b < bins; b++) + chroma[t][b] += fabs(cq[t][ix+b]); + } + /* normalise to unit sum + sum = 0; + for (b = 0; b < bins; b++) + sum += chroma[t][b]; + for (b = 0; b < bins; b++) + chroma[t][b] /= sum; + /* normalise to unit max - NO this made results much worse! + maxchroma = 0; + for (b = 0; b < bins; b++) + if (chroma[t][b] > maxchroma) + maxchroma = chroma[t][b]; + if (maxchroma > 0) + for (b = 0; b < bins; b++) + chroma[t][b] /= maxchroma; + */ + } +} + +/* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */ +void mpeg7_constq(double** features, int nframes, int ncoeff) +{ + int i, j; + double ss; + double env; + double maxenv = 0; + + /* convert const-Q features to dB scale */ + for (i = 0; i < nframes; i++) + for (j = 0; j < ncoeff; j++) + features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON); + + /* normalise each feature vector and add the norm as an extra feature dimension */ + for (i = 0; i < nframes; i++) + { + ss = 0; + for (j = 0; j < ncoeff; j++) + ss += features[i][j] * features[i][j]; + env = sqrt(ss); + for (j = 0; j < ncoeff; j++) + features[i][j] /= env; + features[i][ncoeff] = env; + if (env > maxenv) + maxenv = env; + } + /* normalise the envelopes */ + for (i = 0; i < nframes; i++) + features[i][ncoeff] /= maxenv; +} + +/* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */ +/* NB h is a vector in row major order, as required by cluster_melt() */ +/* for historical reasons we normalise the histograms by their norm (not to sum to one) */ +void create_histograms(int* x, int nx, int m, int hlen, double* h) +{ + int i, j, t; + double norm; + for (i = hlen/2; i < nx-hlen/2; i++) + { + for (j = 0; j < m; j++) + h[i*m+j] = 0; + for (t = i-hlen/2; t <= i+hlen/2; t++) + ++h[i*m+x[t]]; + norm = 0; + for (j = 0; j < m; j++) + norm += h[i*m+j] * h[i*m+j]; + for (j = 0; j < m; j++) + h[i*m+j] /= norm; + } + + /* duplicate histograms at beginning and end to create one histogram for each data value supplied */ + for (i = 0; i < hlen/2; i++) + for (j = 0; j < m; j++) + h[i*m+j] = h[hlen/2*m+j]; + for (i = nx-hlen/2; i < nx; i++) + for (j = 0; j < m; j++) + h[i*m+j] = h[(nx-hlen/2-1)*m+j]; +} + +/* segment using HMM and then histogram clustering */ +void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states, + int histogram_length, int nclusters, int neighbour_limit) +{ + int i, j; + + /*****************************/ + if (0) { + /* try just using the predominant bin number as a 'decoded state' */ + nHMM_states = feature_length + 1; /* allow a 'zero' state */ + double chroma_thresh = 0.05; + double maxval; + int maxbin; + for (i = 0; i < frames_read; i++) + { + maxval = 0; + for (j = 0; j < feature_length; j++) + { + if (features[i][j] > maxval) + { + maxval = features[i][j]; + maxbin = j; + } + } + if (maxval > chroma_thresh) + q[i] = maxbin; + else + q[i] = feature_length; + } + + } + if (1) { + /*****************************/ + + + /* scale all the features to 'balance covariances' during HMM training */ + double scale = 10; + for (i = 0; i < frames_read; i++) + for (j = 0; j < feature_length; j++) + features[i][j] *= scale; + + /* train an HMM on the features */ + + /* create a model */ + model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states); + + /* train the model */ + hmm_train(features, frames_read, model); + + printf("\n\nafter training:\n"); + hmm_print(model); + + /* decode the hidden state sequence */ + viterbi_decode(features, frames_read, model, q); + hmm_close(model); + + /*****************************/ + } + /*****************************/ + + + fprintf(stderr, "HMM state sequence:\n"); + for (i = 0; i < frames_read; i++) + fprintf(stderr, "%d ", q[i]); + fprintf(stderr, "\n\n"); + + /* create histograms of states */ + double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */ + create_histograms(q, frames_read, nHMM_states, histogram_length, h); + + /* cluster the histograms */ + int nbsched = 20; /* length of inverse temperature schedule */ + double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */ + double b0 = 100; + double alpha = 0.7; + bsched[0] = b0; + for (i = 1; i < nbsched; i++) + bsched[i] = alpha * bsched[i-1]; + cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q); + + /* now q holds a sequence of cluster assignments */ + + free(h); + free(bsched); +} + +/* segment constant-Q or chroma features */ +void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type, + int nHMM_states, int histogram_length, int nclusters, int neighbour_limit) +{ + int feature_length; + double** chroma; + int i; + + if (feature_type == FEATURE_TYPE_CONSTQ) + { + fprintf(stderr, "Converting to dB and normalising...\n"); + + mpeg7_constq(features, frames_read, ncoeff); + + fprintf(stderr, "Running PCA...\n"); + + /* do PCA on the features (but not the envelope) */ + int ncomponents = 20; + pca_project(features, frames_read, ncoeff, ncomponents); + + /* copy the envelope so that it immediatly follows the chosen components */ + for (i = 0; i < frames_read; i++) + features[i][ncomponents] = features[i][ncoeff]; + + feature_length = ncomponents + 1; + + /************************************** + //TEST + // feature file name + char* dir = "/Users/mark/documents/semma/audio/"; + char* file_name = (char*) malloc((strlen(dir) + strlen(trackname) + strlen("_features_c20r8h0.2f0.6.mat") + 1)*sizeof(char)); + strcpy(file_name, dir); + strcat(file_name, trackname); + strcat(file_name, "_features_c20r8h0.2f0.6.mat"); + + // get the features from Matlab from mat-file + int frames_in_file; + readmatarray_size(file_name, 2, &frames_in_file, &feature_length); + readmatarray(file_name, 2, frames_in_file, feature_length, features); + // copy final frame to ensure that we get as many as we expected + int missing_frames = frames_read - frames_in_file; + while (missing_frames > 0) + { + for (i = 0; i < feature_length; i++) + features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i]; + --missing_frames; + } + + free(file_name); + ******************************************/ + + cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit); + } + + if (feature_type == FEATURE_TYPE_CHROMA) + { + fprintf(stderr, "Converting to chroma features...\n"); + + /* convert constant-Q to normalised chroma features */ + chroma = (double**) malloc(frames_read*sizeof(double*)); + for (i = 0; i < frames_read; i++) + chroma[i] = (double*) malloc(bins*sizeof(double)); + cq2chroma(features, frames_read, ncoeff, bins, chroma); + feature_length = bins; + + cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit); + + for (i = 0; i < frames_read; i++) + free(chroma[i]); + free(chroma); + } +} + + +