cannam@484: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
c@243: /*
c@243:  *  cluster_segmenter.c
c@243:  *  soundbite
c@243:  *
c@243:  *  Created by Mark Levy on 06/04/2006.
c@309:  *  Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
c@309: 
cannam@480:  This program is free software; you can redistribute it and/or
cannam@480:  modify it under the terms of the GNU General Public License as
cannam@480:  published by the Free Software Foundation; either version 2 of the
cannam@480:  License, or (at your option) any later version.  See the file
cannam@480:  COPYING included with this distribution for more information.
c@243:  *
c@243:  */
c@243: 
c@243: #include "cluster_segmenter.h"
c@243: 
c@243: extern int readmatarray_size(const char *filepath, int n_array, int* t, int* d);
c@243: extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr);
c@243: 
c@243: /* converts constant-Q features to normalised chroma */
c@243: void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
c@243: {
cannam@480:     int noct = ncoeff / bins;       /* number of complete octaves in constant-Q */
cannam@480:     int t, b, oct, ix;
cannam@480:         
cannam@480:     for (t = 0; t < nframes; t++) {
cannam@480:         for (b = 0; b < bins; b++) {
cannam@480:             chroma[t][b] = 0;
cannam@480:         }
cannam@480:         for (oct = 0; oct < noct; oct++) {
cannam@480:             ix = oct * bins;
cannam@480:             for (b = 0; b < bins; b++) {
cannam@480:                 chroma[t][b] += fabs(cq[t][ix+b]);
cannam@480:             }
cannam@480:         }
cannam@480:     }
c@243: }
c@243: 
c@243: /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */
c@243: void mpeg7_constq(double** features, int nframes, int ncoeff)
c@243: {
cannam@480:     int i, j;
cannam@480:     double ss;
cannam@480:     double env;
cannam@480:     double maxenv = 0;
cannam@480:         
cannam@480:     /* convert const-Q features to dB scale */
cannam@480:     for (i = 0; i < nframes; i++) {
cannam@480:         for (j = 0; j < ncoeff; j++) {
cannam@480:             features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
cannam@480:         }
cannam@480:     }
cannam@480:         
cannam@480:     /* normalise each feature vector and add the norm as an extra feature dimension */      
cannam@480:     for (i = 0; i < nframes; i++) {
cannam@480:         ss = 0;
cannam@480:         for (j = 0; j < ncoeff; j++) {
cannam@480:             ss += features[i][j] * features[i][j];
cannam@480:         }
cannam@480:         env = sqrt(ss);
cannam@480:         for (j = 0; j < ncoeff; j++) {
cannam@480:             features[i][j] /= env;
cannam@480:         }
cannam@480:         features[i][ncoeff] = env;
cannam@480:         if (env > maxenv) {
cannam@480:             maxenv = env;
cannam@480:         }
cannam@480:     } 
cannam@480:     /* normalise the envelopes */
cannam@480:     for (i = 0; i < nframes; i++) {
cannam@480:         features[i][ncoeff] /= maxenv;
cannam@480:     }
c@243: }
c@243: 
c@243: /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */
c@243: /* NB h is a vector in row major order, as required by cluster_melt() */
c@243: /* for historical reasons we normalise the histograms by their norm (not to sum to one) */
c@243: void create_histograms(int* x, int nx, int m, int hlen, double* h)
c@243: {
cannam@480:     int i, j, t;
cannam@480:     double norm;
c@266: 
cannam@480:     for (i = 0; i < nx*m; i++) {
cannam@480:         h[i] = 0;
cannam@480:     }
c@266: 
cannam@480:     for (i = hlen/2; i < nx-hlen/2; i++) {
cannam@480:         for (j = 0; j < m; j++) {
cannam@480:             h[i*m+j] = 0;
cannam@480:         }
cannam@480:         for (t = i-hlen/2; t <= i+hlen/2; t++) {
cannam@480:             ++h[i*m+x[t]];
cannam@480:         }
cannam@480:         norm = 0;
cannam@480:         for (j = 0; j < m; j++) {
cannam@480:             norm += h[i*m+j] * h[i*m+j];
cannam@480:         }
cannam@480:         for (j = 0; j < m; j++) {
cannam@480:             h[i*m+j] /= norm;
cannam@480:         }
cannam@480:     }
cannam@480:         
cannam@480:     /* duplicate histograms at beginning and end to create one histogram for each data value supplied */
cannam@480:     for (i = 0; i < hlen/2; i++) {
cannam@480:         for (j = 0; j < m; j++) {
cannam@480:             h[i*m+j] = h[hlen/2*m+j];
cannam@480:         }
cannam@480:     }
cannam@480:     for (i = nx-hlen/2; i < nx; i++) {
cannam@480:         for (j = 0; j < m; j++) {
cannam@480:             h[i*m+j] = h[(nx-hlen/2-1)*m+j];
cannam@480:         }
cannam@480:     }
c@243: }
c@243: 
c@243: /* segment using HMM and then histogram clustering */
c@243: void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states, 
cannam@480:                      int histogram_length, int nclusters, int neighbour_limit)
c@243: {
cannam@480:     int i, j;
cannam@480:         
cannam@480:     /*****************************/
cannam@480:     if (0) {
cannam@480:         /* try just using the predominant bin number as a 'decoded state' */
cannam@480:         nHMM_states = feature_length + 1;       /* allow a 'zero' state */
cannam@480:         double chroma_thresh = 0.05;
cannam@480:         double maxval;
cannam@480:         int maxbin;
cannam@480:         for (i = 0; i < frames_read; i++) {
cannam@480:             maxval = 0;
cannam@480:             for (j = 0; j < feature_length; j++) {
cannam@480:                 if (features[i][j] > maxval) {
cannam@480:                     maxval = features[i][j];
cannam@480:                     maxbin = j;
cannam@480:                 }                               
cannam@480:             }
cannam@480:             if (maxval > chroma_thresh) {
cannam@480:                 q[i] = maxbin;
cannam@480:             } else {
cannam@480:                 q[i] = feature_length;
cannam@480:             }
cannam@480:         }
cannam@480:         
cannam@480:     }
cannam@480:     if (1) {
cannam@480:         /*****************************/
cannam@480:         
cannam@480:         /* scale all the features to 'balance covariances' during HMM training */
cannam@480:         double scale = 10;
cannam@480:         for (i = 0; i < frames_read; i++)
cannam@480:             for (j = 0; j < feature_length; j++)
cannam@480:                 features[i][j] *= scale;
cannam@480:         
cannam@480:         /* train an HMM on the features */
cannam@480:         
cannam@480:         /* create a model */
cannam@480:         model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
cannam@480:         
cannam@480:         /* train the model */
cannam@480:         hmm_train(features, frames_read, model);
cannam@480: /*      
cannam@480:         printf("\n\nafter training:\n");
cannam@480:         hmm_print(model);
cannam@480: */      
cannam@480:         /* decode the hidden state sequence */
cannam@480:         viterbi_decode(features, frames_read, model, q);  
cannam@480:         hmm_close(model);
cannam@480:         
cannam@480:         /*****************************/
cannam@480:     }
cannam@480:     /*****************************/
c@243:     
c@283: /*
cannam@480:   fprintf(stderr, "HMM state sequence:\n");
cannam@480:   for (i = 0; i < frames_read; i++)
cannam@480:   fprintf(stderr, "%d ", q[i]);
cannam@480:   fprintf(stderr, "\n\n");
c@283: */
cannam@480:         
cannam@480:     /* create histograms of states */
cannam@480:     double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double));   /* vector in row major order */
cannam@480:     create_histograms(q, frames_read, nHMM_states, histogram_length, h);
cannam@480:         
cannam@480:     /* cluster the histograms */
cannam@480:     int nbsched = 20;       /* length of inverse temperature schedule */
cannam@480:     double* bsched = (double*) malloc(nbsched*sizeof(double));      /* inverse temperature schedule */
cannam@480:     double b0 = 100;
cannam@480:     double alpha = 0.7;
cannam@480:     bsched[0] = b0;
cannam@480:     for (i = 1; i < nbsched; i++) {
cannam@480:         bsched[i] = alpha * bsched[i-1];
cannam@480:     }
cannam@480:     cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
cannam@480:         
cannam@480:     /* now q holds a sequence of cluster assignments */
cannam@480:         
cannam@480:     free(h);  
cannam@480:     free(bsched);
c@243: }
c@243: 
c@243: /* segment constant-Q or chroma features */
c@243: void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type, 
cannam@480:                     int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
c@243: {
cannam@480:     int feature_length;
cannam@480:     double** chroma;
cannam@480:     int i;
cannam@480:         
cannam@480:     if (feature_type == FEATURE_TYPE_CONSTQ) {
cannam@480: 
cannam@480:         mpeg7_constq(features, frames_read, ncoeff);
cannam@480: 
cannam@480:         /* do PCA on the features (but not the envelope) */
cannam@480:         int ncomponents = 20;
cannam@480:         pca_project(features, frames_read, ncoeff, ncomponents);
cannam@480:                 
cannam@480:         /* copy the envelope so that it immediatly follows the chosen components */
cannam@480:         for (i = 0; i < frames_read; i++) {
cannam@480:             features[i][ncomponents] = features[i][ncoeff];
cannam@480:         }
cannam@480:                 
cannam@480:         feature_length = ncomponents + 1;
cannam@480:         
cannam@480:         cluster_segment(q, features, frames_read, feature_length,
cannam@480:                         nHMM_states, histogram_length, nclusters,
cannam@480:                         neighbour_limit);
cannam@480:     }
cannam@480:         
cannam@480:     if (feature_type == FEATURE_TYPE_CHROMA) {
cannam@480: 
cannam@480:         /* convert constant-Q to normalised chroma features */
cannam@480:         chroma = (double**) malloc(frames_read*sizeof(double*));
cannam@480:         for (i = 0; i < frames_read; i++) {
cannam@480:             chroma[i] = (double*) malloc(bins*sizeof(double));
cannam@480:         }
cannam@480:         
cannam@480:         cq2chroma(features, frames_read, ncoeff, bins, chroma);
cannam@480:         
cannam@480:         feature_length = bins;
cannam@480:                 
cannam@480:         cluster_segment(q, chroma, frames_read, feature_length,
cannam@480:                         nHMM_states, histogram_length, nclusters,
cannam@480:                         neighbour_limit);
cannam@480:         
cannam@480:         for (i = 0; i < frames_read; i++)
cannam@480:             free(chroma[i]);
cannam@480:         free(chroma);
cannam@480:     }
c@243: }
c@243: 
c@243: 
c@243: