# HG changeset patch # User Chris Cannam # Date 1199875585 0 # Node ID dc30e3864ceb160d8e41fff7f1da53660a517a60 # Parent 6060110dc3c68693c2e479f123d4a9c6b1e7c3f0 * merge in segmentation code from soundbite plugin/library repository diff -r 6060110dc3c6 -r dc30e3864ceb dsp/segmentation/ClusterMeltSegmenter.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dsp/segmentation/ClusterMeltSegmenter.cpp Wed Jan 09 10:46:25 2008 +0000 @@ -0,0 +1,187 @@ +/* + * ClusterMeltSegmenter.cpp + * soundbite + * + * Created by Mark Levy on 23/03/2006. + * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. + * + */ + +#include +#include + +#include "ClusterMeltSegmenter.h" +#include "lib_constQ.h" +#include "cluster_segmenter.h" +#include "segment.h" + +ClusterMeltSegmenter::ClusterMeltSegmenter(ClusterMeltSegmenterParams params) : window(NULL), +constq(NULL), +featureType(params.featureType), +windowSize(params.windowSize), +hopSize(params.hopSize), +fmin(params.fmin), +fmax(params.fmax), +nbins(params.nbins), +ncomponents(params.ncomponents), // NB currently not passed - no. of PCA components is set in cluser_segmenter.c +nHMMStates(params.nHMMStates), +nclusters(params.nclusters), +histogramLength(params.histogramLength), +neighbourhoodLimit(params.neighbourhoodLimit) +{ +} + +void ClusterMeltSegmenter::initialise(int fs) +{ + samplerate = fs; + if (featureType != FEATURE_TYPE_UNKNOWN) + { + ncoeff = static_cast(ceil(nbins * (log(fmax / static_cast(fmin))) / log(2.0))); + constq = init_constQ(fmin, fmax, nbins, samplerate, ncoeff); + } +} + +ClusterMeltSegmenter::~ClusterMeltSegmenter() +{ + delete [] window; + if (constq) + close_constQ(constq); +} + +void ClusterMeltSegmenter::extractFeatures(double* samples, int nsamples) +{ + // create a new window if needed + if (!window || nsamples != windowLength) + { + if (window) + delete [] window; + window = hamming_p(nsamples); + windowLength = nsamples; + } + + // copy the samples before windowing in case we need them for something else + double* frame = new double[nsamples]; + for (int i = 0; i < nsamples; i++) + frame[i] = samples[i] * window[i]; + + // extract const-Q + do_constQ(constq, frame, nsamples); + int ncq = constq->ncoeff; + + delete [] frame; + + if (ncq == ncoeff) // else feature extraction failed + { + vector cq(ncq); + for (int i = 0; i < ncq; i++) + cq[i] = constq->absconstQtransform[i]; + features.push_back(cq); + } +} + +void ClusterMeltSegmenter::segment(int m) +{ + nclusters = m; + segment(); +} + +void ClusterMeltSegmenter::setFeatures(const vector >& f) +{ + features = f; + featureType = FEATURE_TYPE_UNKNOWN; +} + +void ClusterMeltSegmenter::segment() +{ + if (constq) + { + close_constQ(constq); // finished extracting features + constq = NULL; + } + + // for now copy the features to a native array and use the existing C segmenter... + double** arrFeatures = new double*[features.size()]; + for (int i = 0; i < features.size(); i++) + { + if (featureType == FEATURE_TYPE_UNKNOWN) + arrFeatures[i] = new double[features[0].size()]; + else + arrFeatures[i] = new double[ncoeff+1]; // allow space for the normalised envelope + for (int j = 0; j < ncoeff; j++) + arrFeatures[i][j] = features[i][j]; + } + + q = new int[features.size()]; + + if (featureType == FEATURE_TYPE_UNKNOWN) + cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength, + nclusters, neighbourhoodLimit); + else + constq_segment(q, arrFeatures, features.size(), nbins, ncoeff, featureType, + nHMMStates, histogramLength, nclusters, neighbourhoodLimit); + + // convert the cluster assignment sequence to a segmentation + makeSegmentation(q, features.size()); + + // de-allocate arrays + delete [] q; + for (int i = 0; i < features.size(); i++) + delete [] arrFeatures[i]; + delete [] arrFeatures; + + // clear the features + clear(); +} + +void ClusterMeltSegmenter::makeSegmentation(int* q, int len) +{ + segmentation.segments.clear(); + segmentation.nsegtypes = nclusters; + segmentation.samplerate = samplerate; + + Segment segment; + segment.start = 0; + segment.type = q[0]; + + for (int i = 1; i < len; i++) + { + if (q[i] != q[i-1]) + { + segment.end = i * getHopsize(); + segmentation.segments.push_back(segment); + segment.type = q[i]; + segment.start = segment.end; + } + } + segment.end = len * getHopsize(); + segmentation.segments.push_back(segment); +} + +/* +void ClusterMeltSegmenter::mpeg7ConstQ() +{ + // convert to dB scale + for (int i = 0; i < features.size(); i++) + for (int j = 0; j < ncoeff; j++) + features[i][j] = 10.0 * log10(features[i][j] + DBL_EPSILON); + + // normalise features and add the norm at the end as an extra feature dimension + double maxnorm = 0; // track the max of the norms + for (int i = 0; i < features.size(); i++) + { + double norm = 0; + for (int j = 0; j < ncoeff; j++) + norm += features[i][j] * features[i][j]; + norm = sqrt(norm); + for (int j = 0; j < ncoeff; j++) + features[i][j] /= norm; + features[i].push_back(norm); + if (norm > maxnorm) + maxnorm = norm; + } + + // normalise the norms + for (int i = 0; i < features.size(); i++) + features[i][ncoeff] /= maxnorm; +} +*/ diff -r 6060110dc3c6 -r dc30e3864ceb dsp/segmentation/ClusterMeltSegmenter.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dsp/segmentation/ClusterMeltSegmenter.h Wed Jan 09 10:46:25 2008 +0000 @@ -0,0 +1,83 @@ +/* + * ClusterMeltSegmenter.h + * soundbite + * + * Created by Mark Levy on 23/03/2006. + * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. + * + */ + +#include + +#include "segment.h" +#include "Segmenter.h" +#include "hmm.h" +#include "lib_constQ.h" + +using std::vector; + +class ClusterMeltSegmenterParams // defaults are sensible for 11025Hz with 0.2 second hopsize +{ +public: + ClusterMeltSegmenterParams() : featureType(FEATURE_TYPE_CONSTQ), hopSize(0.2), windowSize(0.6), fmin(62), fmax(16000), + nbins(8), ncomponents(20), nHMMStates(40), nclusters(10), histogramLength(15), neighbourhoodLimit(20) { } + feature_types featureType; + double hopSize; // in secs + double windowSize; // in secs + int fmin; + int fmax; + int nbins; + int ncomponents; + int nHMMStates; + int nclusters; + int histogramLength; + int neighbourhoodLimit; +}; + +class ClusterMeltSegmenter : public Segmenter +{ +public: + ClusterMeltSegmenter(ClusterMeltSegmenterParams params); + virtual ~ClusterMeltSegmenter(); + virtual void initialise(int samplerate); + virtual int getWindowsize() { return static_cast(windowSize * samplerate); } + virtual int getHopsize() { return static_cast(hopSize * samplerate); } + virtual void extractFeatures(double* samples, int nsamples); + void setFeatures(const vector >& f); // provide the features yourself + virtual void segment(); // segment into default number of segment-types + void segment(int m); // segment into m segment-types + int getNSegmentTypes() { return nclusters; } +protected: + //void mpeg7ConstQ(); + void makeSegmentation(int* q, int len); + + double* window; + int windowLength; // in samples + constQ_t* constq; + model_t* model; // the HMM + //vector stateSequence; + //vector segmentTypeSequence; + int* q; // the decoded HMM state sequence + vector > histograms; + + feature_types featureType; + double hopSize; // in seconds + double windowSize; // in seconds + + // constant-Q parameters + int fmin; + int fmax; + int nbins; + int ncoeff; + + // PCA parameters + int ncomponents; + + // HMM parameters + int nHMMStates; + + // clustering parameters + int nclusters; + int histogramLength; + int neighbourhoodLimit; +}; diff -r 6060110dc3c6 -r dc30e3864ceb dsp/segmentation/SavedFeatureSegmenter.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dsp/segmentation/SavedFeatureSegmenter.cpp Wed Jan 09 10:46:25 2008 +0000 @@ -0,0 +1,97 @@ +/* + * SavedFeatureSegmenter.cpp + * soundbite + * + * Created by Mark Levy on 23/03/2006. + * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. + * + */ + +#include +#include + +#include "SavedFeatureSegmenter.h" +#include "cluster_segmenter.h" +#include "segment.h" + +SavedFeatureSegmenter::SavedFeatureSegmenter(SavedFeatureSegmenterParams params) : windowSize(params.windowSize), +hopSize(params.hopSize), +nHMMStates(params.nHMMStates), +nclusters(params.nclusters), +histogramLength(params.histogramLength), +neighbourhoodLimit(params.neighbourhoodLimit) +{ +} + +void SavedFeatureSegmenter::initialise(int fs) +{ + samplerate = fs; +} + +SavedFeatureSegmenter::~SavedFeatureSegmenter() +{ +} + +void SavedFeatureSegmenter::segment(int m) +{ + nclusters = m; + segment(); +} + +void SavedFeatureSegmenter::setFeatures(const vector >& f) +{ + features = f; +} + +void SavedFeatureSegmenter::segment() +{ + // for now copy the features to a native array and use the existing C segmenter... + double** arrFeatures = new double*[features.size()]; + for (int i = 0; i < features.size(); i++) + { + arrFeatures[i] = new double[features[0].size()]; // allow space for the normalised envelope + for (int j = 0; j < features[0].size(); j++) + arrFeatures[i][j] = features[i][j]; + } + + q = new int[features.size()]; + + cluster_segment(q, arrFeatures, features.size(), features[0].size(), nHMMStates, histogramLength, + nclusters, neighbourhoodLimit); + // convert the cluster assignment sequence to a segmentation + makeSegmentation(q, features.size()); + + // de-allocate arrays + delete [] q; + for (int i = 0; i < features.size(); i++) + delete [] arrFeatures[i]; + delete [] arrFeatures; + + // clear the features + clear(); +} + +void SavedFeatureSegmenter::makeSegmentation(int* q, int len) +{ + segmentation.segments.clear(); + segmentation.nsegtypes = nclusters; + segmentation.samplerate = samplerate; + + Segment segment; + segment.start = 0; + segment.type = q[0]; + + for (int i = 1; i < len; i++) + { + if (q[i] != q[i-1]) + { + segment.end = i * getHopsize(); + segmentation.segments.push_back(segment); + segment.type = q[i]; + segment.start = segment.end; + } + } + segment.end = len * getHopsize(); + segmentation.segments.push_back(segment); +} + diff -r 6060110dc3c6 -r dc30e3864ceb dsp/segmentation/SavedFeatureSegmenter.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dsp/segmentation/SavedFeatureSegmenter.h Wed Jan 09 10:46:25 2008 +0000 @@ -0,0 +1,61 @@ +/* + * SavedFeatureSegmenter.h + * soundbite + * + * Created by Mark Levy on 23/03/2006. + * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. + * + */ + +#include + +#include "segment.h" +#include "Segmenter.h" +#include "hmm.h" + +using std::vector; + +class SavedFeatureSegmenterParams +{ +public: + SavedFeatureSegmenterParams() : hopSize(0.2), windowSize(0.6), + nHMMStates(40), nclusters(10), histogramLength(15), neighbourhoodLimit(20) { } + double hopSize; // in secs + double windowSize; // in secs + int nHMMStates; + int nclusters; + int histogramLength; + int neighbourhoodLimit; +}; + +class SavedFeatureSegmenter : public Segmenter +{ +public: + SavedFeatureSegmenter(SavedFeatureSegmenterParams params); + virtual ~SavedFeatureSegmenter(); + virtual void initialise(int samplerate); + virtual int getWindowsize() { return static_cast(windowSize * samplerate); } + virtual int getHopsize() { return static_cast(hopSize * samplerate); } + virtual void extractFeatures(double* samples, int nsamples) { } + void setFeatures(const vector >& f); // provide the features yourself + virtual void segment(); // segment into default number of segment-types + void segment(int m); // segment into m segment-types + int getNSegmentTypes() { return nclusters; } +protected: + void makeSegmentation(int* q, int len); + + model_t* model; // the HMM + int* q; // the decoded HMM state sequence + vector > histograms; + + double hopSize; // in seconds + double windowSize; // in seconds + + // HMM parameters + int nHMMStates; + + // clustering parameters + int nclusters; + int histogramLength; + int neighbourhoodLimit; +}; \ No newline at end of file diff -r 6060110dc3c6 -r dc30e3864ceb dsp/segmentation/Segmenter.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dsp/segmentation/Segmenter.cpp Wed Jan 09 10:46:25 2008 +0000 @@ -0,0 +1,26 @@ +/* + * Segmenter.cpp + * soundbite + * + * Created by Mark Levy on 04/04/2006. + * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. + * + */ + +#include + +#include "Segmenter.h" + +ostream& operator<<(ostream& os, const Segmentation& s) +{ + os << "structure_name : begin_time end_time\n"; + + for (int i = 0; i < s.segments.size(); i++) + { + Segment seg = s.segments[i]; + os << std::fixed << seg.type << ':' << '\t' << std::setprecision(6) << seg.start / static_cast(s.samplerate) + << '\t' << std::setprecision(6) << seg.end / static_cast(s.samplerate) << "\n"; + } + + return os; +} diff -r 6060110dc3c6 -r dc30e3864ceb dsp/segmentation/Segmenter.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dsp/segmentation/Segmenter.h Wed Jan 09 10:46:25 2008 +0000 @@ -0,0 +1,56 @@ +#ifndef _SEGMENTER_H +#define _SEGMENTER_H + +/* + * Segmenter.h + * soundbite + * + * Created by Mark Levy on 23/03/2006. + * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. + * + */ + +#include +#include + +using std::vector; +using std::ostream; + +class Segment +{ +public: + int start; // in samples + int end; + int type; +}; + +class Segmentation +{ +public: + int nsegtypes; // number of segment types, so possible types are {0,1,...,nsegtypes-1} + int samplerate; + vector segments; +}; + +ostream& operator<<(ostream& os, const Segmentation& s); + +class Segmenter +{ +public: + Segmenter() {} + virtual ~Segmenter() {} + virtual void initialise(int samplerate) = 0; // must be called before any other methods + virtual int getWindowsize() = 0; // required window size for calls to extractFeatures() + virtual int getHopsize() = 0; // required hop size for calls to extractFeatures() + virtual void extractFeatures(double* samples, int nsamples) = 0; + virtual void segment() = 0; // call once all the features have been extracted + virtual void segment(int m) = 0; // specify desired number of segment-types + virtual void clear() { features.clear(); } + const Segmentation& getSegmentation() const { return segmentation; } +protected: + vector > features; + Segmentation segmentation; + int samplerate; +}; + +#endif diff -r 6060110dc3c6 -r dc30e3864ceb dsp/segmentation/cluster_melt.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dsp/segmentation/cluster_melt.c Wed Jan 09 10:46:25 2008 +0000 @@ -0,0 +1,219 @@ +/* + * cluster.c + * cluster_melt + * + * Created by Mark Levy on 21/02/2006. + * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. + * + */ + +#include + +#include "cluster_melt.h" + +#define DEFAULT_LAMBDA 0.02; +#define DEFAULT_LIMIT 20; + +double kldist(double* a, double* b, int n) { + /* NB assume that all a[i], b[i] are non-negative + because a, b represent probability distributions */ + double q, d; + int i; + + d = 0; + for (i = 0; i < n; i++) + { + q = (a[i] + b[i]) / 2.0; + if (q > 0) + { + if (a[i] > 0) + d += a[i] * log(a[i] / q); + if (b[i] > 0) + d += b[i] * log(b[i] / q); + } + } + return d; +} + +void cluster_melt(double *h, int m, int n, double *Bsched, int t, int k, int l, int *c) { + double lambda, sum, beta, logsumexp, maxlp; + int i, j, a, b, b0, b1, limit, B, it, maxiter, maxiter0, maxiter1; + double** cl; /* reference histograms for each cluster */ + int** nc; /* neighbour counts for each histogram */ + double** lp; /* soft assignment probs for each histogram */ + int* oldc; /* previous hard assignments (to check convergence) */ + + /* NB h is passed as a 1d row major array */ + + /* parameter values */ + lambda = DEFAULT_LAMBDA; + if (l > 0) + limit = l; + else + limit = DEFAULT_LIMIT; /* use default if no valid neighbourhood limit supplied */ + B = 2 * limit + 1; + maxiter0 = 20; /* number of iterations at initial temperature */ + maxiter1 = 5; /* number of iterations at subsequent temperatures */ + + /* allocate memory */ + cl = (double**) malloc(k*sizeof(double*)); + for (i= 0; i < k; i++) + cl[i] = (double*) malloc(m*sizeof(double)); + + nc = (int**) malloc(n*sizeof(int*)); + for (i= 0; i < n; i++) + nc[i] = (int*) malloc(k*sizeof(int)); + + lp = (double**) malloc(n*sizeof(double*)); + for (i= 0; i < n; i++) + lp[i] = (double*) malloc(k*sizeof(double)); + + oldc = (int*) malloc(n * sizeof(int)); + + /* initialise */ + for (i = 0; i < k; i++) + { + sum = 0; + for (j = 0; j < m; j++) + { + cl[i][j] = rand(); /* random initial reference histograms */ + sum += cl[i][j] * cl[i][j]; + } + sum = sqrt(sum); + for (j = 0; j < m; j++) + { + cl[i][j] /= sum; /* normalise */ + } + } + //print_array(cl, k, m); + + for (i = 0; i < n; i++) + c[i] = 1; /* initially assign all histograms to cluster 1 */ + + for (a = 0; a < t; a++) + { + beta = Bsched[a]; + + if (a == 0) + maxiter = maxiter0; + else + maxiter = maxiter1; + + for (it = 0; it < maxiter; it++) + { + //if (it == maxiter - 1) + // mexPrintf("hasn't converged after %d iterations\n", maxiter); + + for (i = 0; i < n; i++) + { + /* save current hard assignments */ + oldc[i] = c[i]; + + /* calculate soft assignment logprobs for each cluster */ + sum = 0; + for (j = 0; j < k; j++) + { + lp[i][ j] = -beta * kldist(cl[j], &h[i*m], m); + + /* update matching neighbour counts for this histogram, based on current hard assignments */ + /* old version: + nc[i][j] = 0; + if (i >= limit && i <= n - 1 - limit) + { + for (b = i - limit; b <= i + limit; b++) + { + if (c[b] == j+1) + nc[i][j]++; + } + nc[i][j] = B - nc[i][j]; + } + */ + b0 = i - limit; + if (b0 < 0) + b0 = 0; + b1 = i + limit; + if (b1 >= n) + b1 = n - 1; + nc[i][j] = b1 - b0 + 1; /* = B except at edges */ + for (b = b0; b <= b1; b++) + if (c[b] == j+1) + nc[i][j]--; + + sum += exp(lp[i][j]); + } + + /* normalise responsibilities and add duration logprior */ + logsumexp = log(sum); + for (j = 0; j < k; j++) + lp[i][j] -= logsumexp + lambda * nc[i][j]; + } + //print_array(lp, n, k); + /* + for (i = 0; i < n; i++) + { + for (j = 0; j < k; j++) + mexPrintf("%d ", nc[i][j]); + mexPrintf("\n"); + } + */ + + + /* update the assignments now that we know the duration priors + based on the current assignments */ + for (i = 0; i < n; i++) + { + maxlp = lp[i][0]; + c[i] = 1; + for (j = 1; j < k; j++) + if (lp[i][j] > maxlp) + { + maxlp = lp[i][j]; + c[i] = j+1; + } + } + + /* break if assignments haven't changed */ + i = 0; + while (i < n && oldc[i] == c[i]) + i++; + if (i == n) + break; + + /* update reference histograms now we know new responsibilities */ + for (j = 0; j < k; j++) + { + for (b = 0; b < m; b++) + { + cl[j][b] = 0; + for (i = 0; i < n; i++) + { + cl[j][b] += exp(lp[i][j]) * h[i*m+b]; + } + } + + sum = 0; + for (i = 0; i < n; i++) + sum += exp(lp[i][j]); + for (b = 0; b < m; b++) + cl[j][b] /= sum; /* normalise */ + } + + //print_array(cl, k, m); + //mexPrintf("\n\n"); + } + } + + /* free memory */ + for (i = 0; i < k; i++) + free(cl[i]); + free(cl); + for (i = 0; i < n; i++) + free(nc[i]); + free(nc); + for (i = 0; i < n; i++) + free(lp[i]); + free(lp); + free(oldc); +} + + diff -r 6060110dc3c6 -r dc30e3864ceb dsp/segmentation/cluster_melt.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dsp/segmentation/cluster_melt.h Wed Jan 09 10:46:25 2008 +0000 @@ -0,0 +1,25 @@ +#ifndef _CLUSTER_MELT_H +#define _CLUSTER_MELT_H +/* + * cluster_melt.h + * cluster_melt + * + * Created by Mark Levy on 21/02/2006. + * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. + * + */ + +#include +#include + +void cluster_melt(double *h, /* normalised histograms, as a vector in row major order */ + int m, /* number of dimensions (i.e. histogram bins) */ + int n, /* number of histograms */ + double *Bsched, /* inverse temperature schedule */ + int t, /* length of schedule */ + int k, /* number of clusters */ + int l, /* neighbourhood limit (supply zero to use default value) */ + int *c /* sequence of cluster assignments */ +); + +#endif \ No newline at end of file diff -r 6060110dc3c6 -r dc30e3864ceb dsp/segmentation/cluster_segmenter.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dsp/segmentation/cluster_segmenter.c Wed Jan 09 10:46:25 2008 +0000 @@ -0,0 +1,271 @@ +/* + * cluster_segmenter.c + * soundbite + * + * Created by Mark Levy on 06/04/2006. + * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. + * + */ + +#include "cluster_segmenter.h" + +extern int readmatarray_size(const char *filepath, int n_array, int* t, int* d); +extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr); + +/* converts constant-Q features to normalised chroma */ +void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma) +{ + int noct = ncoeff / bins; /* number of complete octaves in constant-Q */ + int t, b, oct, ix; + //double maxchroma; /* max chroma value at each time, for normalisation */ + //double sum; /* for normalisation */ + + for (t = 0; t < nframes; t++) + { + for (b = 0; b < bins; b++) + chroma[t][b] = 0; + for (oct = 0; oct < noct; oct++) + { + ix = oct * bins; + for (b = 0; b < bins; b++) + chroma[t][b] += fabs(cq[t][ix+b]); + } + /* normalise to unit sum + sum = 0; + for (b = 0; b < bins; b++) + sum += chroma[t][b]; + for (b = 0; b < bins; b++) + chroma[t][b] /= sum; + /* normalise to unit max - NO this made results much worse! + maxchroma = 0; + for (b = 0; b < bins; b++) + if (chroma[t][b] > maxchroma) + maxchroma = chroma[t][b]; + if (maxchroma > 0) + for (b = 0; b < bins; b++) + chroma[t][b] /= maxchroma; + */ + } +} + +/* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */ +void mpeg7_constq(double** features, int nframes, int ncoeff) +{ + int i, j; + double ss; + double env; + double maxenv = 0; + + /* convert const-Q features to dB scale */ + for (i = 0; i < nframes; i++) + for (j = 0; j < ncoeff; j++) + features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON); + + /* normalise each feature vector and add the norm as an extra feature dimension */ + for (i = 0; i < nframes; i++) + { + ss = 0; + for (j = 0; j < ncoeff; j++) + ss += features[i][j] * features[i][j]; + env = sqrt(ss); + for (j = 0; j < ncoeff; j++) + features[i][j] /= env; + features[i][ncoeff] = env; + if (env > maxenv) + maxenv = env; + } + /* normalise the envelopes */ + for (i = 0; i < nframes; i++) + features[i][ncoeff] /= maxenv; +} + +/* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */ +/* NB h is a vector in row major order, as required by cluster_melt() */ +/* for historical reasons we normalise the histograms by their norm (not to sum to one) */ +void create_histograms(int* x, int nx, int m, int hlen, double* h) +{ + int i, j, t; + double norm; + for (i = hlen/2; i < nx-hlen/2; i++) + { + for (j = 0; j < m; j++) + h[i*m+j] = 0; + for (t = i-hlen/2; t <= i+hlen/2; t++) + ++h[i*m+x[t]]; + norm = 0; + for (j = 0; j < m; j++) + norm += h[i*m+j] * h[i*m+j]; + for (j = 0; j < m; j++) + h[i*m+j] /= norm; + } + + /* duplicate histograms at beginning and end to create one histogram for each data value supplied */ + for (i = 0; i < hlen/2; i++) + for (j = 0; j < m; j++) + h[i*m+j] = h[hlen/2*m+j]; + for (i = nx-hlen/2; i < nx; i++) + for (j = 0; j < m; j++) + h[i*m+j] = h[(nx-hlen/2-1)*m+j]; +} + +/* segment using HMM and then histogram clustering */ +void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states, + int histogram_length, int nclusters, int neighbour_limit) +{ + int i, j; + + /*****************************/ + if (0) { + /* try just using the predominant bin number as a 'decoded state' */ + nHMM_states = feature_length + 1; /* allow a 'zero' state */ + double chroma_thresh = 0.05; + double maxval; + int maxbin; + for (i = 0; i < frames_read; i++) + { + maxval = 0; + for (j = 0; j < feature_length; j++) + { + if (features[i][j] > maxval) + { + maxval = features[i][j]; + maxbin = j; + } + } + if (maxval > chroma_thresh) + q[i] = maxbin; + else + q[i] = feature_length; + } + + } + if (1) { + /*****************************/ + + + /* scale all the features to 'balance covariances' during HMM training */ + double scale = 10; + for (i = 0; i < frames_read; i++) + for (j = 0; j < feature_length; j++) + features[i][j] *= scale; + + /* train an HMM on the features */ + + /* create a model */ + model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states); + + /* train the model */ + hmm_train(features, frames_read, model); + + printf("\n\nafter training:\n"); + hmm_print(model); + + /* decode the hidden state sequence */ + viterbi_decode(features, frames_read, model, q); + hmm_close(model); + + /*****************************/ + } + /*****************************/ + + + fprintf(stderr, "HMM state sequence:\n"); + for (i = 0; i < frames_read; i++) + fprintf(stderr, "%d ", q[i]); + fprintf(stderr, "\n\n"); + + /* create histograms of states */ + double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double)); /* vector in row major order */ + create_histograms(q, frames_read, nHMM_states, histogram_length, h); + + /* cluster the histograms */ + int nbsched = 20; /* length of inverse temperature schedule */ + double* bsched = (double*) malloc(nbsched*sizeof(double)); /* inverse temperature schedule */ + double b0 = 100; + double alpha = 0.7; + bsched[0] = b0; + for (i = 1; i < nbsched; i++) + bsched[i] = alpha * bsched[i-1]; + cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q); + + /* now q holds a sequence of cluster assignments */ + + free(h); + free(bsched); +} + +/* segment constant-Q or chroma features */ +void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type, + int nHMM_states, int histogram_length, int nclusters, int neighbour_limit) +{ + int feature_length; + double** chroma; + int i; + + if (feature_type == FEATURE_TYPE_CONSTQ) + { + fprintf(stderr, "Converting to dB and normalising...\n"); + + mpeg7_constq(features, frames_read, ncoeff); + + fprintf(stderr, "Running PCA...\n"); + + /* do PCA on the features (but not the envelope) */ + int ncomponents = 20; + pca_project(features, frames_read, ncoeff, ncomponents); + + /* copy the envelope so that it immediatly follows the chosen components */ + for (i = 0; i < frames_read; i++) + features[i][ncomponents] = features[i][ncoeff]; + + feature_length = ncomponents + 1; + + /************************************** + //TEST + // feature file name + char* dir = "/Users/mark/documents/semma/audio/"; + char* file_name = (char*) malloc((strlen(dir) + strlen(trackname) + strlen("_features_c20r8h0.2f0.6.mat") + 1)*sizeof(char)); + strcpy(file_name, dir); + strcat(file_name, trackname); + strcat(file_name, "_features_c20r8h0.2f0.6.mat"); + + // get the features from Matlab from mat-file + int frames_in_file; + readmatarray_size(file_name, 2, &frames_in_file, &feature_length); + readmatarray(file_name, 2, frames_in_file, feature_length, features); + // copy final frame to ensure that we get as many as we expected + int missing_frames = frames_read - frames_in_file; + while (missing_frames > 0) + { + for (i = 0; i < feature_length; i++) + features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i]; + --missing_frames; + } + + free(file_name); + ******************************************/ + + cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit); + } + + if (feature_type == FEATURE_TYPE_CHROMA) + { + fprintf(stderr, "Converting to chroma features...\n"); + + /* convert constant-Q to normalised chroma features */ + chroma = (double**) malloc(frames_read*sizeof(double*)); + for (i = 0; i < frames_read; i++) + chroma[i] = (double*) malloc(bins*sizeof(double)); + cq2chroma(features, frames_read, ncoeff, bins, chroma); + feature_length = bins; + + cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit); + + for (i = 0; i < frames_read; i++) + free(chroma[i]); + free(chroma); + } +} + + + diff -r 6060110dc3c6 -r dc30e3864ceb dsp/segmentation/cluster_segmenter.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dsp/segmentation/cluster_segmenter.h Wed Jan 09 10:46:25 2008 +0000 @@ -0,0 +1,38 @@ +#ifndef _CLUSTER_SEGMENTER_H +#define _CLUSTER_SEGMENTER_H + +/* + * cluster_segmenter.h + * soundbite + * + * Created by Mark Levy on 06/04/2006. + * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. + * + */ + +#include +#include +#include +#include + +#include "segment.h" +#include "cluster_melt.h" +#include "hmm.h" +#include "pca.h" + +/* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */ +void mpeg7_constq(double** features, int nframes, int ncoeff); + +/* converts constant-Q features to normalised chroma */ +void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma); + +void create_histograms(int* x, int nx, int m, int hlen, double* h); + +void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states, + int histogram_length, int nclusters, int neighbour_limit); + +void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type, + int nHMM_states, int histogram_length, int nclusters, int neighbour_limit); + + +#endif \ No newline at end of file diff -r 6060110dc3c6 -r dc30e3864ceb dsp/segmentation/segment.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dsp/segmentation/segment.h Wed Jan 09 10:46:25 2008 +0000 @@ -0,0 +1,36 @@ +#ifndef _SEGMENT_H +#define _SEGMENT_H + +/* + * segment.h + * soundbite + * + * Created by Mark Levy on 06/04/2006. + * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. + * + */ + +typedef struct segment_t +{ + long start; /* in samples */ + long end; + int type; +} segment_t; + +typedef struct segmentation_t +{ + int nsegs; /* number of segments */ + int nsegtypes; /* number of segment types, so possible types are {0,1,...,nsegtypes-1} */ + int samplerate; + segment_t* segments; +} segmentation_t; + +typedef enum +{ + FEATURE_TYPE_UNKNOWN = 0, + FEATURE_TYPE_CONSTQ = 1, + FEATURE_TYPE_CHROMA +} feature_types; + +#endif +