Mercurial > hg > qm-dsp

diff dsp/segmentation/cluster_segmenter.c @ 505:930b5b0f707d
Merge branch 'codestyle-and-tidy'
author: Chris Cannam <cannam@all-day-breakfast.com>
date: Wed, 05 Jun 2019 12:55:15 +0100
parents: d48276a3ae24
--- a/dsp/segmentation/cluster_segmenter.c	Thu May 30 16:18:13 2019 +0100
+++ b/dsp/segmentation/cluster_segmenter.c	Wed Jun 05 12:55:15 2019 +0100
@@ -1,3 +1,4 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
 /*
  *  cluster_segmenter.c
  *  soundbite
@@ -5,11 +6,11 @@
  *  Created by Mark Levy on 06/04/2006.
  *  Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
 
-    This program is free software; you can redistribute it and/or
-    modify it under the terms of the GNU General Public License as
-    published by the Free Software Foundation; either version 2 of the
-    License, or (at your option) any later version.  See the file
-    COPYING included with this distribution for more information.
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.  See the file
+ COPYING included with this distribution for more information.
  *
  */
 
@@ -21,69 +22,56 @@
 /* converts constant-Q features to normalised chroma */
 void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
 {
-	int noct = ncoeff / bins;	/* number of complete octaves in constant-Q */
-	int t, b, oct, ix;
-	//double maxchroma;	/* max chroma value at each time, for normalisation */
-	//double sum;		/* for normalisation */
-	
-	for (t = 0; t < nframes; t++)
-	{
-		for (b = 0; b < bins; b++)
-			chroma[t][b] = 0;
-		for (oct = 0; oct < noct; oct++)
-		{
-			ix = oct * bins;
-			for (b = 0; b < bins; b++)
-				chroma[t][b] += fabs(cq[t][ix+b]);
-		}
-		/* normalise to unit sum
-		sum = 0;
-		for (b = 0; b < bins; b++)
-			sum += chroma[t][b];
-		for (b = 0; b < bins; b++)
-			chroma[t][b] /= sum;
-		*/
-		/* normalise to unit max - NO this made results much worse!
-		maxchroma = 0;
-		for (b = 0; b < bins; b++)
-			if (chroma[t][b] > maxchroma)
-				maxchroma = chroma[t][b];
-		if (maxchroma > 0)
-			for (b = 0; b < bins; b++)
-				chroma[t][b] /= maxchroma;	
-		*/
-	}
+    int noct = ncoeff / bins;       /* number of complete octaves in constant-Q */
+    int t, b, oct, ix;
+        
+    for (t = 0; t < nframes; t++) {
+        for (b = 0; b < bins; b++) {
+            chroma[t][b] = 0;
+        }
+        for (oct = 0; oct < noct; oct++) {
+            ix = oct * bins;
+            for (b = 0; b < bins; b++) {
+                chroma[t][b] += fabs(cq[t][ix+b]);
+            }
+        }
+    }
 }
 
 /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */
 void mpeg7_constq(double** features, int nframes, int ncoeff)
 {
-	int i, j;
-	double ss;
-	double env;
-	double maxenv = 0;
-	
-	/* convert const-Q features to dB scale */
-	for (i = 0; i < nframes; i++)
-		for (j = 0; j < ncoeff; j++)
-			features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
-	
-	/* normalise each feature vector and add the norm as an extra feature dimension */	
-	for (i = 0; i < nframes; i++)
-	{
-		ss = 0;
-		for (j = 0; j < ncoeff; j++)
-			ss += features[i][j] * features[i][j];
-		env = sqrt(ss);
-		for (j = 0; j < ncoeff; j++)
-			features[i][j] /= env;
-		features[i][ncoeff] = env;
-		if (env > maxenv)
-			maxenv = env;
-	} 
-	/* normalise the envelopes */
-	for (i = 0; i < nframes; i++)
-		features[i][ncoeff] /= maxenv;	
+    int i, j;
+    double ss;
+    double env;
+    double maxenv = 0;
+        
+    /* convert const-Q features to dB scale */
+    for (i = 0; i < nframes; i++) {
+        for (j = 0; j < ncoeff; j++) {
+            features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
+        }
+    }
+        
+    /* normalise each feature vector and add the norm as an extra feature dimension */      
+    for (i = 0; i < nframes; i++) {
+        ss = 0;
+        for (j = 0; j < ncoeff; j++) {
+            ss += features[i][j] * features[i][j];
+        }
+        env = sqrt(ss);
+        for (j = 0; j < ncoeff; j++) {
+            features[i][j] /= env;
+        }
+        features[i][ncoeff] = env;
+        if (env > maxenv) {
+            maxenv = env;
+        }
+    } 
+    /* normalise the envelopes */
+    for (i = 0; i < nframes; i++) {
+        features[i][ncoeff] /= maxenv;
+    }
 }
 
 /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */
@@ -91,194 +79,175 @@
 /* for historical reasons we normalise the histograms by their norm (not to sum to one) */
 void create_histograms(int* x, int nx, int m, int hlen, double* h)
 {
-	int i, j, t;
-	double norm;
+    int i, j, t;
+    double norm;
 
-	for (i = 0; i < nx*m; i++) 
-	        h[i] = 0;
+    for (i = 0; i < nx*m; i++) {
+        h[i] = 0;
+    }
 
-	for (i = hlen/2; i < nx-hlen/2; i++)
-	{
-		for (j = 0; j < m; j++)
-			h[i*m+j] = 0;
-		for (t = i-hlen/2; t <= i+hlen/2; t++)
-			++h[i*m+x[t]];
-		norm = 0;
-		for (j = 0; j < m; j++)
-			norm += h[i*m+j] * h[i*m+j];
-		for (j = 0; j < m; j++)
-			h[i*m+j] /= norm;
-	}
-	
-	/* duplicate histograms at beginning and end to create one histogram for each data value supplied */
-	for (i = 0; i < hlen/2; i++)
-		for (j = 0; j < m; j++)
-			h[i*m+j] = h[hlen/2*m+j];
-	for (i = nx-hlen/2; i < nx; i++)
-		for (j = 0; j < m; j++)
-			h[i*m+j] = h[(nx-hlen/2-1)*m+j];
+    for (i = hlen/2; i < nx-hlen/2; i++) {
+        for (j = 0; j < m; j++) {
+            h[i*m+j] = 0;
+        }
+        for (t = i-hlen/2; t <= i+hlen/2; t++) {
+            ++h[i*m+x[t]];
+        }
+        norm = 0;
+        for (j = 0; j < m; j++) {
+            norm += h[i*m+j] * h[i*m+j];
+        }
+        for (j = 0; j < m; j++) {
+            h[i*m+j] /= norm;
+        }
+    }
+        
+    /* duplicate histograms at beginning and end to create one histogram for each data value supplied */
+    for (i = 0; i < hlen/2; i++) {
+        for (j = 0; j < m; j++) {
+            h[i*m+j] = h[hlen/2*m+j];
+        }
+    }
+    for (i = nx-hlen/2; i < nx; i++) {
+        for (j = 0; j < m; j++) {
+            h[i*m+j] = h[(nx-hlen/2-1)*m+j];
+        }
+    }
 }
 
 /* segment using HMM and then histogram clustering */
 void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states, 
-					 int histogram_length, int nclusters, int neighbour_limit)
+                     int histogram_length, int nclusters, int neighbour_limit)
 {
-	int i, j;
-	
-	/*****************************/
-	if (0) {
-	/* try just using the predominant bin number as a 'decoded state' */
-	nHMM_states = feature_length + 1;	/* allow a 'zero' state */
-	double chroma_thresh = 0.05;
-	double maxval;
-	int maxbin;
-	for (i = 0; i < frames_read; i++)
-	{
-		maxval = 0;
-		for (j = 0; j < feature_length; j++)
-		{
-			if (features[i][j] > maxval) 
-			{
-				maxval = features[i][j];
-				maxbin = j;
-			}				
-		}
-		if (maxval > chroma_thresh)
-			q[i] = maxbin;
-		else
-			q[i] = feature_length;
-	}
-	
-	}
-	if (1) {
-	/*****************************/
-		
-	
-	/* scale all the features to 'balance covariances' during HMM training */
-	double scale = 10;
-	for (i = 0; i < frames_read; i++)
-		for (j = 0; j < feature_length; j++)
-			features[i][j] *= scale;
-	
-	/* train an HMM on the features */
-	
-	/* create a model */
-	model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
-	
-	/* train the model */
-	hmm_train(features, frames_read, model);
-/*	
-	printf("\n\nafter training:\n");
-	hmm_print(model);
-*/	
-	/* decode the hidden state sequence */
-	viterbi_decode(features, frames_read, model, q);  
-	hmm_close(model);
-	
-	/*****************************/
-	}
-	/*****************************/
-	
+    int i, j;
+        
+    /*****************************/
+    if (0) {
+        /* try just using the predominant bin number as a 'decoded state' */
+        nHMM_states = feature_length + 1;       /* allow a 'zero' state */
+        double chroma_thresh = 0.05;
+        double maxval;
+        int maxbin;
+        for (i = 0; i < frames_read; i++) {
+            maxval = 0;
+            for (j = 0; j < feature_length; j++) {
+                if (features[i][j] > maxval) {
+                    maxval = features[i][j];
+                    maxbin = j;
+                }                               
+            }
+            if (maxval > chroma_thresh) {
+                q[i] = maxbin;
+            } else {
+                q[i] = feature_length;
+            }
+        }
+        
+    }
+    if (1) {
+        /*****************************/
+        
+        /* scale all the features to 'balance covariances' during HMM training */
+        double scale = 10;
+        for (i = 0; i < frames_read; i++)
+            for (j = 0; j < feature_length; j++)
+                features[i][j] *= scale;
+        
+        /* train an HMM on the features */
+        
+        /* create a model */
+        model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
+        
+        /* train the model */
+        hmm_train(features, frames_read, model);
+/*      
+        printf("\n\nafter training:\n");
+        hmm_print(model);
+*/      
+        /* decode the hidden state sequence */
+        viterbi_decode(features, frames_read, model, q);  
+        hmm_close(model);
+        
+        /*****************************/
+    }
+    /*****************************/
     
 /*
-	fprintf(stderr, "HMM state sequence:\n");
-	for (i = 0; i < frames_read; i++)
-		fprintf(stderr, "%d ", q[i]);
-	fprintf(stderr, "\n\n");
+  fprintf(stderr, "HMM state sequence:\n");
+  for (i = 0; i < frames_read; i++)
+  fprintf(stderr, "%d ", q[i]);
+  fprintf(stderr, "\n\n");
 */
-	
-	/* create histograms of states */
-	double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double));	/* vector in row major order */
-	create_histograms(q, frames_read, nHMM_states, histogram_length, h);
-	
-	/* cluster the histograms */
-	int nbsched = 20;	/* length of inverse temperature schedule */
-	double* bsched = (double*) malloc(nbsched*sizeof(double));	/* inverse temperature schedule */
-	double b0 = 100;
-	double alpha = 0.7;
-	bsched[0] = b0;
-	for (i = 1; i < nbsched; i++)
-		bsched[i] = alpha * bsched[i-1];
-	cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
-	
-	/* now q holds a sequence of cluster assignments */
-	
-	free(h);  
-	free(bsched);
+        
+    /* create histograms of states */
+    double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double));   /* vector in row major order */
+    create_histograms(q, frames_read, nHMM_states, histogram_length, h);
+        
+    /* cluster the histograms */
+    int nbsched = 20;       /* length of inverse temperature schedule */
+    double* bsched = (double*) malloc(nbsched*sizeof(double));      /* inverse temperature schedule */
+    double b0 = 100;
+    double alpha = 0.7;
+    bsched[0] = b0;
+    for (i = 1; i < nbsched; i++) {
+        bsched[i] = alpha * bsched[i-1];
+    }
+    cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
+        
+    /* now q holds a sequence of cluster assignments */
+        
+    free(h);  
+    free(bsched);
 }
 
 /* segment constant-Q or chroma features */
 void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type, 
-			 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
+                    int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
 {
-	int feature_length;
-	double** chroma;
-	int i;
-	
-	if (feature_type == FEATURE_TYPE_CONSTQ)
-	{
-/*		fprintf(stderr, "Converting to dB and normalising...\n");
- */		
-		mpeg7_constq(features, frames_read, ncoeff);
-/*		
-		fprintf(stderr, "Running PCA...\n");
-*/		
-		/* do PCA on the features (but not the envelope) */
-		int ncomponents = 20;
-		pca_project(features, frames_read, ncoeff, ncomponents);
-		
-		/* copy the envelope so that it immediatly follows the chosen components */
-		for (i = 0; i < frames_read; i++)
-			features[i][ncomponents] = features[i][ncoeff];	
-		
-		feature_length = ncomponents + 1;
-		
-		/**************************************
-		//TEST
-		// feature file name
-		char* dir = "/Users/mark/documents/semma/audio/";
-		char* file_name = (char*) malloc((strlen(dir) + strlen(trackname) + strlen("_features_c20r8h0.2f0.6.mat") + 1)*sizeof(char));
-		strcpy(file_name, dir);
-		strcat(file_name, trackname);
-		strcat(file_name, "_features_c20r8h0.2f0.6.mat");
-		
-		// get the features from Matlab from mat-file
-		int frames_in_file;
-		readmatarray_size(file_name, 2, &frames_in_file, &feature_length);
-		readmatarray(file_name, 2, frames_in_file, feature_length, features);
-		// copy final frame to ensure that we get as many as we expected
-		int missing_frames = frames_read - frames_in_file;
-		while (missing_frames > 0)
-		{
-			for (i = 0; i < feature_length; i++)
-				features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i];
-			--missing_frames;
-		}
-		
-		free(file_name);
-		******************************************/
-	
-		cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
-	}
-	
-	if (feature_type == FEATURE_TYPE_CHROMA)
-	{
-/*
-		fprintf(stderr, "Converting to chroma features...\n");
-*/		
-		/* convert constant-Q to normalised chroma features */
-		chroma = (double**) malloc(frames_read*sizeof(double*));
-		for (i = 0; i < frames_read; i++)
-			chroma[i] = (double*) malloc(bins*sizeof(double));
-		cq2chroma(features, frames_read, ncoeff, bins, chroma);
-		feature_length = bins;
-		
-		cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
-	
-		for (i = 0; i < frames_read; i++)
-			free(chroma[i]);
-		free(chroma);
-	}
+    int feature_length;
+    double** chroma;
+    int i;
+        
+    if (feature_type == FEATURE_TYPE_CONSTQ) {
+
+        mpeg7_constq(features, frames_read, ncoeff);
+
+        /* do PCA on the features (but not the envelope) */
+        int ncomponents = 20;
+        pca_project(features, frames_read, ncoeff, ncomponents);
+                
+        /* copy the envelope so that it immediatly follows the chosen components */
+        for (i = 0; i < frames_read; i++) {
+            features[i][ncomponents] = features[i][ncoeff];
+        }
+                
+        feature_length = ncomponents + 1;
+        
+        cluster_segment(q, features, frames_read, feature_length,
+                        nHMM_states, histogram_length, nclusters,
+                        neighbour_limit);
+    }
+        
+    if (feature_type == FEATURE_TYPE_CHROMA) {
+
+        /* convert constant-Q to normalised chroma features */
+        chroma = (double**) malloc(frames_read*sizeof(double*));
+        for (i = 0; i < frames_read; i++) {
+            chroma[i] = (double*) malloc(bins*sizeof(double));
+        }
+        
+        cq2chroma(features, frames_read, ncoeff, bins, chroma);
+        
+        feature_length = bins;
+                
+        cluster_segment(q, chroma, frames_read, feature_length,
+                        nHMM_states, histogram_length, nclusters,
+                        neighbour_limit);
+        
+        for (i = 0; i < frames_read; i++)
+            free(chroma[i]);
+        free(chroma);
+    }
 }
author	Chris Cannam <cannam@all-day-breakfast.com>
date	Wed, 05 Jun 2019 12:55:15 +0100
parents	d48276a3ae24
children