cannam@18: /*
cannam@18:  *  cluster_segmenter.c
cannam@18:  *  soundbite
cannam@18:  *
cannam@18:  *  Created by Mark Levy on 06/04/2006.
Chris@84:  *  Copyright 2006 Centre for Digital Music, Queen Mary, University of London.
Chris@84: 
Chris@84:     This program is free software; you can redistribute it and/or
Chris@84:     modify it under the terms of the GNU General Public License as
Chris@84:     published by the Free Software Foundation; either version 2 of the
Chris@84:     License, or (at your option) any later version.  See the file
Chris@84:     COPYING included with this distribution for more information.
cannam@18:  *
cannam@18:  */
cannam@18: 
cannam@18: #include "cluster_segmenter.h"
cannam@18: 
cannam@18: extern int readmatarray_size(const char *filepath, int n_array, int* t, int* d);
cannam@18: extern int readmatarray(const char *filepath, int n_array, int t, int d, double** arr);
cannam@18: 
cannam@18: /* converts constant-Q features to normalised chroma */
cannam@18: void cq2chroma(double** cq, int nframes, int ncoeff, int bins, double** chroma)
cannam@18: {
cannam@18: 	int noct = ncoeff / bins;	/* number of complete octaves in constant-Q */
cannam@18: 	int t, b, oct, ix;
cannam@18: 	//double maxchroma;	/* max chroma value at each time, for normalisation */
cannam@18: 	//double sum;		/* for normalisation */
cannam@18: 	
cannam@18: 	for (t = 0; t < nframes; t++)
cannam@18: 	{
cannam@18: 		for (b = 0; b < bins; b++)
cannam@18: 			chroma[t][b] = 0;
cannam@18: 		for (oct = 0; oct < noct; oct++)
cannam@18: 		{
cannam@18: 			ix = oct * bins;
cannam@18: 			for (b = 0; b < bins; b++)
cannam@18: 				chroma[t][b] += fabs(cq[t][ix+b]);
cannam@18: 		}
cannam@18: 		/* normalise to unit sum
cannam@18: 		sum = 0;
cannam@18: 		for (b = 0; b < bins; b++)
cannam@18: 			sum += chroma[t][b];
cannam@18: 		for (b = 0; b < bins; b++)
cannam@18: 			chroma[t][b] /= sum;
cannam@20: 		*/
cannam@18: 		/* normalise to unit max - NO this made results much worse!
cannam@18: 		maxchroma = 0;
cannam@18: 		for (b = 0; b < bins; b++)
cannam@18: 			if (chroma[t][b] > maxchroma)
cannam@18: 				maxchroma = chroma[t][b];
cannam@18: 		if (maxchroma > 0)
cannam@18: 			for (b = 0; b < bins; b++)
cannam@18: 				chroma[t][b] /= maxchroma;	
cannam@18: 		*/
cannam@18: 	}
cannam@18: }
cannam@18: 
cannam@18: /* applies MPEG-7 normalisation to constant-Q features, storing normalised envelope (norm) in last feature dimension */
cannam@18: void mpeg7_constq(double** features, int nframes, int ncoeff)
cannam@18: {
cannam@18: 	int i, j;
cannam@18: 	double ss;
cannam@18: 	double env;
cannam@18: 	double maxenv = 0;
cannam@18: 	
cannam@18: 	/* convert const-Q features to dB scale */
cannam@18: 	for (i = 0; i < nframes; i++)
cannam@18: 		for (j = 0; j < ncoeff; j++)
cannam@18: 			features[i][j] = 10.0 * log10(features[i][j]+DBL_EPSILON);
cannam@18: 	
cannam@18: 	/* normalise each feature vector and add the norm as an extra feature dimension */	
cannam@18: 	for (i = 0; i < nframes; i++)
cannam@18: 	{
cannam@18: 		ss = 0;
cannam@18: 		for (j = 0; j < ncoeff; j++)
cannam@18: 			ss += features[i][j] * features[i][j];
cannam@18: 		env = sqrt(ss);
cannam@18: 		for (j = 0; j < ncoeff; j++)
cannam@18: 			features[i][j] /= env;
cannam@18: 		features[i][ncoeff] = env;
cannam@18: 		if (env > maxenv)
cannam@18: 			maxenv = env;
cannam@18: 	} 
cannam@18: 	/* normalise the envelopes */
cannam@18: 	for (i = 0; i < nframes; i++)
cannam@18: 		features[i][ncoeff] /= maxenv;	
cannam@18: }
cannam@18: 
cannam@18: /* return histograms h[nx*m] of data x[nx] into m bins using a sliding window of length h_len (MUST BE ODD) */
cannam@18: /* NB h is a vector in row major order, as required by cluster_melt() */
cannam@18: /* for historical reasons we normalise the histograms by their norm (not to sum to one) */
cannam@18: void create_histograms(int* x, int nx, int m, int hlen, double* h)
cannam@18: {
cannam@18: 	int i, j, t;
cannam@18: 	double norm;
cannam@41: 
cannam@41: 	for (i = 0; i < nx*m; i++) 
cannam@41: 	        h[i] = 0;
cannam@41: 
cannam@18: 	for (i = hlen/2; i < nx-hlen/2; i++)
cannam@18: 	{
cannam@18: 		for (j = 0; j < m; j++)
cannam@18: 			h[i*m+j] = 0;
cannam@18: 		for (t = i-hlen/2; t <= i+hlen/2; t++)
cannam@18: 			++h[i*m+x[t]];
cannam@18: 		norm = 0;
cannam@18: 		for (j = 0; j < m; j++)
cannam@18: 			norm += h[i*m+j] * h[i*m+j];
cannam@18: 		for (j = 0; j < m; j++)
cannam@18: 			h[i*m+j] /= norm;
cannam@18: 	}
cannam@18: 	
cannam@18: 	/* duplicate histograms at beginning and end to create one histogram for each data value supplied */
cannam@18: 	for (i = 0; i < hlen/2; i++)
cannam@18: 		for (j = 0; j < m; j++)
cannam@18: 			h[i*m+j] = h[hlen/2*m+j];
cannam@18: 	for (i = nx-hlen/2; i < nx; i++)
cannam@18: 		for (j = 0; j < m; j++)
cannam@18: 			h[i*m+j] = h[(nx-hlen/2-1)*m+j];
cannam@18: }
cannam@18: 
cannam@18: /* segment using HMM and then histogram clustering */
cannam@18: void cluster_segment(int* q, double** features, int frames_read, int feature_length, int nHMM_states, 
cannam@18: 					 int histogram_length, int nclusters, int neighbour_limit)
cannam@18: {
cannam@18: 	int i, j;
cannam@18: 	
cannam@18: 	/*****************************/
cannam@18: 	if (0) {
cannam@18: 	/* try just using the predominant bin number as a 'decoded state' */
cannam@18: 	nHMM_states = feature_length + 1;	/* allow a 'zero' state */
cannam@18: 	double chroma_thresh = 0.05;
cannam@18: 	double maxval;
cannam@18: 	int maxbin;
cannam@18: 	for (i = 0; i < frames_read; i++)
cannam@18: 	{
cannam@18: 		maxval = 0;
cannam@18: 		for (j = 0; j < feature_length; j++)
cannam@18: 		{
cannam@18: 			if (features[i][j] > maxval) 
cannam@18: 			{
cannam@18: 				maxval = features[i][j];
cannam@18: 				maxbin = j;
cannam@18: 			}				
cannam@18: 		}
cannam@18: 		if (maxval > chroma_thresh)
cannam@18: 			q[i] = maxbin;
cannam@18: 		else
cannam@18: 			q[i] = feature_length;
cannam@18: 	}
cannam@18: 	
cannam@18: 	}
cannam@18: 	if (1) {
cannam@18: 	/*****************************/
cannam@18: 		
cannam@18: 	
cannam@18: 	/* scale all the features to 'balance covariances' during HMM training */
cannam@18: 	double scale = 10;
cannam@18: 	for (i = 0; i < frames_read; i++)
cannam@18: 		for (j = 0; j < feature_length; j++)
cannam@18: 			features[i][j] *= scale;
cannam@18: 	
cannam@18: 	/* train an HMM on the features */
cannam@18: 	
cannam@18: 	/* create a model */
cannam@18: 	model_t* model = hmm_init(features, frames_read, feature_length, nHMM_states);
cannam@18: 	
cannam@18: 	/* train the model */
cannam@18: 	hmm_train(features, frames_read, model);
cannam@58: /*	
cannam@18: 	printf("\n\nafter training:\n");
cannam@18: 	hmm_print(model);
cannam@58: */	
cannam@18: 	/* decode the hidden state sequence */
cannam@18: 	viterbi_decode(features, frames_read, model, q);  
cannam@18: 	hmm_close(model);
cannam@18: 	
cannam@18: 	/*****************************/
cannam@18: 	}
cannam@18: 	/*****************************/
cannam@18: 	
cannam@18:     
cannam@58: /*
cannam@18: 	fprintf(stderr, "HMM state sequence:\n");
cannam@18: 	for (i = 0; i < frames_read; i++)
cannam@18: 		fprintf(stderr, "%d ", q[i]);
cannam@18: 	fprintf(stderr, "\n\n");
cannam@58: */
cannam@18: 	
cannam@18: 	/* create histograms of states */
cannam@18: 	double* h = (double*) malloc(frames_read*nHMM_states*sizeof(double));	/* vector in row major order */
cannam@18: 	create_histograms(q, frames_read, nHMM_states, histogram_length, h);
cannam@18: 	
cannam@18: 	/* cluster the histograms */
cannam@18: 	int nbsched = 20;	/* length of inverse temperature schedule */
cannam@18: 	double* bsched = (double*) malloc(nbsched*sizeof(double));	/* inverse temperature schedule */
cannam@18: 	double b0 = 100;
cannam@18: 	double alpha = 0.7;
cannam@18: 	bsched[0] = b0;
cannam@18: 	for (i = 1; i < nbsched; i++)
cannam@18: 		bsched[i] = alpha * bsched[i-1];
cannam@18: 	cluster_melt(h, nHMM_states, frames_read, bsched, nbsched, nclusters, neighbour_limit, q);
cannam@18: 	
cannam@18: 	/* now q holds a sequence of cluster assignments */
cannam@18: 	
cannam@18: 	free(h);  
cannam@18: 	free(bsched);
cannam@18: }
cannam@18: 
cannam@18: /* segment constant-Q or chroma features */
cannam@18: void constq_segment(int* q, double** features, int frames_read, int bins, int ncoeff, int feature_type, 
cannam@18: 			 int nHMM_states, int histogram_length, int nclusters, int neighbour_limit)
cannam@18: {
cannam@18: 	int feature_length;
cannam@18: 	double** chroma;
cannam@18: 	int i;
cannam@18: 	
cannam@18: 	if (feature_type == FEATURE_TYPE_CONSTQ)
cannam@18: 	{
cannam@58: /*		fprintf(stderr, "Converting to dB and normalising...\n");
cannam@58:  */		
cannam@18: 		mpeg7_constq(features, frames_read, ncoeff);
cannam@58: /*		
cannam@18: 		fprintf(stderr, "Running PCA...\n");
cannam@58: */		
cannam@18: 		/* do PCA on the features (but not the envelope) */
cannam@18: 		int ncomponents = 20;
cannam@18: 		pca_project(features, frames_read, ncoeff, ncomponents);
cannam@18: 		
cannam@18: 		/* copy the envelope so that it immediatly follows the chosen components */
cannam@18: 		for (i = 0; i < frames_read; i++)
cannam@18: 			features[i][ncomponents] = features[i][ncoeff];	
cannam@18: 		
cannam@18: 		feature_length = ncomponents + 1;
cannam@18: 		
cannam@18: 		/**************************************
cannam@18: 		//TEST
cannam@18: 		// feature file name
cannam@18: 		char* dir = "/Users/mark/documents/semma/audio/";
cannam@18: 		char* file_name = (char*) malloc((strlen(dir) + strlen(trackname) + strlen("_features_c20r8h0.2f0.6.mat") + 1)*sizeof(char));
cannam@18: 		strcpy(file_name, dir);
cannam@18: 		strcat(file_name, trackname);
cannam@18: 		strcat(file_name, "_features_c20r8h0.2f0.6.mat");
cannam@18: 		
cannam@18: 		// get the features from Matlab from mat-file
cannam@18: 		int frames_in_file;
cannam@18: 		readmatarray_size(file_name, 2, &frames_in_file, &feature_length);
cannam@18: 		readmatarray(file_name, 2, frames_in_file, feature_length, features);
cannam@18: 		// copy final frame to ensure that we get as many as we expected
cannam@18: 		int missing_frames = frames_read - frames_in_file;
cannam@18: 		while (missing_frames > 0)
cannam@18: 		{
cannam@18: 			for (i = 0; i < feature_length; i++)
cannam@18: 				features[frames_read-missing_frames][i] = features[frames_read-missing_frames-1][i];
cannam@18: 			--missing_frames;
cannam@18: 		}
cannam@18: 		
cannam@18: 		free(file_name);
cannam@18: 		******************************************/
cannam@18: 	
cannam@18: 		cluster_segment(q, features, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
cannam@18: 	}
cannam@18: 	
cannam@18: 	if (feature_type == FEATURE_TYPE_CHROMA)
cannam@18: 	{
cannam@58: /*
cannam@18: 		fprintf(stderr, "Converting to chroma features...\n");
cannam@58: */		
cannam@18: 		/* convert constant-Q to normalised chroma features */
cannam@18: 		chroma = (double**) malloc(frames_read*sizeof(double*));
cannam@18: 		for (i = 0; i < frames_read; i++)
cannam@18: 			chroma[i] = (double*) malloc(bins*sizeof(double));
cannam@18: 		cq2chroma(features, frames_read, ncoeff, bins, chroma);
cannam@18: 		feature_length = bins;
cannam@18: 		
cannam@18: 		cluster_segment(q, chroma, frames_read, feature_length, nHMM_states, histogram_length, nclusters, neighbour_limit);
cannam@18: 	
cannam@18: 		for (i = 0; i < frames_read; i++)
cannam@18: 			free(chroma[i]);
cannam@18: 		free(chroma);
cannam@18: 	}
cannam@18: }
cannam@18: 
cannam@18: 
cannam@18: