# HG changeset patch # User Chris Cannam # Date 1199875688 0 # Node ID f599563a4663e9e7ecb4e6d5a9b998213c4a112f # Parent dc30e3864ceb160d8e41fff7f1da53660a517a60 * reverting names to lower-case for C (as opposed to mixed-case for C++) -- this is how Mark L had it and he was probably right diff -r dc30e3864ceb -r f599563a4663 hmm/HMM.c --- a/hmm/HMM.c Wed Jan 09 10:46:25 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,800 +0,0 @@ -/* - * hmm.c - * soundbite - * - * Created by Mark Levy on 12/02/2006. - * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. - * - */ - -#include -#include -#include -#include -#include /* to seed random number generator */ -#include /* LAPACK for matrix inversion */ -#ifdef _MAC_OS_X -#include -#else -#include /* BLAS for matrix multiplication */ -#endif - -#include "HMM.h" - -model_t* hmm_init(double** x, int T, int L, int N) -{ - int i, j, d, e, t; - double s, ss; - - model_t* model; - model = (model_t*) malloc(sizeof(model_t)); - model->N = N; - model->L = L; - model->p0 = (double*) malloc(N*sizeof(double)); - model->a = (double**) malloc(N*sizeof(double*)); - model->mu = (double**) malloc(N*sizeof(double*)); - for (i = 0; i < N; i++) - { - model->a[i] = (double*) malloc(N*sizeof(double)); - model->mu[i] = (double*) malloc(L*sizeof(double)); - } - model->cov = (double**) malloc(L*sizeof(double*)); - for (i = 0; i < L; i++) - model->cov[i] = (double*) malloc(L*sizeof(double)); - - srand(time(0)); - double* global_mean = (double*) malloc(L*sizeof(double)); - - /* find global mean */ - for (d = 0; d < L; d++) - { - global_mean[d] = 0; - for (t = 0; t < T; t++) - global_mean[d] += x[t][d]; - global_mean[d] /= T; - } - - /* calculate global diagonal covariance */ - for (d = 0; d < L; d++) - { - for (e = 0; e < L; e++) - model->cov[d][e] = 0; - for (t = 0; t < T; t++) - model->cov[d][d] += (x[t][d] - global_mean[d]) * (x[t][d] - global_mean[d]); - model->cov[d][d] /= T-1; - } - - /* set all means close to global mean */ - for (i = 0; i < N; i++) - { - for (d = 0; d < L; d++) - { - /* add some random noise related to covariance */ - /* ideally the random number would be Gaussian(0,1), as a hack we make it uniform on [-0.25,0.25] */ - model->mu[i][d] = global_mean[d] + (0.5 * rand() / (double) RAND_MAX - 0.25) * sqrt(model->cov[d][d]); - } - } - - /* random intial and transition probs */ - s = 0; - for (i = 0; i < N; i++) - { - model->p0[i] = 1 + rand() / (double) RAND_MAX; - s += model->p0[i]; - ss = 0; - for (j = 0; j < N; j++) - { - model->a[i][j] = 1 + rand() / (double) RAND_MAX; - ss += model->a[i][j]; - } - for (j = 0; j < N; j++) - { - model->a[i][j] /= ss; - } - } - for (i = 0; i < N; i++) - model->p0[i] /= s; - - free(global_mean); - - return model; -} - -void hmm_close(model_t* model) -{ - int i; - - for (i = 0; i < model->N; i++) - { - free(model->a[i]); - free(model->mu[i]); - } - free(model->a); - free(model->mu); - for (i = 0; i < model->L; i++) - free(model->cov[i]); - free(model->cov); - free(model); -} - -void hmm_train(double** x, int T, model_t* model) -{ - int i, t; - double loglik; /* overall log-likelihood at each iteration */ - - int N = model->N; - int L = model->L; - double* p0 = model->p0; - double** a = model->a; - double** mu = model->mu; - double** cov = model->cov; - - /* allocate memory */ - double** gamma = (double**) malloc(T*sizeof(double*)); - double*** xi = (double***) malloc(T*sizeof(double**)); - for (t = 0; t < T; t++) - { - gamma[t] = (double*) malloc(N*sizeof(double)); - xi[t] = (double**) malloc(N*sizeof(double*)); - for (i = 0; i < N; i++) - xi[t][i] = (double*) malloc(N*sizeof(double)); - } - - /* temporary memory */ - double* gauss_y = (double*) malloc(L*sizeof(double)); - double* gauss_z = (double*) malloc(L*sizeof(double)); - - /* obs probs P(j|{x}) */ - double** b = (double**) malloc(T*sizeof(double*)); - for (t = 0; t < T; t++) - b[t] = (double*) malloc(N*sizeof(double)); - - /* inverse covariance and its determinant */ - double** icov = (double**) malloc(L*sizeof(double*)); - for (i = 0; i < L; i++) - icov[i] = (double*) malloc(L*sizeof(double)); - double detcov; - - double thresh = 0.0001; - int niter = 50; - int iter = 0; - double loglik1, loglik2; - while(iter < niter && !(iter > 1 && (loglik - loglik1) < thresh * (loglik1 - loglik2))) - { - ++iter; - - fprintf(stderr, "calculating obsprobs...\n"); - fflush(stderr); - - /* precalculate obs probs */ - invert(cov, L, icov, &detcov); - - for (t = 0; t < T; t++) - { - //int allzero = 1; - for (i = 0; i < N; i++) - { - b[t][i] = exp(loggauss(x[t], L, mu[i], icov, detcov, gauss_y, gauss_z)); - - //if (b[t][i] != 0) - // allzero = 0; - } - /* - if (allzero) - { - printf("all the b[t][i] were zero for t = %d, correcting...\n", t); - for (i = 0; i < N; i++) - { - b[t][i] = 0.00001; - } - } - */ - } - - fprintf(stderr, "forwards-backwards...\n"); - fflush(stderr); - - forward_backwards(xi, gamma, &loglik, &loglik1, &loglik2, iter, N, T, p0, a, b); - - fprintf(stderr, "iteration %d: loglik = %f\n", iter, loglik); - fprintf(stderr, "re-estimation...\n"); - fflush(stderr); - - baum_welch(p0, a, mu, cov, N, T, L, x, xi, gamma); - - /* - printf("a:\n"); - for (i = 0; i < model->N; i++) - { - for (j = 0; j < model->N; j++) - printf("%f ", model->a[i][j]); - printf("\n"); - } - printf("\n\n"); - */ - //hmm_print(model); - } - - /* deallocate memory */ - for (t = 0; t < T; t++) - { - free(gamma[t]); - free(b[t]); - for (i = 0; i < N; i++) - free(xi[t][i]); - free(xi[t]); - } - free(gamma); - free(xi); - free(b); - - for (i = 0; i < L; i++) - free(icov[i]); - free(icov); - - free(gauss_y); - free(gauss_z); -} - -void mlss_reestimate(double* p0, double** a, double** mu, double** cov, int N, int T, int L, int* q, double** x) -{ - /* fit a single Gaussian to observations in each state */ - - /* calculate the mean observation in each state */ - - /* calculate the overall covariance */ - - /* count transitions */ - - /* estimate initial probs from transitions (???) */ -} - -void baum_welch(double* p0, double** a, double** mu, double** cov, int N, int T, int L, double** x, double*** xi, double** gamma) -{ - int i, j, t; - - double* sum_gamma = (double*) malloc(N*sizeof(double)); - - /* temporary memory */ - double* u = (double*) malloc(L*L*sizeof(double)); - double* yy = (double*) malloc(T*L*sizeof(double)); - double* yy2 = (double*) malloc(T*L*sizeof(double)); - - /* re-estimate transition probs */ - for (i = 0; i < N; i++) - { - sum_gamma[i] = 0; - for (t = 0; t < T-1; t++) - sum_gamma[i] += gamma[t][i]; - } - - for (i = 0; i < N; i++) - { - if (sum_gamma[i] == 0) - { - fprintf(stderr, "sum_gamma[%d] was zero...\n", i); - } - //double s = 0; - for (j = 0; j < N; j++) - { - a[i][j] = 0; - for (t = 0; t < T-1; t++) - a[i][j] += xi[t][i][j]; - //s += a[i][j]; - a[i][j] /= sum_gamma[i]; - } - /* - for (j = 0; j < N; j++) - { - a[i][j] /= s; - } - */ - } - - /* NB: now we need to sum gamma over all t */ - for (i = 0; i < N; i++) - sum_gamma[i] += gamma[T-1][i]; - - /* re-estimate initial probs */ - for (i = 0; i < N; i++) - p0[i] = gamma[0][i]; - - /* re-estimate covariance */ - int d, e; - double sum_sum_gamma = 0; - for (i = 0; i < N; i++) - sum_sum_gamma += sum_gamma[i]; - - /* - for (d = 0; d < L; d++) - { - for (e = d; e < L; e++) - { - cov[d][e] = 0; - for (t = 0; t < T; t++) - for (j = 0; j < N; j++) - cov[d][e] += gamma[t][j] * (x[t][d] - mu[j][d]) * (x[t][e] - mu[j][e]); - - cov[d][e] /= sum_sum_gamma; - - if (isnan(cov[d][e])) - { - printf("cov[%d][%d] was nan\n", d, e); - for (j = 0; j < N; j++) - for (i = 0; i < L; i++) - if (isnan(mu[j][i])) - printf("mu[%d][%d] was nan\n", j, i); - for (t = 0; t < T; t++) - for (j = 0; j < N; j++) - if (isnan(gamma[t][j])) - printf("gamma[%d][%d] was nan\n", t, j); - exit(-1); - } - } - } - for (d = 0; d < L; d++) - for (e = 0; e < d; e++) - cov[d][e] = cov[e][d]; - */ - - /* using BLAS */ - for (d = 0; d < L; d++) - for (e = 0; e < L; e++) - cov[d][e] = 0; - - for (j = 0; j < N; j++) - { - for (d = 0; d < L; d++) - for (t = 0; t < T; t++) - { - yy[d*T+t] = x[t][d] - mu[j][d]; - yy2[d*T+t] = gamma[t][j] * (x[t][d] - mu[j][d]); - } - - cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, L, L, T, 1.0, yy, T, yy2, T, 0, u, L); - - for (e = 0; e < L; e++) - for (d = 0; d < L; d++) - cov[d][e] += u[e*L+d]; - } - - for (d = 0; d < L; d++) - for (e = 0; e < L; e++) - cov[d][e] /= T; /* sum_sum_gamma; */ - - //printf("sum_sum_gamma = %f\n", sum_sum_gamma); /* fine, = T IS THIS ALWAYS TRUE with pooled cov?? */ - - /* re-estimate means */ - for (j = 0; j < N; j++) - { - for (d = 0; d < L; d++) - { - mu[j][d] = 0; - for (t = 0; t < T; t++) - mu[j][d] += gamma[t][j] * x[t][d]; - mu[j][d] /= sum_gamma[j]; - } - } - - /* deallocate memory */ - free(sum_gamma); - free(yy); - free(yy2); - free(u); -} - -void forward_backwards(double*** xi, double** gamma, double* loglik, double* loglik1, double* loglik2, int iter, int N, int T, double* p0, double** a, double** b) -{ - /* forwards-backwards with scaling */ - int i, j, t; - - double** alpha = (double**) malloc(T*sizeof(double*)); - double** beta = (double**) malloc(T*sizeof(double*)); - for (t = 0; t < T; t++) - { - alpha[t] = (double*) malloc(N*sizeof(double)); - beta[t] = (double*) malloc(N*sizeof(double)); - } - - /* scaling coefficients */ - double* c = (double*) malloc(T*sizeof(double)); - - /* calculate forward probs and scale coefficients */ - c[0] = 0; - for (i = 0; i < N; i++) - { - alpha[0][i] = p0[i] * b[0][i]; - c[0] += alpha[0][i]; - - //printf("p0[%d] = %f, b[0][%d] = %f\n", i, p0[i], i, b[0][i]); - } - c[0] = 1 / c[0]; - for (i = 0; i < N; i++) - { - alpha[0][i] *= c[0]; - - //printf("alpha[0][%d] = %f\n", i, alpha[0][i]); /* OK agrees with Matlab */ - } - - *loglik1 = *loglik; - *loglik = -log(c[0]); - if (iter == 2) - *loglik2 = *loglik; - - for (t = 1; t < T; t++) - { - c[t] = 0; - for (j = 0; j < N; j++) - { - alpha[t][j] = 0; - for (i = 0; i < N; i++) - alpha[t][j] += alpha[t-1][i] * a[i][j]; - alpha[t][j] *= b[t][j]; - - c[t] += alpha[t][j]; - } - - /* - if (c[t] == 0) - { - printf("c[%d] = 0, going to blow up so exiting\n", t); - for (i = 0; i < N; i++) - if (b[t][i] == 0) - fprintf(stderr, "b[%d][%d] was zero\n", t, i); - fprintf(stderr, "x[t] was \n"); - for (i = 0; i < L; i++) - fprintf(stderr, "%f ", x[t][i]); - fprintf(stderr, "\n\n"); - exit(-1); - } - */ - - c[t] = 1 / c[t]; - for (j = 0; j < N; j++) - alpha[t][j] *= c[t]; - - //printf("c[%d] = %e\n", t, c[t]); - - *loglik -= log(c[t]); - } - - /* calculate backwards probs using same coefficients */ - for (i = 0; i < N; i++) - beta[T-1][i] = 1; - t = T - 1; - while (1) - { - for (i = 0; i < N; i++) - beta[t][i] *= c[t]; - - if (t == 0) - break; - - for (i = 0; i < N; i++) - { - beta[t-1][i] = 0; - for (j = 0; j < N; j++) - beta[t-1][i] += a[i][j] * b[t][j] * beta[t][j]; - } - - t--; - } - - /* - printf("alpha:\n"); - for (t = 0; t < T; t++) - { - for (i = 0; i < N; i++) - printf("%4.4e\t\t", alpha[t][i]); - printf("\n"); - } - printf("\n\n");printf("beta:\n"); - for (t = 0; t < T; t++) - { - for (i = 0; i < N; i++) - printf("%4.4e\t\t", beta[t][i]); - printf("\n"); - } - printf("\n\n"); - */ - - /* calculate posterior probs */ - double tot; - for (t = 0; t < T; t++) - { - tot = 0; - for (i = 0; i < N; i++) - { - gamma[t][i] = alpha[t][i] * beta[t][i]; - tot += gamma[t][i]; - } - for (i = 0; i < N; i++) - { - gamma[t][i] /= tot; - - //printf("gamma[%d][%d] = %f\n", t, i, gamma[t][i]); - } - } - - for (t = 0; t < T-1; t++) - { - tot = 0; - for (i = 0; i < N; i++) - { - for (j = 0; j < N; j++) - { - xi[t][i][j] = alpha[t][i] * a[i][j] * b[t+1][j] * beta[t+1][j]; - tot += xi[t][i][j]; - } - } - for (i = 0; i < N; i++) - for (j = 0; j < N; j++) - xi[t][i][j] /= tot; - } - - /* - // CHECK - fine - // gamma[t][i] = \sum_j{xi[t][i][j]} - tot = 0; - for (j = 0; j < N; j++) - tot += xi[3][1][j]; - printf("gamma[3][1] = %f, sum_j(xi[3][1][j]) = %f\n", gamma[3][1], tot); - */ - - for (t = 0; t < T; t++) - { - free(alpha[t]); - free(beta[t]); - } - free(alpha); - free(beta); - free(c); -} - -void viterbi_decode(double** x, int T, model_t* model, int* q) -{ - int i, j, t; - double max; - - int N = model->N; - int L = model->L; - double* p0 = model->p0; - double** a = model->a; - double** mu = model->mu; - double** cov = model->cov; - - /* inverse covariance and its determinant */ - double** icov = (double**) malloc(L*sizeof(double*)); - for (i = 0; i < L; i++) - icov[i] = (double*) malloc(L*sizeof(double)); - double detcov; - - double** logb = (double**) malloc(T*sizeof(double*)); - double** phi = (double**) malloc(T*sizeof(double*)); - int** psi = (int**) malloc(T*sizeof(int*)); - for (t = 0; t < T; t++) - { - logb[t] = (double*) malloc(N*sizeof(double)); - phi[t] = (double*) malloc(N*sizeof(double)); - psi[t] = (int*) malloc(N*sizeof(int)); - } - - /* temporary memory */ - double* gauss_y = (double*) malloc(L*sizeof(double)); - double* gauss_z = (double*) malloc(L*sizeof(double)); - - /* calculate observation logprobs */ - invert(cov, L, icov, &detcov); - for (t = 0; t < T; t++) - for (i = 0; i < N; i++) - logb[t][i] = loggauss(x[t], L, mu[i], icov, detcov, gauss_y, gauss_z); - - /* initialise */ - for (i = 0; i < N; i++) - { - phi[0][i] = log(p0[i]) + logb[0][i]; - psi[0][i] = 0; - } - - for (t = 1; t < T; t++) - { - for (j = 0; j < N; j++) - { - max = -1000000; // TODO: what should this be?? = smallest possible sumlogprob - psi[t][j] = 0; - for (i = 0; i < N; i++) - { - if (phi[t-1][i] + log(a[i][j]) > max) - { - max = phi[t-1][i] + log(a[i][j]); - phi[t][j] = max + logb[t][j]; - psi[t][j] = i; - } - } - } - } - - /* find maximising state at time T-1 */ - max = phi[T-1][0]; - q[T-1] = 0; - for (i = 1; i < N; i++) - { - if (phi[T-1][i] > max) - { - max = phi[T-1][i]; - q[T-1] = i; - } - } - - - /* track back */ - t = T - 2; - while (t >= 0) - { - q[t] = psi[t+1][q[t+1]]; - t--; - } - - /* de-allocate memory */ - for (i = 0; i < L; i++) - free(icov[i]); - free(icov); - for (t = 0; t < T; t++) - { - free(logb[t]); - free(phi[t]); - free(psi[t]); - } - free(logb); - free(phi); - free(psi); - - free(gauss_y); - free(gauss_z); -} - -/* invert matrix and calculate determinant using LAPACK */ -void invert(double** cov, int L, double** icov, double* detcov) -{ - /* copy square matrix into a vector in column-major order */ - double* a = (double*) malloc(L*L*sizeof(double)); - int i, j; - for(j=0; j < L; j++) - for (i=0; i < L; i++) - a[j*L+i] = cov[i][j]; - - long M = (long) L; - long* ipiv = (long*) malloc(L*L*sizeof(int)); - long ret; - - /* LU decomposition */ - ret = dgetrf_(&M, &M, a, &M, ipiv, &ret); /* ret should be zero, negative if cov is singular */ - if (ret < 0) - { - fprintf(stderr, "Covariance matrix was singular, couldn't invert\n"); - exit(-1); - } - - /* find determinant */ - double det = 1; - for(i = 0; i < L; i++) - det *= a[i*L+i]; - // TODO: get this to work!!! If detcov < 0 then cov is bad anyway... - /* - int sign = 1; - for (i = 0; i < L; i++) - if (ipiv[i] != i) - sign = -sign; - det *= sign; - */ - if (det < 0) - det = -det; - *detcov = det; - - /* allocate required working storage */ - long lwork = -1; - double lwbest; - dgetri_(&M, a, &M, ipiv, &lwbest, &lwork, &ret); - lwork = (long) lwbest; - double* work = (double*) malloc(lwork*sizeof(double)); - - /* find inverse */ - dgetri_(&M, a, &M, ipiv, work, &lwork, &ret); - - for(j=0; j < L; j++) - for (i=0; i < L; i++) - icov[i][j] = a[j*L+i]; - - free(work); - free(a); -} - -/* probability of multivariate Gaussian given mean, inverse and determinant of covariance */ -double gauss(double* x, int L, double* mu, double** icov, double detcov, double* y, double* z) -{ - int i, j; - double s = 0; - for (i = 0; i < L; i++) - y[i] = x[i] - mu[i]; - for (i = 0; i < L; i++) - { - //z[i] = 0; - //for (j = 0; j < L; j++) - // z[i] += icov[i][j] * y[j]; - z[i] = cblas_ddot(L, &icov[i][0], 1, y, 1); - } - s = cblas_ddot(L, z, 1, y, 1); - //for (i = 0; i < L; i++) - // s += z[i] * y[i]; - - return exp(-s/2.0) / (pow(2*PI, L/2.0) * sqrt(detcov)); -} - -/* log probability of multivariate Gaussian given mean, inverse and determinant of covariance */ -double loggauss(double* x, int L, double* mu, double** icov, double detcov, double* y, double* z) -{ - int i, j; - double s = 0; - double ret; - for (i = 0; i < L; i++) - y[i] = x[i] - mu[i]; - for (i = 0; i < L; i++) - { - //z[i] = 0; - //for (j = 0; j < L; j++) - // z[i] += icov[i][j] * y[j]; - z[i] = cblas_ddot(L, &icov[i][0], 1, y, 1); - } - s = cblas_ddot(L, z, 1, y, 1); - //for (i = 0; i < L; i++) - // s += z[i] * y[i]; - - ret = -0.5 * (s + L * log(2*PI) + log(detcov)); - - /* - // TEST - if (isinf(ret) > 0) - printf("loggauss returning infinity\n"); - if (isinf(ret) < 0) - printf("loggauss returning -infinity\n"); - if (isnan(ret)) - printf("loggauss returning nan\n"); - */ - - return ret; -} - -void hmm_print(model_t* model) -{ - int i, j; - printf("p0:\n"); - for (i = 0; i < model->N; i++) - printf("%f ", model->p0[i]); - printf("\n\n"); - printf("a:\n"); - for (i = 0; i < model->N; i++) - { - for (j = 0; j < model->N; j++) - printf("%f ", model->a[i][j]); - printf("\n"); - } - printf("\n\n"); - printf("mu:\n"); - for (i = 0; i < model->N; i++) - { - for (j = 0; j < model->L; j++) - printf("%f ", model->mu[i][j]); - printf("\n"); - } - printf("\n\n"); - printf("cov:\n"); - for (i = 0; i < model->L; i++) - { - for (j = 0; j < model->L; j++) - printf("%f ", model->cov[i][j]); - printf("\n"); - } - printf("\n\n"); -} - - diff -r dc30e3864ceb -r f599563a4663 hmm/HMM.h --- a/hmm/HMM.h Wed Jan 09 10:46:25 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -#ifndef _HMM_H -#define _HMM_H - -/* - * hmm.h - * soundbite - * - * Created by Mark Levy on 12/02/2006. - * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. - * - */ - -#ifndef PI -#define PI 3.14159265358979323846264338327950288 -#endif - -typedef struct _model_t { - int N; /* number of states */ - double* p0; /* initial probs */ - double** a; /* transition probs */ - int L; /* dimensionality of data */ - double** mu; /* state means */ - double** cov; /* covariance, tied between all states */ -} model_t; - -void hmm_train(double** x, int T, model_t* model); /* with scaling */ -void forward_backwards(double*** xi, double** gamma, double* loglik, double* loglik1, double* loglik2, int iter, - int N, int T, double* p0, double** a, double** b); -void baum_welch(double* p0, double** a, double** mu, double** cov, int N, int T, int L, double** x, double*** xi, double** gamma); -void viterbi_decode(double** x, int T, model_t* model, int* q); /* using logs */ -model_t* hmm_init(double** x, int T, int L, int N); -void hmm_close(model_t* model); -void invert(double** cov, int L, double** icov, double* detcov); /* uses LAPACK (included with Mac OSX) */ -double gauss(double* x, int L, double* mu, double** icov, double detcov, double* y, double* z); -double loggauss(double* x, int L, double* mu, double** icov, double detcov, double* y, double* z); -void hmm_print(model_t* model); - -#endif - diff -r dc30e3864ceb -r f599563a4663 hmm/hmm.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hmm/hmm.c Wed Jan 09 10:48:08 2008 +0000 @@ -0,0 +1,800 @@ +/* + * hmm.c + * soundbite + * + * Created by Mark Levy on 12/02/2006. + * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. + * + */ + +#include +#include +#include +#include +#include /* to seed random number generator */ +#include /* LAPACK for matrix inversion */ +#ifdef _MAC_OS_X +#include +#else +#include /* BLAS for matrix multiplication */ +#endif + +#include "hmm.h" + +model_t* hmm_init(double** x, int T, int L, int N) +{ + int i, j, d, e, t; + double s, ss; + + model_t* model; + model = (model_t*) malloc(sizeof(model_t)); + model->N = N; + model->L = L; + model->p0 = (double*) malloc(N*sizeof(double)); + model->a = (double**) malloc(N*sizeof(double*)); + model->mu = (double**) malloc(N*sizeof(double*)); + for (i = 0; i < N; i++) + { + model->a[i] = (double*) malloc(N*sizeof(double)); + model->mu[i] = (double*) malloc(L*sizeof(double)); + } + model->cov = (double**) malloc(L*sizeof(double*)); + for (i = 0; i < L; i++) + model->cov[i] = (double*) malloc(L*sizeof(double)); + + srand(time(0)); + double* global_mean = (double*) malloc(L*sizeof(double)); + + /* find global mean */ + for (d = 0; d < L; d++) + { + global_mean[d] = 0; + for (t = 0; t < T; t++) + global_mean[d] += x[t][d]; + global_mean[d] /= T; + } + + /* calculate global diagonal covariance */ + for (d = 0; d < L; d++) + { + for (e = 0; e < L; e++) + model->cov[d][e] = 0; + for (t = 0; t < T; t++) + model->cov[d][d] += (x[t][d] - global_mean[d]) * (x[t][d] - global_mean[d]); + model->cov[d][d] /= T-1; + } + + /* set all means close to global mean */ + for (i = 0; i < N; i++) + { + for (d = 0; d < L; d++) + { + /* add some random noise related to covariance */ + /* ideally the random number would be Gaussian(0,1), as a hack we make it uniform on [-0.25,0.25] */ + model->mu[i][d] = global_mean[d] + (0.5 * rand() / (double) RAND_MAX - 0.25) * sqrt(model->cov[d][d]); + } + } + + /* random intial and transition probs */ + s = 0; + for (i = 0; i < N; i++) + { + model->p0[i] = 1 + rand() / (double) RAND_MAX; + s += model->p0[i]; + ss = 0; + for (j = 0; j < N; j++) + { + model->a[i][j] = 1 + rand() / (double) RAND_MAX; + ss += model->a[i][j]; + } + for (j = 0; j < N; j++) + { + model->a[i][j] /= ss; + } + } + for (i = 0; i < N; i++) + model->p0[i] /= s; + + free(global_mean); + + return model; +} + +void hmm_close(model_t* model) +{ + int i; + + for (i = 0; i < model->N; i++) + { + free(model->a[i]); + free(model->mu[i]); + } + free(model->a); + free(model->mu); + for (i = 0; i < model->L; i++) + free(model->cov[i]); + free(model->cov); + free(model); +} + +void hmm_train(double** x, int T, model_t* model) +{ + int i, t; + double loglik; /* overall log-likelihood at each iteration */ + + int N = model->N; + int L = model->L; + double* p0 = model->p0; + double** a = model->a; + double** mu = model->mu; + double** cov = model->cov; + + /* allocate memory */ + double** gamma = (double**) malloc(T*sizeof(double*)); + double*** xi = (double***) malloc(T*sizeof(double**)); + for (t = 0; t < T; t++) + { + gamma[t] = (double*) malloc(N*sizeof(double)); + xi[t] = (double**) malloc(N*sizeof(double*)); + for (i = 0; i < N; i++) + xi[t][i] = (double*) malloc(N*sizeof(double)); + } + + /* temporary memory */ + double* gauss_y = (double*) malloc(L*sizeof(double)); + double* gauss_z = (double*) malloc(L*sizeof(double)); + + /* obs probs P(j|{x}) */ + double** b = (double**) malloc(T*sizeof(double*)); + for (t = 0; t < T; t++) + b[t] = (double*) malloc(N*sizeof(double)); + + /* inverse covariance and its determinant */ + double** icov = (double**) malloc(L*sizeof(double*)); + for (i = 0; i < L; i++) + icov[i] = (double*) malloc(L*sizeof(double)); + double detcov; + + double thresh = 0.0001; + int niter = 50; + int iter = 0; + double loglik1, loglik2; + while(iter < niter && !(iter > 1 && (loglik - loglik1) < thresh * (loglik1 - loglik2))) + { + ++iter; + + fprintf(stderr, "calculating obsprobs...\n"); + fflush(stderr); + + /* precalculate obs probs */ + invert(cov, L, icov, &detcov); + + for (t = 0; t < T; t++) + { + //int allzero = 1; + for (i = 0; i < N; i++) + { + b[t][i] = exp(loggauss(x[t], L, mu[i], icov, detcov, gauss_y, gauss_z)); + + //if (b[t][i] != 0) + // allzero = 0; + } + /* + if (allzero) + { + printf("all the b[t][i] were zero for t = %d, correcting...\n", t); + for (i = 0; i < N; i++) + { + b[t][i] = 0.00001; + } + } + */ + } + + fprintf(stderr, "forwards-backwards...\n"); + fflush(stderr); + + forward_backwards(xi, gamma, &loglik, &loglik1, &loglik2, iter, N, T, p0, a, b); + + fprintf(stderr, "iteration %d: loglik = %f\n", iter, loglik); + fprintf(stderr, "re-estimation...\n"); + fflush(stderr); + + baum_welch(p0, a, mu, cov, N, T, L, x, xi, gamma); + + /* + printf("a:\n"); + for (i = 0; i < model->N; i++) + { + for (j = 0; j < model->N; j++) + printf("%f ", model->a[i][j]); + printf("\n"); + } + printf("\n\n"); + */ + //hmm_print(model); + } + + /* deallocate memory */ + for (t = 0; t < T; t++) + { + free(gamma[t]); + free(b[t]); + for (i = 0; i < N; i++) + free(xi[t][i]); + free(xi[t]); + } + free(gamma); + free(xi); + free(b); + + for (i = 0; i < L; i++) + free(icov[i]); + free(icov); + + free(gauss_y); + free(gauss_z); +} + +void mlss_reestimate(double* p0, double** a, double** mu, double** cov, int N, int T, int L, int* q, double** x) +{ + /* fit a single Gaussian to observations in each state */ + + /* calculate the mean observation in each state */ + + /* calculate the overall covariance */ + + /* count transitions */ + + /* estimate initial probs from transitions (???) */ +} + +void baum_welch(double* p0, double** a, double** mu, double** cov, int N, int T, int L, double** x, double*** xi, double** gamma) +{ + int i, j, t; + + double* sum_gamma = (double*) malloc(N*sizeof(double)); + + /* temporary memory */ + double* u = (double*) malloc(L*L*sizeof(double)); + double* yy = (double*) malloc(T*L*sizeof(double)); + double* yy2 = (double*) malloc(T*L*sizeof(double)); + + /* re-estimate transition probs */ + for (i = 0; i < N; i++) + { + sum_gamma[i] = 0; + for (t = 0; t < T-1; t++) + sum_gamma[i] += gamma[t][i]; + } + + for (i = 0; i < N; i++) + { + if (sum_gamma[i] == 0) + { + fprintf(stderr, "sum_gamma[%d] was zero...\n", i); + } + //double s = 0; + for (j = 0; j < N; j++) + { + a[i][j] = 0; + for (t = 0; t < T-1; t++) + a[i][j] += xi[t][i][j]; + //s += a[i][j]; + a[i][j] /= sum_gamma[i]; + } + /* + for (j = 0; j < N; j++) + { + a[i][j] /= s; + } + */ + } + + /* NB: now we need to sum gamma over all t */ + for (i = 0; i < N; i++) + sum_gamma[i] += gamma[T-1][i]; + + /* re-estimate initial probs */ + for (i = 0; i < N; i++) + p0[i] = gamma[0][i]; + + /* re-estimate covariance */ + int d, e; + double sum_sum_gamma = 0; + for (i = 0; i < N; i++) + sum_sum_gamma += sum_gamma[i]; + + /* + for (d = 0; d < L; d++) + { + for (e = d; e < L; e++) + { + cov[d][e] = 0; + for (t = 0; t < T; t++) + for (j = 0; j < N; j++) + cov[d][e] += gamma[t][j] * (x[t][d] - mu[j][d]) * (x[t][e] - mu[j][e]); + + cov[d][e] /= sum_sum_gamma; + + if (isnan(cov[d][e])) + { + printf("cov[%d][%d] was nan\n", d, e); + for (j = 0; j < N; j++) + for (i = 0; i < L; i++) + if (isnan(mu[j][i])) + printf("mu[%d][%d] was nan\n", j, i); + for (t = 0; t < T; t++) + for (j = 0; j < N; j++) + if (isnan(gamma[t][j])) + printf("gamma[%d][%d] was nan\n", t, j); + exit(-1); + } + } + } + for (d = 0; d < L; d++) + for (e = 0; e < d; e++) + cov[d][e] = cov[e][d]; + */ + + /* using BLAS */ + for (d = 0; d < L; d++) + for (e = 0; e < L; e++) + cov[d][e] = 0; + + for (j = 0; j < N; j++) + { + for (d = 0; d < L; d++) + for (t = 0; t < T; t++) + { + yy[d*T+t] = x[t][d] - mu[j][d]; + yy2[d*T+t] = gamma[t][j] * (x[t][d] - mu[j][d]); + } + + cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, L, L, T, 1.0, yy, T, yy2, T, 0, u, L); + + for (e = 0; e < L; e++) + for (d = 0; d < L; d++) + cov[d][e] += u[e*L+d]; + } + + for (d = 0; d < L; d++) + for (e = 0; e < L; e++) + cov[d][e] /= T; /* sum_sum_gamma; */ + + //printf("sum_sum_gamma = %f\n", sum_sum_gamma); /* fine, = T IS THIS ALWAYS TRUE with pooled cov?? */ + + /* re-estimate means */ + for (j = 0; j < N; j++) + { + for (d = 0; d < L; d++) + { + mu[j][d] = 0; + for (t = 0; t < T; t++) + mu[j][d] += gamma[t][j] * x[t][d]; + mu[j][d] /= sum_gamma[j]; + } + } + + /* deallocate memory */ + free(sum_gamma); + free(yy); + free(yy2); + free(u); +} + +void forward_backwards(double*** xi, double** gamma, double* loglik, double* loglik1, double* loglik2, int iter, int N, int T, double* p0, double** a, double** b) +{ + /* forwards-backwards with scaling */ + int i, j, t; + + double** alpha = (double**) malloc(T*sizeof(double*)); + double** beta = (double**) malloc(T*sizeof(double*)); + for (t = 0; t < T; t++) + { + alpha[t] = (double*) malloc(N*sizeof(double)); + beta[t] = (double*) malloc(N*sizeof(double)); + } + + /* scaling coefficients */ + double* c = (double*) malloc(T*sizeof(double)); + + /* calculate forward probs and scale coefficients */ + c[0] = 0; + for (i = 0; i < N; i++) + { + alpha[0][i] = p0[i] * b[0][i]; + c[0] += alpha[0][i]; + + //printf("p0[%d] = %f, b[0][%d] = %f\n", i, p0[i], i, b[0][i]); + } + c[0] = 1 / c[0]; + for (i = 0; i < N; i++) + { + alpha[0][i] *= c[0]; + + //printf("alpha[0][%d] = %f\n", i, alpha[0][i]); /* OK agrees with Matlab */ + } + + *loglik1 = *loglik; + *loglik = -log(c[0]); + if (iter == 2) + *loglik2 = *loglik; + + for (t = 1; t < T; t++) + { + c[t] = 0; + for (j = 0; j < N; j++) + { + alpha[t][j] = 0; + for (i = 0; i < N; i++) + alpha[t][j] += alpha[t-1][i] * a[i][j]; + alpha[t][j] *= b[t][j]; + + c[t] += alpha[t][j]; + } + + /* + if (c[t] == 0) + { + printf("c[%d] = 0, going to blow up so exiting\n", t); + for (i = 0; i < N; i++) + if (b[t][i] == 0) + fprintf(stderr, "b[%d][%d] was zero\n", t, i); + fprintf(stderr, "x[t] was \n"); + for (i = 0; i < L; i++) + fprintf(stderr, "%f ", x[t][i]); + fprintf(stderr, "\n\n"); + exit(-1); + } + */ + + c[t] = 1 / c[t]; + for (j = 0; j < N; j++) + alpha[t][j] *= c[t]; + + //printf("c[%d] = %e\n", t, c[t]); + + *loglik -= log(c[t]); + } + + /* calculate backwards probs using same coefficients */ + for (i = 0; i < N; i++) + beta[T-1][i] = 1; + t = T - 1; + while (1) + { + for (i = 0; i < N; i++) + beta[t][i] *= c[t]; + + if (t == 0) + break; + + for (i = 0; i < N; i++) + { + beta[t-1][i] = 0; + for (j = 0; j < N; j++) + beta[t-1][i] += a[i][j] * b[t][j] * beta[t][j]; + } + + t--; + } + + /* + printf("alpha:\n"); + for (t = 0; t < T; t++) + { + for (i = 0; i < N; i++) + printf("%4.4e\t\t", alpha[t][i]); + printf("\n"); + } + printf("\n\n");printf("beta:\n"); + for (t = 0; t < T; t++) + { + for (i = 0; i < N; i++) + printf("%4.4e\t\t", beta[t][i]); + printf("\n"); + } + printf("\n\n"); + */ + + /* calculate posterior probs */ + double tot; + for (t = 0; t < T; t++) + { + tot = 0; + for (i = 0; i < N; i++) + { + gamma[t][i] = alpha[t][i] * beta[t][i]; + tot += gamma[t][i]; + } + for (i = 0; i < N; i++) + { + gamma[t][i] /= tot; + + //printf("gamma[%d][%d] = %f\n", t, i, gamma[t][i]); + } + } + + for (t = 0; t < T-1; t++) + { + tot = 0; + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + xi[t][i][j] = alpha[t][i] * a[i][j] * b[t+1][j] * beta[t+1][j]; + tot += xi[t][i][j]; + } + } + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + xi[t][i][j] /= tot; + } + + /* + // CHECK - fine + // gamma[t][i] = \sum_j{xi[t][i][j]} + tot = 0; + for (j = 0; j < N; j++) + tot += xi[3][1][j]; + printf("gamma[3][1] = %f, sum_j(xi[3][1][j]) = %f\n", gamma[3][1], tot); + */ + + for (t = 0; t < T; t++) + { + free(alpha[t]); + free(beta[t]); + } + free(alpha); + free(beta); + free(c); +} + +void viterbi_decode(double** x, int T, model_t* model, int* q) +{ + int i, j, t; + double max; + + int N = model->N; + int L = model->L; + double* p0 = model->p0; + double** a = model->a; + double** mu = model->mu; + double** cov = model->cov; + + /* inverse covariance and its determinant */ + double** icov = (double**) malloc(L*sizeof(double*)); + for (i = 0; i < L; i++) + icov[i] = (double*) malloc(L*sizeof(double)); + double detcov; + + double** logb = (double**) malloc(T*sizeof(double*)); + double** phi = (double**) malloc(T*sizeof(double*)); + int** psi = (int**) malloc(T*sizeof(int*)); + for (t = 0; t < T; t++) + { + logb[t] = (double*) malloc(N*sizeof(double)); + phi[t] = (double*) malloc(N*sizeof(double)); + psi[t] = (int*) malloc(N*sizeof(int)); + } + + /* temporary memory */ + double* gauss_y = (double*) malloc(L*sizeof(double)); + double* gauss_z = (double*) malloc(L*sizeof(double)); + + /* calculate observation logprobs */ + invert(cov, L, icov, &detcov); + for (t = 0; t < T; t++) + for (i = 0; i < N; i++) + logb[t][i] = loggauss(x[t], L, mu[i], icov, detcov, gauss_y, gauss_z); + + /* initialise */ + for (i = 0; i < N; i++) + { + phi[0][i] = log(p0[i]) + logb[0][i]; + psi[0][i] = 0; + } + + for (t = 1; t < T; t++) + { + for (j = 0; j < N; j++) + { + max = -1000000; // TODO: what should this be?? = smallest possible sumlogprob + psi[t][j] = 0; + for (i = 0; i < N; i++) + { + if (phi[t-1][i] + log(a[i][j]) > max) + { + max = phi[t-1][i] + log(a[i][j]); + phi[t][j] = max + logb[t][j]; + psi[t][j] = i; + } + } + } + } + + /* find maximising state at time T-1 */ + max = phi[T-1][0]; + q[T-1] = 0; + for (i = 1; i < N; i++) + { + if (phi[T-1][i] > max) + { + max = phi[T-1][i]; + q[T-1] = i; + } + } + + + /* track back */ + t = T - 2; + while (t >= 0) + { + q[t] = psi[t+1][q[t+1]]; + t--; + } + + /* de-allocate memory */ + for (i = 0; i < L; i++) + free(icov[i]); + free(icov); + for (t = 0; t < T; t++) + { + free(logb[t]); + free(phi[t]); + free(psi[t]); + } + free(logb); + free(phi); + free(psi); + + free(gauss_y); + free(gauss_z); +} + +/* invert matrix and calculate determinant using LAPACK */ +void invert(double** cov, int L, double** icov, double* detcov) +{ + /* copy square matrix into a vector in column-major order */ + double* a = (double*) malloc(L*L*sizeof(double)); + int i, j; + for(j=0; j < L; j++) + for (i=0; i < L; i++) + a[j*L+i] = cov[i][j]; + + long M = (long) L; + long* ipiv = (long*) malloc(L*L*sizeof(int)); + long ret; + + /* LU decomposition */ + ret = dgetrf_(&M, &M, a, &M, ipiv, &ret); /* ret should be zero, negative if cov is singular */ + if (ret < 0) + { + fprintf(stderr, "Covariance matrix was singular, couldn't invert\n"); + exit(-1); + } + + /* find determinant */ + double det = 1; + for(i = 0; i < L; i++) + det *= a[i*L+i]; + // TODO: get this to work!!! If detcov < 0 then cov is bad anyway... + /* + int sign = 1; + for (i = 0; i < L; i++) + if (ipiv[i] != i) + sign = -sign; + det *= sign; + */ + if (det < 0) + det = -det; + *detcov = det; + + /* allocate required working storage */ + long lwork = -1; + double lwbest; + dgetri_(&M, a, &M, ipiv, &lwbest, &lwork, &ret); + lwork = (long) lwbest; + double* work = (double*) malloc(lwork*sizeof(double)); + + /* find inverse */ + dgetri_(&M, a, &M, ipiv, work, &lwork, &ret); + + for(j=0; j < L; j++) + for (i=0; i < L; i++) + icov[i][j] = a[j*L+i]; + + free(work); + free(a); +} + +/* probability of multivariate Gaussian given mean, inverse and determinant of covariance */ +double gauss(double* x, int L, double* mu, double** icov, double detcov, double* y, double* z) +{ + int i, j; + double s = 0; + for (i = 0; i < L; i++) + y[i] = x[i] - mu[i]; + for (i = 0; i < L; i++) + { + //z[i] = 0; + //for (j = 0; j < L; j++) + // z[i] += icov[i][j] * y[j]; + z[i] = cblas_ddot(L, &icov[i][0], 1, y, 1); + } + s = cblas_ddot(L, z, 1, y, 1); + //for (i = 0; i < L; i++) + // s += z[i] * y[i]; + + return exp(-s/2.0) / (pow(2*PI, L/2.0) * sqrt(detcov)); +} + +/* log probability of multivariate Gaussian given mean, inverse and determinant of covariance */ +double loggauss(double* x, int L, double* mu, double** icov, double detcov, double* y, double* z) +{ + int i, j; + double s = 0; + double ret; + for (i = 0; i < L; i++) + y[i] = x[i] - mu[i]; + for (i = 0; i < L; i++) + { + //z[i] = 0; + //for (j = 0; j < L; j++) + // z[i] += icov[i][j] * y[j]; + z[i] = cblas_ddot(L, &icov[i][0], 1, y, 1); + } + s = cblas_ddot(L, z, 1, y, 1); + //for (i = 0; i < L; i++) + // s += z[i] * y[i]; + + ret = -0.5 * (s + L * log(2*PI) + log(detcov)); + + /* + // TEST + if (isinf(ret) > 0) + printf("loggauss returning infinity\n"); + if (isinf(ret) < 0) + printf("loggauss returning -infinity\n"); + if (isnan(ret)) + printf("loggauss returning nan\n"); + */ + + return ret; +} + +void hmm_print(model_t* model) +{ + int i, j; + printf("p0:\n"); + for (i = 0; i < model->N; i++) + printf("%f ", model->p0[i]); + printf("\n\n"); + printf("a:\n"); + for (i = 0; i < model->N; i++) + { + for (j = 0; j < model->N; j++) + printf("%f ", model->a[i][j]); + printf("\n"); + } + printf("\n\n"); + printf("mu:\n"); + for (i = 0; i < model->N; i++) + { + for (j = 0; j < model->L; j++) + printf("%f ", model->mu[i][j]); + printf("\n"); + } + printf("\n\n"); + printf("cov:\n"); + for (i = 0; i < model->L; i++) + { + for (j = 0; j < model->L; j++) + printf("%f ", model->cov[i][j]); + printf("\n"); + } + printf("\n\n"); +} + + diff -r dc30e3864ceb -r f599563a4663 hmm/hmm.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hmm/hmm.h Wed Jan 09 10:48:08 2008 +0000 @@ -0,0 +1,39 @@ +#ifndef _HMM_H +#define _HMM_H + +/* + * hmm.h + * soundbite + * + * Created by Mark Levy on 12/02/2006. + * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. + * + */ + +#ifndef PI +#define PI 3.14159265358979323846264338327950288 +#endif + +typedef struct _model_t { + int N; /* number of states */ + double* p0; /* initial probs */ + double** a; /* transition probs */ + int L; /* dimensionality of data */ + double** mu; /* state means */ + double** cov; /* covariance, tied between all states */ +} model_t; + +void hmm_train(double** x, int T, model_t* model); /* with scaling */ +void forward_backwards(double*** xi, double** gamma, double* loglik, double* loglik1, double* loglik2, int iter, + int N, int T, double* p0, double** a, double** b); +void baum_welch(double* p0, double** a, double** mu, double** cov, int N, int T, int L, double** x, double*** xi, double** gamma); +void viterbi_decode(double** x, int T, model_t* model, int* q); /* using logs */ +model_t* hmm_init(double** x, int T, int L, int N); +void hmm_close(model_t* model); +void invert(double** cov, int L, double** icov, double* detcov); /* uses LAPACK (included with Mac OSX) */ +double gauss(double* x, int L, double* mu, double** icov, double detcov, double* y, double* z); +double loggauss(double* x, int L, double* mu, double** icov, double detcov, double* y, double* z); +void hmm_print(model_t* model); + +#endif + diff -r dc30e3864ceb -r f599563a4663 maths/pca/PCA.c --- a/maths/pca/PCA.c Wed Jan 09 10:46:25 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,356 +0,0 @@ -/*********************************/ -/* Principal Components Analysis */ -/*********************************/ - -/*********************************************************************/ -/* Principal Components Analysis or the Karhunen-Loeve expansion is a - classical method for dimensionality reduction or exploratory data - analysis. One reference among many is: F. Murtagh and A. Heck, - Multivariate Data Analysis, Kluwer Academic, Dordrecht, 1987. - - Author: - F. Murtagh - Phone: + 49 89 32006298 (work) - + 49 89 965307 (home) - Earn/Bitnet: fionn@dgaeso51, fim@dgaipp1s, murtagh@stsci - Span: esomc1::fionn - Internet: murtagh@scivax.stsci.edu - - F. Murtagh, Munich, 6 June 1989 */ -/*********************************************************************/ - -#include -#include -#include - -#include "PCA.h" - -#define SIGN(a, b) ( (b) < 0 ? -fabs(a) : fabs(a) ) - -/** Variance-covariance matrix: creation *****************************/ - -/* Create m * m covariance matrix from given n * m data matrix. */ -void covcol(double** data, int n, int m, double** symmat) -{ -double *mean; -int i, j, j1, j2; - -/* Allocate storage for mean vector */ - -mean = (double*) malloc(m*sizeof(double)); - -/* Determine mean of column vectors of input data matrix */ - -for (j = 0; j < m; j++) - { - mean[j] = 0.0; - for (i = 0; i < n; i++) - { - mean[j] += data[i][j]; - } - mean[j] /= (double)n; - } - -/* -printf("\nMeans of column vectors:\n"); -for (j = 0; j < m; j++) { - printf("%12.1f",mean[j]); } printf("\n"); - */ - -/* Center the column vectors. */ - -for (i = 0; i < n; i++) - { - for (j = 0; j < m; j++) - { - data[i][j] -= mean[j]; - } - } - -/* Calculate the m * m covariance matrix. */ -for (j1 = 0; j1 < m; j1++) - { - for (j2 = j1; j2 < m; j2++) - { - symmat[j1][j2] = 0.0; - for (i = 0; i < n; i++) - { - symmat[j1][j2] += data[i][j1] * data[i][j2]; - } - symmat[j2][j1] = symmat[j1][j2]; - } - } - -free(mean); - -return; - -} - -/** Error handler **************************************************/ - -void erhand(char* err_msg) -{ - fprintf(stderr,"Run-time error:\n"); - fprintf(stderr,"%s\n", err_msg); - fprintf(stderr,"Exiting to system.\n"); - exit(1); -} - - -/** Reduce a real, symmetric matrix to a symmetric, tridiag. matrix. */ - -/* Householder reduction of matrix a to tridiagonal form. -Algorithm: Martin et al., Num. Math. 11, 181-195, 1968. -Ref: Smith et al., Matrix Eigensystem Routines -- EISPACK Guide -Springer-Verlag, 1976, pp. 489-494. -W H Press et al., Numerical Recipes in C, Cambridge U P, -1988, pp. 373-374. */ -void tred2(double** a, int n, double* d, double* e) -{ - int l, k, j, i; - double scale, hh, h, g, f; - - for (i = n-1; i >= 1; i--) - { - l = i - 1; - h = scale = 0.0; - if (l > 0) - { - for (k = 0; k <= l; k++) - scale += fabs(a[i][k]); - if (scale == 0.0) - e[i] = a[i][l]; - else - { - for (k = 0; k <= l; k++) - { - a[i][k] /= scale; - h += a[i][k] * a[i][k]; - } - f = a[i][l]; - g = f>0 ? -sqrt(h) : sqrt(h); - e[i] = scale * g; - h -= f * g; - a[i][l] = f - g; - f = 0.0; - for (j = 0; j <= l; j++) - { - a[j][i] = a[i][j]/h; - g = 0.0; - for (k = 0; k <= j; k++) - g += a[j][k] * a[i][k]; - for (k = j+1; k <= l; k++) - g += a[k][j] * a[i][k]; - e[j] = g / h; - f += e[j] * a[i][j]; - } - hh = f / (h + h); - for (j = 0; j <= l; j++) - { - f = a[i][j]; - e[j] = g = e[j] - hh * f; - for (k = 0; k <= j; k++) - a[j][k] -= (f * e[k] + g * a[i][k]); - } - } - } - else - e[i] = a[i][l]; - d[i] = h; - } - d[0] = 0.0; - e[0] = 0.0; - for (i = 0; i < n; i++) - { - l = i - 1; - if (d[i]) - { - for (j = 0; j <= l; j++) - { - g = 0.0; - for (k = 0; k <= l; k++) - g += a[i][k] * a[k][j]; - for (k = 0; k <= l; k++) - a[k][j] -= g * a[k][i]; - } - } - d[i] = a[i][i]; - a[i][i] = 1.0; - for (j = 0; j <= l; j++) - a[j][i] = a[i][j] = 0.0; - } -} - -/** Tridiagonal QL algorithm -- Implicit **********************/ - -void tqli(double* d, double* e, int n, double** z) -{ - int m, l, iter, i, k; - double s, r, p, g, f, dd, c, b; - - for (i = 1; i < n; i++) - e[i-1] = e[i]; - e[n-1] = 0.0; - for (l = 0; l < n; l++) - { - iter = 0; - do - { - for (m = l; m < n-1; m++) - { - dd = fabs(d[m]) + fabs(d[m+1]); - if (fabs(e[m]) + dd == dd) break; - } - if (m != l) - { - if (iter++ == 30) erhand("No convergence in TLQI."); - g = (d[l+1] - d[l]) / (2.0 * e[l]); - r = sqrt((g * g) + 1.0); - g = d[m] - d[l] + e[l] / (g + SIGN(r, g)); - s = c = 1.0; - p = 0.0; - for (i = m-1; i >= l; i--) - { - f = s * e[i]; - b = c * e[i]; - if (fabs(f) >= fabs(g)) - { - c = g / f; - r = sqrt((c * c) + 1.0); - e[i+1] = f * r; - c *= (s = 1.0/r); - } - else - { - s = f / g; - r = sqrt((s * s) + 1.0); - e[i+1] = g * r; - s *= (c = 1.0/r); - } - g = d[i+1] - p; - r = (d[i] - g) * s + 2.0 * c * b; - p = s * r; - d[i+1] = g + p; - g = c * r - b; - for (k = 0; k < n; k++) - { - f = z[k][i+1]; - z[k][i+1] = s * z[k][i] + c * f; - z[k][i] = c * z[k][i] - s * f; - } - } - d[l] = d[l] - p; - e[l] = g; - e[m] = 0.0; - } - } while (m != l); - } -} - -/* In place projection onto basis vectors */ -void pca_project(double** data, int n, int m, int ncomponents) -{ - int i, j, k, k2; - double **symmat, **symmat2, *evals, *interm; - - //TODO: assert ncomponents < m - - symmat = (double**) malloc(m*sizeof(double*)); - for (i = 0; i < m; i++) - symmat[i] = (double*) malloc(m*sizeof(double)); - - covcol(data, n, m, symmat); - - /********************************************************************* - Eigen-reduction - **********************************************************************/ - - /* Allocate storage for dummy and new vectors. */ - evals = (double*) malloc(m*sizeof(double)); /* Storage alloc. for vector of eigenvalues */ - interm = (double*) malloc(m*sizeof(double)); /* Storage alloc. for 'intermediate' vector */ - //MALLOC_ARRAY(symmat2,m,m,double); - //for (i = 0; i < m; i++) { - // for (j = 0; j < m; j++) { - // symmat2[i][j] = symmat[i][j]; /* Needed below for col. projections */ - // } - //} - tred2(symmat, m, evals, interm); /* Triangular decomposition */ -tqli(evals, interm, m, symmat); /* Reduction of sym. trid. matrix */ -/* evals now contains the eigenvalues, -columns of symmat now contain the associated eigenvectors. */ - -/* - printf("\nEigenvalues:\n"); - for (j = m-1; j >= 0; j--) { - printf("%18.5f\n", evals[j]); } - printf("\n(Eigenvalues should be strictly positive; limited\n"); - printf("precision machine arithmetic may affect this.\n"); - printf("Eigenvalues are often expressed as cumulative\n"); - printf("percentages, representing the 'percentage variance\n"); - printf("explained' by the associated axis or principal component.)\n"); - - printf("\nEigenvectors:\n"); - printf("(First three; their definition in terms of original vbes.)\n"); - for (j = 0; j < m; j++) { - for (i = 1; i <= 3; i++) { - printf("%12.4f", symmat[j][m-i]); } - printf("\n"); } - */ - -/* Form projections of row-points on prin. components. */ -/* Store in 'data', overwriting original data. */ -for (i = 0; i < n; i++) { - for (j = 0; j < m; j++) { - interm[j] = data[i][j]; } /* data[i][j] will be overwritten */ - for (k = 0; k < ncomponents; k++) { - data[i][k] = 0.0; - for (k2 = 0; k2 < m; k2++) { - data[i][k] += interm[k2] * symmat[k2][m-k-1]; } - } -} - -/* -printf("\nProjections of row-points on first 3 prin. comps.:\n"); - for (i = 0; i < n; i++) { - for (j = 0; j < 3; j++) { - printf("%12.4f", data[i][j]); } - printf("\n"); } - */ - -/* Form projections of col.-points on first three prin. components. */ -/* Store in 'symmat2', overwriting what was stored in this. */ -//for (j = 0; j < m; j++) { -// for (k = 0; k < m; k++) { -// interm[k] = symmat2[j][k]; } /*symmat2[j][k] will be overwritten*/ -// for (i = 0; i < 3; i++) { -// symmat2[j][i] = 0.0; -// for (k2 = 0; k2 < m; k2++) { -// symmat2[j][i] += interm[k2] * symmat[k2][m-i-1]; } -// if (evals[m-i-1] > 0.0005) /* Guard against zero eigenvalue */ -// symmat2[j][i] /= sqrt(evals[m-i-1]); /* Rescale */ -// else -// symmat2[j][i] = 0.0; /* Standard kludge */ -// } -// } - -/* - printf("\nProjections of column-points on first 3 prin. comps.:\n"); - for (j = 0; j < m; j++) { - for (k = 0; k < 3; k++) { - printf("%12.4f", symmat2[j][k]); } - printf("\n"); } - */ - - -for (i = 0; i < m; i++) - free(symmat[i]); -free(symmat); -//FREE_ARRAY(symmat2,m); -free(evals); -free(interm); - -} - - - diff -r dc30e3864ceb -r f599563a4663 maths/pca/PCA.h --- a/maths/pca/PCA.h Wed Jan 09 10:46:25 2008 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,17 +0,0 @@ -#ifndef _PCA_H -#define _PCA_H - -/* - * pca.h - * soundbite - * - * Created by Mark Levy on 08/02/2006. - * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. - * - */ - -void pca_project(double** data, int n, int m, int ncomponents); - - -#endif - diff -r dc30e3864ceb -r f599563a4663 maths/pca/pca.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/maths/pca/pca.c Wed Jan 09 10:48:08 2008 +0000 @@ -0,0 +1,356 @@ +/*********************************/ +/* Principal Components Analysis */ +/*********************************/ + +/*********************************************************************/ +/* Principal Components Analysis or the Karhunen-Loeve expansion is a + classical method for dimensionality reduction or exploratory data + analysis. One reference among many is: F. Murtagh and A. Heck, + Multivariate Data Analysis, Kluwer Academic, Dordrecht, 1987. + + Author: + F. Murtagh + Phone: + 49 89 32006298 (work) + + 49 89 965307 (home) + Earn/Bitnet: fionn@dgaeso51, fim@dgaipp1s, murtagh@stsci + Span: esomc1::fionn + Internet: murtagh@scivax.stsci.edu + + F. Murtagh, Munich, 6 June 1989 */ +/*********************************************************************/ + +#include +#include +#include + +#include "pca.h" + +#define SIGN(a, b) ( (b) < 0 ? -fabs(a) : fabs(a) ) + +/** Variance-covariance matrix: creation *****************************/ + +/* Create m * m covariance matrix from given n * m data matrix. */ +void covcol(double** data, int n, int m, double** symmat) +{ +double *mean; +int i, j, j1, j2; + +/* Allocate storage for mean vector */ + +mean = (double*) malloc(m*sizeof(double)); + +/* Determine mean of column vectors of input data matrix */ + +for (j = 0; j < m; j++) + { + mean[j] = 0.0; + for (i = 0; i < n; i++) + { + mean[j] += data[i][j]; + } + mean[j] /= (double)n; + } + +/* +printf("\nMeans of column vectors:\n"); +for (j = 0; j < m; j++) { + printf("%12.1f",mean[j]); } printf("\n"); + */ + +/* Center the column vectors. */ + +for (i = 0; i < n; i++) + { + for (j = 0; j < m; j++) + { + data[i][j] -= mean[j]; + } + } + +/* Calculate the m * m covariance matrix. */ +for (j1 = 0; j1 < m; j1++) + { + for (j2 = j1; j2 < m; j2++) + { + symmat[j1][j2] = 0.0; + for (i = 0; i < n; i++) + { + symmat[j1][j2] += data[i][j1] * data[i][j2]; + } + symmat[j2][j1] = symmat[j1][j2]; + } + } + +free(mean); + +return; + +} + +/** Error handler **************************************************/ + +void erhand(char* err_msg) +{ + fprintf(stderr,"Run-time error:\n"); + fprintf(stderr,"%s\n", err_msg); + fprintf(stderr,"Exiting to system.\n"); + exit(1); +} + + +/** Reduce a real, symmetric matrix to a symmetric, tridiag. matrix. */ + +/* Householder reduction of matrix a to tridiagonal form. +Algorithm: Martin et al., Num. Math. 11, 181-195, 1968. +Ref: Smith et al., Matrix Eigensystem Routines -- EISPACK Guide +Springer-Verlag, 1976, pp. 489-494. +W H Press et al., Numerical Recipes in C, Cambridge U P, +1988, pp. 373-374. */ +void tred2(double** a, int n, double* d, double* e) +{ + int l, k, j, i; + double scale, hh, h, g, f; + + for (i = n-1; i >= 1; i--) + { + l = i - 1; + h = scale = 0.0; + if (l > 0) + { + for (k = 0; k <= l; k++) + scale += fabs(a[i][k]); + if (scale == 0.0) + e[i] = a[i][l]; + else + { + for (k = 0; k <= l; k++) + { + a[i][k] /= scale; + h += a[i][k] * a[i][k]; + } + f = a[i][l]; + g = f>0 ? -sqrt(h) : sqrt(h); + e[i] = scale * g; + h -= f * g; + a[i][l] = f - g; + f = 0.0; + for (j = 0; j <= l; j++) + { + a[j][i] = a[i][j]/h; + g = 0.0; + for (k = 0; k <= j; k++) + g += a[j][k] * a[i][k]; + for (k = j+1; k <= l; k++) + g += a[k][j] * a[i][k]; + e[j] = g / h; + f += e[j] * a[i][j]; + } + hh = f / (h + h); + for (j = 0; j <= l; j++) + { + f = a[i][j]; + e[j] = g = e[j] - hh * f; + for (k = 0; k <= j; k++) + a[j][k] -= (f * e[k] + g * a[i][k]); + } + } + } + else + e[i] = a[i][l]; + d[i] = h; + } + d[0] = 0.0; + e[0] = 0.0; + for (i = 0; i < n; i++) + { + l = i - 1; + if (d[i]) + { + for (j = 0; j <= l; j++) + { + g = 0.0; + for (k = 0; k <= l; k++) + g += a[i][k] * a[k][j]; + for (k = 0; k <= l; k++) + a[k][j] -= g * a[k][i]; + } + } + d[i] = a[i][i]; + a[i][i] = 1.0; + for (j = 0; j <= l; j++) + a[j][i] = a[i][j] = 0.0; + } +} + +/** Tridiagonal QL algorithm -- Implicit **********************/ + +void tqli(double* d, double* e, int n, double** z) +{ + int m, l, iter, i, k; + double s, r, p, g, f, dd, c, b; + + for (i = 1; i < n; i++) + e[i-1] = e[i]; + e[n-1] = 0.0; + for (l = 0; l < n; l++) + { + iter = 0; + do + { + for (m = l; m < n-1; m++) + { + dd = fabs(d[m]) + fabs(d[m+1]); + if (fabs(e[m]) + dd == dd) break; + } + if (m != l) + { + if (iter++ == 30) erhand("No convergence in TLQI."); + g = (d[l+1] - d[l]) / (2.0 * e[l]); + r = sqrt((g * g) + 1.0); + g = d[m] - d[l] + e[l] / (g + SIGN(r, g)); + s = c = 1.0; + p = 0.0; + for (i = m-1; i >= l; i--) + { + f = s * e[i]; + b = c * e[i]; + if (fabs(f) >= fabs(g)) + { + c = g / f; + r = sqrt((c * c) + 1.0); + e[i+1] = f * r; + c *= (s = 1.0/r); + } + else + { + s = f / g; + r = sqrt((s * s) + 1.0); + e[i+1] = g * r; + s *= (c = 1.0/r); + } + g = d[i+1] - p; + r = (d[i] - g) * s + 2.0 * c * b; + p = s * r; + d[i+1] = g + p; + g = c * r - b; + for (k = 0; k < n; k++) + { + f = z[k][i+1]; + z[k][i+1] = s * z[k][i] + c * f; + z[k][i] = c * z[k][i] - s * f; + } + } + d[l] = d[l] - p; + e[l] = g; + e[m] = 0.0; + } + } while (m != l); + } +} + +/* In place projection onto basis vectors */ +void pca_project(double** data, int n, int m, int ncomponents) +{ + int i, j, k, k2; + double **symmat, **symmat2, *evals, *interm; + + //TODO: assert ncomponents < m + + symmat = (double**) malloc(m*sizeof(double*)); + for (i = 0; i < m; i++) + symmat[i] = (double*) malloc(m*sizeof(double)); + + covcol(data, n, m, symmat); + + /********************************************************************* + Eigen-reduction + **********************************************************************/ + + /* Allocate storage for dummy and new vectors. */ + evals = (double*) malloc(m*sizeof(double)); /* Storage alloc. for vector of eigenvalues */ + interm = (double*) malloc(m*sizeof(double)); /* Storage alloc. for 'intermediate' vector */ + //MALLOC_ARRAY(symmat2,m,m,double); + //for (i = 0; i < m; i++) { + // for (j = 0; j < m; j++) { + // symmat2[i][j] = symmat[i][j]; /* Needed below for col. projections */ + // } + //} + tred2(symmat, m, evals, interm); /* Triangular decomposition */ +tqli(evals, interm, m, symmat); /* Reduction of sym. trid. matrix */ +/* evals now contains the eigenvalues, +columns of symmat now contain the associated eigenvectors. */ + +/* + printf("\nEigenvalues:\n"); + for (j = m-1; j >= 0; j--) { + printf("%18.5f\n", evals[j]); } + printf("\n(Eigenvalues should be strictly positive; limited\n"); + printf("precision machine arithmetic may affect this.\n"); + printf("Eigenvalues are often expressed as cumulative\n"); + printf("percentages, representing the 'percentage variance\n"); + printf("explained' by the associated axis or principal component.)\n"); + + printf("\nEigenvectors:\n"); + printf("(First three; their definition in terms of original vbes.)\n"); + for (j = 0; j < m; j++) { + for (i = 1; i <= 3; i++) { + printf("%12.4f", symmat[j][m-i]); } + printf("\n"); } + */ + +/* Form projections of row-points on prin. components. */ +/* Store in 'data', overwriting original data. */ +for (i = 0; i < n; i++) { + for (j = 0; j < m; j++) { + interm[j] = data[i][j]; } /* data[i][j] will be overwritten */ + for (k = 0; k < ncomponents; k++) { + data[i][k] = 0.0; + for (k2 = 0; k2 < m; k2++) { + data[i][k] += interm[k2] * symmat[k2][m-k-1]; } + } +} + +/* +printf("\nProjections of row-points on first 3 prin. comps.:\n"); + for (i = 0; i < n; i++) { + for (j = 0; j < 3; j++) { + printf("%12.4f", data[i][j]); } + printf("\n"); } + */ + +/* Form projections of col.-points on first three prin. components. */ +/* Store in 'symmat2', overwriting what was stored in this. */ +//for (j = 0; j < m; j++) { +// for (k = 0; k < m; k++) { +// interm[k] = symmat2[j][k]; } /*symmat2[j][k] will be overwritten*/ +// for (i = 0; i < 3; i++) { +// symmat2[j][i] = 0.0; +// for (k2 = 0; k2 < m; k2++) { +// symmat2[j][i] += interm[k2] * symmat[k2][m-i-1]; } +// if (evals[m-i-1] > 0.0005) /* Guard against zero eigenvalue */ +// symmat2[j][i] /= sqrt(evals[m-i-1]); /* Rescale */ +// else +// symmat2[j][i] = 0.0; /* Standard kludge */ +// } +// } + +/* + printf("\nProjections of column-points on first 3 prin. comps.:\n"); + for (j = 0; j < m; j++) { + for (k = 0; k < 3; k++) { + printf("%12.4f", symmat2[j][k]); } + printf("\n"); } + */ + + +for (i = 0; i < m; i++) + free(symmat[i]); +free(symmat); +//FREE_ARRAY(symmat2,m); +free(evals); +free(interm); + +} + + + diff -r dc30e3864ceb -r f599563a4663 maths/pca/pca.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/maths/pca/pca.h Wed Jan 09 10:48:08 2008 +0000 @@ -0,0 +1,17 @@ +#ifndef _PCA_H +#define _PCA_H + +/* + * pca.h + * soundbite + * + * Created by Mark Levy on 08/02/2006. + * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. + * + */ + +void pca_project(double** data, int n, int m, int ncomponents); + + +#endif + diff -r dc30e3864ceb -r f599563a4663 qm-dsp.pro --- a/qm-dsp.pro Wed Jan 09 10:46:25 2008 +0000 +++ b/qm-dsp.pro Wed Jan 09 10:48:08 2008 +0000 @@ -37,13 +37,13 @@ dsp/tonal/TCSgram.h \ dsp/tonal/TonalEstimator.h \ dsp/transforms/FFT.h \ - hmm/HMM.h \ + hmm/hmm.h \ maths/Correlation.h \ maths/Histogram.h \ maths/MathAliases.h \ maths/MathUtilities.h \ maths/Polyfit.h \ - maths/pca/PCA.h + maths/pca/pca.h SOURCES += base/Pitch.cpp \ dsp/chromagram/Chromagram.cpp \ dsp/chromagram/ChromaProcess.cpp \ @@ -62,7 +62,7 @@ dsp/tonal/TCSgram.cpp \ dsp/tonal/TonalEstimator.cpp \ dsp/transforms/FFT.cpp \ - hmm/HMM.c \ + hmm/hmm.c \ maths/Correlation.cpp \ maths/MathUtilities.cpp \ - maths/pca/PCA.c + maths/pca/pca.c