Mercurial > hg > qm-dsp
diff hmm/hmm.c @ 241:a98dd8ec96f8
* Move dsp/maths to maths ; bring PCA and HMM across from Soundbite
author | Chris Cannam <c.cannam@qmul.ac.uk> |
---|---|
date | Wed, 09 Jan 2008 10:31:29 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hmm/hmm.c Wed Jan 09 10:31:29 2008 +0000 @@ -0,0 +1,800 @@ +/* + * hmm.c + * soundbite + * + * Created by Mark Levy on 12/02/2006. + * Copyright 2006 Centre for Digital Music, Queen Mary, University of London. All rights reserved. + * + */ + +#include <stdio.h> +#include <math.h> +#include <stdlib.h> +#include <float.h> +#include <time.h> /* to seed random number generator */ +#include "clapack.h" /* LAPACK for matrix inversion */ +#ifdef _MAC_OS_X +#include <vecLib/cblas.h> +#else +#include <cblas.h> /* BLAS for matrix multiplication */ +#endif + +#include "hmm.h" + +model_t* hmm_init(double** x, int T, int L, int N) +{ + int i, j, d, e, t; + double s, ss; + + model_t* model; + model = (model_t*) malloc(sizeof(model_t)); + model->N = N; + model->L = L; + model->p0 = (double*) malloc(N*sizeof(double)); + model->a = (double**) malloc(N*sizeof(double*)); + model->mu = (double**) malloc(N*sizeof(double*)); + for (i = 0; i < N; i++) + { + model->a[i] = (double*) malloc(N*sizeof(double)); + model->mu[i] = (double*) malloc(L*sizeof(double)); + } + model->cov = (double**) malloc(L*sizeof(double*)); + for (i = 0; i < L; i++) + model->cov[i] = (double*) malloc(L*sizeof(double)); + + srand(time(0)); + double* global_mean = (double*) malloc(L*sizeof(double)); + + /* find global mean */ + for (d = 0; d < L; d++) + { + global_mean[d] = 0; + for (t = 0; t < T; t++) + global_mean[d] += x[t][d]; + global_mean[d] /= T; + } + + /* calculate global diagonal covariance */ + for (d = 0; d < L; d++) + { + for (e = 0; e < L; e++) + model->cov[d][e] = 0; + for (t = 0; t < T; t++) + model->cov[d][d] += (x[t][d] - global_mean[d]) * (x[t][d] - global_mean[d]); + model->cov[d][d] /= T-1; + } + + /* set all means close to global mean */ + for (i = 0; i < N; i++) + { + for (d = 0; d < L; d++) + { + /* add some random noise related to covariance */ + /* ideally the random number would be Gaussian(0,1), as a hack we make it uniform on [-0.25,0.25] */ + model->mu[i][d] = global_mean[d] + (0.5 * rand() / (double) RAND_MAX - 0.25) * sqrt(model->cov[d][d]); + } + } + + /* random intial and transition probs */ + s = 0; + for (i = 0; i < N; i++) + { + model->p0[i] = 1 + rand() / (double) RAND_MAX; + s += model->p0[i]; + ss = 0; + for (j = 0; j < N; j++) + { + model->a[i][j] = 1 + rand() / (double) RAND_MAX; + ss += model->a[i][j]; + } + for (j = 0; j < N; j++) + { + model->a[i][j] /= ss; + } + } + for (i = 0; i < N; i++) + model->p0[i] /= s; + + free(global_mean); + + return model; +} + +void hmm_close(model_t* model) +{ + int i; + + for (i = 0; i < model->N; i++) + { + free(model->a[i]); + free(model->mu[i]); + } + free(model->a); + free(model->mu); + for (i = 0; i < model->L; i++) + free(model->cov[i]); + free(model->cov); + free(model); +} + +void hmm_train(double** x, int T, model_t* model) +{ + int i, t; + double loglik; /* overall log-likelihood at each iteration */ + + int N = model->N; + int L = model->L; + double* p0 = model->p0; + double** a = model->a; + double** mu = model->mu; + double** cov = model->cov; + + /* allocate memory */ + double** gamma = (double**) malloc(T*sizeof(double*)); + double*** xi = (double***) malloc(T*sizeof(double**)); + for (t = 0; t < T; t++) + { + gamma[t] = (double*) malloc(N*sizeof(double)); + xi[t] = (double**) malloc(N*sizeof(double*)); + for (i = 0; i < N; i++) + xi[t][i] = (double*) malloc(N*sizeof(double)); + } + + /* temporary memory */ + double* gauss_y = (double*) malloc(L*sizeof(double)); + double* gauss_z = (double*) malloc(L*sizeof(double)); + + /* obs probs P(j|{x}) */ + double** b = (double**) malloc(T*sizeof(double*)); + for (t = 0; t < T; t++) + b[t] = (double*) malloc(N*sizeof(double)); + + /* inverse covariance and its determinant */ + double** icov = (double**) malloc(L*sizeof(double*)); + for (i = 0; i < L; i++) + icov[i] = (double*) malloc(L*sizeof(double)); + double detcov; + + double thresh = 0.0001; + int niter = 50; + int iter = 0; + double loglik1, loglik2; + while(iter < niter && !(iter > 1 && (loglik - loglik1) < thresh * (loglik1 - loglik2))) + { + ++iter; + + fprintf(stderr, "calculating obsprobs...\n"); + fflush(stderr); + + /* precalculate obs probs */ + invert(cov, L, icov, &detcov); + + for (t = 0; t < T; t++) + { + //int allzero = 1; + for (i = 0; i < N; i++) + { + b[t][i] = exp(loggauss(x[t], L, mu[i], icov, detcov, gauss_y, gauss_z)); + + //if (b[t][i] != 0) + // allzero = 0; + } + /* + if (allzero) + { + printf("all the b[t][i] were zero for t = %d, correcting...\n", t); + for (i = 0; i < N; i++) + { + b[t][i] = 0.00001; + } + } + */ + } + + fprintf(stderr, "forwards-backwards...\n"); + fflush(stderr); + + forward_backwards(xi, gamma, &loglik, &loglik1, &loglik2, iter, N, T, p0, a, b); + + fprintf(stderr, "iteration %d: loglik = %f\n", iter, loglik); + fprintf(stderr, "re-estimation...\n"); + fflush(stderr); + + baum_welch(p0, a, mu, cov, N, T, L, x, xi, gamma); + + /* + printf("a:\n"); + for (i = 0; i < model->N; i++) + { + for (j = 0; j < model->N; j++) + printf("%f ", model->a[i][j]); + printf("\n"); + } + printf("\n\n"); + */ + //hmm_print(model); + } + + /* deallocate memory */ + for (t = 0; t < T; t++) + { + free(gamma[t]); + free(b[t]); + for (i = 0; i < N; i++) + free(xi[t][i]); + free(xi[t]); + } + free(gamma); + free(xi); + free(b); + + for (i = 0; i < L; i++) + free(icov[i]); + free(icov); + + free(gauss_y); + free(gauss_z); +} + +void mlss_reestimate(double* p0, double** a, double** mu, double** cov, int N, int T, int L, int* q, double** x) +{ + /* fit a single Gaussian to observations in each state */ + + /* calculate the mean observation in each state */ + + /* calculate the overall covariance */ + + /* count transitions */ + + /* estimate initial probs from transitions (???) */ +} + +void baum_welch(double* p0, double** a, double** mu, double** cov, int N, int T, int L, double** x, double*** xi, double** gamma) +{ + int i, j, t; + + double* sum_gamma = (double*) malloc(N*sizeof(double)); + + /* temporary memory */ + double* u = (double*) malloc(L*L*sizeof(double)); + double* yy = (double*) malloc(T*L*sizeof(double)); + double* yy2 = (double*) malloc(T*L*sizeof(double)); + + /* re-estimate transition probs */ + for (i = 0; i < N; i++) + { + sum_gamma[i] = 0; + for (t = 0; t < T-1; t++) + sum_gamma[i] += gamma[t][i]; + } + + for (i = 0; i < N; i++) + { + if (sum_gamma[i] == 0) + { + fprintf(stderr, "sum_gamma[%d] was zero...\n", i); + } + //double s = 0; + for (j = 0; j < N; j++) + { + a[i][j] = 0; + for (t = 0; t < T-1; t++) + a[i][j] += xi[t][i][j]; + //s += a[i][j]; + a[i][j] /= sum_gamma[i]; + } + /* + for (j = 0; j < N; j++) + { + a[i][j] /= s; + } + */ + } + + /* NB: now we need to sum gamma over all t */ + for (i = 0; i < N; i++) + sum_gamma[i] += gamma[T-1][i]; + + /* re-estimate initial probs */ + for (i = 0; i < N; i++) + p0[i] = gamma[0][i]; + + /* re-estimate covariance */ + int d, e; + double sum_sum_gamma = 0; + for (i = 0; i < N; i++) + sum_sum_gamma += sum_gamma[i]; + + /* + for (d = 0; d < L; d++) + { + for (e = d; e < L; e++) + { + cov[d][e] = 0; + for (t = 0; t < T; t++) + for (j = 0; j < N; j++) + cov[d][e] += gamma[t][j] * (x[t][d] - mu[j][d]) * (x[t][e] - mu[j][e]); + + cov[d][e] /= sum_sum_gamma; + + if (isnan(cov[d][e])) + { + printf("cov[%d][%d] was nan\n", d, e); + for (j = 0; j < N; j++) + for (i = 0; i < L; i++) + if (isnan(mu[j][i])) + printf("mu[%d][%d] was nan\n", j, i); + for (t = 0; t < T; t++) + for (j = 0; j < N; j++) + if (isnan(gamma[t][j])) + printf("gamma[%d][%d] was nan\n", t, j); + exit(-1); + } + } + } + for (d = 0; d < L; d++) + for (e = 0; e < d; e++) + cov[d][e] = cov[e][d]; + */ + + /* using BLAS */ + for (d = 0; d < L; d++) + for (e = 0; e < L; e++) + cov[d][e] = 0; + + for (j = 0; j < N; j++) + { + for (d = 0; d < L; d++) + for (t = 0; t < T; t++) + { + yy[d*T+t] = x[t][d] - mu[j][d]; + yy2[d*T+t] = gamma[t][j] * (x[t][d] - mu[j][d]); + } + + cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, L, L, T, 1.0, yy, T, yy2, T, 0, u, L); + + for (e = 0; e < L; e++) + for (d = 0; d < L; d++) + cov[d][e] += u[e*L+d]; + } + + for (d = 0; d < L; d++) + for (e = 0; e < L; e++) + cov[d][e] /= T; /* sum_sum_gamma; */ + + //printf("sum_sum_gamma = %f\n", sum_sum_gamma); /* fine, = T IS THIS ALWAYS TRUE with pooled cov?? */ + + /* re-estimate means */ + for (j = 0; j < N; j++) + { + for (d = 0; d < L; d++) + { + mu[j][d] = 0; + for (t = 0; t < T; t++) + mu[j][d] += gamma[t][j] * x[t][d]; + mu[j][d] /= sum_gamma[j]; + } + } + + /* deallocate memory */ + free(sum_gamma); + free(yy); + free(yy2); + free(u); +} + +void forward_backwards(double*** xi, double** gamma, double* loglik, double* loglik1, double* loglik2, int iter, int N, int T, double* p0, double** a, double** b) +{ + /* forwards-backwards with scaling */ + int i, j, t; + + double** alpha = (double**) malloc(T*sizeof(double*)); + double** beta = (double**) malloc(T*sizeof(double*)); + for (t = 0; t < T; t++) + { + alpha[t] = (double*) malloc(N*sizeof(double)); + beta[t] = (double*) malloc(N*sizeof(double)); + } + + /* scaling coefficients */ + double* c = (double*) malloc(T*sizeof(double)); + + /* calculate forward probs and scale coefficients */ + c[0] = 0; + for (i = 0; i < N; i++) + { + alpha[0][i] = p0[i] * b[0][i]; + c[0] += alpha[0][i]; + + //printf("p0[%d] = %f, b[0][%d] = %f\n", i, p0[i], i, b[0][i]); + } + c[0] = 1 / c[0]; + for (i = 0; i < N; i++) + { + alpha[0][i] *= c[0]; + + //printf("alpha[0][%d] = %f\n", i, alpha[0][i]); /* OK agrees with Matlab */ + } + + *loglik1 = *loglik; + *loglik = -log(c[0]); + if (iter == 2) + *loglik2 = *loglik; + + for (t = 1; t < T; t++) + { + c[t] = 0; + for (j = 0; j < N; j++) + { + alpha[t][j] = 0; + for (i = 0; i < N; i++) + alpha[t][j] += alpha[t-1][i] * a[i][j]; + alpha[t][j] *= b[t][j]; + + c[t] += alpha[t][j]; + } + + /* + if (c[t] == 0) + { + printf("c[%d] = 0, going to blow up so exiting\n", t); + for (i = 0; i < N; i++) + if (b[t][i] == 0) + fprintf(stderr, "b[%d][%d] was zero\n", t, i); + fprintf(stderr, "x[t] was \n"); + for (i = 0; i < L; i++) + fprintf(stderr, "%f ", x[t][i]); + fprintf(stderr, "\n\n"); + exit(-1); + } + */ + + c[t] = 1 / c[t]; + for (j = 0; j < N; j++) + alpha[t][j] *= c[t]; + + //printf("c[%d] = %e\n", t, c[t]); + + *loglik -= log(c[t]); + } + + /* calculate backwards probs using same coefficients */ + for (i = 0; i < N; i++) + beta[T-1][i] = 1; + t = T - 1; + while (1) + { + for (i = 0; i < N; i++) + beta[t][i] *= c[t]; + + if (t == 0) + break; + + for (i = 0; i < N; i++) + { + beta[t-1][i] = 0; + for (j = 0; j < N; j++) + beta[t-1][i] += a[i][j] * b[t][j] * beta[t][j]; + } + + t--; + } + + /* + printf("alpha:\n"); + for (t = 0; t < T; t++) + { + for (i = 0; i < N; i++) + printf("%4.4e\t\t", alpha[t][i]); + printf("\n"); + } + printf("\n\n");printf("beta:\n"); + for (t = 0; t < T; t++) + { + for (i = 0; i < N; i++) + printf("%4.4e\t\t", beta[t][i]); + printf("\n"); + } + printf("\n\n"); + */ + + /* calculate posterior probs */ + double tot; + for (t = 0; t < T; t++) + { + tot = 0; + for (i = 0; i < N; i++) + { + gamma[t][i] = alpha[t][i] * beta[t][i]; + tot += gamma[t][i]; + } + for (i = 0; i < N; i++) + { + gamma[t][i] /= tot; + + //printf("gamma[%d][%d] = %f\n", t, i, gamma[t][i]); + } + } + + for (t = 0; t < T-1; t++) + { + tot = 0; + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + xi[t][i][j] = alpha[t][i] * a[i][j] * b[t+1][j] * beta[t+1][j]; + tot += xi[t][i][j]; + } + } + for (i = 0; i < N; i++) + for (j = 0; j < N; j++) + xi[t][i][j] /= tot; + } + + /* + // CHECK - fine + // gamma[t][i] = \sum_j{xi[t][i][j]} + tot = 0; + for (j = 0; j < N; j++) + tot += xi[3][1][j]; + printf("gamma[3][1] = %f, sum_j(xi[3][1][j]) = %f\n", gamma[3][1], tot); + */ + + for (t = 0; t < T; t++) + { + free(alpha[t]); + free(beta[t]); + } + free(alpha); + free(beta); + free(c); +} + +void viterbi_decode(double** x, int T, model_t* model, int* q) +{ + int i, j, t; + double max; + + int N = model->N; + int L = model->L; + double* p0 = model->p0; + double** a = model->a; + double** mu = model->mu; + double** cov = model->cov; + + /* inverse covariance and its determinant */ + double** icov = (double**) malloc(L*sizeof(double*)); + for (i = 0; i < L; i++) + icov[i] = (double*) malloc(L*sizeof(double)); + double detcov; + + double** logb = (double**) malloc(T*sizeof(double*)); + double** phi = (double**) malloc(T*sizeof(double*)); + int** psi = (int**) malloc(T*sizeof(int*)); + for (t = 0; t < T; t++) + { + logb[t] = (double*) malloc(N*sizeof(double)); + phi[t] = (double*) malloc(N*sizeof(double)); + psi[t] = (int*) malloc(N*sizeof(int)); + } + + /* temporary memory */ + double* gauss_y = (double*) malloc(L*sizeof(double)); + double* gauss_z = (double*) malloc(L*sizeof(double)); + + /* calculate observation logprobs */ + invert(cov, L, icov, &detcov); + for (t = 0; t < T; t++) + for (i = 0; i < N; i++) + logb[t][i] = loggauss(x[t], L, mu[i], icov, detcov, gauss_y, gauss_z); + + /* initialise */ + for (i = 0; i < N; i++) + { + phi[0][i] = log(p0[i]) + logb[0][i]; + psi[0][i] = 0; + } + + for (t = 1; t < T; t++) + { + for (j = 0; j < N; j++) + { + max = -1000000; // TODO: what should this be?? = smallest possible sumlogprob + psi[t][j] = 0; + for (i = 0; i < N; i++) + { + if (phi[t-1][i] + log(a[i][j]) > max) + { + max = phi[t-1][i] + log(a[i][j]); + phi[t][j] = max + logb[t][j]; + psi[t][j] = i; + } + } + } + } + + /* find maximising state at time T-1 */ + max = phi[T-1][0]; + q[T-1] = 0; + for (i = 1; i < N; i++) + { + if (phi[T-1][i] > max) + { + max = phi[T-1][i]; + q[T-1] = i; + } + } + + + /* track back */ + t = T - 2; + while (t >= 0) + { + q[t] = psi[t+1][q[t+1]]; + t--; + } + + /* de-allocate memory */ + for (i = 0; i < L; i++) + free(icov[i]); + free(icov); + for (t = 0; t < T; t++) + { + free(logb[t]); + free(phi[t]); + free(psi[t]); + } + free(logb); + free(phi); + free(psi); + + free(gauss_y); + free(gauss_z); +} + +/* invert matrix and calculate determinant using LAPACK */ +void invert(double** cov, int L, double** icov, double* detcov) +{ + /* copy square matrix into a vector in column-major order */ + double* a = (double*) malloc(L*L*sizeof(double)); + int i, j; + for(j=0; j < L; j++) + for (i=0; i < L; i++) + a[j*L+i] = cov[i][j]; + + long M = (long) L; + long* ipiv = (long*) malloc(L*L*sizeof(int)); + long ret; + + /* LU decomposition */ + ret = dgetrf_(&M, &M, a, &M, ipiv, &ret); /* ret should be zero, negative if cov is singular */ + if (ret < 0) + { + fprintf(stderr, "Covariance matrix was singular, couldn't invert\n"); + exit(-1); + } + + /* find determinant */ + double det = 1; + for(i = 0; i < L; i++) + det *= a[i*L+i]; + // TODO: get this to work!!! If detcov < 0 then cov is bad anyway... + /* + int sign = 1; + for (i = 0; i < L; i++) + if (ipiv[i] != i) + sign = -sign; + det *= sign; + */ + if (det < 0) + det = -det; + *detcov = det; + + /* allocate required working storage */ + long lwork = -1; + double lwbest; + dgetri_(&M, a, &M, ipiv, &lwbest, &lwork, &ret); + lwork = (long) lwbest; + double* work = (double*) malloc(lwork*sizeof(double)); + + /* find inverse */ + dgetri_(&M, a, &M, ipiv, work, &lwork, &ret); + + for(j=0; j < L; j++) + for (i=0; i < L; i++) + icov[i][j] = a[j*L+i]; + + free(work); + free(a); +} + +/* probability of multivariate Gaussian given mean, inverse and determinant of covariance */ +double gauss(double* x, int L, double* mu, double** icov, double detcov, double* y, double* z) +{ + int i, j; + double s = 0; + for (i = 0; i < L; i++) + y[i] = x[i] - mu[i]; + for (i = 0; i < L; i++) + { + //z[i] = 0; + //for (j = 0; j < L; j++) + // z[i] += icov[i][j] * y[j]; + z[i] = cblas_ddot(L, &icov[i][0], 1, y, 1); + } + s = cblas_ddot(L, z, 1, y, 1); + //for (i = 0; i < L; i++) + // s += z[i] * y[i]; + + return exp(-s/2.0) / (pow(2*PI, L/2.0) * sqrt(detcov)); +} + +/* log probability of multivariate Gaussian given mean, inverse and determinant of covariance */ +double loggauss(double* x, int L, double* mu, double** icov, double detcov, double* y, double* z) +{ + int i, j; + double s = 0; + double ret; + for (i = 0; i < L; i++) + y[i] = x[i] - mu[i]; + for (i = 0; i < L; i++) + { + //z[i] = 0; + //for (j = 0; j < L; j++) + // z[i] += icov[i][j] * y[j]; + z[i] = cblas_ddot(L, &icov[i][0], 1, y, 1); + } + s = cblas_ddot(L, z, 1, y, 1); + //for (i = 0; i < L; i++) + // s += z[i] * y[i]; + + ret = -0.5 * (s + L * log(2*PI) + log(detcov)); + + /* + // TEST + if (isinf(ret) > 0) + printf("loggauss returning infinity\n"); + if (isinf(ret) < 0) + printf("loggauss returning -infinity\n"); + if (isnan(ret)) + printf("loggauss returning nan\n"); + */ + + return ret; +} + +void hmm_print(model_t* model) +{ + int i, j; + printf("p0:\n"); + for (i = 0; i < model->N; i++) + printf("%f ", model->p0[i]); + printf("\n\n"); + printf("a:\n"); + for (i = 0; i < model->N; i++) + { + for (j = 0; j < model->N; j++) + printf("%f ", model->a[i][j]); + printf("\n"); + } + printf("\n\n"); + printf("mu:\n"); + for (i = 0; i < model->N; i++) + { + for (j = 0; j < model->L; j++) + printf("%f ", model->mu[i][j]); + printf("\n"); + } + printf("\n\n"); + printf("cov:\n"); + for (i = 0; i < model->L; i++) + { + for (j = 0; j < model->L; j++) + printf("%f ", model->cov[i][j]); + printf("\n"); + } + printf("\n\n"); +} + +