annotate maths/pca/pca.c @ 321:f1e6be2de9a5

A threshold (delta) is added in the peak picking parameters structure (PPickParams). It is used as an offset when computing the smoothed detection function. A constructor for the structure PPickParams is also added to set the parameters to 0 when a structure instance is created. Hence programmes using the peak picking parameter structure and which do not set the delta parameter (e.g. QM Vamp note onset detector) won't be affected by the modifications. Functions modified: - dsp/onsets/PeakPicking.cpp - dsp/onsets/PeakPicking.h - dsp/signalconditioning/DFProcess.cpp - dsp/signalconditioning/DFProcess.h
author mathieub <mathieu.barthet@eecs.qmul.ac.uk>
date Mon, 20 Jun 2011 19:01:48 +0100
parents f599563a4663
children e4a57215ddee
rev   line source
c@244 1 /*********************************/
c@244 2 /* Principal Components Analysis */
c@244 3 /*********************************/
c@244 4
c@244 5 /*********************************************************************/
c@244 6 /* Principal Components Analysis or the Karhunen-Loeve expansion is a
c@244 7 classical method for dimensionality reduction or exploratory data
c@244 8 analysis. One reference among many is: F. Murtagh and A. Heck,
c@244 9 Multivariate Data Analysis, Kluwer Academic, Dordrecht, 1987.
c@244 10
c@244 11 Author:
c@244 12 F. Murtagh
c@244 13 Phone: + 49 89 32006298 (work)
c@244 14 + 49 89 965307 (home)
c@244 15 Earn/Bitnet: fionn@dgaeso51, fim@dgaipp1s, murtagh@stsci
c@244 16 Span: esomc1::fionn
c@244 17 Internet: murtagh@scivax.stsci.edu
c@244 18
c@244 19 F. Murtagh, Munich, 6 June 1989 */
c@244 20 /*********************************************************************/
c@244 21
c@244 22 #include <stdio.h>
c@244 23 #include <stdlib.h>
c@244 24 #include <math.h>
c@244 25
c@244 26 #include "pca.h"
c@244 27
c@244 28 #define SIGN(a, b) ( (b) < 0 ? -fabs(a) : fabs(a) )
c@244 29
c@244 30 /** Variance-covariance matrix: creation *****************************/
c@244 31
c@244 32 /* Create m * m covariance matrix from given n * m data matrix. */
c@244 33 void covcol(double** data, int n, int m, double** symmat)
c@244 34 {
c@244 35 double *mean;
c@244 36 int i, j, j1, j2;
c@244 37
c@244 38 /* Allocate storage for mean vector */
c@244 39
c@244 40 mean = (double*) malloc(m*sizeof(double));
c@244 41
c@244 42 /* Determine mean of column vectors of input data matrix */
c@244 43
c@244 44 for (j = 0; j < m; j++)
c@244 45 {
c@244 46 mean[j] = 0.0;
c@244 47 for (i = 0; i < n; i++)
c@244 48 {
c@244 49 mean[j] += data[i][j];
c@244 50 }
c@244 51 mean[j] /= (double)n;
c@244 52 }
c@244 53
c@244 54 /*
c@244 55 printf("\nMeans of column vectors:\n");
c@244 56 for (j = 0; j < m; j++) {
c@244 57 printf("%12.1f",mean[j]); } printf("\n");
c@244 58 */
c@244 59
c@244 60 /* Center the column vectors. */
c@244 61
c@244 62 for (i = 0; i < n; i++)
c@244 63 {
c@244 64 for (j = 0; j < m; j++)
c@244 65 {
c@244 66 data[i][j] -= mean[j];
c@244 67 }
c@244 68 }
c@244 69
c@244 70 /* Calculate the m * m covariance matrix. */
c@244 71 for (j1 = 0; j1 < m; j1++)
c@244 72 {
c@244 73 for (j2 = j1; j2 < m; j2++)
c@244 74 {
c@244 75 symmat[j1][j2] = 0.0;
c@244 76 for (i = 0; i < n; i++)
c@244 77 {
c@244 78 symmat[j1][j2] += data[i][j1] * data[i][j2];
c@244 79 }
c@244 80 symmat[j2][j1] = symmat[j1][j2];
c@244 81 }
c@244 82 }
c@244 83
c@244 84 free(mean);
c@244 85
c@244 86 return;
c@244 87
c@244 88 }
c@244 89
c@244 90 /** Error handler **************************************************/
c@244 91
c@244 92 void erhand(char* err_msg)
c@244 93 {
c@244 94 fprintf(stderr,"Run-time error:\n");
c@244 95 fprintf(stderr,"%s\n", err_msg);
c@244 96 fprintf(stderr,"Exiting to system.\n");
c@244 97 exit(1);
c@244 98 }
c@244 99
c@244 100
c@244 101 /** Reduce a real, symmetric matrix to a symmetric, tridiag. matrix. */
c@244 102
c@244 103 /* Householder reduction of matrix a to tridiagonal form.
c@244 104 Algorithm: Martin et al., Num. Math. 11, 181-195, 1968.
c@244 105 Ref: Smith et al., Matrix Eigensystem Routines -- EISPACK Guide
c@244 106 Springer-Verlag, 1976, pp. 489-494.
c@244 107 W H Press et al., Numerical Recipes in C, Cambridge U P,
c@244 108 1988, pp. 373-374. */
c@244 109 void tred2(double** a, int n, double* d, double* e)
c@244 110 {
c@244 111 int l, k, j, i;
c@244 112 double scale, hh, h, g, f;
c@244 113
c@244 114 for (i = n-1; i >= 1; i--)
c@244 115 {
c@244 116 l = i - 1;
c@244 117 h = scale = 0.0;
c@244 118 if (l > 0)
c@244 119 {
c@244 120 for (k = 0; k <= l; k++)
c@244 121 scale += fabs(a[i][k]);
c@244 122 if (scale == 0.0)
c@244 123 e[i] = a[i][l];
c@244 124 else
c@244 125 {
c@244 126 for (k = 0; k <= l; k++)
c@244 127 {
c@244 128 a[i][k] /= scale;
c@244 129 h += a[i][k] * a[i][k];
c@244 130 }
c@244 131 f = a[i][l];
c@244 132 g = f>0 ? -sqrt(h) : sqrt(h);
c@244 133 e[i] = scale * g;
c@244 134 h -= f * g;
c@244 135 a[i][l] = f - g;
c@244 136 f = 0.0;
c@244 137 for (j = 0; j <= l; j++)
c@244 138 {
c@244 139 a[j][i] = a[i][j]/h;
c@244 140 g = 0.0;
c@244 141 for (k = 0; k <= j; k++)
c@244 142 g += a[j][k] * a[i][k];
c@244 143 for (k = j+1; k <= l; k++)
c@244 144 g += a[k][j] * a[i][k];
c@244 145 e[j] = g / h;
c@244 146 f += e[j] * a[i][j];
c@244 147 }
c@244 148 hh = f / (h + h);
c@244 149 for (j = 0; j <= l; j++)
c@244 150 {
c@244 151 f = a[i][j];
c@244 152 e[j] = g = e[j] - hh * f;
c@244 153 for (k = 0; k <= j; k++)
c@244 154 a[j][k] -= (f * e[k] + g * a[i][k]);
c@244 155 }
c@244 156 }
c@244 157 }
c@244 158 else
c@244 159 e[i] = a[i][l];
c@244 160 d[i] = h;
c@244 161 }
c@244 162 d[0] = 0.0;
c@244 163 e[0] = 0.0;
c@244 164 for (i = 0; i < n; i++)
c@244 165 {
c@244 166 l = i - 1;
c@244 167 if (d[i])
c@244 168 {
c@244 169 for (j = 0; j <= l; j++)
c@244 170 {
c@244 171 g = 0.0;
c@244 172 for (k = 0; k <= l; k++)
c@244 173 g += a[i][k] * a[k][j];
c@244 174 for (k = 0; k <= l; k++)
c@244 175 a[k][j] -= g * a[k][i];
c@244 176 }
c@244 177 }
c@244 178 d[i] = a[i][i];
c@244 179 a[i][i] = 1.0;
c@244 180 for (j = 0; j <= l; j++)
c@244 181 a[j][i] = a[i][j] = 0.0;
c@244 182 }
c@244 183 }
c@244 184
c@244 185 /** Tridiagonal QL algorithm -- Implicit **********************/
c@244 186
c@244 187 void tqli(double* d, double* e, int n, double** z)
c@244 188 {
c@244 189 int m, l, iter, i, k;
c@244 190 double s, r, p, g, f, dd, c, b;
c@244 191
c@244 192 for (i = 1; i < n; i++)
c@244 193 e[i-1] = e[i];
c@244 194 e[n-1] = 0.0;
c@244 195 for (l = 0; l < n; l++)
c@244 196 {
c@244 197 iter = 0;
c@244 198 do
c@244 199 {
c@244 200 for (m = l; m < n-1; m++)
c@244 201 {
c@244 202 dd = fabs(d[m]) + fabs(d[m+1]);
c@244 203 if (fabs(e[m]) + dd == dd) break;
c@244 204 }
c@244 205 if (m != l)
c@244 206 {
c@244 207 if (iter++ == 30) erhand("No convergence in TLQI.");
c@244 208 g = (d[l+1] - d[l]) / (2.0 * e[l]);
c@244 209 r = sqrt((g * g) + 1.0);
c@244 210 g = d[m] - d[l] + e[l] / (g + SIGN(r, g));
c@244 211 s = c = 1.0;
c@244 212 p = 0.0;
c@244 213 for (i = m-1; i >= l; i--)
c@244 214 {
c@244 215 f = s * e[i];
c@244 216 b = c * e[i];
c@244 217 if (fabs(f) >= fabs(g))
c@244 218 {
c@244 219 c = g / f;
c@244 220 r = sqrt((c * c) + 1.0);
c@244 221 e[i+1] = f * r;
c@244 222 c *= (s = 1.0/r);
c@244 223 }
c@244 224 else
c@244 225 {
c@244 226 s = f / g;
c@244 227 r = sqrt((s * s) + 1.0);
c@244 228 e[i+1] = g * r;
c@244 229 s *= (c = 1.0/r);
c@244 230 }
c@244 231 g = d[i+1] - p;
c@244 232 r = (d[i] - g) * s + 2.0 * c * b;
c@244 233 p = s * r;
c@244 234 d[i+1] = g + p;
c@244 235 g = c * r - b;
c@244 236 for (k = 0; k < n; k++)
c@244 237 {
c@244 238 f = z[k][i+1];
c@244 239 z[k][i+1] = s * z[k][i] + c * f;
c@244 240 z[k][i] = c * z[k][i] - s * f;
c@244 241 }
c@244 242 }
c@244 243 d[l] = d[l] - p;
c@244 244 e[l] = g;
c@244 245 e[m] = 0.0;
c@244 246 }
c@244 247 } while (m != l);
c@244 248 }
c@244 249 }
c@244 250
c@244 251 /* In place projection onto basis vectors */
c@244 252 void pca_project(double** data, int n, int m, int ncomponents)
c@244 253 {
c@244 254 int i, j, k, k2;
c@244 255 double **symmat, **symmat2, *evals, *interm;
c@244 256
c@244 257 //TODO: assert ncomponents < m
c@244 258
c@244 259 symmat = (double**) malloc(m*sizeof(double*));
c@244 260 for (i = 0; i < m; i++)
c@244 261 symmat[i] = (double*) malloc(m*sizeof(double));
c@244 262
c@244 263 covcol(data, n, m, symmat);
c@244 264
c@244 265 /*********************************************************************
c@244 266 Eigen-reduction
c@244 267 **********************************************************************/
c@244 268
c@244 269 /* Allocate storage for dummy and new vectors. */
c@244 270 evals = (double*) malloc(m*sizeof(double)); /* Storage alloc. for vector of eigenvalues */
c@244 271 interm = (double*) malloc(m*sizeof(double)); /* Storage alloc. for 'intermediate' vector */
c@244 272 //MALLOC_ARRAY(symmat2,m,m,double);
c@244 273 //for (i = 0; i < m; i++) {
c@244 274 // for (j = 0; j < m; j++) {
c@244 275 // symmat2[i][j] = symmat[i][j]; /* Needed below for col. projections */
c@244 276 // }
c@244 277 //}
c@244 278 tred2(symmat, m, evals, interm); /* Triangular decomposition */
c@244 279 tqli(evals, interm, m, symmat); /* Reduction of sym. trid. matrix */
c@244 280 /* evals now contains the eigenvalues,
c@244 281 columns of symmat now contain the associated eigenvectors. */
c@244 282
c@244 283 /*
c@244 284 printf("\nEigenvalues:\n");
c@244 285 for (j = m-1; j >= 0; j--) {
c@244 286 printf("%18.5f\n", evals[j]); }
c@244 287 printf("\n(Eigenvalues should be strictly positive; limited\n");
c@244 288 printf("precision machine arithmetic may affect this.\n");
c@244 289 printf("Eigenvalues are often expressed as cumulative\n");
c@244 290 printf("percentages, representing the 'percentage variance\n");
c@244 291 printf("explained' by the associated axis or principal component.)\n");
c@244 292
c@244 293 printf("\nEigenvectors:\n");
c@244 294 printf("(First three; their definition in terms of original vbes.)\n");
c@244 295 for (j = 0; j < m; j++) {
c@244 296 for (i = 1; i <= 3; i++) {
c@244 297 printf("%12.4f", symmat[j][m-i]); }
c@244 298 printf("\n"); }
c@244 299 */
c@244 300
c@244 301 /* Form projections of row-points on prin. components. */
c@244 302 /* Store in 'data', overwriting original data. */
c@244 303 for (i = 0; i < n; i++) {
c@244 304 for (j = 0; j < m; j++) {
c@244 305 interm[j] = data[i][j]; } /* data[i][j] will be overwritten */
c@244 306 for (k = 0; k < ncomponents; k++) {
c@244 307 data[i][k] = 0.0;
c@244 308 for (k2 = 0; k2 < m; k2++) {
c@244 309 data[i][k] += interm[k2] * symmat[k2][m-k-1]; }
c@244 310 }
c@244 311 }
c@244 312
c@244 313 /*
c@244 314 printf("\nProjections of row-points on first 3 prin. comps.:\n");
c@244 315 for (i = 0; i < n; i++) {
c@244 316 for (j = 0; j < 3; j++) {
c@244 317 printf("%12.4f", data[i][j]); }
c@244 318 printf("\n"); }
c@244 319 */
c@244 320
c@244 321 /* Form projections of col.-points on first three prin. components. */
c@244 322 /* Store in 'symmat2', overwriting what was stored in this. */
c@244 323 //for (j = 0; j < m; j++) {
c@244 324 // for (k = 0; k < m; k++) {
c@244 325 // interm[k] = symmat2[j][k]; } /*symmat2[j][k] will be overwritten*/
c@244 326 // for (i = 0; i < 3; i++) {
c@244 327 // symmat2[j][i] = 0.0;
c@244 328 // for (k2 = 0; k2 < m; k2++) {
c@244 329 // symmat2[j][i] += interm[k2] * symmat[k2][m-i-1]; }
c@244 330 // if (evals[m-i-1] > 0.0005) /* Guard against zero eigenvalue */
c@244 331 // symmat2[j][i] /= sqrt(evals[m-i-1]); /* Rescale */
c@244 332 // else
c@244 333 // symmat2[j][i] = 0.0; /* Standard kludge */
c@244 334 // }
c@244 335 // }
c@244 336
c@244 337 /*
c@244 338 printf("\nProjections of column-points on first 3 prin. comps.:\n");
c@244 339 for (j = 0; j < m; j++) {
c@244 340 for (k = 0; k < 3; k++) {
c@244 341 printf("%12.4f", symmat2[j][k]); }
c@244 342 printf("\n"); }
c@244 343 */
c@244 344
c@244 345
c@244 346 for (i = 0; i < m; i++)
c@244 347 free(symmat[i]);
c@244 348 free(symmat);
c@244 349 //FREE_ARRAY(symmat2,m);
c@244 350 free(evals);
c@244 351 free(interm);
c@244 352
c@244 353 }
c@244 354
c@244 355
c@244 356