annotate maths/pca/pca.c @ 209:ccd2019190bf msvc

Some MSVC fixes, including (temporarily, probably) renaming the FFT source file to avoid getting it mixed up with the Vamp SDK one in our object dir
author Chris Cannam
date Thu, 01 Feb 2018 16:34:08 +0000
parents e4a57215ddee
children fdaa63607c15
rev   line source
cannam@16 1 /*********************************/
cannam@16 2 /* Principal Components Analysis */
cannam@16 3 /*********************************/
cannam@16 4
cannam@16 5 /*********************************************************************/
cannam@16 6 /* Principal Components Analysis or the Karhunen-Loeve expansion is a
cannam@16 7 classical method for dimensionality reduction or exploratory data
cannam@16 8 analysis. One reference among many is: F. Murtagh and A. Heck,
cannam@16 9 Multivariate Data Analysis, Kluwer Academic, Dordrecht, 1987.
cannam@16 10
cannam@16 11 Author:
cannam@16 12 F. Murtagh
cannam@16 13 Phone: + 49 89 32006298 (work)
cannam@16 14 + 49 89 965307 (home)
cannam@16 15 Earn/Bitnet: fionn@dgaeso51, fim@dgaipp1s, murtagh@stsci
cannam@16 16 Span: esomc1::fionn
cannam@16 17 Internet: murtagh@scivax.stsci.edu
cannam@16 18
cannam@16 19 F. Murtagh, Munich, 6 June 1989 */
cannam@16 20 /*********************************************************************/
cannam@16 21
cannam@16 22 #include <stdio.h>
cannam@16 23 #include <stdlib.h>
cannam@16 24 #include <math.h>
cannam@16 25
cannam@16 26 #include "pca.h"
cannam@16 27
cannam@16 28 #define SIGN(a, b) ( (b) < 0 ? -fabs(a) : fabs(a) )
cannam@16 29
cannam@16 30 /** Variance-covariance matrix: creation *****************************/
cannam@16 31
cannam@16 32 /* Create m * m covariance matrix from given n * m data matrix. */
cannam@16 33 void covcol(double** data, int n, int m, double** symmat)
cannam@16 34 {
cannam@16 35 double *mean;
cannam@16 36 int i, j, j1, j2;
cannam@16 37
cannam@16 38 /* Allocate storage for mean vector */
cannam@16 39
cannam@16 40 mean = (double*) malloc(m*sizeof(double));
cannam@16 41
cannam@16 42 /* Determine mean of column vectors of input data matrix */
cannam@16 43
cannam@16 44 for (j = 0; j < m; j++)
cannam@16 45 {
cannam@16 46 mean[j] = 0.0;
cannam@16 47 for (i = 0; i < n; i++)
cannam@16 48 {
cannam@16 49 mean[j] += data[i][j];
cannam@16 50 }
cannam@16 51 mean[j] /= (double)n;
cannam@16 52 }
cannam@16 53
cannam@16 54 /*
cannam@16 55 printf("\nMeans of column vectors:\n");
cannam@16 56 for (j = 0; j < m; j++) {
cannam@16 57 printf("%12.1f",mean[j]); } printf("\n");
cannam@16 58 */
cannam@16 59
cannam@16 60 /* Center the column vectors. */
cannam@16 61
cannam@16 62 for (i = 0; i < n; i++)
cannam@16 63 {
cannam@16 64 for (j = 0; j < m; j++)
cannam@16 65 {
cannam@16 66 data[i][j] -= mean[j];
cannam@16 67 }
cannam@16 68 }
cannam@16 69
cannam@16 70 /* Calculate the m * m covariance matrix. */
cannam@16 71 for (j1 = 0; j1 < m; j1++)
cannam@16 72 {
cannam@16 73 for (j2 = j1; j2 < m; j2++)
cannam@16 74 {
cannam@16 75 symmat[j1][j2] = 0.0;
cannam@16 76 for (i = 0; i < n; i++)
cannam@16 77 {
cannam@16 78 symmat[j1][j2] += data[i][j1] * data[i][j2];
cannam@16 79 }
cannam@16 80 symmat[j2][j1] = symmat[j1][j2];
cannam@16 81 }
cannam@16 82 }
cannam@16 83
cannam@16 84 free(mean);
cannam@16 85
cannam@16 86 return;
cannam@16 87
cannam@16 88 }
cannam@16 89
cannam@16 90 /** Error handler **************************************************/
cannam@16 91
cannam@16 92 void erhand(char* err_msg)
cannam@16 93 {
cannam@16 94 fprintf(stderr,"Run-time error:\n");
cannam@16 95 fprintf(stderr,"%s\n", err_msg);
cannam@16 96 fprintf(stderr,"Exiting to system.\n");
cannam@16 97 exit(1);
cannam@16 98 }
cannam@16 99
cannam@16 100
cannam@16 101 /** Reduce a real, symmetric matrix to a symmetric, tridiag. matrix. */
cannam@16 102
cannam@16 103 /* Householder reduction of matrix a to tridiagonal form.
cannam@16 104 Algorithm: Martin et al., Num. Math. 11, 181-195, 1968.
cannam@16 105 Ref: Smith et al., Matrix Eigensystem Routines -- EISPACK Guide
cannam@16 106 Springer-Verlag, 1976, pp. 489-494.
cannam@16 107 W H Press et al., Numerical Recipes in C, Cambridge U P,
cannam@16 108 1988, pp. 373-374. */
cannam@16 109 void tred2(double** a, int n, double* d, double* e)
cannam@16 110 {
cannam@16 111 int l, k, j, i;
cannam@16 112 double scale, hh, h, g, f;
cannam@16 113
cannam@16 114 for (i = n-1; i >= 1; i--)
cannam@16 115 {
cannam@16 116 l = i - 1;
cannam@16 117 h = scale = 0.0;
cannam@16 118 if (l > 0)
cannam@16 119 {
cannam@16 120 for (k = 0; k <= l; k++)
cannam@16 121 scale += fabs(a[i][k]);
cannam@16 122 if (scale == 0.0)
cannam@16 123 e[i] = a[i][l];
cannam@16 124 else
cannam@16 125 {
cannam@16 126 for (k = 0; k <= l; k++)
cannam@16 127 {
cannam@16 128 a[i][k] /= scale;
cannam@16 129 h += a[i][k] * a[i][k];
cannam@16 130 }
cannam@16 131 f = a[i][l];
cannam@16 132 g = f>0 ? -sqrt(h) : sqrt(h);
cannam@16 133 e[i] = scale * g;
cannam@16 134 h -= f * g;
cannam@16 135 a[i][l] = f - g;
cannam@16 136 f = 0.0;
cannam@16 137 for (j = 0; j <= l; j++)
cannam@16 138 {
cannam@16 139 a[j][i] = a[i][j]/h;
cannam@16 140 g = 0.0;
cannam@16 141 for (k = 0; k <= j; k++)
cannam@16 142 g += a[j][k] * a[i][k];
cannam@16 143 for (k = j+1; k <= l; k++)
cannam@16 144 g += a[k][j] * a[i][k];
cannam@16 145 e[j] = g / h;
cannam@16 146 f += e[j] * a[i][j];
cannam@16 147 }
cannam@16 148 hh = f / (h + h);
cannam@16 149 for (j = 0; j <= l; j++)
cannam@16 150 {
cannam@16 151 f = a[i][j];
cannam@16 152 e[j] = g = e[j] - hh * f;
cannam@16 153 for (k = 0; k <= j; k++)
cannam@16 154 a[j][k] -= (f * e[k] + g * a[i][k]);
cannam@16 155 }
cannam@16 156 }
cannam@16 157 }
cannam@16 158 else
cannam@16 159 e[i] = a[i][l];
cannam@16 160 d[i] = h;
cannam@16 161 }
cannam@16 162 d[0] = 0.0;
cannam@16 163 e[0] = 0.0;
cannam@16 164 for (i = 0; i < n; i++)
cannam@16 165 {
cannam@16 166 l = i - 1;
cannam@16 167 if (d[i])
cannam@16 168 {
cannam@16 169 for (j = 0; j <= l; j++)
cannam@16 170 {
cannam@16 171 g = 0.0;
cannam@16 172 for (k = 0; k <= l; k++)
cannam@16 173 g += a[i][k] * a[k][j];
cannam@16 174 for (k = 0; k <= l; k++)
cannam@16 175 a[k][j] -= g * a[k][i];
cannam@16 176 }
cannam@16 177 }
cannam@16 178 d[i] = a[i][i];
cannam@16 179 a[i][i] = 1.0;
cannam@16 180 for (j = 0; j <= l; j++)
cannam@16 181 a[j][i] = a[i][j] = 0.0;
cannam@16 182 }
cannam@16 183 }
cannam@16 184
cannam@16 185 /** Tridiagonal QL algorithm -- Implicit **********************/
cannam@16 186
cannam@16 187 void tqli(double* d, double* e, int n, double** z)
cannam@16 188 {
cannam@16 189 int m, l, iter, i, k;
cannam@16 190 double s, r, p, g, f, dd, c, b;
cannam@16 191
cannam@16 192 for (i = 1; i < n; i++)
cannam@16 193 e[i-1] = e[i];
cannam@16 194 e[n-1] = 0.0;
cannam@16 195 for (l = 0; l < n; l++)
cannam@16 196 {
cannam@16 197 iter = 0;
cannam@16 198 do
cannam@16 199 {
cannam@16 200 for (m = l; m < n-1; m++)
cannam@16 201 {
cannam@16 202 dd = fabs(d[m]) + fabs(d[m+1]);
cannam@16 203 if (fabs(e[m]) + dd == dd) break;
cannam@16 204 }
cannam@16 205 if (m != l)
cannam@16 206 {
cannam@16 207 if (iter++ == 30) erhand("No convergence in TLQI.");
cannam@16 208 g = (d[l+1] - d[l]) / (2.0 * e[l]);
cannam@16 209 r = sqrt((g * g) + 1.0);
cannam@16 210 g = d[m] - d[l] + e[l] / (g + SIGN(r, g));
cannam@16 211 s = c = 1.0;
cannam@16 212 p = 0.0;
cannam@16 213 for (i = m-1; i >= l; i--)
cannam@16 214 {
cannam@16 215 f = s * e[i];
cannam@16 216 b = c * e[i];
cannam@16 217 if (fabs(f) >= fabs(g))
cannam@16 218 {
cannam@16 219 c = g / f;
cannam@16 220 r = sqrt((c * c) + 1.0);
cannam@16 221 e[i+1] = f * r;
cannam@16 222 c *= (s = 1.0/r);
cannam@16 223 }
cannam@16 224 else
cannam@16 225 {
cannam@16 226 s = f / g;
cannam@16 227 r = sqrt((s * s) + 1.0);
cannam@16 228 e[i+1] = g * r;
cannam@16 229 s *= (c = 1.0/r);
cannam@16 230 }
cannam@16 231 g = d[i+1] - p;
cannam@16 232 r = (d[i] - g) * s + 2.0 * c * b;
cannam@16 233 p = s * r;
cannam@16 234 d[i+1] = g + p;
cannam@16 235 g = c * r - b;
cannam@16 236 for (k = 0; k < n; k++)
cannam@16 237 {
cannam@16 238 f = z[k][i+1];
cannam@16 239 z[k][i+1] = s * z[k][i] + c * f;
cannam@16 240 z[k][i] = c * z[k][i] - s * f;
cannam@16 241 }
cannam@16 242 }
cannam@16 243 d[l] = d[l] - p;
cannam@16 244 e[l] = g;
cannam@16 245 e[m] = 0.0;
cannam@16 246 }
cannam@16 247 } while (m != l);
cannam@16 248 }
cannam@16 249 }
cannam@16 250
cannam@16 251 /* In place projection onto basis vectors */
cannam@16 252 void pca_project(double** data, int n, int m, int ncomponents)
cannam@16 253 {
cannam@16 254 int i, j, k, k2;
Chris@189 255 double **symmat, /* **symmat2, */ *evals, *interm;
cannam@16 256
cannam@16 257 //TODO: assert ncomponents < m
cannam@16 258
cannam@16 259 symmat = (double**) malloc(m*sizeof(double*));
cannam@16 260 for (i = 0; i < m; i++)
cannam@16 261 symmat[i] = (double*) malloc(m*sizeof(double));
cannam@16 262
cannam@16 263 covcol(data, n, m, symmat);
cannam@16 264
cannam@16 265 /*********************************************************************
cannam@16 266 Eigen-reduction
cannam@16 267 **********************************************************************/
cannam@16 268
cannam@16 269 /* Allocate storage for dummy and new vectors. */
cannam@16 270 evals = (double*) malloc(m*sizeof(double)); /* Storage alloc. for vector of eigenvalues */
cannam@16 271 interm = (double*) malloc(m*sizeof(double)); /* Storage alloc. for 'intermediate' vector */
cannam@16 272 //MALLOC_ARRAY(symmat2,m,m,double);
cannam@16 273 //for (i = 0; i < m; i++) {
cannam@16 274 // for (j = 0; j < m; j++) {
cannam@16 275 // symmat2[i][j] = symmat[i][j]; /* Needed below for col. projections */
cannam@16 276 // }
cannam@16 277 //}
cannam@16 278 tred2(symmat, m, evals, interm); /* Triangular decomposition */
cannam@16 279 tqli(evals, interm, m, symmat); /* Reduction of sym. trid. matrix */
cannam@16 280 /* evals now contains the eigenvalues,
cannam@16 281 columns of symmat now contain the associated eigenvectors. */
cannam@16 282
cannam@16 283 /*
cannam@16 284 printf("\nEigenvalues:\n");
cannam@16 285 for (j = m-1; j >= 0; j--) {
cannam@16 286 printf("%18.5f\n", evals[j]); }
cannam@16 287 printf("\n(Eigenvalues should be strictly positive; limited\n");
cannam@16 288 printf("precision machine arithmetic may affect this.\n");
cannam@16 289 printf("Eigenvalues are often expressed as cumulative\n");
cannam@16 290 printf("percentages, representing the 'percentage variance\n");
cannam@16 291 printf("explained' by the associated axis or principal component.)\n");
cannam@16 292
cannam@16 293 printf("\nEigenvectors:\n");
cannam@16 294 printf("(First three; their definition in terms of original vbes.)\n");
cannam@16 295 for (j = 0; j < m; j++) {
cannam@16 296 for (i = 1; i <= 3; i++) {
cannam@16 297 printf("%12.4f", symmat[j][m-i]); }
cannam@16 298 printf("\n"); }
cannam@16 299 */
cannam@16 300
cannam@16 301 /* Form projections of row-points on prin. components. */
cannam@16 302 /* Store in 'data', overwriting original data. */
cannam@16 303 for (i = 0; i < n; i++) {
cannam@16 304 for (j = 0; j < m; j++) {
cannam@16 305 interm[j] = data[i][j]; } /* data[i][j] will be overwritten */
cannam@16 306 for (k = 0; k < ncomponents; k++) {
cannam@16 307 data[i][k] = 0.0;
cannam@16 308 for (k2 = 0; k2 < m; k2++) {
cannam@16 309 data[i][k] += interm[k2] * symmat[k2][m-k-1]; }
cannam@16 310 }
cannam@16 311 }
cannam@16 312
cannam@16 313 /*
cannam@16 314 printf("\nProjections of row-points on first 3 prin. comps.:\n");
cannam@16 315 for (i = 0; i < n; i++) {
cannam@16 316 for (j = 0; j < 3; j++) {
cannam@16 317 printf("%12.4f", data[i][j]); }
cannam@16 318 printf("\n"); }
cannam@16 319 */
cannam@16 320
cannam@16 321 /* Form projections of col.-points on first three prin. components. */
cannam@16 322 /* Store in 'symmat2', overwriting what was stored in this. */
cannam@16 323 //for (j = 0; j < m; j++) {
cannam@16 324 // for (k = 0; k < m; k++) {
cannam@16 325 // interm[k] = symmat2[j][k]; } /*symmat2[j][k] will be overwritten*/
cannam@16 326 // for (i = 0; i < 3; i++) {
cannam@16 327 // symmat2[j][i] = 0.0;
cannam@16 328 // for (k2 = 0; k2 < m; k2++) {
cannam@16 329 // symmat2[j][i] += interm[k2] * symmat[k2][m-i-1]; }
cannam@16 330 // if (evals[m-i-1] > 0.0005) /* Guard against zero eigenvalue */
cannam@16 331 // symmat2[j][i] /= sqrt(evals[m-i-1]); /* Rescale */
cannam@16 332 // else
cannam@16 333 // symmat2[j][i] = 0.0; /* Standard kludge */
cannam@16 334 // }
cannam@16 335 // }
cannam@16 336
cannam@16 337 /*
cannam@16 338 printf("\nProjections of column-points on first 3 prin. comps.:\n");
cannam@16 339 for (j = 0; j < m; j++) {
cannam@16 340 for (k = 0; k < 3; k++) {
cannam@16 341 printf("%12.4f", symmat2[j][k]); }
cannam@16 342 printf("\n"); }
cannam@16 343 */
cannam@16 344
cannam@16 345
cannam@16 346 for (i = 0; i < m; i++)
cannam@16 347 free(symmat[i]);
cannam@16 348 free(symmat);
cannam@16 349 //FREE_ARRAY(symmat2,m);
cannam@16 350 free(evals);
cannam@16 351 free(interm);
cannam@16 352
cannam@16 353 }
cannam@16 354
cannam@16 355
cannam@16 356