annotate maths/pca/pca.c @ 414:7e8d1f26b098

Fix compiler warnings with -Wall -Wextra
author Chris Cannam <c.cannam@qmul.ac.uk>
date Mon, 28 Sep 2015 12:33:17 +0100
parents f599563a4663
children fdaa63607c15
rev   line source
c@244 1 /*********************************/
c@244 2 /* Principal Components Analysis */
c@244 3 /*********************************/
c@244 4
c@244 5 /*********************************************************************/
c@244 6 /* Principal Components Analysis or the Karhunen-Loeve expansion is a
c@244 7 classical method for dimensionality reduction or exploratory data
c@244 8 analysis. One reference among many is: F. Murtagh and A. Heck,
c@244 9 Multivariate Data Analysis, Kluwer Academic, Dordrecht, 1987.
c@244 10
c@244 11 Author:
c@244 12 F. Murtagh
c@244 13 Phone: + 49 89 32006298 (work)
c@244 14 + 49 89 965307 (home)
c@244 15 Earn/Bitnet: fionn@dgaeso51, fim@dgaipp1s, murtagh@stsci
c@244 16 Span: esomc1::fionn
c@244 17 Internet: murtagh@scivax.stsci.edu
c@244 18
c@244 19 F. Murtagh, Munich, 6 June 1989 */
c@244 20 /*********************************************************************/
c@244 21
c@244 22 #include <stdio.h>
c@244 23 #include <stdlib.h>
c@244 24 #include <math.h>
c@244 25
c@244 26 #include "pca.h"
c@244 27
c@244 28 #define SIGN(a, b) ( (b) < 0 ? -fabs(a) : fabs(a) )
c@244 29
c@244 30 /** Variance-covariance matrix: creation *****************************/
c@244 31
c@244 32 /* Create m * m covariance matrix from given n * m data matrix. */
c@244 33 void covcol(double** data, int n, int m, double** symmat)
c@244 34 {
c@244 35 double *mean;
c@244 36 int i, j, j1, j2;
c@244 37
c@244 38 /* Allocate storage for mean vector */
c@244 39
c@244 40 mean = (double*) malloc(m*sizeof(double));
c@244 41
c@244 42 /* Determine mean of column vectors of input data matrix */
c@244 43
c@244 44 for (j = 0; j < m; j++)
c@244 45 {
c@244 46 mean[j] = 0.0;
c@244 47 for (i = 0; i < n; i++)
c@244 48 {
c@244 49 mean[j] += data[i][j];
c@244 50 }
c@244 51 mean[j] /= (double)n;
c@244 52 }
c@244 53
c@244 54 /*
c@244 55 printf("\nMeans of column vectors:\n");
c@244 56 for (j = 0; j < m; j++) {
c@244 57 printf("%12.1f",mean[j]); } printf("\n");
c@244 58 */
c@244 59
c@244 60 /* Center the column vectors. */
c@244 61
c@244 62 for (i = 0; i < n; i++)
c@244 63 {
c@244 64 for (j = 0; j < m; j++)
c@244 65 {
c@244 66 data[i][j] -= mean[j];
c@244 67 }
c@244 68 }
c@244 69
c@244 70 /* Calculate the m * m covariance matrix. */
c@244 71 for (j1 = 0; j1 < m; j1++)
c@244 72 {
c@244 73 for (j2 = j1; j2 < m; j2++)
c@244 74 {
c@244 75 symmat[j1][j2] = 0.0;
c@244 76 for (i = 0; i < n; i++)
c@244 77 {
c@244 78 symmat[j1][j2] += data[i][j1] * data[i][j2];
c@244 79 }
c@244 80 symmat[j2][j1] = symmat[j1][j2];
c@244 81 }
c@244 82 }
c@244 83
c@244 84 free(mean);
c@244 85
c@244 86 return;
c@244 87
c@244 88 }
c@244 89
c@244 90 /** Error handler **************************************************/
c@244 91
c@244 92 void erhand(char* err_msg)
c@244 93 {
c@244 94 fprintf(stderr,"Run-time error:\n");
c@244 95 fprintf(stderr,"%s\n", err_msg);
c@244 96 fprintf(stderr,"Exiting to system.\n");
c@244 97 exit(1);
c@244 98 }
c@244 99
c@244 100
c@244 101 /** Reduce a real, symmetric matrix to a symmetric, tridiag. matrix. */
c@244 102
c@244 103 /* Householder reduction of matrix a to tridiagonal form.
c@244 104 Algorithm: Martin et al., Num. Math. 11, 181-195, 1968.
c@244 105 Ref: Smith et al., Matrix Eigensystem Routines -- EISPACK Guide
c@244 106 Springer-Verlag, 1976, pp. 489-494.
c@244 107 W H Press et al., Numerical Recipes in C, Cambridge U P,
c@244 108 1988, pp. 373-374. */
c@244 109 void tred2(double** a, int n, double* d, double* e)
c@244 110 {
c@244 111 int l, k, j, i;
c@244 112 double scale, hh, h, g, f;
c@244 113
c@244 114 for (i = n-1; i >= 1; i--)
c@244 115 {
c@244 116 l = i - 1;
c@244 117 h = scale = 0.0;
c@244 118 if (l > 0)
c@244 119 {
c@244 120 for (k = 0; k <= l; k++)
c@244 121 scale += fabs(a[i][k]);
c@244 122 if (scale == 0.0)
c@244 123 e[i] = a[i][l];
c@244 124 else
c@244 125 {
c@244 126 for (k = 0; k <= l; k++)
c@244 127 {
c@244 128 a[i][k] /= scale;
c@244 129 h += a[i][k] * a[i][k];
c@244 130 }
c@244 131 f = a[i][l];
c@244 132 g = f>0 ? -sqrt(h) : sqrt(h);
c@244 133 e[i] = scale * g;
c@244 134 h -= f * g;
c@244 135 a[i][l] = f - g;
c@244 136 f = 0.0;
c@244 137 for (j = 0; j <= l; j++)
c@244 138 {
c@244 139 a[j][i] = a[i][j]/h;
c@244 140 g = 0.0;
c@244 141 for (k = 0; k <= j; k++)
c@244 142 g += a[j][k] * a[i][k];
c@244 143 for (k = j+1; k <= l; k++)
c@244 144 g += a[k][j] * a[i][k];
c@244 145 e[j] = g / h;
c@244 146 f += e[j] * a[i][j];
c@244 147 }
c@244 148 hh = f / (h + h);
c@244 149 for (j = 0; j <= l; j++)
c@244 150 {
c@244 151 f = a[i][j];
c@244 152 e[j] = g = e[j] - hh * f;
c@244 153 for (k = 0; k <= j; k++)
c@244 154 a[j][k] -= (f * e[k] + g * a[i][k]);
c@244 155 }
c@244 156 }
c@244 157 }
c@244 158 else
c@244 159 e[i] = a[i][l];
c@244 160 d[i] = h;
c@244 161 }
c@244 162 d[0] = 0.0;
c@244 163 e[0] = 0.0;
c@244 164 for (i = 0; i < n; i++)
c@244 165 {
c@244 166 l = i - 1;
c@244 167 if (d[i])
c@244 168 {
c@244 169 for (j = 0; j <= l; j++)
c@244 170 {
c@244 171 g = 0.0;
c@244 172 for (k = 0; k <= l; k++)
c@244 173 g += a[i][k] * a[k][j];
c@244 174 for (k = 0; k <= l; k++)
c@244 175 a[k][j] -= g * a[k][i];
c@244 176 }
c@244 177 }
c@244 178 d[i] = a[i][i];
c@244 179 a[i][i] = 1.0;
c@244 180 for (j = 0; j <= l; j++)
c@244 181 a[j][i] = a[i][j] = 0.0;
c@244 182 }
c@244 183 }
c@244 184
c@244 185 /** Tridiagonal QL algorithm -- Implicit **********************/
c@244 186
c@244 187 void tqli(double* d, double* e, int n, double** z)
c@244 188 {
c@244 189 int m, l, iter, i, k;
c@244 190 double s, r, p, g, f, dd, c, b;
c@244 191
c@244 192 for (i = 1; i < n; i++)
c@244 193 e[i-1] = e[i];
c@244 194 e[n-1] = 0.0;
c@244 195 for (l = 0; l < n; l++)
c@244 196 {
c@244 197 iter = 0;
c@244 198 do
c@244 199 {
c@244 200 for (m = l; m < n-1; m++)
c@244 201 {
c@244 202 dd = fabs(d[m]) + fabs(d[m+1]);
c@244 203 if (fabs(e[m]) + dd == dd) break;
c@244 204 }
c@244 205 if (m != l)
c@244 206 {
c@244 207 if (iter++ == 30) erhand("No convergence in TLQI.");
c@244 208 g = (d[l+1] - d[l]) / (2.0 * e[l]);
c@244 209 r = sqrt((g * g) + 1.0);
c@244 210 g = d[m] - d[l] + e[l] / (g + SIGN(r, g));
c@244 211 s = c = 1.0;
c@244 212 p = 0.0;
c@244 213 for (i = m-1; i >= l; i--)
c@244 214 {
c@244 215 f = s * e[i];
c@244 216 b = c * e[i];
c@244 217 if (fabs(f) >= fabs(g))
c@244 218 {
c@244 219 c = g / f;
c@244 220 r = sqrt((c * c) + 1.0);
c@244 221 e[i+1] = f * r;
c@244 222 c *= (s = 1.0/r);
c@244 223 }
c@244 224 else
c@244 225 {
c@244 226 s = f / g;
c@244 227 r = sqrt((s * s) + 1.0);
c@244 228 e[i+1] = g * r;
c@244 229 s *= (c = 1.0/r);
c@244 230 }
c@244 231 g = d[i+1] - p;
c@244 232 r = (d[i] - g) * s + 2.0 * c * b;
c@244 233 p = s * r;
c@244 234 d[i+1] = g + p;
c@244 235 g = c * r - b;
c@244 236 for (k = 0; k < n; k++)
c@244 237 {
c@244 238 f = z[k][i+1];
c@244 239 z[k][i+1] = s * z[k][i] + c * f;
c@244 240 z[k][i] = c * z[k][i] - s * f;
c@244 241 }
c@244 242 }
c@244 243 d[l] = d[l] - p;
c@244 244 e[l] = g;
c@244 245 e[m] = 0.0;
c@244 246 }
c@244 247 } while (m != l);
c@244 248 }
c@244 249 }
c@244 250
c@244 251 /* In place projection onto basis vectors */
c@244 252 void pca_project(double** data, int n, int m, int ncomponents)
c@244 253 {
c@244 254 int i, j, k, k2;
c@414 255 double **symmat, /* **symmat2, */ *evals, *interm;
c@244 256
c@244 257 //TODO: assert ncomponents < m
c@244 258
c@244 259 symmat = (double**) malloc(m*sizeof(double*));
c@244 260 for (i = 0; i < m; i++)
c@244 261 symmat[i] = (double*) malloc(m*sizeof(double));
c@244 262
c@244 263 covcol(data, n, m, symmat);
c@244 264
c@244 265 /*********************************************************************
c@244 266 Eigen-reduction
c@244 267 **********************************************************************/
c@244 268
c@244 269 /* Allocate storage for dummy and new vectors. */
c@244 270 evals = (double*) malloc(m*sizeof(double)); /* Storage alloc. for vector of eigenvalues */
c@244 271 interm = (double*) malloc(m*sizeof(double)); /* Storage alloc. for 'intermediate' vector */
c@244 272 //MALLOC_ARRAY(symmat2,m,m,double);
c@244 273 //for (i = 0; i < m; i++) {
c@244 274 // for (j = 0; j < m; j++) {
c@244 275 // symmat2[i][j] = symmat[i][j]; /* Needed below for col. projections */
c@244 276 // }
c@244 277 //}
c@244 278 tred2(symmat, m, evals, interm); /* Triangular decomposition */
c@244 279 tqli(evals, interm, m, symmat); /* Reduction of sym. trid. matrix */
c@244 280 /* evals now contains the eigenvalues,
c@244 281 columns of symmat now contain the associated eigenvectors. */
c@244 282
c@244 283 /*
c@244 284 printf("\nEigenvalues:\n");
c@244 285 for (j = m-1; j >= 0; j--) {
c@244 286 printf("%18.5f\n", evals[j]); }
c@244 287 printf("\n(Eigenvalues should be strictly positive; limited\n");
c@244 288 printf("precision machine arithmetic may affect this.\n");
c@244 289 printf("Eigenvalues are often expressed as cumulative\n");
c@244 290 printf("percentages, representing the 'percentage variance\n");
c@244 291 printf("explained' by the associated axis or principal component.)\n");
c@244 292
c@244 293 printf("\nEigenvectors:\n");
c@244 294 printf("(First three; their definition in terms of original vbes.)\n");
c@244 295 for (j = 0; j < m; j++) {
c@244 296 for (i = 1; i <= 3; i++) {
c@244 297 printf("%12.4f", symmat[j][m-i]); }
c@244 298 printf("\n"); }
c@244 299 */
c@244 300
c@244 301 /* Form projections of row-points on prin. components. */
c@244 302 /* Store in 'data', overwriting original data. */
c@244 303 for (i = 0; i < n; i++) {
c@244 304 for (j = 0; j < m; j++) {
c@244 305 interm[j] = data[i][j]; } /* data[i][j] will be overwritten */
c@244 306 for (k = 0; k < ncomponents; k++) {
c@244 307 data[i][k] = 0.0;
c@244 308 for (k2 = 0; k2 < m; k2++) {
c@244 309 data[i][k] += interm[k2] * symmat[k2][m-k-1]; }
c@244 310 }
c@244 311 }
c@244 312
c@244 313 /*
c@244 314 printf("\nProjections of row-points on first 3 prin. comps.:\n");
c@244 315 for (i = 0; i < n; i++) {
c@244 316 for (j = 0; j < 3; j++) {
c@244 317 printf("%12.4f", data[i][j]); }
c@244 318 printf("\n"); }
c@244 319 */
c@244 320
c@244 321 /* Form projections of col.-points on first three prin. components. */
c@244 322 /* Store in 'symmat2', overwriting what was stored in this. */
c@244 323 //for (j = 0; j < m; j++) {
c@244 324 // for (k = 0; k < m; k++) {
c@244 325 // interm[k] = symmat2[j][k]; } /*symmat2[j][k] will be overwritten*/
c@244 326 // for (i = 0; i < 3; i++) {
c@244 327 // symmat2[j][i] = 0.0;
c@244 328 // for (k2 = 0; k2 < m; k2++) {
c@244 329 // symmat2[j][i] += interm[k2] * symmat[k2][m-i-1]; }
c@244 330 // if (evals[m-i-1] > 0.0005) /* Guard against zero eigenvalue */
c@244 331 // symmat2[j][i] /= sqrt(evals[m-i-1]); /* Rescale */
c@244 332 // else
c@244 333 // symmat2[j][i] = 0.0; /* Standard kludge */
c@244 334 // }
c@244 335 // }
c@244 336
c@244 337 /*
c@244 338 printf("\nProjections of column-points on first 3 prin. comps.:\n");
c@244 339 for (j = 0; j < m; j++) {
c@244 340 for (k = 0; k < 3; k++) {
c@244 341 printf("%12.4f", symmat2[j][k]); }
c@244 342 printf("\n"); }
c@244 343 */
c@244 344
c@244 345
c@244 346 for (i = 0; i < m; i++)
c@244 347 free(symmat[i]);
c@244 348 free(symmat);
c@244 349 //FREE_ARRAY(symmat2,m);
c@244 350 free(evals);
c@244 351 free(interm);
c@244 352
c@244 353 }
c@244 354
c@244 355
c@244 356