annotate maths/pca/pca.c @ 189:e4a57215ddee

Fix compiler warnings with -Wall -Wextra
author Chris Cannam
date Mon, 28 Sep 2015 12:33:17 +0100
parents 2e3f5d2d62c1
children fdaa63607c15
rev   line source
cannam@16 1 /*********************************/
cannam@16 2 /* Principal Components Analysis */
cannam@16 3 /*********************************/
cannam@16 4
cannam@16 5 /*********************************************************************/
cannam@16 6 /* Principal Components Analysis or the Karhunen-Loeve expansion is a
cannam@16 7 classical method for dimensionality reduction or exploratory data
cannam@16 8 analysis. One reference among many is: F. Murtagh and A. Heck,
cannam@16 9 Multivariate Data Analysis, Kluwer Academic, Dordrecht, 1987.
cannam@16 10
cannam@16 11 Author:
cannam@16 12 F. Murtagh
cannam@16 13 Phone: + 49 89 32006298 (work)
cannam@16 14 + 49 89 965307 (home)
cannam@16 15 Earn/Bitnet: fionn@dgaeso51, fim@dgaipp1s, murtagh@stsci
cannam@16 16 Span: esomc1::fionn
cannam@16 17 Internet: murtagh@scivax.stsci.edu
cannam@16 18
cannam@16 19 F. Murtagh, Munich, 6 June 1989 */
cannam@16 20 /*********************************************************************/
cannam@16 21
cannam@16 22 #include <stdio.h>
cannam@16 23 #include <stdlib.h>
cannam@16 24 #include <math.h>
cannam@16 25
cannam@16 26 #include "pca.h"
cannam@16 27
cannam@16 28 #define SIGN(a, b) ( (b) < 0 ? -fabs(a) : fabs(a) )
cannam@16 29
cannam@16 30 /** Variance-covariance matrix: creation *****************************/
cannam@16 31
cannam@16 32 /* Create m * m covariance matrix from given n * m data matrix. */
cannam@16 33 void covcol(double** data, int n, int m, double** symmat)
cannam@16 34 {
cannam@16 35 double *mean;
cannam@16 36 int i, j, j1, j2;
cannam@16 37
cannam@16 38 /* Allocate storage for mean vector */
cannam@16 39
cannam@16 40 mean = (double*) malloc(m*sizeof(double));
cannam@16 41
cannam@16 42 /* Determine mean of column vectors of input data matrix */
cannam@16 43
cannam@16 44 for (j = 0; j < m; j++)
cannam@16 45 {
cannam@16 46 mean[j] = 0.0;
cannam@16 47 for (i = 0; i < n; i++)
cannam@16 48 {
cannam@16 49 mean[j] += data[i][j];
cannam@16 50 }
cannam@16 51 mean[j] /= (double)n;
cannam@16 52 }
cannam@16 53
cannam@16 54 /*
cannam@16 55 printf("\nMeans of column vectors:\n");
cannam@16 56 for (j = 0; j < m; j++) {
cannam@16 57 printf("%12.1f",mean[j]); } printf("\n");
cannam@16 58 */
cannam@16 59
cannam@16 60 /* Center the column vectors. */
cannam@16 61
cannam@16 62 for (i = 0; i < n; i++)
cannam@16 63 {
cannam@16 64 for (j = 0; j < m; j++)
cannam@16 65 {
cannam@16 66 data[i][j] -= mean[j];
cannam@16 67 }
cannam@16 68 }
cannam@16 69
cannam@16 70 /* Calculate the m * m covariance matrix. */
cannam@16 71 for (j1 = 0; j1 < m; j1++)
cannam@16 72 {
cannam@16 73 for (j2 = j1; j2 < m; j2++)
cannam@16 74 {
cannam@16 75 symmat[j1][j2] = 0.0;
cannam@16 76 for (i = 0; i < n; i++)
cannam@16 77 {
cannam@16 78 symmat[j1][j2] += data[i][j1] * data[i][j2];
cannam@16 79 }
cannam@16 80 symmat[j2][j1] = symmat[j1][j2];
cannam@16 81 }
cannam@16 82 }
cannam@16 83
cannam@16 84 free(mean);
cannam@16 85
cannam@16 86 return;
cannam@16 87
cannam@16 88 }
cannam@16 89
cannam@16 90 /** Error handler **************************************************/
cannam@16 91
cannam@16 92 void erhand(char* err_msg)
cannam@16 93 {
cannam@16 94 fprintf(stderr,"Run-time error:\n");
cannam@16 95 fprintf(stderr,"%s\n", err_msg);
cannam@16 96 fprintf(stderr,"Exiting to system.\n");
cannam@16 97 exit(1);
cannam@16 98 }
cannam@16 99
cannam@16 100
cannam@16 101 /** Reduce a real, symmetric matrix to a symmetric, tridiag. matrix. */
cannam@16 102
cannam@16 103 /* Householder reduction of matrix a to tridiagonal form.
cannam@16 104 Algorithm: Martin et al., Num. Math. 11, 181-195, 1968.
cannam@16 105 Ref: Smith et al., Matrix Eigensystem Routines -- EISPACK Guide
cannam@16 106 Springer-Verlag, 1976, pp. 489-494.
cannam@16 107 W H Press et al., Numerical Recipes in C, Cambridge U P,
cannam@16 108 1988, pp. 373-374. */
cannam@16 109 void tred2(double** a, int n, double* d, double* e)
cannam@16 110 {
cannam@16 111 int l, k, j, i;
cannam@16 112 double scale, hh, h, g, f;
cannam@16 113
cannam@16 114 for (i = n-1; i >= 1; i--)
cannam@16 115 {
cannam@16 116 l = i - 1;
cannam@16 117 h = scale = 0.0;
cannam@16 118 if (l > 0)
cannam@16 119 {
cannam@16 120 for (k = 0; k <= l; k++)
cannam@16 121 scale += fabs(a[i][k]);
cannam@16 122 if (scale == 0.0)
cannam@16 123 e[i] = a[i][l];
cannam@16 124 else
cannam@16 125 {
cannam@16 126 for (k = 0; k <= l; k++)
cannam@16 127 {
cannam@16 128 a[i][k] /= scale;
cannam@16 129 h += a[i][k] * a[i][k];
cannam@16 130 }
cannam@16 131 f = a[i][l];
cannam@16 132 g = f>0 ? -sqrt(h) : sqrt(h);
cannam@16 133 e[i] = scale * g;
cannam@16 134 h -= f * g;
cannam@16 135 a[i][l] = f - g;
cannam@16 136 f = 0.0;
cannam@16 137 for (j = 0; j <= l; j++)
cannam@16 138 {
cannam@16 139 a[j][i] = a[i][j]/h;
cannam@16 140 g = 0.0;
cannam@16 141 for (k = 0; k <= j; k++)
cannam@16 142 g += a[j][k] * a[i][k];
cannam@16 143 for (k = j+1; k <= l; k++)
cannam@16 144 g += a[k][j] * a[i][k];
cannam@16 145 e[j] = g / h;
cannam@16 146 f += e[j] * a[i][j];
cannam@16 147 }
cannam@16 148 hh = f / (h + h);
cannam@16 149 for (j = 0; j <= l; j++)
cannam@16 150 {
cannam@16 151 f = a[i][j];
cannam@16 152 e[j] = g = e[j] - hh * f;
cannam@16 153 for (k = 0; k <= j; k++)
cannam@16 154 a[j][k] -= (f * e[k] + g * a[i][k]);
cannam@16 155 }
cannam@16 156 }
cannam@16 157 }
cannam@16 158 else
cannam@16 159 e[i] = a[i][l];
cannam@16 160 d[i] = h;
cannam@16 161 }
cannam@16 162 d[0] = 0.0;
cannam@16 163 e[0] = 0.0;
cannam@16 164 for (i = 0; i < n; i++)
cannam@16 165 {
cannam@16 166 l = i - 1;
cannam@16 167 if (d[i])
cannam@16 168 {
cannam@16 169 for (j = 0; j <= l; j++)
cannam@16 170 {
cannam@16 171 g = 0.0;
cannam@16 172 for (k = 0; k <= l; k++)
cannam@16 173 g += a[i][k] * a[k][j];
cannam@16 174 for (k = 0; k <= l; k++)
cannam@16 175 a[k][j] -= g * a[k][i];
cannam@16 176 }
cannam@16 177 }
cannam@16 178 d[i] = a[i][i];
cannam@16 179 a[i][i] = 1.0;
cannam@16 180 for (j = 0; j <= l; j++)
cannam@16 181 a[j][i] = a[i][j] = 0.0;
cannam@16 182 }
cannam@16 183 }
cannam@16 184
cannam@16 185 /** Tridiagonal QL algorithm -- Implicit **********************/
cannam@16 186
cannam@16 187 void tqli(double* d, double* e, int n, double** z)
cannam@16 188 {
cannam@16 189 int m, l, iter, i, k;
cannam@16 190 double s, r, p, g, f, dd, c, b;
cannam@16 191
cannam@16 192 for (i = 1; i < n; i++)
cannam@16 193 e[i-1] = e[i];
cannam@16 194 e[n-1] = 0.0;
cannam@16 195 for (l = 0; l < n; l++)
cannam@16 196 {
cannam@16 197 iter = 0;
cannam@16 198 do
cannam@16 199 {
cannam@16 200 for (m = l; m < n-1; m++)
cannam@16 201 {
cannam@16 202 dd = fabs(d[m]) + fabs(d[m+1]);
cannam@16 203 if (fabs(e[m]) + dd == dd) break;
cannam@16 204 }
cannam@16 205 if (m != l)
cannam@16 206 {
cannam@16 207 if (iter++ == 30) erhand("No convergence in TLQI.");
cannam@16 208 g = (d[l+1] - d[l]) / (2.0 * e[l]);
cannam@16 209 r = sqrt((g * g) + 1.0);
cannam@16 210 g = d[m] - d[l] + e[l] / (g + SIGN(r, g));
cannam@16 211 s = c = 1.0;
cannam@16 212 p = 0.0;
cannam@16 213 for (i = m-1; i >= l; i--)
cannam@16 214 {
cannam@16 215 f = s * e[i];
cannam@16 216 b = c * e[i];
cannam@16 217 if (fabs(f) >= fabs(g))
cannam@16 218 {
cannam@16 219 c = g / f;
cannam@16 220 r = sqrt((c * c) + 1.0);
cannam@16 221 e[i+1] = f * r;
cannam@16 222 c *= (s = 1.0/r);
cannam@16 223 }
cannam@16 224 else
cannam@16 225 {
cannam@16 226 s = f / g;
cannam@16 227 r = sqrt((s * s) + 1.0);
cannam@16 228 e[i+1] = g * r;
cannam@16 229 s *= (c = 1.0/r);
cannam@16 230 }
cannam@16 231 g = d[i+1] - p;
cannam@16 232 r = (d[i] - g) * s + 2.0 * c * b;
cannam@16 233 p = s * r;
cannam@16 234 d[i+1] = g + p;
cannam@16 235 g = c * r - b;
cannam@16 236 for (k = 0; k < n; k++)
cannam@16 237 {
cannam@16 238 f = z[k][i+1];
cannam@16 239 z[k][i+1] = s * z[k][i] + c * f;
cannam@16 240 z[k][i] = c * z[k][i] - s * f;
cannam@16 241 }
cannam@16 242 }
cannam@16 243 d[l] = d[l] - p;
cannam@16 244 e[l] = g;
cannam@16 245 e[m] = 0.0;
cannam@16 246 }
cannam@16 247 } while (m != l);
cannam@16 248 }
cannam@16 249 }
cannam@16 250
cannam@16 251 /* In place projection onto basis vectors */
cannam@16 252 void pca_project(double** data, int n, int m, int ncomponents)
cannam@16 253 {
cannam@16 254 int i, j, k, k2;
Chris@189 255 double **symmat, /* **symmat2, */ *evals, *interm;
cannam@16 256
cannam@16 257 //TODO: assert ncomponents < m
cannam@16 258
cannam@16 259 symmat = (double**) malloc(m*sizeof(double*));
cannam@16 260 for (i = 0; i < m; i++)
cannam@16 261 symmat[i] = (double*) malloc(m*sizeof(double));
cannam@16 262
cannam@16 263 covcol(data, n, m, symmat);
cannam@16 264
cannam@16 265 /*********************************************************************
cannam@16 266 Eigen-reduction
cannam@16 267 **********************************************************************/
cannam@16 268
cannam@16 269 /* Allocate storage for dummy and new vectors. */
cannam@16 270 evals = (double*) malloc(m*sizeof(double)); /* Storage alloc. for vector of eigenvalues */
cannam@16 271 interm = (double*) malloc(m*sizeof(double)); /* Storage alloc. for 'intermediate' vector */
cannam@16 272 //MALLOC_ARRAY(symmat2,m,m,double);
cannam@16 273 //for (i = 0; i < m; i++) {
cannam@16 274 // for (j = 0; j < m; j++) {
cannam@16 275 // symmat2[i][j] = symmat[i][j]; /* Needed below for col. projections */
cannam@16 276 // }
cannam@16 277 //}
cannam@16 278 tred2(symmat, m, evals, interm); /* Triangular decomposition */
cannam@16 279 tqli(evals, interm, m, symmat); /* Reduction of sym. trid. matrix */
cannam@16 280 /* evals now contains the eigenvalues,
cannam@16 281 columns of symmat now contain the associated eigenvectors. */
cannam@16 282
cannam@16 283 /*
cannam@16 284 printf("\nEigenvalues:\n");
cannam@16 285 for (j = m-1; j >= 0; j--) {
cannam@16 286 printf("%18.5f\n", evals[j]); }
cannam@16 287 printf("\n(Eigenvalues should be strictly positive; limited\n");
cannam@16 288 printf("precision machine arithmetic may affect this.\n");
cannam@16 289 printf("Eigenvalues are often expressed as cumulative\n");
cannam@16 290 printf("percentages, representing the 'percentage variance\n");
cannam@16 291 printf("explained' by the associated axis or principal component.)\n");
cannam@16 292
cannam@16 293 printf("\nEigenvectors:\n");
cannam@16 294 printf("(First three; their definition in terms of original vbes.)\n");
cannam@16 295 for (j = 0; j < m; j++) {
cannam@16 296 for (i = 1; i <= 3; i++) {
cannam@16 297 printf("%12.4f", symmat[j][m-i]); }
cannam@16 298 printf("\n"); }
cannam@16 299 */
cannam@16 300
cannam@16 301 /* Form projections of row-points on prin. components. */
cannam@16 302 /* Store in 'data', overwriting original data. */
cannam@16 303 for (i = 0; i < n; i++) {
cannam@16 304 for (j = 0; j < m; j++) {
cannam@16 305 interm[j] = data[i][j]; } /* data[i][j] will be overwritten */
cannam@16 306 for (k = 0; k < ncomponents; k++) {
cannam@16 307 data[i][k] = 0.0;
cannam@16 308 for (k2 = 0; k2 < m; k2++) {
cannam@16 309 data[i][k] += interm[k2] * symmat[k2][m-k-1]; }
cannam@16 310 }
cannam@16 311 }
cannam@16 312
cannam@16 313 /*
cannam@16 314 printf("\nProjections of row-points on first 3 prin. comps.:\n");
cannam@16 315 for (i = 0; i < n; i++) {
cannam@16 316 for (j = 0; j < 3; j++) {
cannam@16 317 printf("%12.4f", data[i][j]); }
cannam@16 318 printf("\n"); }
cannam@16 319 */
cannam@16 320
cannam@16 321 /* Form projections of col.-points on first three prin. components. */
cannam@16 322 /* Store in 'symmat2', overwriting what was stored in this. */
cannam@16 323 //for (j = 0; j < m; j++) {
cannam@16 324 // for (k = 0; k < m; k++) {
cannam@16 325 // interm[k] = symmat2[j][k]; } /*symmat2[j][k] will be overwritten*/
cannam@16 326 // for (i = 0; i < 3; i++) {
cannam@16 327 // symmat2[j][i] = 0.0;
cannam@16 328 // for (k2 = 0; k2 < m; k2++) {
cannam@16 329 // symmat2[j][i] += interm[k2] * symmat[k2][m-i-1]; }
cannam@16 330 // if (evals[m-i-1] > 0.0005) /* Guard against zero eigenvalue */
cannam@16 331 // symmat2[j][i] /= sqrt(evals[m-i-1]); /* Rescale */
cannam@16 332 // else
cannam@16 333 // symmat2[j][i] = 0.0; /* Standard kludge */
cannam@16 334 // }
cannam@16 335 // }
cannam@16 336
cannam@16 337 /*
cannam@16 338 printf("\nProjections of column-points on first 3 prin. comps.:\n");
cannam@16 339 for (j = 0; j < m; j++) {
cannam@16 340 for (k = 0; k < 3; k++) {
cannam@16 341 printf("%12.4f", symmat2[j][k]); }
cannam@16 342 printf("\n"); }
cannam@16 343 */
cannam@16 344
cannam@16 345
cannam@16 346 for (i = 0; i < m; i++)
cannam@16 347 free(symmat[i]);
cannam@16 348 free(symmat);
cannam@16 349 //FREE_ARRAY(symmat2,m);
cannam@16 350 free(evals);
cannam@16 351 free(interm);
cannam@16 352
cannam@16 353 }
cannam@16 354
cannam@16 355
cannam@16 356