annotate ext/cblas/src/dgemm.c @ 515:08bcc06c38ec tip master

Remove fast-math
author Chris Cannam <cannam@all-day-breakfast.com>
date Tue, 28 Jan 2020 15:27:37 +0000
parents 905e45637745
children
rev   line source
c@427 1 /* dgemm.f -- translated by f2c (version 20061008).
c@427 2 You must link the resulting object file with libf2c:
c@427 3 on Microsoft Windows system, link with libf2c.lib;
c@427 4 on Linux or Unix systems, link with .../path/to/libf2c.a -lm
c@427 5 or, if you install libf2c.a in a standard place, with -lf2c -lm
c@427 6 -- in that order, at the end of the command line, as in
c@427 7 cc *.o -lf2c -lm
c@427 8 Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
c@427 9
c@427 10 http://www.netlib.org/f2c/libf2c.zip
c@427 11 */
c@427 12
c@427 13 #include "f2c.h"
c@427 14 #include "blaswrap.h"
c@427 15
c@427 16 /* Subroutine */ int dgemm_(char *transa, char *transb, integer *m, integer *
c@427 17 n, integer *k, doublereal *alpha, doublereal *a, integer *lda,
c@427 18 doublereal *b, integer *ldb, doublereal *beta, doublereal *c__,
c@427 19 integer *ldc)
c@427 20 {
c@427 21 /* System generated locals */
c@427 22 integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
c@427 23 i__3;
c@427 24
c@427 25 /* Local variables */
c@427 26 integer i__, j, l, info;
c@427 27 logical nota, notb;
c@427 28 doublereal temp;
c@427 29 integer ncola;
c@427 30 extern logical lsame_(char *, char *);
c@427 31 integer nrowa, nrowb;
c@427 32 extern /* Subroutine */ int xerbla_(char *, integer *);
c@427 33
c@427 34 /* .. Scalar Arguments .. */
c@427 35 /* .. */
c@427 36 /* .. Array Arguments .. */
c@427 37 /* .. */
c@427 38
c@427 39 /* Purpose */
c@427 40 /* ======= */
c@427 41
c@427 42 /* DGEMM performs one of the matrix-matrix operations */
c@427 43
c@427 44 /* C := alpha*op( A )*op( B ) + beta*C, */
c@427 45
c@427 46 /* where op( X ) is one of */
c@427 47
c@427 48 /* op( X ) = X or op( X ) = X', */
c@427 49
c@427 50 /* alpha and beta are scalars, and A, B and C are matrices, with op( A ) */
c@427 51 /* an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. */
c@427 52
c@427 53 /* Arguments */
c@427 54 /* ========== */
c@427 55
c@427 56 /* TRANSA - CHARACTER*1. */
c@427 57 /* On entry, TRANSA specifies the form of op( A ) to be used in */
c@427 58 /* the matrix multiplication as follows: */
c@427 59
c@427 60 /* TRANSA = 'N' or 'n', op( A ) = A. */
c@427 61
c@427 62 /* TRANSA = 'T' or 't', op( A ) = A'. */
c@427 63
c@427 64 /* TRANSA = 'C' or 'c', op( A ) = A'. */
c@427 65
c@427 66 /* Unchanged on exit. */
c@427 67
c@427 68 /* TRANSB - CHARACTER*1. */
c@427 69 /* On entry, TRANSB specifies the form of op( B ) to be used in */
c@427 70 /* the matrix multiplication as follows: */
c@427 71
c@427 72 /* TRANSB = 'N' or 'n', op( B ) = B. */
c@427 73
c@427 74 /* TRANSB = 'T' or 't', op( B ) = B'. */
c@427 75
c@427 76 /* TRANSB = 'C' or 'c', op( B ) = B'. */
c@427 77
c@427 78 /* Unchanged on exit. */
c@427 79
c@427 80 /* M - INTEGER. */
c@427 81 /* On entry, M specifies the number of rows of the matrix */
c@427 82 /* op( A ) and of the matrix C. M must be at least zero. */
c@427 83 /* Unchanged on exit. */
c@427 84
c@427 85 /* N - INTEGER. */
c@427 86 /* On entry, N specifies the number of columns of the matrix */
c@427 87 /* op( B ) and the number of columns of the matrix C. N must be */
c@427 88 /* at least zero. */
c@427 89 /* Unchanged on exit. */
c@427 90
c@427 91 /* K - INTEGER. */
c@427 92 /* On entry, K specifies the number of columns of the matrix */
c@427 93 /* op( A ) and the number of rows of the matrix op( B ). K must */
c@427 94 /* be at least zero. */
c@427 95 /* Unchanged on exit. */
c@427 96
c@427 97 /* ALPHA - DOUBLE PRECISION. */
c@427 98 /* On entry, ALPHA specifies the scalar alpha. */
c@427 99 /* Unchanged on exit. */
c@427 100
c@427 101 /* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is */
c@427 102 /* k when TRANSA = 'N' or 'n', and is m otherwise. */
c@427 103 /* Before entry with TRANSA = 'N' or 'n', the leading m by k */
c@427 104 /* part of the array A must contain the matrix A, otherwise */
c@427 105 /* the leading k by m part of the array A must contain the */
c@427 106 /* matrix A. */
c@427 107 /* Unchanged on exit. */
c@427 108
c@427 109 /* LDA - INTEGER. */
c@427 110 /* On entry, LDA specifies the first dimension of A as declared */
c@427 111 /* in the calling (sub) program. When TRANSA = 'N' or 'n' then */
c@427 112 /* LDA must be at least max( 1, m ), otherwise LDA must be at */
c@427 113 /* least max( 1, k ). */
c@427 114 /* Unchanged on exit. */
c@427 115
c@427 116 /* B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is */
c@427 117 /* n when TRANSB = 'N' or 'n', and is k otherwise. */
c@427 118 /* Before entry with TRANSB = 'N' or 'n', the leading k by n */
c@427 119 /* part of the array B must contain the matrix B, otherwise */
c@427 120 /* the leading n by k part of the array B must contain the */
c@427 121 /* matrix B. */
c@427 122 /* Unchanged on exit. */
c@427 123
c@427 124 /* LDB - INTEGER. */
c@427 125 /* On entry, LDB specifies the first dimension of B as declared */
c@427 126 /* in the calling (sub) program. When TRANSB = 'N' or 'n' then */
c@427 127 /* LDB must be at least max( 1, k ), otherwise LDB must be at */
c@427 128 /* least max( 1, n ). */
c@427 129 /* Unchanged on exit. */
c@427 130
c@427 131 /* BETA - DOUBLE PRECISION. */
c@427 132 /* On entry, BETA specifies the scalar beta. When BETA is */
c@427 133 /* supplied as zero then C need not be set on input. */
c@427 134 /* Unchanged on exit. */
c@427 135
c@427 136 /* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). */
c@427 137 /* Before entry, the leading m by n part of the array C must */
c@427 138 /* contain the matrix C, except when beta is zero, in which */
c@427 139 /* case C need not be set on entry. */
c@427 140 /* On exit, the array C is overwritten by the m by n matrix */
c@427 141 /* ( alpha*op( A )*op( B ) + beta*C ). */
c@427 142
c@427 143 /* LDC - INTEGER. */
c@427 144 /* On entry, LDC specifies the first dimension of C as declared */
c@427 145 /* in the calling (sub) program. LDC must be at least */
c@427 146 /* max( 1, m ). */
c@427 147 /* Unchanged on exit. */
c@427 148
c@427 149
c@427 150 /* Level 3 Blas routine. */
c@427 151
c@427 152 /* -- Written on 8-February-1989. */
c@427 153 /* Jack Dongarra, Argonne National Laboratory. */
c@427 154 /* Iain Duff, AERE Harwell. */
c@427 155 /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */
c@427 156 /* Sven Hammarling, Numerical Algorithms Group Ltd. */
c@427 157
c@427 158
c@427 159 /* .. External Functions .. */
c@427 160 /* .. */
c@427 161 /* .. External Subroutines .. */
c@427 162 /* .. */
c@427 163 /* .. Intrinsic Functions .. */
c@427 164 /* .. */
c@427 165 /* .. Local Scalars .. */
c@427 166 /* .. */
c@427 167 /* .. Parameters .. */
c@427 168 /* .. */
c@427 169
c@427 170 /* Set NOTA and NOTB as true if A and B respectively are not */
c@427 171 /* transposed and set NROWA, NCOLA and NROWB as the number of rows */
c@427 172 /* and columns of A and the number of rows of B respectively. */
c@427 173
c@427 174 /* Parameter adjustments */
c@427 175 a_dim1 = *lda;
c@427 176 a_offset = 1 + a_dim1;
c@427 177 a -= a_offset;
c@427 178 b_dim1 = *ldb;
c@427 179 b_offset = 1 + b_dim1;
c@427 180 b -= b_offset;
c@427 181 c_dim1 = *ldc;
c@427 182 c_offset = 1 + c_dim1;
c@427 183 c__ -= c_offset;
c@427 184
c@427 185 /* Function Body */
c@427 186 nota = lsame_(transa, "N");
c@427 187 notb = lsame_(transb, "N");
c@427 188 if (nota) {
c@427 189 nrowa = *m;
c@427 190 ncola = *k;
c@427 191 } else {
c@427 192 nrowa = *k;
c@427 193 ncola = *m;
c@427 194 }
c@427 195 if (notb) {
c@427 196 nrowb = *k;
c@427 197 } else {
c@427 198 nrowb = *n;
c@427 199 }
c@427 200
c@427 201 /* Test the input parameters. */
c@427 202
c@427 203 info = 0;
c@427 204 if (! nota && ! lsame_(transa, "C") && ! lsame_(
c@427 205 transa, "T")) {
c@427 206 info = 1;
c@427 207 } else if (! notb && ! lsame_(transb, "C") && !
c@427 208 lsame_(transb, "T")) {
c@427 209 info = 2;
c@427 210 } else if (*m < 0) {
c@427 211 info = 3;
c@427 212 } else if (*n < 0) {
c@427 213 info = 4;
c@427 214 } else if (*k < 0) {
c@427 215 info = 5;
c@427 216 } else if (*lda < max(1,nrowa)) {
c@427 217 info = 8;
c@427 218 } else if (*ldb < max(1,nrowb)) {
c@427 219 info = 10;
c@427 220 } else if (*ldc < max(1,*m)) {
c@427 221 info = 13;
c@427 222 }
c@427 223 if (info != 0) {
c@427 224 xerbla_("DGEMM ", &info);
c@427 225 return 0;
c@427 226 }
c@427 227
c@427 228 /* Quick return if possible. */
c@427 229
c@427 230 if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
c@427 231 return 0;
c@427 232 }
c@427 233
c@427 234 /* And if alpha.eq.zero. */
c@427 235
c@427 236 if (*alpha == 0.) {
c@427 237 if (*beta == 0.) {
c@427 238 i__1 = *n;
c@427 239 for (j = 1; j <= i__1; ++j) {
c@427 240 i__2 = *m;
c@427 241 for (i__ = 1; i__ <= i__2; ++i__) {
c@427 242 c__[i__ + j * c_dim1] = 0.;
c@427 243 /* L10: */
c@427 244 }
c@427 245 /* L20: */
c@427 246 }
c@427 247 } else {
c@427 248 i__1 = *n;
c@427 249 for (j = 1; j <= i__1; ++j) {
c@427 250 i__2 = *m;
c@427 251 for (i__ = 1; i__ <= i__2; ++i__) {
c@427 252 c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
c@427 253 /* L30: */
c@427 254 }
c@427 255 /* L40: */
c@427 256 }
c@427 257 }
c@427 258 return 0;
c@427 259 }
c@427 260
c@427 261 /* Start the operations. */
c@427 262
c@427 263 if (notb) {
c@427 264 if (nota) {
c@427 265
c@427 266 /* Form C := alpha*A*B + beta*C. */
c@427 267
c@427 268 i__1 = *n;
c@427 269 for (j = 1; j <= i__1; ++j) {
c@427 270 if (*beta == 0.) {
c@427 271 i__2 = *m;
c@427 272 for (i__ = 1; i__ <= i__2; ++i__) {
c@427 273 c__[i__ + j * c_dim1] = 0.;
c@427 274 /* L50: */
c@427 275 }
c@427 276 } else if (*beta != 1.) {
c@427 277 i__2 = *m;
c@427 278 for (i__ = 1; i__ <= i__2; ++i__) {
c@427 279 c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
c@427 280 /* L60: */
c@427 281 }
c@427 282 }
c@427 283 i__2 = *k;
c@427 284 for (l = 1; l <= i__2; ++l) {
c@427 285 if (b[l + j * b_dim1] != 0.) {
c@427 286 temp = *alpha * b[l + j * b_dim1];
c@427 287 i__3 = *m;
c@427 288 for (i__ = 1; i__ <= i__3; ++i__) {
c@427 289 c__[i__ + j * c_dim1] += temp * a[i__ + l *
c@427 290 a_dim1];
c@427 291 /* L70: */
c@427 292 }
c@427 293 }
c@427 294 /* L80: */
c@427 295 }
c@427 296 /* L90: */
c@427 297 }
c@427 298 } else {
c@427 299
c@427 300 /* Form C := alpha*A'*B + beta*C */
c@427 301
c@427 302 i__1 = *n;
c@427 303 for (j = 1; j <= i__1; ++j) {
c@427 304 i__2 = *m;
c@427 305 for (i__ = 1; i__ <= i__2; ++i__) {
c@427 306 temp = 0.;
c@427 307 i__3 = *k;
c@427 308 for (l = 1; l <= i__3; ++l) {
c@427 309 temp += a[l + i__ * a_dim1] * b[l + j * b_dim1];
c@427 310 /* L100: */
c@427 311 }
c@427 312 if (*beta == 0.) {
c@427 313 c__[i__ + j * c_dim1] = *alpha * temp;
c@427 314 } else {
c@427 315 c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
c@427 316 i__ + j * c_dim1];
c@427 317 }
c@427 318 /* L110: */
c@427 319 }
c@427 320 /* L120: */
c@427 321 }
c@427 322 }
c@427 323 } else {
c@427 324 if (nota) {
c@427 325
c@427 326 /* Form C := alpha*A*B' + beta*C */
c@427 327
c@427 328 i__1 = *n;
c@427 329 for (j = 1; j <= i__1; ++j) {
c@427 330 if (*beta == 0.) {
c@427 331 i__2 = *m;
c@427 332 for (i__ = 1; i__ <= i__2; ++i__) {
c@427 333 c__[i__ + j * c_dim1] = 0.;
c@427 334 /* L130: */
c@427 335 }
c@427 336 } else if (*beta != 1.) {
c@427 337 i__2 = *m;
c@427 338 for (i__ = 1; i__ <= i__2; ++i__) {
c@427 339 c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
c@427 340 /* L140: */
c@427 341 }
c@427 342 }
c@427 343 i__2 = *k;
c@427 344 for (l = 1; l <= i__2; ++l) {
c@427 345 if (b[j + l * b_dim1] != 0.) {
c@427 346 temp = *alpha * b[j + l * b_dim1];
c@427 347 i__3 = *m;
c@427 348 for (i__ = 1; i__ <= i__3; ++i__) {
c@427 349 c__[i__ + j * c_dim1] += temp * a[i__ + l *
c@427 350 a_dim1];
c@427 351 /* L150: */
c@427 352 }
c@427 353 }
c@427 354 /* L160: */
c@427 355 }
c@427 356 /* L170: */
c@427 357 }
c@427 358 } else {
c@427 359
c@427 360 /* Form C := alpha*A'*B' + beta*C */
c@427 361
c@427 362 i__1 = *n;
c@427 363 for (j = 1; j <= i__1; ++j) {
c@427 364 i__2 = *m;
c@427 365 for (i__ = 1; i__ <= i__2; ++i__) {
c@427 366 temp = 0.;
c@427 367 i__3 = *k;
c@427 368 for (l = 1; l <= i__3; ++l) {
c@427 369 temp += a[l + i__ * a_dim1] * b[j + l * b_dim1];
c@427 370 /* L180: */
c@427 371 }
c@427 372 if (*beta == 0.) {
c@427 373 c__[i__ + j * c_dim1] = *alpha * temp;
c@427 374 } else {
c@427 375 c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
c@427 376 i__ + j * c_dim1];
c@427 377 }
c@427 378 /* L190: */
c@427 379 }
c@427 380 /* L200: */
c@427 381 }
c@427 382 }
c@427 383 }
c@427 384
c@427 385 return 0;
c@427 386
c@427 387 /* End of DGEMM . */
c@427 388
c@427 389 } /* dgemm_ */