qm-dsp: ext/cblas/src/dgemm.c annotate

annotate ext/cblas/src/dgemm.c @ 515:08bcc06c38ec tip master

Remove fast-math

author	Chris Cannam <cannam@all-day-breakfast.com>
date	Tue, 28 Jan 2020 15:27:37 +0000
parents	905e45637745
children

rev	line source
c@427	1 /* dgemm.f -- translated by f2c (version 20061008).
c@427	2 You must link the resulting object file with libf2c:
c@427	3 on Microsoft Windows system, link with libf2c.lib;
c@427	4 on Linux or Unix systems, link with .../path/to/libf2c.a -lm
c@427	5 or, if you install libf2c.a in a standard place, with -lf2c -lm
c@427	6 -- in that order, at the end of the command line, as in
c@427	7 cc *.o -lf2c -lm
c@427	8 Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
c@427	9
c@427	10 http://www.netlib.org/f2c/libf2c.zip
c@427	11 */
c@427	12
c@427	13 #include "f2c.h"
c@427	14 #include "blaswrap.h"
c@427	15
c@427	16 /* Subroutine / int dgemm_(char transa, char transb, integer m, integer *
c@427	17 n, integer k, doublereal alpha, doublereal a, integer lda,
c@427	18 doublereal b, integer ldb, doublereal beta, doublereal c__,
c@427	19 integer *ldc)
c@427	20 {
c@427	21 /* System generated locals */
c@427	22 integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
c@427	23 i__3;
c@427	24
c@427	25 /* Local variables */
c@427	26 integer i__, j, l, info;
c@427	27 logical nota, notb;
c@427	28 doublereal temp;
c@427	29 integer ncola;
c@427	30 extern logical lsame_(char , char );
c@427	31 integer nrowa, nrowb;
c@427	32 extern /* Subroutine / int xerbla_(char , integer *);
c@427	33
c@427	34 /* .. Scalar Arguments .. */
c@427	35 /* .. */
c@427	36 /* .. Array Arguments .. */
c@427	37 /* .. */
c@427	38
c@427	39 /* Purpose */
c@427	40 /* ======= */
c@427	41
c@427	42 /* DGEMM performs one of the matrix-matrix operations */
c@427	43
c@427	44 /* C := alphaop( A )op( B ) + betaC, /
c@427	45
c@427	46 /* where op( X ) is one of */
c@427	47
c@427	48 /* op( X ) = X or op( X ) = X', */
c@427	49
c@427	50 /* alpha and beta are scalars, and A, B and C are matrices, with op( A ) */
c@427	51 /* an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. */
c@427	52
c@427	53 /* Arguments */
c@427	54 /* ========== */
c@427	55
c@427	56 /* TRANSA - CHARACTER1. /
c@427	57 /* On entry, TRANSA specifies the form of op( A ) to be used in */
c@427	58 /* the matrix multiplication as follows: */
c@427	59
c@427	60 /* TRANSA = 'N' or 'n', op( A ) = A. */
c@427	61
c@427	62 /* TRANSA = 'T' or 't', op( A ) = A'. */
c@427	63
c@427	64 /* TRANSA = 'C' or 'c', op( A ) = A'. */
c@427	65
c@427	66 /* Unchanged on exit. */
c@427	67
c@427	68 /* TRANSB - CHARACTER1. /
c@427	69 /* On entry, TRANSB specifies the form of op( B ) to be used in */
c@427	70 /* the matrix multiplication as follows: */
c@427	71
c@427	72 /* TRANSB = 'N' or 'n', op( B ) = B. */
c@427	73
c@427	74 /* TRANSB = 'T' or 't', op( B ) = B'. */
c@427	75
c@427	76 /* TRANSB = 'C' or 'c', op( B ) = B'. */
c@427	77
c@427	78 /* Unchanged on exit. */
c@427	79
c@427	80 /* M - INTEGER. */
c@427	81 /* On entry, M specifies the number of rows of the matrix */
c@427	82 /* op( A ) and of the matrix C. M must be at least zero. */
c@427	83 /* Unchanged on exit. */
c@427	84
c@427	85 /* N - INTEGER. */
c@427	86 /* On entry, N specifies the number of columns of the matrix */
c@427	87 /* op( B ) and the number of columns of the matrix C. N must be */
c@427	88 /* at least zero. */
c@427	89 /* Unchanged on exit. */
c@427	90
c@427	91 /* K - INTEGER. */
c@427	92 /* On entry, K specifies the number of columns of the matrix */
c@427	93 /* op( A ) and the number of rows of the matrix op( B ). K must */
c@427	94 /* be at least zero. */
c@427	95 /* Unchanged on exit. */
c@427	96
c@427	97 /* ALPHA - DOUBLE PRECISION. */
c@427	98 /* On entry, ALPHA specifies the scalar alpha. */
c@427	99 /* Unchanged on exit. */
c@427	100
c@427	101 /* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is */
c@427	102 /* k when TRANSA = 'N' or 'n', and is m otherwise. */
c@427	103 /* Before entry with TRANSA = 'N' or 'n', the leading m by k */
c@427	104 /* part of the array A must contain the matrix A, otherwise */
c@427	105 /* the leading k by m part of the array A must contain the */
c@427	106 /* matrix A. */
c@427	107 /* Unchanged on exit. */
c@427	108
c@427	109 /* LDA - INTEGER. */
c@427	110 /* On entry, LDA specifies the first dimension of A as declared */
c@427	111 /* in the calling (sub) program. When TRANSA = 'N' or 'n' then */
c@427	112 /* LDA must be at least max( 1, m ), otherwise LDA must be at */
c@427	113 /* least max( 1, k ). */
c@427	114 /* Unchanged on exit. */
c@427	115
c@427	116 /* B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is */
c@427	117 /* n when TRANSB = 'N' or 'n', and is k otherwise. */
c@427	118 /* Before entry with TRANSB = 'N' or 'n', the leading k by n */
c@427	119 /* part of the array B must contain the matrix B, otherwise */
c@427	120 /* the leading n by k part of the array B must contain the */
c@427	121 /* matrix B. */
c@427	122 /* Unchanged on exit. */
c@427	123
c@427	124 /* LDB - INTEGER. */
c@427	125 /* On entry, LDB specifies the first dimension of B as declared */
c@427	126 /* in the calling (sub) program. When TRANSB = 'N' or 'n' then */
c@427	127 /* LDB must be at least max( 1, k ), otherwise LDB must be at */
c@427	128 /* least max( 1, n ). */
c@427	129 /* Unchanged on exit. */
c@427	130
c@427	131 /* BETA - DOUBLE PRECISION. */
c@427	132 /* On entry, BETA specifies the scalar beta. When BETA is */
c@427	133 /* supplied as zero then C need not be set on input. */
c@427	134 /* Unchanged on exit. */
c@427	135
c@427	136 /* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). */
c@427	137 /* Before entry, the leading m by n part of the array C must */
c@427	138 /* contain the matrix C, except when beta is zero, in which */
c@427	139 /* case C need not be set on entry. */
c@427	140 /* On exit, the array C is overwritten by the m by n matrix */
c@427	141 /* ( alphaop( A )op( B ) + betaC ). /
c@427	142
c@427	143 /* LDC - INTEGER. */
c@427	144 /* On entry, LDC specifies the first dimension of C as declared */
c@427	145 /* in the calling (sub) program. LDC must be at least */
c@427	146 /* max( 1, m ). */
c@427	147 /* Unchanged on exit. */
c@427	148
c@427	149
c@427	150 /* Level 3 Blas routine. */
c@427	151
c@427	152 /* -- Written on 8-February-1989. */
c@427	153 /* Jack Dongarra, Argonne National Laboratory. */
c@427	154 /* Iain Duff, AERE Harwell. */
c@427	155 /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */
c@427	156 /* Sven Hammarling, Numerical Algorithms Group Ltd. */
c@427	157
c@427	158
c@427	159 /* .. External Functions .. */
c@427	160 /* .. */
c@427	161 /* .. External Subroutines .. */
c@427	162 /* .. */
c@427	163 /* .. Intrinsic Functions .. */
c@427	164 /* .. */
c@427	165 /* .. Local Scalars .. */
c@427	166 /* .. */
c@427	167 /* .. Parameters .. */
c@427	168 /* .. */
c@427	169
c@427	170 /* Set NOTA and NOTB as true if A and B respectively are not */
c@427	171 /* transposed and set NROWA, NCOLA and NROWB as the number of rows */
c@427	172 /* and columns of A and the number of rows of B respectively. */
c@427	173
c@427	174 /* Parameter adjustments */
c@427	175 a_dim1 = *lda;
c@427	176 a_offset = 1 + a_dim1;
c@427	177 a -= a_offset;
c@427	178 b_dim1 = *ldb;
c@427	179 b_offset = 1 + b_dim1;
c@427	180 b -= b_offset;
c@427	181 c_dim1 = *ldc;
c@427	182 c_offset = 1 + c_dim1;
c@427	183 c__ -= c_offset;
c@427	184
c@427	185 /* Function Body */
c@427	186 nota = lsame_(transa, "N");
c@427	187 notb = lsame_(transb, "N");
c@427	188 if (nota) {
c@427	189 nrowa = *m;
c@427	190 ncola = *k;
c@427	191 } else {
c@427	192 nrowa = *k;
c@427	193 ncola = *m;
c@427	194 }
c@427	195 if (notb) {
c@427	196 nrowb = *k;
c@427	197 } else {
c@427	198 nrowb = *n;
c@427	199 }
c@427	200
c@427	201 /* Test the input parameters. */
c@427	202
c@427	203 info = 0;
c@427	204 if (! nota && ! lsame_(transa, "C") && ! lsame_(
c@427	205 transa, "T")) {
c@427	206 info = 1;
c@427	207 } else if (! notb && ! lsame_(transb, "C") && !
c@427	208 lsame_(transb, "T")) {
c@427	209 info = 2;
c@427	210 } else if (*m < 0) {
c@427	211 info = 3;
c@427	212 } else if (*n < 0) {
c@427	213 info = 4;
c@427	214 } else if (*k < 0) {
c@427	215 info = 5;
c@427	216 } else if (*lda < max(1,nrowa)) {
c@427	217 info = 8;
c@427	218 } else if (*ldb < max(1,nrowb)) {
c@427	219 info = 10;
c@427	220 } else if (ldc < max(1,m)) {
c@427	221 info = 13;
c@427	222 }
c@427	223 if (info != 0) {
c@427	224 xerbla_("DGEMM ", &info);
c@427	225 return 0;
c@427	226 }
c@427	227
c@427	228 /* Quick return if possible. */
c@427	229
c@427	230 if (m == 0 \|\| n == 0 \|\| (alpha == 0. \|\| k == 0) && *beta == 1.) {
c@427	231 return 0;
c@427	232 }
c@427	233
c@427	234 /* And if alpha.eq.zero. */
c@427	235
c@427	236 if (*alpha == 0.) {
c@427	237 if (*beta == 0.) {
c@427	238 i__1 = *n;
c@427	239 for (j = 1; j <= i__1; ++j) {
c@427	240 i__2 = *m;
c@427	241 for (i__ = 1; i__ <= i__2; ++i__) {
c@427	242 c__[i__ + j * c_dim1] = 0.;
c@427	243 /* L10: */
c@427	244 }
c@427	245 /* L20: */
c@427	246 }
c@427	247 } else {
c@427	248 i__1 = *n;
c@427	249 for (j = 1; j <= i__1; ++j) {
c@427	250 i__2 = *m;
c@427	251 for (i__ = 1; i__ <= i__2; ++i__) {
c@427	252 c__[i__ + j * c_dim1] = beta c__[i__ + j * c_dim1];
c@427	253 /* L30: */
c@427	254 }
c@427	255 /* L40: */
c@427	256 }
c@427	257 }
c@427	258 return 0;
c@427	259 }
c@427	260
c@427	261 /* Start the operations. */
c@427	262
c@427	263 if (notb) {
c@427	264 if (nota) {
c@427	265
c@427	266 /* Form C := alphaAB + betaC. /
c@427	267
c@427	268 i__1 = *n;
c@427	269 for (j = 1; j <= i__1; ++j) {
c@427	270 if (*beta == 0.) {
c@427	271 i__2 = *m;
c@427	272 for (i__ = 1; i__ <= i__2; ++i__) {
c@427	273 c__[i__ + j * c_dim1] = 0.;
c@427	274 /* L50: */
c@427	275 }
c@427	276 } else if (*beta != 1.) {
c@427	277 i__2 = *m;
c@427	278 for (i__ = 1; i__ <= i__2; ++i__) {
c@427	279 c__[i__ + j * c_dim1] = beta c__[i__ + j * c_dim1];
c@427	280 /* L60: */
c@427	281 }
c@427	282 }
c@427	283 i__2 = *k;
c@427	284 for (l = 1; l <= i__2; ++l) {
c@427	285 if (b[l + j * b_dim1] != 0.) {
c@427	286 temp = alpha b[l + j * b_dim1];
c@427	287 i__3 = *m;
c@427	288 for (i__ = 1; i__ <= i__3; ++i__) {
c@427	289 c__[i__ + j * c_dim1] += temp * a[i__ + l *
c@427	290 a_dim1];
c@427	291 /* L70: */
c@427	292 }
c@427	293 }
c@427	294 /* L80: */
c@427	295 }
c@427	296 /* L90: */
c@427	297 }
c@427	298 } else {
c@427	299
c@427	300 /* Form C := alphaA'B + betaC /
c@427	301
c@427	302 i__1 = *n;
c@427	303 for (j = 1; j <= i__1; ++j) {
c@427	304 i__2 = *m;
c@427	305 for (i__ = 1; i__ <= i__2; ++i__) {
c@427	306 temp = 0.;
c@427	307 i__3 = *k;
c@427	308 for (l = 1; l <= i__3; ++l) {
c@427	309 temp += a[l + i__ * a_dim1] * b[l + j * b_dim1];
c@427	310 /* L100: */
c@427	311 }
c@427	312 if (*beta == 0.) {
c@427	313 c__[i__ + j * c_dim1] = alpha temp;
c@427	314 } else {
c@427	315 c__[i__ + j * c_dim1] = alpha temp + beta c__[
c@427	316 i__ + j * c_dim1];
c@427	317 }
c@427	318 /* L110: */
c@427	319 }
c@427	320 /* L120: */
c@427	321 }
c@427	322 }
c@427	323 } else {
c@427	324 if (nota) {
c@427	325
c@427	326 /* Form C := alphaAB' + betaC /
c@427	327
c@427	328 i__1 = *n;
c@427	329 for (j = 1; j <= i__1; ++j) {
c@427	330 if (*beta == 0.) {
c@427	331 i__2 = *m;
c@427	332 for (i__ = 1; i__ <= i__2; ++i__) {
c@427	333 c__[i__ + j * c_dim1] = 0.;
c@427	334 /* L130: */
c@427	335 }
c@427	336 } else if (*beta != 1.) {
c@427	337 i__2 = *m;
c@427	338 for (i__ = 1; i__ <= i__2; ++i__) {
c@427	339 c__[i__ + j * c_dim1] = beta c__[i__ + j * c_dim1];
c@427	340 /* L140: */
c@427	341 }
c@427	342 }
c@427	343 i__2 = *k;
c@427	344 for (l = 1; l <= i__2; ++l) {
c@427	345 if (b[j + l * b_dim1] != 0.) {
c@427	346 temp = alpha b[j + l * b_dim1];
c@427	347 i__3 = *m;
c@427	348 for (i__ = 1; i__ <= i__3; ++i__) {
c@427	349 c__[i__ + j * c_dim1] += temp * a[i__ + l *
c@427	350 a_dim1];
c@427	351 /* L150: */
c@427	352 }
c@427	353 }
c@427	354 /* L160: */
c@427	355 }
c@427	356 /* L170: */
c@427	357 }
c@427	358 } else {
c@427	359
c@427	360 /* Form C := alphaA'B' + betaC /
c@427	361
c@427	362 i__1 = *n;
c@427	363 for (j = 1; j <= i__1; ++j) {
c@427	364 i__2 = *m;
c@427	365 for (i__ = 1; i__ <= i__2; ++i__) {
c@427	366 temp = 0.;
c@427	367 i__3 = *k;
c@427	368 for (l = 1; l <= i__3; ++l) {
c@427	369 temp += a[l + i__ * a_dim1] * b[j + l * b_dim1];
c@427	370 /* L180: */
c@427	371 }
c@427	372 if (*beta == 0.) {
c@427	373 c__[i__ + j * c_dim1] = alpha temp;
c@427	374 } else {
c@427	375 c__[i__ + j * c_dim1] = alpha temp + beta c__[
c@427	376 i__ + j * c_dim1];
c@427	377 }
c@427	378 /* L190: */
c@427	379 }
c@427	380 /* L200: */
c@427	381 }
c@427	382 }
c@427	383 }
c@427	384
c@427	385 return 0;
c@427	386
c@427	387 /* End of DGEMM . */
c@427	388
c@427	389 } /* dgemm_ */

Mercurial > hg > qm-dsp

annotate ext/cblas/src/dgemm.c @ 515:08bcc06c38ec tip master