qm-dsp: ext/cblas/src/dgemm.c annotate

annotate ext/cblas/src/dgemm.c @ 202:45330e0d2819 clapack-included

Add the CLAPACK and CBLAS/F2C-BLAS files we use

author	Chris Cannam
date	Fri, 30 Sep 2016 15:51:22 +0100
parents
children

rev	line source
Chris@202	1 /* dgemm.f -- translated by f2c (version 20061008).
Chris@202	2 You must link the resulting object file with libf2c:
Chris@202	3 on Microsoft Windows system, link with libf2c.lib;
Chris@202	4 on Linux or Unix systems, link with .../path/to/libf2c.a -lm
Chris@202	5 or, if you install libf2c.a in a standard place, with -lf2c -lm
Chris@202	6 -- in that order, at the end of the command line, as in
Chris@202	7 cc *.o -lf2c -lm
Chris@202	8 Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
Chris@202	9
Chris@202	10 http://www.netlib.org/f2c/libf2c.zip
Chris@202	11 */
Chris@202	12
Chris@202	13 #include "f2c.h"
Chris@202	14 #include "blaswrap.h"
Chris@202	15
Chris@202	16 /* Subroutine / int dgemm_(char transa, char transb, integer m, integer *
Chris@202	17 n, integer k, doublereal alpha, doublereal a, integer lda,
Chris@202	18 doublereal b, integer ldb, doublereal beta, doublereal c__,
Chris@202	19 integer *ldc)
Chris@202	20 {
Chris@202	21 /* System generated locals */
Chris@202	22 integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
Chris@202	23 i__3;
Chris@202	24
Chris@202	25 /* Local variables */
Chris@202	26 integer i__, j, l, info;
Chris@202	27 logical nota, notb;
Chris@202	28 doublereal temp;
Chris@202	29 integer ncola;
Chris@202	30 extern logical lsame_(char , char );
Chris@202	31 integer nrowa, nrowb;
Chris@202	32 extern /* Subroutine / int xerbla_(char , integer *);
Chris@202	33
Chris@202	34 /* .. Scalar Arguments .. */
Chris@202	35 /* .. */
Chris@202	36 /* .. Array Arguments .. */
Chris@202	37 /* .. */
Chris@202	38
Chris@202	39 /* Purpose */
Chris@202	40 /* ======= */
Chris@202	41
Chris@202	42 /* DGEMM performs one of the matrix-matrix operations */
Chris@202	43
Chris@202	44 /* C := alphaop( A )op( B ) + betaC, /
Chris@202	45
Chris@202	46 /* where op( X ) is one of */
Chris@202	47
Chris@202	48 /* op( X ) = X or op( X ) = X', */
Chris@202	49
Chris@202	50 /* alpha and beta are scalars, and A, B and C are matrices, with op( A ) */
Chris@202	51 /* an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. */
Chris@202	52
Chris@202	53 /* Arguments */
Chris@202	54 /* ========== */
Chris@202	55
Chris@202	56 /* TRANSA - CHARACTER1. /
Chris@202	57 /* On entry, TRANSA specifies the form of op( A ) to be used in */
Chris@202	58 /* the matrix multiplication as follows: */
Chris@202	59
Chris@202	60 /* TRANSA = 'N' or 'n', op( A ) = A. */
Chris@202	61
Chris@202	62 /* TRANSA = 'T' or 't', op( A ) = A'. */
Chris@202	63
Chris@202	64 /* TRANSA = 'C' or 'c', op( A ) = A'. */
Chris@202	65
Chris@202	66 /* Unchanged on exit. */
Chris@202	67
Chris@202	68 /* TRANSB - CHARACTER1. /
Chris@202	69 /* On entry, TRANSB specifies the form of op( B ) to be used in */
Chris@202	70 /* the matrix multiplication as follows: */
Chris@202	71
Chris@202	72 /* TRANSB = 'N' or 'n', op( B ) = B. */
Chris@202	73
Chris@202	74 /* TRANSB = 'T' or 't', op( B ) = B'. */
Chris@202	75
Chris@202	76 /* TRANSB = 'C' or 'c', op( B ) = B'. */
Chris@202	77
Chris@202	78 /* Unchanged on exit. */
Chris@202	79
Chris@202	80 /* M - INTEGER. */
Chris@202	81 /* On entry, M specifies the number of rows of the matrix */
Chris@202	82 /* op( A ) and of the matrix C. M must be at least zero. */
Chris@202	83 /* Unchanged on exit. */
Chris@202	84
Chris@202	85 /* N - INTEGER. */
Chris@202	86 /* On entry, N specifies the number of columns of the matrix */
Chris@202	87 /* op( B ) and the number of columns of the matrix C. N must be */
Chris@202	88 /* at least zero. */
Chris@202	89 /* Unchanged on exit. */
Chris@202	90
Chris@202	91 /* K - INTEGER. */
Chris@202	92 /* On entry, K specifies the number of columns of the matrix */
Chris@202	93 /* op( A ) and the number of rows of the matrix op( B ). K must */
Chris@202	94 /* be at least zero. */
Chris@202	95 /* Unchanged on exit. */
Chris@202	96
Chris@202	97 /* ALPHA - DOUBLE PRECISION. */
Chris@202	98 /* On entry, ALPHA specifies the scalar alpha. */
Chris@202	99 /* Unchanged on exit. */
Chris@202	100
Chris@202	101 /* A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is */
Chris@202	102 /* k when TRANSA = 'N' or 'n', and is m otherwise. */
Chris@202	103 /* Before entry with TRANSA = 'N' or 'n', the leading m by k */
Chris@202	104 /* part of the array A must contain the matrix A, otherwise */
Chris@202	105 /* the leading k by m part of the array A must contain the */
Chris@202	106 /* matrix A. */
Chris@202	107 /* Unchanged on exit. */
Chris@202	108
Chris@202	109 /* LDA - INTEGER. */
Chris@202	110 /* On entry, LDA specifies the first dimension of A as declared */
Chris@202	111 /* in the calling (sub) program. When TRANSA = 'N' or 'n' then */
Chris@202	112 /* LDA must be at least max( 1, m ), otherwise LDA must be at */
Chris@202	113 /* least max( 1, k ). */
Chris@202	114 /* Unchanged on exit. */
Chris@202	115
Chris@202	116 /* B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is */
Chris@202	117 /* n when TRANSB = 'N' or 'n', and is k otherwise. */
Chris@202	118 /* Before entry with TRANSB = 'N' or 'n', the leading k by n */
Chris@202	119 /* part of the array B must contain the matrix B, otherwise */
Chris@202	120 /* the leading n by k part of the array B must contain the */
Chris@202	121 /* matrix B. */
Chris@202	122 /* Unchanged on exit. */
Chris@202	123
Chris@202	124 /* LDB - INTEGER. */
Chris@202	125 /* On entry, LDB specifies the first dimension of B as declared */
Chris@202	126 /* in the calling (sub) program. When TRANSB = 'N' or 'n' then */
Chris@202	127 /* LDB must be at least max( 1, k ), otherwise LDB must be at */
Chris@202	128 /* least max( 1, n ). */
Chris@202	129 /* Unchanged on exit. */
Chris@202	130
Chris@202	131 /* BETA - DOUBLE PRECISION. */
Chris@202	132 /* On entry, BETA specifies the scalar beta. When BETA is */
Chris@202	133 /* supplied as zero then C need not be set on input. */
Chris@202	134 /* Unchanged on exit. */
Chris@202	135
Chris@202	136 /* C - DOUBLE PRECISION array of DIMENSION ( LDC, n ). */
Chris@202	137 /* Before entry, the leading m by n part of the array C must */
Chris@202	138 /* contain the matrix C, except when beta is zero, in which */
Chris@202	139 /* case C need not be set on entry. */
Chris@202	140 /* On exit, the array C is overwritten by the m by n matrix */
Chris@202	141 /* ( alphaop( A )op( B ) + betaC ). /
Chris@202	142
Chris@202	143 /* LDC - INTEGER. */
Chris@202	144 /* On entry, LDC specifies the first dimension of C as declared */
Chris@202	145 /* in the calling (sub) program. LDC must be at least */
Chris@202	146 /* max( 1, m ). */
Chris@202	147 /* Unchanged on exit. */
Chris@202	148
Chris@202	149
Chris@202	150 /* Level 3 Blas routine. */
Chris@202	151
Chris@202	152 /* -- Written on 8-February-1989. */
Chris@202	153 /* Jack Dongarra, Argonne National Laboratory. */
Chris@202	154 /* Iain Duff, AERE Harwell. */
Chris@202	155 /* Jeremy Du Croz, Numerical Algorithms Group Ltd. */
Chris@202	156 /* Sven Hammarling, Numerical Algorithms Group Ltd. */
Chris@202	157
Chris@202	158
Chris@202	159 /* .. External Functions .. */
Chris@202	160 /* .. */
Chris@202	161 /* .. External Subroutines .. */
Chris@202	162 /* .. */
Chris@202	163 /* .. Intrinsic Functions .. */
Chris@202	164 /* .. */
Chris@202	165 /* .. Local Scalars .. */
Chris@202	166 /* .. */
Chris@202	167 /* .. Parameters .. */
Chris@202	168 /* .. */
Chris@202	169
Chris@202	170 /* Set NOTA and NOTB as true if A and B respectively are not */
Chris@202	171 /* transposed and set NROWA, NCOLA and NROWB as the number of rows */
Chris@202	172 /* and columns of A and the number of rows of B respectively. */
Chris@202	173
Chris@202	174 /* Parameter adjustments */
Chris@202	175 a_dim1 = *lda;
Chris@202	176 a_offset = 1 + a_dim1;
Chris@202	177 a -= a_offset;
Chris@202	178 b_dim1 = *ldb;
Chris@202	179 b_offset = 1 + b_dim1;
Chris@202	180 b -= b_offset;
Chris@202	181 c_dim1 = *ldc;
Chris@202	182 c_offset = 1 + c_dim1;
Chris@202	183 c__ -= c_offset;
Chris@202	184
Chris@202	185 /* Function Body */
Chris@202	186 nota = lsame_(transa, "N");
Chris@202	187 notb = lsame_(transb, "N");
Chris@202	188 if (nota) {
Chris@202	189 nrowa = *m;
Chris@202	190 ncola = *k;
Chris@202	191 } else {
Chris@202	192 nrowa = *k;
Chris@202	193 ncola = *m;
Chris@202	194 }
Chris@202	195 if (notb) {
Chris@202	196 nrowb = *k;
Chris@202	197 } else {
Chris@202	198 nrowb = *n;
Chris@202	199 }
Chris@202	200
Chris@202	201 /* Test the input parameters. */
Chris@202	202
Chris@202	203 info = 0;
Chris@202	204 if (! nota && ! lsame_(transa, "C") && ! lsame_(
Chris@202	205 transa, "T")) {
Chris@202	206 info = 1;
Chris@202	207 } else if (! notb && ! lsame_(transb, "C") && !
Chris@202	208 lsame_(transb, "T")) {
Chris@202	209 info = 2;
Chris@202	210 } else if (*m < 0) {
Chris@202	211 info = 3;
Chris@202	212 } else if (*n < 0) {
Chris@202	213 info = 4;
Chris@202	214 } else if (*k < 0) {
Chris@202	215 info = 5;
Chris@202	216 } else if (*lda < max(1,nrowa)) {
Chris@202	217 info = 8;
Chris@202	218 } else if (*ldb < max(1,nrowb)) {
Chris@202	219 info = 10;
Chris@202	220 } else if (ldc < max(1,m)) {
Chris@202	221 info = 13;
Chris@202	222 }
Chris@202	223 if (info != 0) {
Chris@202	224 xerbla_("DGEMM ", &info);
Chris@202	225 return 0;
Chris@202	226 }
Chris@202	227
Chris@202	228 /* Quick return if possible. */
Chris@202	229
Chris@202	230 if (m == 0 \|\| n == 0 \|\| (alpha == 0. \|\| k == 0) && *beta == 1.) {
Chris@202	231 return 0;
Chris@202	232 }
Chris@202	233
Chris@202	234 /* And if alpha.eq.zero. */
Chris@202	235
Chris@202	236 if (*alpha == 0.) {
Chris@202	237 if (*beta == 0.) {
Chris@202	238 i__1 = *n;
Chris@202	239 for (j = 1; j <= i__1; ++j) {
Chris@202	240 i__2 = *m;
Chris@202	241 for (i__ = 1; i__ <= i__2; ++i__) {
Chris@202	242 c__[i__ + j * c_dim1] = 0.;
Chris@202	243 /* L10: */
Chris@202	244 }
Chris@202	245 /* L20: */
Chris@202	246 }
Chris@202	247 } else {
Chris@202	248 i__1 = *n;
Chris@202	249 for (j = 1; j <= i__1; ++j) {
Chris@202	250 i__2 = *m;
Chris@202	251 for (i__ = 1; i__ <= i__2; ++i__) {
Chris@202	252 c__[i__ + j * c_dim1] = beta c__[i__ + j * c_dim1];
Chris@202	253 /* L30: */
Chris@202	254 }
Chris@202	255 /* L40: */
Chris@202	256 }
Chris@202	257 }
Chris@202	258 return 0;
Chris@202	259 }
Chris@202	260
Chris@202	261 /* Start the operations. */
Chris@202	262
Chris@202	263 if (notb) {
Chris@202	264 if (nota) {
Chris@202	265
Chris@202	266 /* Form C := alphaAB + betaC. /
Chris@202	267
Chris@202	268 i__1 = *n;
Chris@202	269 for (j = 1; j <= i__1; ++j) {
Chris@202	270 if (*beta == 0.) {
Chris@202	271 i__2 = *m;
Chris@202	272 for (i__ = 1; i__ <= i__2; ++i__) {
Chris@202	273 c__[i__ + j * c_dim1] = 0.;
Chris@202	274 /* L50: */
Chris@202	275 }
Chris@202	276 } else if (*beta != 1.) {
Chris@202	277 i__2 = *m;
Chris@202	278 for (i__ = 1; i__ <= i__2; ++i__) {
Chris@202	279 c__[i__ + j * c_dim1] = beta c__[i__ + j * c_dim1];
Chris@202	280 /* L60: */
Chris@202	281 }
Chris@202	282 }
Chris@202	283 i__2 = *k;
Chris@202	284 for (l = 1; l <= i__2; ++l) {
Chris@202	285 if (b[l + j * b_dim1] != 0.) {
Chris@202	286 temp = alpha b[l + j * b_dim1];
Chris@202	287 i__3 = *m;
Chris@202	288 for (i__ = 1; i__ <= i__3; ++i__) {
Chris@202	289 c__[i__ + j * c_dim1] += temp * a[i__ + l *
Chris@202	290 a_dim1];
Chris@202	291 /* L70: */
Chris@202	292 }
Chris@202	293 }
Chris@202	294 /* L80: */
Chris@202	295 }
Chris@202	296 /* L90: */
Chris@202	297 }
Chris@202	298 } else {
Chris@202	299
Chris@202	300 /* Form C := alphaA'B + betaC /
Chris@202	301
Chris@202	302 i__1 = *n;
Chris@202	303 for (j = 1; j <= i__1; ++j) {
Chris@202	304 i__2 = *m;
Chris@202	305 for (i__ = 1; i__ <= i__2; ++i__) {
Chris@202	306 temp = 0.;
Chris@202	307 i__3 = *k;
Chris@202	308 for (l = 1; l <= i__3; ++l) {
Chris@202	309 temp += a[l + i__ * a_dim1] * b[l + j * b_dim1];
Chris@202	310 /* L100: */
Chris@202	311 }
Chris@202	312 if (*beta == 0.) {
Chris@202	313 c__[i__ + j * c_dim1] = alpha temp;
Chris@202	314 } else {
Chris@202	315 c__[i__ + j * c_dim1] = alpha temp + beta c__[
Chris@202	316 i__ + j * c_dim1];
Chris@202	317 }
Chris@202	318 /* L110: */
Chris@202	319 }
Chris@202	320 /* L120: */
Chris@202	321 }
Chris@202	322 }
Chris@202	323 } else {
Chris@202	324 if (nota) {
Chris@202	325
Chris@202	326 /* Form C := alphaAB' + betaC /
Chris@202	327
Chris@202	328 i__1 = *n;
Chris@202	329 for (j = 1; j <= i__1; ++j) {
Chris@202	330 if (*beta == 0.) {
Chris@202	331 i__2 = *m;
Chris@202	332 for (i__ = 1; i__ <= i__2; ++i__) {
Chris@202	333 c__[i__ + j * c_dim1] = 0.;
Chris@202	334 /* L130: */
Chris@202	335 }
Chris@202	336 } else if (*beta != 1.) {
Chris@202	337 i__2 = *m;
Chris@202	338 for (i__ = 1; i__ <= i__2; ++i__) {
Chris@202	339 c__[i__ + j * c_dim1] = beta c__[i__ + j * c_dim1];
Chris@202	340 /* L140: */
Chris@202	341 }
Chris@202	342 }
Chris@202	343 i__2 = *k;
Chris@202	344 for (l = 1; l <= i__2; ++l) {
Chris@202	345 if (b[j + l * b_dim1] != 0.) {
Chris@202	346 temp = alpha b[j + l * b_dim1];
Chris@202	347 i__3 = *m;
Chris@202	348 for (i__ = 1; i__ <= i__3; ++i__) {
Chris@202	349 c__[i__ + j * c_dim1] += temp * a[i__ + l *
Chris@202	350 a_dim1];
Chris@202	351 /* L150: */
Chris@202	352 }
Chris@202	353 }
Chris@202	354 /* L160: */
Chris@202	355 }
Chris@202	356 /* L170: */
Chris@202	357 }
Chris@202	358 } else {
Chris@202	359
Chris@202	360 /* Form C := alphaA'B' + betaC /
Chris@202	361
Chris@202	362 i__1 = *n;
Chris@202	363 for (j = 1; j <= i__1; ++j) {
Chris@202	364 i__2 = *m;
Chris@202	365 for (i__ = 1; i__ <= i__2; ++i__) {
Chris@202	366 temp = 0.;
Chris@202	367 i__3 = *k;
Chris@202	368 for (l = 1; l <= i__3; ++l) {
Chris@202	369 temp += a[l + i__ * a_dim1] * b[j + l * b_dim1];
Chris@202	370 /* L180: */
Chris@202	371 }
Chris@202	372 if (*beta == 0.) {
Chris@202	373 c__[i__ + j * c_dim1] = alpha temp;
Chris@202	374 } else {
Chris@202	375 c__[i__ + j * c_dim1] = alpha temp + beta c__[
Chris@202	376 i__ + j * c_dim1];
Chris@202	377 }
Chris@202	378 /* L190: */
Chris@202	379 }
Chris@202	380 /* L200: */
Chris@202	381 }
Chris@202	382 }
Chris@202	383 }
Chris@202	384
Chris@202	385 return 0;
Chris@202	386
Chris@202	387 /* End of DGEMM . */
Chris@202	388
Chris@202	389 } /* dgemm_ */

Mercurial > hg > qm-dsp

annotate ext/cblas/src/dgemm.c @ 202:45330e0d2819 clapack-included