annotate build/linux/amd64/atlas/cmm.h @ 136:011d4fc47ebf

* Some Makefile bits
author Chris Cannam <c.cannam@qmul.ac.uk>
date Wed, 09 Feb 2011 14:26:13 +0000
parents ddea89113517
children
rev   line source
c@64 1 #ifndef CMM_H
c@64 2 #define CMM_H
c@64 3
c@64 4 #define ATL_mmMULADD
c@64 5 #define ATL_mmLAT 1
c@64 6 #define ATL_mmMU 12
c@64 7 #define ATL_mmNU 1
c@64 8 #define ATL_mmKU 72
c@64 9 #define MB 72
c@64 10 #define NB 72
c@64 11 #define KB 72
c@64 12 #define NBNB 5184
c@64 13 #define MBNB 5184
c@64 14 #define MBKB 5184
c@64 15 #define NBKB 5184
c@64 16 #define NB2 144
c@64 17 #define NBNB2 10368
c@64 18
c@64 19 #define ATL_MulByNB(N_) ((N_) * 72)
c@64 20 #define ATL_DivByNB(N_) ((N_) / 72)
c@64 21 #define ATL_MulByNBNB(N_) ((N_) * 5184)
c@64 22 void ATL_cJIK72x72x72TN72x72x0_a1_b0(const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc);
c@64 23 void ATL_cJIK72x72x72TN72x72x0_a1_b1(const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc);
c@64 24 void ATL_cJIK72x72x72TN72x72x0_a1_bX(const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc);
c@64 25
c@64 26 #define NBmm_b1(m_, n_, k_, al_, A_, lda_, B_, ldb_, be_, C_, ldc_) \
c@64 27 { \
c@64 28 ATL_cJIK72x72x72TN72x72x0_a1_bX(m_, n_, k_, al_, (A_), lda_, (B_), ldb_, ATL_rnone, C_, ldc_); \
c@64 29 ATL_cJIK72x72x72TN72x72x0_a1_b1(m_, n_, k_, al_, (A_), lda_, (B_)+NBNB, ldb_, ATL_rone, (C_)+1, ldc_); \
c@64 30 ATL_cJIK72x72x72TN72x72x0_a1_bX(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_)+NBNB, ldb_, ATL_rnone, C_, ldc_); \
c@64 31 ATL_cJIK72x72x72TN72x72x0_a1_b1(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_), ldb_, ATL_rone, (C_)+1, ldc_); \
c@64 32 }
c@64 33 #define NBmm_b0(m_, n_, k_, al_, A_, lda_, B_, ldb_, be_, C_, ldc_) \
c@64 34 { \
c@64 35 ATL_cJIK72x72x72TN72x72x0_a1_b0(m_, n_, k_, al_, (A_), lda_, (B_), ldb_, ATL_rzero, C_, ldc_); \
c@64 36 ATL_cJIK72x72x72TN72x72x0_a1_b0(m_, n_, k_, al_, (A_), lda_, (B_)+NBNB, ldb_, ATL_rzero, (C_)+1, ldc_); \
c@64 37 ATL_cJIK72x72x72TN72x72x0_a1_bX(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_)+NBNB, ldb_, ATL_rnone, C_, ldc_); \
c@64 38 ATL_cJIK72x72x72TN72x72x0_a1_b1(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_), ldb_, ATL_rone, (C_)+1, ldc_); \
c@64 39 }
c@64 40 #define NBmm_bX(m_, n_, k_, al_, A_, lda_, B_, ldb_, be_, C_, ldc_) \
c@64 41 { \
c@64 42 ATL_cJIK72x72x72TN72x72x0_a1_bX(m_, n_, k_, al_, (A_), lda_, (B_), ldb_, -(be_), C_, ldc_); \
c@64 43 ATL_cJIK72x72x72TN72x72x0_a1_bX(m_, n_, k_, al_, (A_), lda_, (B_)+NBNB, ldb_, be_, (C_)+1, ldc_); \
c@64 44 ATL_cJIK72x72x72TN72x72x0_a1_bX(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_)+NBNB, ldb_, ATL_rnone, C_, ldc_); \
c@64 45 ATL_cJIK72x72x72TN72x72x0_a1_b1(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_), ldb_, ATL_rone, (C_)+1, ldc_); \
c@64 46 }
c@64 47 #define rNBmm_b1 ATL_sJIK72x72x72TN72x72x0_a1_b1
c@64 48 #define rNBmm_b0 ATL_sJIK72x72x72TN72x72x0_a1_b0
c@64 49 #define rNBmm_bX ATL_sJIK72x72x72TN72x72x0_a1_bX
c@64 50
c@64 51 #endif