annotate build/linux/amd64/atlas/zmm.h @ 172:40d53428bb85

Metadata bits
author Chris Cannam <c.cannam@qmul.ac.uk>
date Mon, 07 Sep 2015 12:14:55 +0100
parents ddea89113517
children
rev   line source
c@64 1 #ifndef ZMM_H
c@64 2 #define ZMM_H
c@64 3
c@64 4 #define ATL_mmMULADD
c@64 5 #define ATL_mmLAT 1
c@64 6 #define ATL_mmMU 12
c@64 7 #define ATL_mmNU 1
c@64 8 #define ATL_mmKU 48
c@64 9 #define MB 48
c@64 10 #define NB 48
c@64 11 #define KB 48
c@64 12 #define NBNB 2304
c@64 13 #define MBNB 2304
c@64 14 #define MBKB 2304
c@64 15 #define NBKB 2304
c@64 16 #define NB2 96
c@64 17 #define NBNB2 4608
c@64 18
c@64 19 #define ATL_MulByNB(N_) ((N_) * 48)
c@64 20 #define ATL_DivByNB(N_) ((N_) / 48)
c@64 21 #define ATL_MulByNBNB(N_) ((N_) * 2304)
c@64 22 void ATL_zJIK48x48x48TN48x48x0_a1_b0(const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc);
c@64 23 void ATL_zJIK48x48x48TN48x48x0_a1_b1(const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc);
c@64 24 void ATL_zJIK48x48x48TN48x48x0_a1_bX(const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc);
c@64 25
c@64 26 #define NBmm_b1(m_, n_, k_, al_, A_, lda_, B_, ldb_, be_, C_, ldc_) \
c@64 27 { \
c@64 28 ATL_zJIK48x48x48TN48x48x0_a1_bX(m_, n_, k_, al_, (A_), lda_, (B_), ldb_, ATL_rnone, C_, ldc_); \
c@64 29 ATL_zJIK48x48x48TN48x48x0_a1_b1(m_, n_, k_, al_, (A_), lda_, (B_)+NBNB, ldb_, ATL_rone, (C_)+1, ldc_); \
c@64 30 ATL_zJIK48x48x48TN48x48x0_a1_bX(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_)+NBNB, ldb_, ATL_rnone, C_, ldc_); \
c@64 31 ATL_zJIK48x48x48TN48x48x0_a1_b1(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_), ldb_, ATL_rone, (C_)+1, ldc_); \
c@64 32 }
c@64 33 #define NBmm_b0(m_, n_, k_, al_, A_, lda_, B_, ldb_, be_, C_, ldc_) \
c@64 34 { \
c@64 35 ATL_zJIK48x48x48TN48x48x0_a1_b0(m_, n_, k_, al_, (A_), lda_, (B_), ldb_, ATL_rzero, C_, ldc_); \
c@64 36 ATL_zJIK48x48x48TN48x48x0_a1_b0(m_, n_, k_, al_, (A_), lda_, (B_)+NBNB, ldb_, ATL_rzero, (C_)+1, ldc_); \
c@64 37 ATL_zJIK48x48x48TN48x48x0_a1_bX(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_)+NBNB, ldb_, ATL_rnone, C_, ldc_); \
c@64 38 ATL_zJIK48x48x48TN48x48x0_a1_b1(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_), ldb_, ATL_rone, (C_)+1, ldc_); \
c@64 39 }
c@64 40 #define NBmm_bX(m_, n_, k_, al_, A_, lda_, B_, ldb_, be_, C_, ldc_) \
c@64 41 { \
c@64 42 ATL_zJIK48x48x48TN48x48x0_a1_bX(m_, n_, k_, al_, (A_), lda_, (B_), ldb_, -(be_), C_, ldc_); \
c@64 43 ATL_zJIK48x48x48TN48x48x0_a1_bX(m_, n_, k_, al_, (A_), lda_, (B_)+NBNB, ldb_, be_, (C_)+1, ldc_); \
c@64 44 ATL_zJIK48x48x48TN48x48x0_a1_bX(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_)+NBNB, ldb_, ATL_rnone, C_, ldc_); \
c@64 45 ATL_zJIK48x48x48TN48x48x0_a1_b1(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_), ldb_, ATL_rone, (C_)+1, ldc_); \
c@64 46 }
c@64 47 #define rNBmm_b1 ATL_dJIK48x48x48TN48x48x0_a1_b1
c@64 48 #define rNBmm_b0 ATL_dJIK48x48x48TN48x48x0_a1_b0
c@64 49 #define rNBmm_bX ATL_dJIK48x48x48TN48x48x0_a1_bX
c@64 50
c@64 51 #endif