c@64: #ifndef CMM_H c@64: #define CMM_H c@64: c@64: #define ATL_mmMULADD c@64: #define ATL_mmLAT 1 c@64: #define ATL_mmMU 12 c@64: #define ATL_mmNU 1 c@64: #define ATL_mmKU 72 c@64: #define MB 72 c@64: #define NB 72 c@64: #define KB 72 c@64: #define NBNB 5184 c@64: #define MBNB 5184 c@64: #define MBKB 5184 c@64: #define NBKB 5184 c@64: #define NB2 144 c@64: #define NBNB2 10368 c@64: c@64: #define ATL_MulByNB(N_) ((N_) * 72) c@64: #define ATL_DivByNB(N_) ((N_) / 72) c@64: #define ATL_MulByNBNB(N_) ((N_) * 5184) c@64: void ATL_cJIK72x72x72TN72x72x0_a1_b0(const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc); c@64: void ATL_cJIK72x72x72TN72x72x0_a1_b1(const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc); c@64: void ATL_cJIK72x72x72TN72x72x0_a1_bX(const int M, const int N, const int K, const TYPE alpha, const TYPE *A, const int lda, const TYPE *B, const int ldb, const TYPE beta, TYPE *C, const int ldc); c@64: c@64: #define NBmm_b1(m_, n_, k_, al_, A_, lda_, B_, ldb_, be_, C_, ldc_) \ c@64: { \ c@64: ATL_cJIK72x72x72TN72x72x0_a1_bX(m_, n_, k_, al_, (A_), lda_, (B_), ldb_, ATL_rnone, C_, ldc_); \ c@64: ATL_cJIK72x72x72TN72x72x0_a1_b1(m_, n_, k_, al_, (A_), lda_, (B_)+NBNB, ldb_, ATL_rone, (C_)+1, ldc_); \ c@64: ATL_cJIK72x72x72TN72x72x0_a1_bX(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_)+NBNB, ldb_, ATL_rnone, C_, ldc_); \ c@64: ATL_cJIK72x72x72TN72x72x0_a1_b1(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_), ldb_, ATL_rone, (C_)+1, ldc_); \ c@64: } c@64: #define NBmm_b0(m_, n_, k_, al_, A_, lda_, B_, ldb_, be_, C_, ldc_) \ c@64: { \ c@64: ATL_cJIK72x72x72TN72x72x0_a1_b0(m_, n_, k_, al_, (A_), lda_, (B_), ldb_, ATL_rzero, C_, ldc_); \ c@64: ATL_cJIK72x72x72TN72x72x0_a1_b0(m_, n_, k_, al_, (A_), lda_, (B_)+NBNB, ldb_, ATL_rzero, (C_)+1, ldc_); \ c@64: ATL_cJIK72x72x72TN72x72x0_a1_bX(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_)+NBNB, ldb_, ATL_rnone, C_, ldc_); \ c@64: ATL_cJIK72x72x72TN72x72x0_a1_b1(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_), ldb_, ATL_rone, (C_)+1, ldc_); \ c@64: } c@64: #define NBmm_bX(m_, n_, k_, al_, A_, lda_, B_, ldb_, be_, C_, ldc_) \ c@64: { \ c@64: ATL_cJIK72x72x72TN72x72x0_a1_bX(m_, n_, k_, al_, (A_), lda_, (B_), ldb_, -(be_), C_, ldc_); \ c@64: ATL_cJIK72x72x72TN72x72x0_a1_bX(m_, n_, k_, al_, (A_), lda_, (B_)+NBNB, ldb_, be_, (C_)+1, ldc_); \ c@64: ATL_cJIK72x72x72TN72x72x0_a1_bX(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_)+NBNB, ldb_, ATL_rnone, C_, ldc_); \ c@64: ATL_cJIK72x72x72TN72x72x0_a1_b1(m_, n_, k_, al_, (A_)+NBNB, lda_, (B_), ldb_, ATL_rone, (C_)+1, ldc_); \ c@64: } c@64: #define rNBmm_b1 ATL_sJIK72x72x72TN72x72x0_a1_b1 c@64: #define rNBmm_b0 ATL_sJIK72x72x72TN72x72x0_a1_b0 c@64: #define rNBmm_bX ATL_sJIK72x72x72TN72x72x0_a1_bX c@64: c@64: #endif