Chris@49: // Copyright (C) 2008-2013 NICTA (www.nicta.com.au) Chris@49: // Copyright (C) 2008-2013 Conrad Sanderson Chris@49: // Copyright (C) 2012 Ryan Curtin Chris@49: // Chris@49: // This Source Code Form is subject to the terms of the Mozilla Public Chris@49: // License, v. 2.0. If a copy of the MPL was not distributed with this Chris@49: // file, You can obtain one at http://mozilla.org/MPL/2.0/. Chris@49: Chris@49: Chris@49: //! \addtogroup op_strans Chris@49: //! @{ Chris@49: Chris@49: Chris@49: Chris@49: //! for tiny square matrices (size <= 4x4) Chris@49: template Chris@49: arma_hot Chris@49: inline Chris@49: void Chris@49: op_strans::apply_noalias_tinysq(Mat& out, const TA& A) Chris@49: { Chris@49: const eT* Am = A.memptr(); Chris@49: eT* outm = out.memptr(); Chris@49: Chris@49: switch(A.n_rows) Chris@49: { Chris@49: case 1: Chris@49: { Chris@49: outm[0] = Am[0]; Chris@49: } Chris@49: break; Chris@49: Chris@49: case 2: Chris@49: { Chris@49: outm[pos::n2] = Am[pos::n2]; Chris@49: outm[pos::n2] = Am[pos::n2]; Chris@49: Chris@49: outm[pos::n2] = Am[pos::n2]; Chris@49: outm[pos::n2] = Am[pos::n2]; Chris@49: } Chris@49: break; Chris@49: Chris@49: case 3: Chris@49: { Chris@49: outm[pos::n3] = Am[pos::n3]; Chris@49: outm[pos::n3] = Am[pos::n3]; Chris@49: outm[pos::n3] = Am[pos::n3]; Chris@49: Chris@49: outm[pos::n3] = Am[pos::n3]; Chris@49: outm[pos::n3] = Am[pos::n3]; Chris@49: outm[pos::n3] = Am[pos::n3]; Chris@49: Chris@49: outm[pos::n3] = Am[pos::n3]; Chris@49: outm[pos::n3] = Am[pos::n3]; Chris@49: outm[pos::n3] = Am[pos::n3]; Chris@49: } Chris@49: break; Chris@49: Chris@49: case 4: Chris@49: { Chris@49: outm[pos::n4] = Am[pos::n4]; Chris@49: outm[pos::n4] = Am[pos::n4]; Chris@49: outm[pos::n4] = Am[pos::n4]; Chris@49: outm[pos::n4] = Am[pos::n4]; Chris@49: Chris@49: outm[pos::n4] = Am[pos::n4]; Chris@49: outm[pos::n4] = Am[pos::n4]; Chris@49: outm[pos::n4] = Am[pos::n4]; Chris@49: outm[pos::n4] = Am[pos::n4]; Chris@49: Chris@49: outm[pos::n4] = Am[pos::n4]; Chris@49: outm[pos::n4] = Am[pos::n4]; Chris@49: outm[pos::n4] = Am[pos::n4]; Chris@49: outm[pos::n4] = Am[pos::n4]; Chris@49: Chris@49: outm[pos::n4] = Am[pos::n4]; Chris@49: outm[pos::n4] = Am[pos::n4]; Chris@49: outm[pos::n4] = Am[pos::n4]; Chris@49: outm[pos::n4] = Am[pos::n4]; Chris@49: } Chris@49: break; Chris@49: Chris@49: default: Chris@49: ; Chris@49: } Chris@49: Chris@49: } Chris@49: Chris@49: Chris@49: Chris@49: //! Immediate transpose of a dense matrix Chris@49: template Chris@49: arma_hot Chris@49: inline Chris@49: void Chris@49: op_strans::apply_noalias(Mat& out, const TA& A) Chris@49: { Chris@49: arma_extra_debug_sigprint(); Chris@49: Chris@49: const uword A_n_cols = A.n_cols; Chris@49: const uword A_n_rows = A.n_rows; Chris@49: Chris@49: out.set_size(A_n_cols, A_n_rows); Chris@49: Chris@49: if( (TA::is_row) || (TA::is_col) || (A_n_cols == 1) || (A_n_rows == 1) ) Chris@49: { Chris@49: arrayops::copy( out.memptr(), A.memptr(), A.n_elem ); Chris@49: } Chris@49: else Chris@49: { Chris@49: if( (A_n_rows <= 4) && (A_n_rows == A_n_cols) ) Chris@49: { Chris@49: op_strans::apply_noalias_tinysq(out, A); Chris@49: } Chris@49: else Chris@49: { Chris@49: for(uword k=0; k < A_n_cols; ++k) Chris@49: { Chris@49: uword i, j; Chris@49: Chris@49: const eT* colptr = A.colptr(k); Chris@49: Chris@49: for(i=0, j=1; j < A_n_rows; i+=2, j+=2) Chris@49: { Chris@49: const eT tmp_i = colptr[i]; Chris@49: const eT tmp_j = colptr[j]; Chris@49: Chris@49: out.at(k, i) = tmp_i; Chris@49: out.at(k, j) = tmp_j; Chris@49: } Chris@49: Chris@49: if(i < A_n_rows) Chris@49: { Chris@49: out.at(k, i) = colptr[i]; Chris@49: } Chris@49: } Chris@49: } Chris@49: } Chris@49: } Chris@49: Chris@49: Chris@49: Chris@49: template Chris@49: arma_hot Chris@49: inline Chris@49: void Chris@49: op_strans::apply(Mat& out, const TA& A) Chris@49: { Chris@49: arma_extra_debug_sigprint(); Chris@49: Chris@49: if(&out != &A) Chris@49: { Chris@49: op_strans::apply_noalias(out, A); Chris@49: } Chris@49: else Chris@49: { Chris@49: const uword n_rows = A.n_rows; Chris@49: const uword n_cols = A.n_cols; Chris@49: Chris@49: if(n_rows == n_cols) Chris@49: { Chris@49: arma_extra_debug_print("op_strans::apply(): doing in-place transpose of a square matrix"); Chris@49: Chris@49: const uword N = n_rows; Chris@49: Chris@49: for(uword k=0; k < N; ++k) Chris@49: { Chris@49: eT* colptr = out.colptr(k); Chris@49: Chris@49: uword i,j; Chris@49: Chris@49: for(i=(k+1), j=(k+2); j < N; i+=2, j+=2) Chris@49: { Chris@49: std::swap(out.at(k,i), colptr[i]); Chris@49: std::swap(out.at(k,j), colptr[j]); Chris@49: } Chris@49: Chris@49: if(i < N) Chris@49: { Chris@49: std::swap(out.at(k,i), colptr[i]); Chris@49: } Chris@49: } Chris@49: } Chris@49: else Chris@49: { Chris@49: Mat tmp; Chris@49: op_strans::apply_noalias(tmp, A); Chris@49: Chris@49: out.steal_mem(tmp); Chris@49: } Chris@49: } Chris@49: } Chris@49: Chris@49: Chris@49: Chris@49: template Chris@49: arma_hot Chris@49: inline Chris@49: void Chris@49: op_strans::apply_proxy(Mat& out, const T1& X) Chris@49: { Chris@49: arma_extra_debug_sigprint(); Chris@49: Chris@49: typedef typename T1::elem_type eT; Chris@49: Chris@49: const Proxy P(X); Chris@49: Chris@49: // allow detection of in-place transpose Chris@49: if( (is_Mat::stored_type>::value == true) && (Proxy::fake_mat == false) ) Chris@49: { Chris@49: const unwrap::stored_type> tmp(P.Q); Chris@49: Chris@49: op_strans::apply(out, tmp.M); Chris@49: } Chris@49: else Chris@49: { Chris@49: const uword n_rows = P.get_n_rows(); Chris@49: const uword n_cols = P.get_n_cols(); Chris@49: Chris@49: const bool is_alias = P.is_alias(out); Chris@49: Chris@49: if( (resolves_to_vector::value == true) && (Proxy::prefer_at_accessor == false) ) Chris@49: { Chris@49: if(is_alias == false) Chris@49: { Chris@49: out.set_size(n_cols, n_rows); Chris@49: Chris@49: eT* out_mem = out.memptr(); Chris@49: Chris@49: const uword n_elem = P.get_n_elem(); Chris@49: Chris@49: typename Proxy::ea_type Pea = P.get_ea(); Chris@49: Chris@49: uword i,j; Chris@49: for(i=0, j=1; j < n_elem; i+=2, j+=2) Chris@49: { Chris@49: const eT tmp_i = Pea[i]; Chris@49: const eT tmp_j = Pea[j]; Chris@49: Chris@49: out_mem[i] = tmp_i; Chris@49: out_mem[j] = tmp_j; Chris@49: } Chris@49: Chris@49: if(i < n_elem) Chris@49: { Chris@49: out_mem[i] = Pea[i]; Chris@49: } Chris@49: } Chris@49: else // aliasing Chris@49: { Chris@49: Mat out2(n_cols, n_rows); Chris@49: Chris@49: eT* out_mem = out2.memptr(); Chris@49: Chris@49: const uword n_elem = P.get_n_elem(); Chris@49: Chris@49: typename Proxy::ea_type Pea = P.get_ea(); Chris@49: Chris@49: uword i,j; Chris@49: for(i=0, j=1; j < n_elem; i+=2, j+=2) Chris@49: { Chris@49: const eT tmp_i = Pea[i]; Chris@49: const eT tmp_j = Pea[j]; Chris@49: Chris@49: out_mem[i] = tmp_i; Chris@49: out_mem[j] = tmp_j; Chris@49: } Chris@49: Chris@49: if(i < n_elem) Chris@49: { Chris@49: out_mem[i] = Pea[i]; Chris@49: } Chris@49: Chris@49: out.steal_mem(out2); Chris@49: } Chris@49: } Chris@49: else // general matrix transpose Chris@49: { Chris@49: if(is_alias == false) Chris@49: { Chris@49: out.set_size(n_cols, n_rows); Chris@49: Chris@49: for(uword k=0; k < n_cols; ++k) Chris@49: { Chris@49: uword i, j; Chris@49: Chris@49: for(i=0, j=1; j < n_rows; i+=2, j+=2) Chris@49: { Chris@49: const eT tmp_i = P.at(i,k); Chris@49: const eT tmp_j = P.at(j,k); Chris@49: Chris@49: out.at(k,i) = tmp_i; Chris@49: out.at(k,j) = tmp_j; Chris@49: } Chris@49: Chris@49: if(i < n_rows) Chris@49: { Chris@49: out.at(k,i) = P.at(i,k); Chris@49: } Chris@49: } Chris@49: } Chris@49: else // aliasing Chris@49: { Chris@49: Mat out2(n_cols, n_rows); Chris@49: Chris@49: for(uword k=0; k < n_cols; ++k) Chris@49: { Chris@49: uword i, j; Chris@49: Chris@49: for(i=0, j=1; j < n_rows; i+=2, j+=2) Chris@49: { Chris@49: const eT tmp_i = P.at(i,k); Chris@49: const eT tmp_j = P.at(j,k); Chris@49: Chris@49: out2.at(k,i) = tmp_i; Chris@49: out2.at(k,j) = tmp_j; Chris@49: } Chris@49: Chris@49: if(i < n_rows) Chris@49: { Chris@49: out2.at(k,i) = P.at(i,k); Chris@49: } Chris@49: } Chris@49: Chris@49: out.steal_mem(out2); Chris@49: } Chris@49: } Chris@49: } Chris@49: } Chris@49: Chris@49: Chris@49: Chris@49: template Chris@49: arma_hot Chris@49: inline Chris@49: void Chris@49: op_strans::apply(Mat& out, const Op& in) Chris@49: { Chris@49: arma_extra_debug_sigprint(); Chris@49: Chris@49: op_strans::apply_proxy(out, in.m); Chris@49: } Chris@49: Chris@49: Chris@49: Chris@49: // Chris@49: // op_strans2 Chris@49: Chris@49: Chris@49: Chris@49: //! for tiny square matrices (size <= 4x4) Chris@49: template Chris@49: arma_hot Chris@49: inline Chris@49: void Chris@49: op_strans2::apply_noalias_tinysq(Mat& out, const TA& A, const eT val) Chris@49: { Chris@49: const eT* Am = A.memptr(); Chris@49: eT* outm = out.memptr(); Chris@49: Chris@49: switch(A.n_rows) Chris@49: { Chris@49: case 1: Chris@49: { Chris@49: outm[0] = val * Am[0]; Chris@49: } Chris@49: break; Chris@49: Chris@49: case 2: Chris@49: { Chris@49: outm[pos::n2] = val * Am[pos::n2]; Chris@49: outm[pos::n2] = val * Am[pos::n2]; Chris@49: Chris@49: outm[pos::n2] = val * Am[pos::n2]; Chris@49: outm[pos::n2] = val * Am[pos::n2]; Chris@49: } Chris@49: break; Chris@49: Chris@49: case 3: Chris@49: { Chris@49: outm[pos::n3] = val * Am[pos::n3]; Chris@49: outm[pos::n3] = val * Am[pos::n3]; Chris@49: outm[pos::n3] = val * Am[pos::n3]; Chris@49: Chris@49: outm[pos::n3] = val * Am[pos::n3]; Chris@49: outm[pos::n3] = val * Am[pos::n3]; Chris@49: outm[pos::n3] = val * Am[pos::n3]; Chris@49: Chris@49: outm[pos::n3] = val * Am[pos::n3]; Chris@49: outm[pos::n3] = val * Am[pos::n3]; Chris@49: outm[pos::n3] = val * Am[pos::n3]; Chris@49: } Chris@49: break; Chris@49: Chris@49: case 4: Chris@49: { Chris@49: outm[pos::n4] = val * Am[pos::n4]; Chris@49: outm[pos::n4] = val * Am[pos::n4]; Chris@49: outm[pos::n4] = val * Am[pos::n4]; Chris@49: outm[pos::n4] = val * Am[pos::n4]; Chris@49: Chris@49: outm[pos::n4] = val * Am[pos::n4]; Chris@49: outm[pos::n4] = val * Am[pos::n4]; Chris@49: outm[pos::n4] = val * Am[pos::n4]; Chris@49: outm[pos::n4] = val * Am[pos::n4]; Chris@49: Chris@49: outm[pos::n4] = val * Am[pos::n4]; Chris@49: outm[pos::n4] = val * Am[pos::n4]; Chris@49: outm[pos::n4] = val * Am[pos::n4]; Chris@49: outm[pos::n4] = val * Am[pos::n4]; Chris@49: Chris@49: outm[pos::n4] = val * Am[pos::n4]; Chris@49: outm[pos::n4] = val * Am[pos::n4]; Chris@49: outm[pos::n4] = val * Am[pos::n4]; Chris@49: outm[pos::n4] = val * Am[pos::n4]; Chris@49: } Chris@49: break; Chris@49: Chris@49: default: Chris@49: ; Chris@49: } Chris@49: Chris@49: } Chris@49: Chris@49: Chris@49: Chris@49: template Chris@49: arma_hot Chris@49: inline Chris@49: void Chris@49: op_strans2::apply_noalias(Mat& out, const TA& A, const eT val) Chris@49: { Chris@49: arma_extra_debug_sigprint(); Chris@49: Chris@49: const uword A_n_cols = A.n_cols; Chris@49: const uword A_n_rows = A.n_rows; Chris@49: Chris@49: out.set_size(A_n_cols, A_n_rows); Chris@49: Chris@49: if( (TA::is_col) || (TA::is_row) || (A_n_cols == 1) || (A_n_rows == 1) ) Chris@49: { Chris@49: const uword N = A.n_elem; Chris@49: Chris@49: const eT* A_mem = A.memptr(); Chris@49: eT* out_mem = out.memptr(); Chris@49: Chris@49: uword i,j; Chris@49: for(i=0, j=1; j < N; i+=2, j+=2) Chris@49: { Chris@49: const eT tmp_i = A_mem[i]; Chris@49: const eT tmp_j = A_mem[j]; Chris@49: Chris@49: out_mem[i] = val * tmp_i; Chris@49: out_mem[j] = val * tmp_j; Chris@49: } Chris@49: Chris@49: if(i < N) Chris@49: { Chris@49: out_mem[i] = val * A_mem[i]; Chris@49: } Chris@49: } Chris@49: else Chris@49: { Chris@49: if( (A_n_rows <= 4) && (A_n_rows == A_n_cols) ) Chris@49: { Chris@49: op_strans2::apply_noalias_tinysq(out, A, val); Chris@49: } Chris@49: else Chris@49: { Chris@49: for(uword k=0; k < A_n_cols; ++k) Chris@49: { Chris@49: uword i, j; Chris@49: Chris@49: const eT* colptr = A.colptr(k); Chris@49: Chris@49: for(i=0, j=1; j < A_n_rows; i+=2, j+=2) Chris@49: { Chris@49: const eT tmp_i = colptr[i]; Chris@49: const eT tmp_j = colptr[j]; Chris@49: Chris@49: out.at(k, i) = val * tmp_i; Chris@49: out.at(k, j) = val * tmp_j; Chris@49: } Chris@49: Chris@49: if(i < A_n_rows) Chris@49: { Chris@49: out.at(k, i) = val * colptr[i]; Chris@49: } Chris@49: } Chris@49: } Chris@49: } Chris@49: } Chris@49: Chris@49: Chris@49: Chris@49: template Chris@49: arma_hot Chris@49: inline Chris@49: void Chris@49: op_strans2::apply(Mat& out, const TA& A, const eT val) Chris@49: { Chris@49: arma_extra_debug_sigprint(); Chris@49: Chris@49: if(&out != &A) Chris@49: { Chris@49: op_strans2::apply_noalias(out, A, val); Chris@49: } Chris@49: else Chris@49: { Chris@49: const uword n_rows = out.n_rows; Chris@49: const uword n_cols = out.n_cols; Chris@49: Chris@49: if(n_rows == n_cols) Chris@49: { Chris@49: arma_extra_debug_print("op_strans2::apply(): doing in-place transpose of a square matrix"); Chris@49: Chris@49: const uword N = n_rows; Chris@49: Chris@49: // TODO: do multiplication while swapping Chris@49: Chris@49: for(uword k=0; k < N; ++k) Chris@49: { Chris@49: eT* colptr = out.colptr(k); Chris@49: Chris@49: uword i,j; Chris@49: Chris@49: for(i=(k+1), j=(k+2); j < N; i+=2, j+=2) Chris@49: { Chris@49: std::swap(out.at(k,i), colptr[i]); Chris@49: std::swap(out.at(k,j), colptr[j]); Chris@49: } Chris@49: Chris@49: if(i < N) Chris@49: { Chris@49: std::swap(out.at(k,i), colptr[i]); Chris@49: } Chris@49: } Chris@49: Chris@49: arrayops::inplace_mul( out.memptr(), val, out.n_elem ); Chris@49: } Chris@49: else Chris@49: { Chris@49: Mat tmp; Chris@49: op_strans2::apply_noalias(tmp, A, val); Chris@49: Chris@49: out.steal_mem(tmp); Chris@49: } Chris@49: } Chris@49: } Chris@49: Chris@49: Chris@49: Chris@49: template Chris@49: arma_hot Chris@49: inline Chris@49: void Chris@49: op_strans2::apply_proxy(Mat& out, const T1& X, const typename T1::elem_type val) Chris@49: { Chris@49: arma_extra_debug_sigprint(); Chris@49: Chris@49: typedef typename T1::elem_type eT; Chris@49: Chris@49: const Proxy P(X); Chris@49: Chris@49: // allow detection of in-place transpose Chris@49: if( (is_Mat::stored_type>::value == true) && (Proxy::fake_mat == false) ) Chris@49: { Chris@49: const unwrap::stored_type> tmp(P.Q); Chris@49: Chris@49: op_strans2::apply(out, tmp.M, val); Chris@49: } Chris@49: else Chris@49: { Chris@49: const uword n_rows = P.get_n_rows(); Chris@49: const uword n_cols = P.get_n_cols(); Chris@49: Chris@49: const bool is_alias = P.is_alias(out); Chris@49: Chris@49: if( (resolves_to_vector::value == true) && (Proxy::prefer_at_accessor == false) ) Chris@49: { Chris@49: if(is_alias == false) Chris@49: { Chris@49: out.set_size(n_cols, n_rows); Chris@49: Chris@49: eT* out_mem = out.memptr(); Chris@49: Chris@49: const uword n_elem = P.get_n_elem(); Chris@49: Chris@49: typename Proxy::ea_type Pea = P.get_ea(); Chris@49: Chris@49: uword i,j; Chris@49: for(i=0, j=1; j < n_elem; i+=2, j+=2) Chris@49: { Chris@49: const eT tmp_i = Pea[i]; Chris@49: const eT tmp_j = Pea[j]; Chris@49: Chris@49: out_mem[i] = val * tmp_i; Chris@49: out_mem[j] = val * tmp_j; Chris@49: } Chris@49: Chris@49: if(i < n_elem) Chris@49: { Chris@49: out_mem[i] = val * Pea[i]; Chris@49: } Chris@49: } Chris@49: else // aliasing Chris@49: { Chris@49: Mat out2(n_cols, n_rows); Chris@49: Chris@49: eT* out_mem = out2.memptr(); Chris@49: Chris@49: const uword n_elem = P.get_n_elem(); Chris@49: Chris@49: typename Proxy::ea_type Pea = P.get_ea(); Chris@49: Chris@49: uword i,j; Chris@49: for(i=0, j=1; j < n_elem; i+=2, j+=2) Chris@49: { Chris@49: const eT tmp_i = Pea[i]; Chris@49: const eT tmp_j = Pea[j]; Chris@49: Chris@49: out_mem[i] = val * tmp_i; Chris@49: out_mem[j] = val * tmp_j; Chris@49: } Chris@49: Chris@49: if(i < n_elem) Chris@49: { Chris@49: out_mem[i] = val * Pea[i]; Chris@49: } Chris@49: Chris@49: out.steal_mem(out2); Chris@49: } Chris@49: } Chris@49: else // general matrix transpose Chris@49: { Chris@49: if(is_alias == false) Chris@49: { Chris@49: out.set_size(n_cols, n_rows); Chris@49: Chris@49: for(uword k=0; k < n_cols; ++k) Chris@49: { Chris@49: uword i, j; Chris@49: Chris@49: for(i=0, j=1; j < n_rows; i+=2, j+=2) Chris@49: { Chris@49: const eT tmp_i = P.at(i,k); Chris@49: const eT tmp_j = P.at(j,k); Chris@49: Chris@49: out.at(k,i) = val * tmp_i; Chris@49: out.at(k,j) = val * tmp_j; Chris@49: } Chris@49: Chris@49: if(i < n_rows) Chris@49: { Chris@49: out.at(k,i) = val * P.at(i,k); Chris@49: } Chris@49: } Chris@49: } Chris@49: else // aliasing Chris@49: { Chris@49: Mat out2(n_cols, n_rows); Chris@49: Chris@49: for(uword k=0; k < n_cols; ++k) Chris@49: { Chris@49: uword i, j; Chris@49: Chris@49: for(i=0, j=1; j < n_rows; i+=2, j+=2) Chris@49: { Chris@49: const eT tmp_i = P.at(i,k); Chris@49: const eT tmp_j = P.at(j,k); Chris@49: Chris@49: out2.at(k,i) = val * tmp_i; Chris@49: out2.at(k,j) = val * tmp_j; Chris@49: } Chris@49: Chris@49: if(i < n_rows) Chris@49: { Chris@49: out2.at(k,i) = val * P.at(i,k); Chris@49: } Chris@49: } Chris@49: Chris@49: out.steal_mem(out2); Chris@49: } Chris@49: } Chris@49: } Chris@49: } Chris@49: Chris@49: Chris@49: Chris@49: //! @}