view armadillo-3.900.4/include/armadillo_bits/op_strans_meat.hpp @ 84:55a047986812 tip

Update library URI so as not to be document-local
author Chris Cannam
date Wed, 22 Apr 2020 14:21:57 +0100
parents 1ec0e2823891
children
line wrap: on
line source
// Copyright (C) 2008-2013 NICTA (www.nicta.com.au)
// Copyright (C) 2008-2013 Conrad Sanderson
// Copyright (C) 2012 Ryan Curtin
// 
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.


//! \addtogroup op_strans
//! @{



//! for tiny square matrices (size <= 4x4)
template<typename eT, typename TA>
arma_hot
inline
void
op_strans::apply_noalias_tinysq(Mat<eT>& out, const TA& A)
  {
  const eT*   Am =   A.memptr();
        eT* outm = out.memptr();
  
  switch(A.n_rows)
    {
    case 1:
      {
      outm[0] = Am[0];
      }
      break;
      
    case 2:
      {
      outm[pos<false,0,0>::n2] = Am[pos<true,0,0>::n2];
      outm[pos<false,1,0>::n2] = Am[pos<true,1,0>::n2];
      
      outm[pos<false,0,1>::n2] = Am[pos<true,0,1>::n2];
      outm[pos<false,1,1>::n2] = Am[pos<true,1,1>::n2];
      }
      break;
    
    case 3:
      {
      outm[pos<false,0,0>::n3] = Am[pos<true,0,0>::n3];
      outm[pos<false,1,0>::n3] = Am[pos<true,1,0>::n3];
      outm[pos<false,2,0>::n3] = Am[pos<true,2,0>::n3];
      
      outm[pos<false,0,1>::n3] = Am[pos<true,0,1>::n3];
      outm[pos<false,1,1>::n3] = Am[pos<true,1,1>::n3];
      outm[pos<false,2,1>::n3] = Am[pos<true,2,1>::n3];
      
      outm[pos<false,0,2>::n3] = Am[pos<true,0,2>::n3];
      outm[pos<false,1,2>::n3] = Am[pos<true,1,2>::n3];
      outm[pos<false,2,2>::n3] = Am[pos<true,2,2>::n3];
      }
      break;
    
    case 4:
      {
      outm[pos<false,0,0>::n4] = Am[pos<true,0,0>::n4];
      outm[pos<false,1,0>::n4] = Am[pos<true,1,0>::n4];
      outm[pos<false,2,0>::n4] = Am[pos<true,2,0>::n4];
      outm[pos<false,3,0>::n4] = Am[pos<true,3,0>::n4];
      
      outm[pos<false,0,1>::n4] = Am[pos<true,0,1>::n4];
      outm[pos<false,1,1>::n4] = Am[pos<true,1,1>::n4];
      outm[pos<false,2,1>::n4] = Am[pos<true,2,1>::n4];
      outm[pos<false,3,1>::n4] = Am[pos<true,3,1>::n4];
      
      outm[pos<false,0,2>::n4] = Am[pos<true,0,2>::n4];
      outm[pos<false,1,2>::n4] = Am[pos<true,1,2>::n4];
      outm[pos<false,2,2>::n4] = Am[pos<true,2,2>::n4];
      outm[pos<false,3,2>::n4] = Am[pos<true,3,2>::n4];
      
      outm[pos<false,0,3>::n4] = Am[pos<true,0,3>::n4];
      outm[pos<false,1,3>::n4] = Am[pos<true,1,3>::n4];
      outm[pos<false,2,3>::n4] = Am[pos<true,2,3>::n4];
      outm[pos<false,3,3>::n4] = Am[pos<true,3,3>::n4];
      }
      break;
    
    default:
      ;
    }
  
  }



//! Immediate transpose of a dense matrix
template<typename eT, typename TA>
arma_hot
inline
void
op_strans::apply_noalias(Mat<eT>& out, const TA& A)
  {
  arma_extra_debug_sigprint();
  
  const uword A_n_cols = A.n_cols;
  const uword A_n_rows = A.n_rows;
  
  out.set_size(A_n_cols, A_n_rows);
  
  if( (TA::is_row) || (TA::is_col) || (A_n_cols == 1) || (A_n_rows == 1) )
    {
    arrayops::copy( out.memptr(), A.memptr(), A.n_elem );
    }
  else
    {
    if( (A_n_rows <= 4) && (A_n_rows == A_n_cols) )
      {
      op_strans::apply_noalias_tinysq(out, A);
      }
    else
      {
      for(uword k=0; k < A_n_cols; ++k)
        {
        uword i, j;
        
        const eT* colptr = A.colptr(k);
        
        for(i=0, j=1; j < A_n_rows; i+=2, j+=2)
          {
          const eT tmp_i = colptr[i];
          const eT tmp_j = colptr[j];
          
          out.at(k, i) = tmp_i;
          out.at(k, j) = tmp_j;
          }
        
        if(i < A_n_rows)
          {
          out.at(k, i) = colptr[i];
          }
        }
      }
    }
  }



template<typename eT, typename TA>
arma_hot
inline
void
op_strans::apply(Mat<eT>& out, const TA& A)
  {
  arma_extra_debug_sigprint();
  
  if(&out != &A)
    {
    op_strans::apply_noalias(out, A);
    }
  else
    {
    const uword n_rows = A.n_rows;
    const uword n_cols = A.n_cols;
    
    if(n_rows == n_cols)
      {
      arma_extra_debug_print("op_strans::apply(): doing in-place transpose of a square matrix");
      
      const uword N = n_rows;
      
      for(uword k=0; k < N; ++k)
        {
        eT* colptr = out.colptr(k);
        
        uword i,j;
        
        for(i=(k+1), j=(k+2); j < N; i+=2, j+=2)
          {
          std::swap(out.at(k,i), colptr[i]);
          std::swap(out.at(k,j), colptr[j]);
          }
        
        if(i < N)
          {
          std::swap(out.at(k,i), colptr[i]);
          }
        }
      }
    else
      {
      Mat<eT> tmp;
      op_strans::apply_noalias(tmp, A);
      
      out.steal_mem(tmp);
      }
    }
  }



template<typename T1>
arma_hot
inline
void
op_strans::apply_proxy(Mat<typename T1::elem_type>& out, const T1& X)
  {
  arma_extra_debug_sigprint();
  
  typedef typename T1::elem_type eT;
  
  const Proxy<T1> P(X);
  
  // allow detection of in-place transpose
  if( (is_Mat<typename Proxy<T1>::stored_type>::value == true) && (Proxy<T1>::fake_mat == false) )
    {
    const unwrap<typename Proxy<T1>::stored_type> tmp(P.Q);
    
    op_strans::apply(out, tmp.M);
    }
  else
    {
    const uword n_rows = P.get_n_rows();
    const uword n_cols = P.get_n_cols();
    
    const bool is_alias = P.is_alias(out);
    
    if( (resolves_to_vector<T1>::value == true) && (Proxy<T1>::prefer_at_accessor == false) )
      {
      if(is_alias == false)
        {
        out.set_size(n_cols, n_rows);
        
        eT* out_mem = out.memptr();
        
        const uword n_elem = P.get_n_elem();
        
        typename Proxy<T1>::ea_type Pea = P.get_ea();
        
        uword i,j;
        for(i=0, j=1; j < n_elem; i+=2, j+=2)
          {
          const eT tmp_i = Pea[i];
          const eT tmp_j = Pea[j];
          
          out_mem[i] = tmp_i;
          out_mem[j] = tmp_j;
          }
        
        if(i < n_elem)
          {
          out_mem[i] = Pea[i];
          }
        }
      else  // aliasing
        {
        Mat<eT> out2(n_cols, n_rows);
        
        eT* out_mem = out2.memptr();
        
        const uword n_elem = P.get_n_elem();
        
        typename Proxy<T1>::ea_type Pea = P.get_ea();
        
        uword i,j;
        for(i=0, j=1; j < n_elem; i+=2, j+=2)
          {
          const eT tmp_i = Pea[i];
          const eT tmp_j = Pea[j];
          
          out_mem[i] = tmp_i;
          out_mem[j] = tmp_j;
          }
        
        if(i < n_elem)
          {
          out_mem[i] = Pea[i];
          }
        
        out.steal_mem(out2);
        }
      }
    else   // general matrix transpose
      {
      if(is_alias == false)
        {
        out.set_size(n_cols, n_rows);
        
        for(uword k=0; k < n_cols; ++k)
          {
          uword i, j;
          
          for(i=0, j=1; j < n_rows; i+=2, j+=2)
            {
            const eT tmp_i = P.at(i,k);
            const eT tmp_j = P.at(j,k);
            
            out.at(k,i) = tmp_i;
            out.at(k,j) = tmp_j;
            }
          
          if(i < n_rows)
            {
            out.at(k,i) = P.at(i,k);
            }
          }
        }
      else // aliasing
        {
        Mat<eT> out2(n_cols, n_rows);
        
        for(uword k=0; k < n_cols; ++k)
          {
          uword i, j;
          
          for(i=0, j=1; j < n_rows; i+=2, j+=2)
            {
            const eT tmp_i = P.at(i,k);
            const eT tmp_j = P.at(j,k);
            
            out2.at(k,i) = tmp_i;
            out2.at(k,j) = tmp_j;
            }
          
          if(i < n_rows)
            {
            out2.at(k,i) = P.at(i,k);
            }
          }
        
        out.steal_mem(out2);
        }
      }
    }
  }



template<typename T1>
arma_hot
inline
void
op_strans::apply(Mat<typename T1::elem_type>& out, const Op<T1,op_strans>& in)
  {
  arma_extra_debug_sigprint();
  
  op_strans::apply_proxy(out, in.m);
  }



//
// op_strans2



//! for tiny square matrices (size <= 4x4)
template<typename eT, typename TA>
arma_hot
inline
void
op_strans2::apply_noalias_tinysq(Mat<eT>& out, const TA& A, const eT val)
  {
  const eT* Am   =   A.memptr();
        eT* outm = out.memptr();
  
  switch(A.n_rows)
    {
    case 1:
      {
      outm[0] = val * Am[0];
      }
      break;
      
    case 2:
      {
      outm[pos<false,0,0>::n2] = val * Am[pos<true,0,0>::n2];
      outm[pos<false,1,0>::n2] = val * Am[pos<true,1,0>::n2];
      
      outm[pos<false,0,1>::n2] = val * Am[pos<true,0,1>::n2];
      outm[pos<false,1,1>::n2] = val * Am[pos<true,1,1>::n2];
      }
      break;
    
    case 3:
      {
      outm[pos<false,0,0>::n3] = val * Am[pos<true,0,0>::n3];
      outm[pos<false,1,0>::n3] = val * Am[pos<true,1,0>::n3];
      outm[pos<false,2,0>::n3] = val * Am[pos<true,2,0>::n3];
      
      outm[pos<false,0,1>::n3] = val * Am[pos<true,0,1>::n3];
      outm[pos<false,1,1>::n3] = val * Am[pos<true,1,1>::n3];
      outm[pos<false,2,1>::n3] = val * Am[pos<true,2,1>::n3];
      
      outm[pos<false,0,2>::n3] = val * Am[pos<true,0,2>::n3];
      outm[pos<false,1,2>::n3] = val * Am[pos<true,1,2>::n3];
      outm[pos<false,2,2>::n3] = val * Am[pos<true,2,2>::n3];
      }
      break;
    
    case 4:
      {
      outm[pos<false,0,0>::n4] = val * Am[pos<true,0,0>::n4];
      outm[pos<false,1,0>::n4] = val * Am[pos<true,1,0>::n4];
      outm[pos<false,2,0>::n4] = val * Am[pos<true,2,0>::n4];
      outm[pos<false,3,0>::n4] = val * Am[pos<true,3,0>::n4];
      
      outm[pos<false,0,1>::n4] = val * Am[pos<true,0,1>::n4];
      outm[pos<false,1,1>::n4] = val * Am[pos<true,1,1>::n4];
      outm[pos<false,2,1>::n4] = val * Am[pos<true,2,1>::n4];
      outm[pos<false,3,1>::n4] = val * Am[pos<true,3,1>::n4];
      
      outm[pos<false,0,2>::n4] = val * Am[pos<true,0,2>::n4];
      outm[pos<false,1,2>::n4] = val * Am[pos<true,1,2>::n4];
      outm[pos<false,2,2>::n4] = val * Am[pos<true,2,2>::n4];
      outm[pos<false,3,2>::n4] = val * Am[pos<true,3,2>::n4];
      
      outm[pos<false,0,3>::n4] = val * Am[pos<true,0,3>::n4];
      outm[pos<false,1,3>::n4] = val * Am[pos<true,1,3>::n4];
      outm[pos<false,2,3>::n4] = val * Am[pos<true,2,3>::n4];
      outm[pos<false,3,3>::n4] = val * Am[pos<true,3,3>::n4];
      }
      break;
    
    default:
      ;
    }
  
  }



template<typename eT, typename TA>
arma_hot
inline
void
op_strans2::apply_noalias(Mat<eT>& out, const TA& A, const eT val)
  {
  arma_extra_debug_sigprint();
  
  const uword A_n_cols = A.n_cols;
  const uword A_n_rows = A.n_rows;
  
  out.set_size(A_n_cols, A_n_rows);
  
  if( (TA::is_col) || (TA::is_row) || (A_n_cols == 1) || (A_n_rows == 1) )
    {
    const uword N = A.n_elem;
    
    const eT*   A_mem =   A.memptr();
          eT* out_mem = out.memptr();
    
    uword i,j;
    for(i=0, j=1; j < N; i+=2, j+=2)
      {
      const eT tmp_i = A_mem[i];
      const eT tmp_j = A_mem[j];
      
      out_mem[i] = val * tmp_i;
      out_mem[j] = val * tmp_j;
      }
    
    if(i < N)
      {
      out_mem[i] = val * A_mem[i];
      }
    }
  else
    {
    if( (A_n_rows <= 4) && (A_n_rows == A_n_cols) )
      {
      op_strans2::apply_noalias_tinysq(out, A, val);
      }
    else
      {
      for(uword k=0; k < A_n_cols; ++k)
        {
        uword i, j;
        
        const eT* colptr = A.colptr(k);
        
        for(i=0, j=1; j < A_n_rows; i+=2, j+=2)
          {
          const eT tmp_i = colptr[i];
          const eT tmp_j = colptr[j];
          
          out.at(k, i) = val * tmp_i;
          out.at(k, j) = val * tmp_j;
          }
        
        if(i < A_n_rows)
          {
          out.at(k, i) = val * colptr[i];
          }
        }
      }
    }
  }



template<typename eT, typename TA>
arma_hot
inline
void
op_strans2::apply(Mat<eT>& out, const TA& A, const eT val)
  {
  arma_extra_debug_sigprint();
  
  if(&out != &A)
    {
    op_strans2::apply_noalias(out, A, val);
    }
  else
    {
    const uword n_rows = out.n_rows;
    const uword n_cols = out.n_cols;
    
    if(n_rows == n_cols)
      {
      arma_extra_debug_print("op_strans2::apply(): doing in-place transpose of a square matrix");
      
      const uword N = n_rows;
      
      // TODO: do multiplication while swapping
      
      for(uword k=0; k < N; ++k)
        {
        eT* colptr = out.colptr(k);
        
        uword i,j;
        
        for(i=(k+1), j=(k+2); j < N; i+=2, j+=2)
          {
          std::swap(out.at(k,i), colptr[i]);
          std::swap(out.at(k,j), colptr[j]);
          }
        
        if(i < N)
          {
          std::swap(out.at(k,i), colptr[i]);
          }
        }
      
      arrayops::inplace_mul( out.memptr(), val, out.n_elem );
      }
    else
      {
      Mat<eT> tmp;
      op_strans2::apply_noalias(tmp, A, val);
      
      out.steal_mem(tmp);
      }
    }
  }



template<typename T1>
arma_hot
inline
void
op_strans2::apply_proxy(Mat<typename T1::elem_type>& out, const T1& X, const typename T1::elem_type val)
  {
  arma_extra_debug_sigprint();
  
  typedef typename T1::elem_type eT;
  
  const Proxy<T1> P(X);
  
  // allow detection of in-place transpose
  if( (is_Mat<typename Proxy<T1>::stored_type>::value == true) && (Proxy<T1>::fake_mat == false) )
    {
    const unwrap<typename Proxy<T1>::stored_type> tmp(P.Q);
    
    op_strans2::apply(out, tmp.M, val);
    }
  else
    {
    const uword n_rows = P.get_n_rows();
    const uword n_cols = P.get_n_cols();
    
    const bool is_alias = P.is_alias(out);
    
    if( (resolves_to_vector<T1>::value == true) && (Proxy<T1>::prefer_at_accessor == false) )
      {
      if(is_alias == false)
        {
        out.set_size(n_cols, n_rows);
        
        eT* out_mem = out.memptr();
        
        const uword n_elem = P.get_n_elem();
        
        typename Proxy<T1>::ea_type Pea = P.get_ea();
        
        uword i,j;
        for(i=0, j=1; j < n_elem; i+=2, j+=2)
          {
          const eT tmp_i = Pea[i];
          const eT tmp_j = Pea[j];
          
          out_mem[i] = val * tmp_i;
          out_mem[j] = val * tmp_j;
          }
        
        if(i < n_elem)
          {
          out_mem[i] = val * Pea[i];
          }
        }
      else  // aliasing
        {
        Mat<eT> out2(n_cols, n_rows);
        
        eT* out_mem = out2.memptr();
        
        const uword n_elem = P.get_n_elem();
        
        typename Proxy<T1>::ea_type Pea = P.get_ea();
        
        uword i,j;
        for(i=0, j=1; j < n_elem; i+=2, j+=2)
          {
          const eT tmp_i = Pea[i];
          const eT tmp_j = Pea[j];
          
          out_mem[i] = val * tmp_i;
          out_mem[j] = val * tmp_j;
          }
        
        if(i < n_elem)
          {
          out_mem[i] = val * Pea[i];
          }
        
        out.steal_mem(out2);
        }
      }
    else   // general matrix transpose
      {
      if(is_alias == false)
        {
        out.set_size(n_cols, n_rows);
        
        for(uword k=0; k < n_cols; ++k)
          {
          uword i, j;
          
          for(i=0, j=1; j < n_rows; i+=2, j+=2)
            {
            const eT tmp_i = P.at(i,k);
            const eT tmp_j = P.at(j,k);
            
            out.at(k,i) = val * tmp_i;
            out.at(k,j) = val * tmp_j;
            }
          
          if(i < n_rows)
            {
            out.at(k,i) = val * P.at(i,k);
            }
          }
        }
      else // aliasing
        {
        Mat<eT> out2(n_cols, n_rows);
        
        for(uword k=0; k < n_cols; ++k)
          {
          uword i, j;
          
          for(i=0, j=1; j < n_rows; i+=2, j+=2)
            {
            const eT tmp_i = P.at(i,k);
            const eT tmp_j = P.at(j,k);
            
            out2.at(k,i) = val * tmp_i;
            out2.at(k,j) = val * tmp_j;
            }
          
          if(i < n_rows)
            {
            out2.at(k,i) = val * P.at(i,k);
            }
          }
        
        out.steal_mem(out2);
        }
      }
    }
  }



//! @}