diff armadillo-3.900.4/include/armadillo_bits/op_strans_meat.hpp @ 49:1ec0e2823891

Switch to using subrepo copies of qm-dsp, nnls-chroma, vamp-plugin-sdk; update Armadillo version; assume build without external BLAS/LAPACK
author Chris Cannam
date Thu, 13 Jun 2013 10:25:24 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/armadillo-3.900.4/include/armadillo_bits/op_strans_meat.hpp	Thu Jun 13 10:25:24 2013 +0100
@@ -0,0 +1,690 @@
+// Copyright (C) 2008-2013 NICTA (www.nicta.com.au)
+// Copyright (C) 2008-2013 Conrad Sanderson
+// Copyright (C) 2012 Ryan Curtin
+// 
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+//! \addtogroup op_strans
+//! @{
+
+
+
+//! for tiny square matrices (size <= 4x4)
+template<typename eT, typename TA>
+arma_hot
+inline
+void
+op_strans::apply_noalias_tinysq(Mat<eT>& out, const TA& A)
+  {
+  const eT*   Am =   A.memptr();
+        eT* outm = out.memptr();
+  
+  switch(A.n_rows)
+    {
+    case 1:
+      {
+      outm[0] = Am[0];
+      }
+      break;
+      
+    case 2:
+      {
+      outm[pos<false,0,0>::n2] = Am[pos<true,0,0>::n2];
+      outm[pos<false,1,0>::n2] = Am[pos<true,1,0>::n2];
+      
+      outm[pos<false,0,1>::n2] = Am[pos<true,0,1>::n2];
+      outm[pos<false,1,1>::n2] = Am[pos<true,1,1>::n2];
+      }
+      break;
+    
+    case 3:
+      {
+      outm[pos<false,0,0>::n3] = Am[pos<true,0,0>::n3];
+      outm[pos<false,1,0>::n3] = Am[pos<true,1,0>::n3];
+      outm[pos<false,2,0>::n3] = Am[pos<true,2,0>::n3];
+      
+      outm[pos<false,0,1>::n3] = Am[pos<true,0,1>::n3];
+      outm[pos<false,1,1>::n3] = Am[pos<true,1,1>::n3];
+      outm[pos<false,2,1>::n3] = Am[pos<true,2,1>::n3];
+      
+      outm[pos<false,0,2>::n3] = Am[pos<true,0,2>::n3];
+      outm[pos<false,1,2>::n3] = Am[pos<true,1,2>::n3];
+      outm[pos<false,2,2>::n3] = Am[pos<true,2,2>::n3];
+      }
+      break;
+    
+    case 4:
+      {
+      outm[pos<false,0,0>::n4] = Am[pos<true,0,0>::n4];
+      outm[pos<false,1,0>::n4] = Am[pos<true,1,0>::n4];
+      outm[pos<false,2,0>::n4] = Am[pos<true,2,0>::n4];
+      outm[pos<false,3,0>::n4] = Am[pos<true,3,0>::n4];
+      
+      outm[pos<false,0,1>::n4] = Am[pos<true,0,1>::n4];
+      outm[pos<false,1,1>::n4] = Am[pos<true,1,1>::n4];
+      outm[pos<false,2,1>::n4] = Am[pos<true,2,1>::n4];
+      outm[pos<false,3,1>::n4] = Am[pos<true,3,1>::n4];
+      
+      outm[pos<false,0,2>::n4] = Am[pos<true,0,2>::n4];
+      outm[pos<false,1,2>::n4] = Am[pos<true,1,2>::n4];
+      outm[pos<false,2,2>::n4] = Am[pos<true,2,2>::n4];
+      outm[pos<false,3,2>::n4] = Am[pos<true,3,2>::n4];
+      
+      outm[pos<false,0,3>::n4] = Am[pos<true,0,3>::n4];
+      outm[pos<false,1,3>::n4] = Am[pos<true,1,3>::n4];
+      outm[pos<false,2,3>::n4] = Am[pos<true,2,3>::n4];
+      outm[pos<false,3,3>::n4] = Am[pos<true,3,3>::n4];
+      }
+      break;
+    
+    default:
+      ;
+    }
+  
+  }
+
+
+
+//! Immediate transpose of a dense matrix
+template<typename eT, typename TA>
+arma_hot
+inline
+void
+op_strans::apply_noalias(Mat<eT>& out, const TA& A)
+  {
+  arma_extra_debug_sigprint();
+  
+  const uword A_n_cols = A.n_cols;
+  const uword A_n_rows = A.n_rows;
+  
+  out.set_size(A_n_cols, A_n_rows);
+  
+  if( (TA::is_row) || (TA::is_col) || (A_n_cols == 1) || (A_n_rows == 1) )
+    {
+    arrayops::copy( out.memptr(), A.memptr(), A.n_elem );
+    }
+  else
+    {
+    if( (A_n_rows <= 4) && (A_n_rows == A_n_cols) )
+      {
+      op_strans::apply_noalias_tinysq(out, A);
+      }
+    else
+      {
+      for(uword k=0; k < A_n_cols; ++k)
+        {
+        uword i, j;
+        
+        const eT* colptr = A.colptr(k);
+        
+        for(i=0, j=1; j < A_n_rows; i+=2, j+=2)
+          {
+          const eT tmp_i = colptr[i];
+          const eT tmp_j = colptr[j];
+          
+          out.at(k, i) = tmp_i;
+          out.at(k, j) = tmp_j;
+          }
+        
+        if(i < A_n_rows)
+          {
+          out.at(k, i) = colptr[i];
+          }
+        }
+      }
+    }
+  }
+
+
+
+template<typename eT, typename TA>
+arma_hot
+inline
+void
+op_strans::apply(Mat<eT>& out, const TA& A)
+  {
+  arma_extra_debug_sigprint();
+  
+  if(&out != &A)
+    {
+    op_strans::apply_noalias(out, A);
+    }
+  else
+    {
+    const uword n_rows = A.n_rows;
+    const uword n_cols = A.n_cols;
+    
+    if(n_rows == n_cols)
+      {
+      arma_extra_debug_print("op_strans::apply(): doing in-place transpose of a square matrix");
+      
+      const uword N = n_rows;
+      
+      for(uword k=0; k < N; ++k)
+        {
+        eT* colptr = out.colptr(k);
+        
+        uword i,j;
+        
+        for(i=(k+1), j=(k+2); j < N; i+=2, j+=2)
+          {
+          std::swap(out.at(k,i), colptr[i]);
+          std::swap(out.at(k,j), colptr[j]);
+          }
+        
+        if(i < N)
+          {
+          std::swap(out.at(k,i), colptr[i]);
+          }
+        }
+      }
+    else
+      {
+      Mat<eT> tmp;
+      op_strans::apply_noalias(tmp, A);
+      
+      out.steal_mem(tmp);
+      }
+    }
+  }
+
+
+
+template<typename T1>
+arma_hot
+inline
+void
+op_strans::apply_proxy(Mat<typename T1::elem_type>& out, const T1& X)
+  {
+  arma_extra_debug_sigprint();
+  
+  typedef typename T1::elem_type eT;
+  
+  const Proxy<T1> P(X);
+  
+  // allow detection of in-place transpose
+  if( (is_Mat<typename Proxy<T1>::stored_type>::value == true) && (Proxy<T1>::fake_mat == false) )
+    {
+    const unwrap<typename Proxy<T1>::stored_type> tmp(P.Q);
+    
+    op_strans::apply(out, tmp.M);
+    }
+  else
+    {
+    const uword n_rows = P.get_n_rows();
+    const uword n_cols = P.get_n_cols();
+    
+    const bool is_alias = P.is_alias(out);
+    
+    if( (resolves_to_vector<T1>::value == true) && (Proxy<T1>::prefer_at_accessor == false) )
+      {
+      if(is_alias == false)
+        {
+        out.set_size(n_cols, n_rows);
+        
+        eT* out_mem = out.memptr();
+        
+        const uword n_elem = P.get_n_elem();
+        
+        typename Proxy<T1>::ea_type Pea = P.get_ea();
+        
+        uword i,j;
+        for(i=0, j=1; j < n_elem; i+=2, j+=2)
+          {
+          const eT tmp_i = Pea[i];
+          const eT tmp_j = Pea[j];
+          
+          out_mem[i] = tmp_i;
+          out_mem[j] = tmp_j;
+          }
+        
+        if(i < n_elem)
+          {
+          out_mem[i] = Pea[i];
+          }
+        }
+      else  // aliasing
+        {
+        Mat<eT> out2(n_cols, n_rows);
+        
+        eT* out_mem = out2.memptr();
+        
+        const uword n_elem = P.get_n_elem();
+        
+        typename Proxy<T1>::ea_type Pea = P.get_ea();
+        
+        uword i,j;
+        for(i=0, j=1; j < n_elem; i+=2, j+=2)
+          {
+          const eT tmp_i = Pea[i];
+          const eT tmp_j = Pea[j];
+          
+          out_mem[i] = tmp_i;
+          out_mem[j] = tmp_j;
+          }
+        
+        if(i < n_elem)
+          {
+          out_mem[i] = Pea[i];
+          }
+        
+        out.steal_mem(out2);
+        }
+      }
+    else   // general matrix transpose
+      {
+      if(is_alias == false)
+        {
+        out.set_size(n_cols, n_rows);
+        
+        for(uword k=0; k < n_cols; ++k)
+          {
+          uword i, j;
+          
+          for(i=0, j=1; j < n_rows; i+=2, j+=2)
+            {
+            const eT tmp_i = P.at(i,k);
+            const eT tmp_j = P.at(j,k);
+            
+            out.at(k,i) = tmp_i;
+            out.at(k,j) = tmp_j;
+            }
+          
+          if(i < n_rows)
+            {
+            out.at(k,i) = P.at(i,k);
+            }
+          }
+        }
+      else // aliasing
+        {
+        Mat<eT> out2(n_cols, n_rows);
+        
+        for(uword k=0; k < n_cols; ++k)
+          {
+          uword i, j;
+          
+          for(i=0, j=1; j < n_rows; i+=2, j+=2)
+            {
+            const eT tmp_i = P.at(i,k);
+            const eT tmp_j = P.at(j,k);
+            
+            out2.at(k,i) = tmp_i;
+            out2.at(k,j) = tmp_j;
+            }
+          
+          if(i < n_rows)
+            {
+            out2.at(k,i) = P.at(i,k);
+            }
+          }
+        
+        out.steal_mem(out2);
+        }
+      }
+    }
+  }
+
+
+
+template<typename T1>
+arma_hot
+inline
+void
+op_strans::apply(Mat<typename T1::elem_type>& out, const Op<T1,op_strans>& in)
+  {
+  arma_extra_debug_sigprint();
+  
+  op_strans::apply_proxy(out, in.m);
+  }
+
+
+
+//
+// op_strans2
+
+
+
+//! for tiny square matrices (size <= 4x4)
+template<typename eT, typename TA>
+arma_hot
+inline
+void
+op_strans2::apply_noalias_tinysq(Mat<eT>& out, const TA& A, const eT val)
+  {
+  const eT* Am   =   A.memptr();
+        eT* outm = out.memptr();
+  
+  switch(A.n_rows)
+    {
+    case 1:
+      {
+      outm[0] = val * Am[0];
+      }
+      break;
+      
+    case 2:
+      {
+      outm[pos<false,0,0>::n2] = val * Am[pos<true,0,0>::n2];
+      outm[pos<false,1,0>::n2] = val * Am[pos<true,1,0>::n2];
+      
+      outm[pos<false,0,1>::n2] = val * Am[pos<true,0,1>::n2];
+      outm[pos<false,1,1>::n2] = val * Am[pos<true,1,1>::n2];
+      }
+      break;
+    
+    case 3:
+      {
+      outm[pos<false,0,0>::n3] = val * Am[pos<true,0,0>::n3];
+      outm[pos<false,1,0>::n3] = val * Am[pos<true,1,0>::n3];
+      outm[pos<false,2,0>::n3] = val * Am[pos<true,2,0>::n3];
+      
+      outm[pos<false,0,1>::n3] = val * Am[pos<true,0,1>::n3];
+      outm[pos<false,1,1>::n3] = val * Am[pos<true,1,1>::n3];
+      outm[pos<false,2,1>::n3] = val * Am[pos<true,2,1>::n3];
+      
+      outm[pos<false,0,2>::n3] = val * Am[pos<true,0,2>::n3];
+      outm[pos<false,1,2>::n3] = val * Am[pos<true,1,2>::n3];
+      outm[pos<false,2,2>::n3] = val * Am[pos<true,2,2>::n3];
+      }
+      break;
+    
+    case 4:
+      {
+      outm[pos<false,0,0>::n4] = val * Am[pos<true,0,0>::n4];
+      outm[pos<false,1,0>::n4] = val * Am[pos<true,1,0>::n4];
+      outm[pos<false,2,0>::n4] = val * Am[pos<true,2,0>::n4];
+      outm[pos<false,3,0>::n4] = val * Am[pos<true,3,0>::n4];
+      
+      outm[pos<false,0,1>::n4] = val * Am[pos<true,0,1>::n4];
+      outm[pos<false,1,1>::n4] = val * Am[pos<true,1,1>::n4];
+      outm[pos<false,2,1>::n4] = val * Am[pos<true,2,1>::n4];
+      outm[pos<false,3,1>::n4] = val * Am[pos<true,3,1>::n4];
+      
+      outm[pos<false,0,2>::n4] = val * Am[pos<true,0,2>::n4];
+      outm[pos<false,1,2>::n4] = val * Am[pos<true,1,2>::n4];
+      outm[pos<false,2,2>::n4] = val * Am[pos<true,2,2>::n4];
+      outm[pos<false,3,2>::n4] = val * Am[pos<true,3,2>::n4];
+      
+      outm[pos<false,0,3>::n4] = val * Am[pos<true,0,3>::n4];
+      outm[pos<false,1,3>::n4] = val * Am[pos<true,1,3>::n4];
+      outm[pos<false,2,3>::n4] = val * Am[pos<true,2,3>::n4];
+      outm[pos<false,3,3>::n4] = val * Am[pos<true,3,3>::n4];
+      }
+      break;
+    
+    default:
+      ;
+    }
+  
+  }
+
+
+
+template<typename eT, typename TA>
+arma_hot
+inline
+void
+op_strans2::apply_noalias(Mat<eT>& out, const TA& A, const eT val)
+  {
+  arma_extra_debug_sigprint();
+  
+  const uword A_n_cols = A.n_cols;
+  const uword A_n_rows = A.n_rows;
+  
+  out.set_size(A_n_cols, A_n_rows);
+  
+  if( (TA::is_col) || (TA::is_row) || (A_n_cols == 1) || (A_n_rows == 1) )
+    {
+    const uword N = A.n_elem;
+    
+    const eT*   A_mem =   A.memptr();
+          eT* out_mem = out.memptr();
+    
+    uword i,j;
+    for(i=0, j=1; j < N; i+=2, j+=2)
+      {
+      const eT tmp_i = A_mem[i];
+      const eT tmp_j = A_mem[j];
+      
+      out_mem[i] = val * tmp_i;
+      out_mem[j] = val * tmp_j;
+      }
+    
+    if(i < N)
+      {
+      out_mem[i] = val * A_mem[i];
+      }
+    }
+  else
+    {
+    if( (A_n_rows <= 4) && (A_n_rows == A_n_cols) )
+      {
+      op_strans2::apply_noalias_tinysq(out, A, val);
+      }
+    else
+      {
+      for(uword k=0; k < A_n_cols; ++k)
+        {
+        uword i, j;
+        
+        const eT* colptr = A.colptr(k);
+        
+        for(i=0, j=1; j < A_n_rows; i+=2, j+=2)
+          {
+          const eT tmp_i = colptr[i];
+          const eT tmp_j = colptr[j];
+          
+          out.at(k, i) = val * tmp_i;
+          out.at(k, j) = val * tmp_j;
+          }
+        
+        if(i < A_n_rows)
+          {
+          out.at(k, i) = val * colptr[i];
+          }
+        }
+      }
+    }
+  }
+
+
+
+template<typename eT, typename TA>
+arma_hot
+inline
+void
+op_strans2::apply(Mat<eT>& out, const TA& A, const eT val)
+  {
+  arma_extra_debug_sigprint();
+  
+  if(&out != &A)
+    {
+    op_strans2::apply_noalias(out, A, val);
+    }
+  else
+    {
+    const uword n_rows = out.n_rows;
+    const uword n_cols = out.n_cols;
+    
+    if(n_rows == n_cols)
+      {
+      arma_extra_debug_print("op_strans2::apply(): doing in-place transpose of a square matrix");
+      
+      const uword N = n_rows;
+      
+      // TODO: do multiplication while swapping
+      
+      for(uword k=0; k < N; ++k)
+        {
+        eT* colptr = out.colptr(k);
+        
+        uword i,j;
+        
+        for(i=(k+1), j=(k+2); j < N; i+=2, j+=2)
+          {
+          std::swap(out.at(k,i), colptr[i]);
+          std::swap(out.at(k,j), colptr[j]);
+          }
+        
+        if(i < N)
+          {
+          std::swap(out.at(k,i), colptr[i]);
+          }
+        }
+      
+      arrayops::inplace_mul( out.memptr(), val, out.n_elem );
+      }
+    else
+      {
+      Mat<eT> tmp;
+      op_strans2::apply_noalias(tmp, A, val);
+      
+      out.steal_mem(tmp);
+      }
+    }
+  }
+
+
+
+template<typename T1>
+arma_hot
+inline
+void
+op_strans2::apply_proxy(Mat<typename T1::elem_type>& out, const T1& X, const typename T1::elem_type val)
+  {
+  arma_extra_debug_sigprint();
+  
+  typedef typename T1::elem_type eT;
+  
+  const Proxy<T1> P(X);
+  
+  // allow detection of in-place transpose
+  if( (is_Mat<typename Proxy<T1>::stored_type>::value == true) && (Proxy<T1>::fake_mat == false) )
+    {
+    const unwrap<typename Proxy<T1>::stored_type> tmp(P.Q);
+    
+    op_strans2::apply(out, tmp.M, val);
+    }
+  else
+    {
+    const uword n_rows = P.get_n_rows();
+    const uword n_cols = P.get_n_cols();
+    
+    const bool is_alias = P.is_alias(out);
+    
+    if( (resolves_to_vector<T1>::value == true) && (Proxy<T1>::prefer_at_accessor == false) )
+      {
+      if(is_alias == false)
+        {
+        out.set_size(n_cols, n_rows);
+        
+        eT* out_mem = out.memptr();
+        
+        const uword n_elem = P.get_n_elem();
+        
+        typename Proxy<T1>::ea_type Pea = P.get_ea();
+        
+        uword i,j;
+        for(i=0, j=1; j < n_elem; i+=2, j+=2)
+          {
+          const eT tmp_i = Pea[i];
+          const eT tmp_j = Pea[j];
+          
+          out_mem[i] = val * tmp_i;
+          out_mem[j] = val * tmp_j;
+          }
+        
+        if(i < n_elem)
+          {
+          out_mem[i] = val * Pea[i];
+          }
+        }
+      else  // aliasing
+        {
+        Mat<eT> out2(n_cols, n_rows);
+        
+        eT* out_mem = out2.memptr();
+        
+        const uword n_elem = P.get_n_elem();
+        
+        typename Proxy<T1>::ea_type Pea = P.get_ea();
+        
+        uword i,j;
+        for(i=0, j=1; j < n_elem; i+=2, j+=2)
+          {
+          const eT tmp_i = Pea[i];
+          const eT tmp_j = Pea[j];
+          
+          out_mem[i] = val * tmp_i;
+          out_mem[j] = val * tmp_j;
+          }
+        
+        if(i < n_elem)
+          {
+          out_mem[i] = val * Pea[i];
+          }
+        
+        out.steal_mem(out2);
+        }
+      }
+    else   // general matrix transpose
+      {
+      if(is_alias == false)
+        {
+        out.set_size(n_cols, n_rows);
+        
+        for(uword k=0; k < n_cols; ++k)
+          {
+          uword i, j;
+          
+          for(i=0, j=1; j < n_rows; i+=2, j+=2)
+            {
+            const eT tmp_i = P.at(i,k);
+            const eT tmp_j = P.at(j,k);
+            
+            out.at(k,i) = val * tmp_i;
+            out.at(k,j) = val * tmp_j;
+            }
+          
+          if(i < n_rows)
+            {
+            out.at(k,i) = val * P.at(i,k);
+            }
+          }
+        }
+      else // aliasing
+        {
+        Mat<eT> out2(n_cols, n_rows);
+        
+        for(uword k=0; k < n_cols; ++k)
+          {
+          uword i, j;
+          
+          for(i=0, j=1; j < n_rows; i+=2, j+=2)
+            {
+            const eT tmp_i = P.at(i,k);
+            const eT tmp_j = P.at(j,k);
+            
+            out2.at(k,i) = val * tmp_i;
+            out2.at(k,j) = val * tmp_j;
+            }
+          
+          if(i < n_rows)
+            {
+            out2.at(k,i) = val * P.at(i,k);
+            }
+          }
+        
+        out.steal_mem(out2);
+        }
+      }
+    }
+  }
+
+
+
+//! @}