Mercurial > hg > segmenter-vamp-plugin
diff armadillo-3.900.4/include/armadillo_bits/auxlib_meat.hpp @ 49:1ec0e2823891
Switch to using subrepo copies of qm-dsp, nnls-chroma, vamp-plugin-sdk; update Armadillo version; assume build without external BLAS/LAPACK
author | Chris Cannam |
---|---|
date | Thu, 13 Jun 2013 10:25:24 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/armadillo-3.900.4/include/armadillo_bits/auxlib_meat.hpp Thu Jun 13 10:25:24 2013 +0100 @@ -0,0 +1,3249 @@ +// Copyright (C) 2008-2013 NICTA (www.nicta.com.au) +// Copyright (C) 2008-2013 Conrad Sanderson +// Copyright (C) 2009 Edmund Highcock +// Copyright (C) 2011 James Sanders +// Copyright (C) 2011 Stanislav Funiak +// Copyright (C) 2012 Eric Jon Sundstrom +// Copyright (C) 2012 Michael McNeil Forbes +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +//! \addtogroup auxlib +//! @{ + + + +//! immediate matrix inverse +template<typename eT, typename T1> +inline +bool +auxlib::inv(Mat<eT>& out, const Base<eT,T1>& X, const bool slow) + { + arma_extra_debug_sigprint(); + + out = X.get_ref(); + + arma_debug_check( (out.is_square() == false), "inv(): given matrix is not square" ); + + bool status = false; + + const uword N = out.n_rows; + + if( (N <= 4) && (slow == false) ) + { + status = auxlib::inv_inplace_tinymat(out, N); + } + + if( (N > 4) || (status == false) ) + { + status = auxlib::inv_inplace_lapack(out); + } + + return status; + } + + + +template<typename eT> +inline +bool +auxlib::inv(Mat<eT>& out, const Mat<eT>& X, const bool slow) + { + arma_extra_debug_sigprint(); + + arma_debug_check( (X.is_square() == false), "inv(): given matrix is not square" ); + + bool status = false; + + const uword N = X.n_rows; + + if( (N <= 4) && (slow == false) ) + { + status = (&out != &X) ? auxlib::inv_noalias_tinymat(out, X, N) : auxlib::inv_inplace_tinymat(out, N); + } + + if( (N > 4) || (status == false) ) + { + out = X; + status = auxlib::inv_inplace_lapack(out); + } + + return status; + } + + + +template<typename eT> +inline +bool +auxlib::inv_noalias_tinymat(Mat<eT>& out, const Mat<eT>& X, const uword N) + { + arma_extra_debug_sigprint(); + + bool det_ok = true; + + out.set_size(N,N); + + switch(N) + { + case 1: + { + out[0] = eT(1) / X[0]; + }; + break; + + case 2: + { + const eT* Xm = X.memptr(); + + const eT a = Xm[pos<0,0>::n2]; + const eT b = Xm[pos<0,1>::n2]; + const eT c = Xm[pos<1,0>::n2]; + const eT d = Xm[pos<1,1>::n2]; + + const eT tmp_det = (a*d - b*c); + + if(tmp_det != eT(0)) + { + eT* outm = out.memptr(); + + outm[pos<0,0>::n2] = d / tmp_det; + outm[pos<0,1>::n2] = -b / tmp_det; + outm[pos<1,0>::n2] = -c / tmp_det; + outm[pos<1,1>::n2] = a / tmp_det; + } + else + { + det_ok = false; + } + }; + break; + + case 3: + { + const eT* X_col0 = X.colptr(0); + const eT a11 = X_col0[0]; + const eT a21 = X_col0[1]; + const eT a31 = X_col0[2]; + + const eT* X_col1 = X.colptr(1); + const eT a12 = X_col1[0]; + const eT a22 = X_col1[1]; + const eT a32 = X_col1[2]; + + const eT* X_col2 = X.colptr(2); + const eT a13 = X_col2[0]; + const eT a23 = X_col2[1]; + const eT a33 = X_col2[2]; + + const eT tmp_det = a11*(a33*a22 - a32*a23) - a21*(a33*a12-a32*a13) + a31*(a23*a12 - a22*a13); + + if(tmp_det != eT(0)) + { + eT* out_col0 = out.colptr(0); + out_col0[0] = (a33*a22 - a32*a23) / tmp_det; + out_col0[1] = -(a33*a21 - a31*a23) / tmp_det; + out_col0[2] = (a32*a21 - a31*a22) / tmp_det; + + eT* out_col1 = out.colptr(1); + out_col1[0] = -(a33*a12 - a32*a13) / tmp_det; + out_col1[1] = (a33*a11 - a31*a13) / tmp_det; + out_col1[2] = -(a32*a11 - a31*a12) / tmp_det; + + eT* out_col2 = out.colptr(2); + out_col2[0] = (a23*a12 - a22*a13) / tmp_det; + out_col2[1] = -(a23*a11 - a21*a13) / tmp_det; + out_col2[2] = (a22*a11 - a21*a12) / tmp_det; + } + else + { + det_ok = false; + } + }; + break; + + case 4: + { + const eT tmp_det = det(X); + + if(tmp_det != eT(0)) + { + const eT* Xm = X.memptr(); + eT* outm = out.memptr(); + + outm[pos<0,0>::n4] = ( Xm[pos<1,2>::n4]*Xm[pos<2,3>::n4]*Xm[pos<3,1>::n4] - Xm[pos<1,3>::n4]*Xm[pos<2,2>::n4]*Xm[pos<3,1>::n4] + Xm[pos<1,3>::n4]*Xm[pos<2,1>::n4]*Xm[pos<3,2>::n4] - Xm[pos<1,1>::n4]*Xm[pos<2,3>::n4]*Xm[pos<3,2>::n4] - Xm[pos<1,2>::n4]*Xm[pos<2,1>::n4]*Xm[pos<3,3>::n4] + Xm[pos<1,1>::n4]*Xm[pos<2,2>::n4]*Xm[pos<3,3>::n4] ) / tmp_det; + outm[pos<1,0>::n4] = ( Xm[pos<1,3>::n4]*Xm[pos<2,2>::n4]*Xm[pos<3,0>::n4] - Xm[pos<1,2>::n4]*Xm[pos<2,3>::n4]*Xm[pos<3,0>::n4] - Xm[pos<1,3>::n4]*Xm[pos<2,0>::n4]*Xm[pos<3,2>::n4] + Xm[pos<1,0>::n4]*Xm[pos<2,3>::n4]*Xm[pos<3,2>::n4] + Xm[pos<1,2>::n4]*Xm[pos<2,0>::n4]*Xm[pos<3,3>::n4] - Xm[pos<1,0>::n4]*Xm[pos<2,2>::n4]*Xm[pos<3,3>::n4] ) / tmp_det; + outm[pos<2,0>::n4] = ( Xm[pos<1,1>::n4]*Xm[pos<2,3>::n4]*Xm[pos<3,0>::n4] - Xm[pos<1,3>::n4]*Xm[pos<2,1>::n4]*Xm[pos<3,0>::n4] + Xm[pos<1,3>::n4]*Xm[pos<2,0>::n4]*Xm[pos<3,1>::n4] - Xm[pos<1,0>::n4]*Xm[pos<2,3>::n4]*Xm[pos<3,1>::n4] - Xm[pos<1,1>::n4]*Xm[pos<2,0>::n4]*Xm[pos<3,3>::n4] + Xm[pos<1,0>::n4]*Xm[pos<2,1>::n4]*Xm[pos<3,3>::n4] ) / tmp_det; + outm[pos<3,0>::n4] = ( Xm[pos<1,2>::n4]*Xm[pos<2,1>::n4]*Xm[pos<3,0>::n4] - Xm[pos<1,1>::n4]*Xm[pos<2,2>::n4]*Xm[pos<3,0>::n4] - Xm[pos<1,2>::n4]*Xm[pos<2,0>::n4]*Xm[pos<3,1>::n4] + Xm[pos<1,0>::n4]*Xm[pos<2,2>::n4]*Xm[pos<3,1>::n4] + Xm[pos<1,1>::n4]*Xm[pos<2,0>::n4]*Xm[pos<3,2>::n4] - Xm[pos<1,0>::n4]*Xm[pos<2,1>::n4]*Xm[pos<3,2>::n4] ) / tmp_det; + + outm[pos<0,1>::n4] = ( Xm[pos<0,3>::n4]*Xm[pos<2,2>::n4]*Xm[pos<3,1>::n4] - Xm[pos<0,2>::n4]*Xm[pos<2,3>::n4]*Xm[pos<3,1>::n4] - Xm[pos<0,3>::n4]*Xm[pos<2,1>::n4]*Xm[pos<3,2>::n4] + Xm[pos<0,1>::n4]*Xm[pos<2,3>::n4]*Xm[pos<3,2>::n4] + Xm[pos<0,2>::n4]*Xm[pos<2,1>::n4]*Xm[pos<3,3>::n4] - Xm[pos<0,1>::n4]*Xm[pos<2,2>::n4]*Xm[pos<3,3>::n4] ) / tmp_det; + outm[pos<1,1>::n4] = ( Xm[pos<0,2>::n4]*Xm[pos<2,3>::n4]*Xm[pos<3,0>::n4] - Xm[pos<0,3>::n4]*Xm[pos<2,2>::n4]*Xm[pos<3,0>::n4] + Xm[pos<0,3>::n4]*Xm[pos<2,0>::n4]*Xm[pos<3,2>::n4] - Xm[pos<0,0>::n4]*Xm[pos<2,3>::n4]*Xm[pos<3,2>::n4] - Xm[pos<0,2>::n4]*Xm[pos<2,0>::n4]*Xm[pos<3,3>::n4] + Xm[pos<0,0>::n4]*Xm[pos<2,2>::n4]*Xm[pos<3,3>::n4] ) / tmp_det; + outm[pos<2,1>::n4] = ( Xm[pos<0,3>::n4]*Xm[pos<2,1>::n4]*Xm[pos<3,0>::n4] - Xm[pos<0,1>::n4]*Xm[pos<2,3>::n4]*Xm[pos<3,0>::n4] - Xm[pos<0,3>::n4]*Xm[pos<2,0>::n4]*Xm[pos<3,1>::n4] + Xm[pos<0,0>::n4]*Xm[pos<2,3>::n4]*Xm[pos<3,1>::n4] + Xm[pos<0,1>::n4]*Xm[pos<2,0>::n4]*Xm[pos<3,3>::n4] - Xm[pos<0,0>::n4]*Xm[pos<2,1>::n4]*Xm[pos<3,3>::n4] ) / tmp_det; + outm[pos<3,1>::n4] = ( Xm[pos<0,1>::n4]*Xm[pos<2,2>::n4]*Xm[pos<3,0>::n4] - Xm[pos<0,2>::n4]*Xm[pos<2,1>::n4]*Xm[pos<3,0>::n4] + Xm[pos<0,2>::n4]*Xm[pos<2,0>::n4]*Xm[pos<3,1>::n4] - Xm[pos<0,0>::n4]*Xm[pos<2,2>::n4]*Xm[pos<3,1>::n4] - Xm[pos<0,1>::n4]*Xm[pos<2,0>::n4]*Xm[pos<3,2>::n4] + Xm[pos<0,0>::n4]*Xm[pos<2,1>::n4]*Xm[pos<3,2>::n4] ) / tmp_det; + + outm[pos<0,2>::n4] = ( Xm[pos<0,2>::n4]*Xm[pos<1,3>::n4]*Xm[pos<3,1>::n4] - Xm[pos<0,3>::n4]*Xm[pos<1,2>::n4]*Xm[pos<3,1>::n4] + Xm[pos<0,3>::n4]*Xm[pos<1,1>::n4]*Xm[pos<3,2>::n4] - Xm[pos<0,1>::n4]*Xm[pos<1,3>::n4]*Xm[pos<3,2>::n4] - Xm[pos<0,2>::n4]*Xm[pos<1,1>::n4]*Xm[pos<3,3>::n4] + Xm[pos<0,1>::n4]*Xm[pos<1,2>::n4]*Xm[pos<3,3>::n4] ) / tmp_det; + outm[pos<1,2>::n4] = ( Xm[pos<0,3>::n4]*Xm[pos<1,2>::n4]*Xm[pos<3,0>::n4] - Xm[pos<0,2>::n4]*Xm[pos<1,3>::n4]*Xm[pos<3,0>::n4] - Xm[pos<0,3>::n4]*Xm[pos<1,0>::n4]*Xm[pos<3,2>::n4] + Xm[pos<0,0>::n4]*Xm[pos<1,3>::n4]*Xm[pos<3,2>::n4] + Xm[pos<0,2>::n4]*Xm[pos<1,0>::n4]*Xm[pos<3,3>::n4] - Xm[pos<0,0>::n4]*Xm[pos<1,2>::n4]*Xm[pos<3,3>::n4] ) / tmp_det; + outm[pos<2,2>::n4] = ( Xm[pos<0,1>::n4]*Xm[pos<1,3>::n4]*Xm[pos<3,0>::n4] - Xm[pos<0,3>::n4]*Xm[pos<1,1>::n4]*Xm[pos<3,0>::n4] + Xm[pos<0,3>::n4]*Xm[pos<1,0>::n4]*Xm[pos<3,1>::n4] - Xm[pos<0,0>::n4]*Xm[pos<1,3>::n4]*Xm[pos<3,1>::n4] - Xm[pos<0,1>::n4]*Xm[pos<1,0>::n4]*Xm[pos<3,3>::n4] + Xm[pos<0,0>::n4]*Xm[pos<1,1>::n4]*Xm[pos<3,3>::n4] ) / tmp_det; + outm[pos<3,2>::n4] = ( Xm[pos<0,2>::n4]*Xm[pos<1,1>::n4]*Xm[pos<3,0>::n4] - Xm[pos<0,1>::n4]*Xm[pos<1,2>::n4]*Xm[pos<3,0>::n4] - Xm[pos<0,2>::n4]*Xm[pos<1,0>::n4]*Xm[pos<3,1>::n4] + Xm[pos<0,0>::n4]*Xm[pos<1,2>::n4]*Xm[pos<3,1>::n4] + Xm[pos<0,1>::n4]*Xm[pos<1,0>::n4]*Xm[pos<3,2>::n4] - Xm[pos<0,0>::n4]*Xm[pos<1,1>::n4]*Xm[pos<3,2>::n4] ) / tmp_det; + + outm[pos<0,3>::n4] = ( Xm[pos<0,3>::n4]*Xm[pos<1,2>::n4]*Xm[pos<2,1>::n4] - Xm[pos<0,2>::n4]*Xm[pos<1,3>::n4]*Xm[pos<2,1>::n4] - Xm[pos<0,3>::n4]*Xm[pos<1,1>::n4]*Xm[pos<2,2>::n4] + Xm[pos<0,1>::n4]*Xm[pos<1,3>::n4]*Xm[pos<2,2>::n4] + Xm[pos<0,2>::n4]*Xm[pos<1,1>::n4]*Xm[pos<2,3>::n4] - Xm[pos<0,1>::n4]*Xm[pos<1,2>::n4]*Xm[pos<2,3>::n4] ) / tmp_det; + outm[pos<1,3>::n4] = ( Xm[pos<0,2>::n4]*Xm[pos<1,3>::n4]*Xm[pos<2,0>::n4] - Xm[pos<0,3>::n4]*Xm[pos<1,2>::n4]*Xm[pos<2,0>::n4] + Xm[pos<0,3>::n4]*Xm[pos<1,0>::n4]*Xm[pos<2,2>::n4] - Xm[pos<0,0>::n4]*Xm[pos<1,3>::n4]*Xm[pos<2,2>::n4] - Xm[pos<0,2>::n4]*Xm[pos<1,0>::n4]*Xm[pos<2,3>::n4] + Xm[pos<0,0>::n4]*Xm[pos<1,2>::n4]*Xm[pos<2,3>::n4] ) / tmp_det; + outm[pos<2,3>::n4] = ( Xm[pos<0,3>::n4]*Xm[pos<1,1>::n4]*Xm[pos<2,0>::n4] - Xm[pos<0,1>::n4]*Xm[pos<1,3>::n4]*Xm[pos<2,0>::n4] - Xm[pos<0,3>::n4]*Xm[pos<1,0>::n4]*Xm[pos<2,1>::n4] + Xm[pos<0,0>::n4]*Xm[pos<1,3>::n4]*Xm[pos<2,1>::n4] + Xm[pos<0,1>::n4]*Xm[pos<1,0>::n4]*Xm[pos<2,3>::n4] - Xm[pos<0,0>::n4]*Xm[pos<1,1>::n4]*Xm[pos<2,3>::n4] ) / tmp_det; + outm[pos<3,3>::n4] = ( Xm[pos<0,1>::n4]*Xm[pos<1,2>::n4]*Xm[pos<2,0>::n4] - Xm[pos<0,2>::n4]*Xm[pos<1,1>::n4]*Xm[pos<2,0>::n4] + Xm[pos<0,2>::n4]*Xm[pos<1,0>::n4]*Xm[pos<2,1>::n4] - Xm[pos<0,0>::n4]*Xm[pos<1,2>::n4]*Xm[pos<2,1>::n4] - Xm[pos<0,1>::n4]*Xm[pos<1,0>::n4]*Xm[pos<2,2>::n4] + Xm[pos<0,0>::n4]*Xm[pos<1,1>::n4]*Xm[pos<2,2>::n4] ) / tmp_det; + } + else + { + det_ok = false; + } + }; + break; + + default: + ; + } + + return det_ok; + } + + + +template<typename eT> +inline +bool +auxlib::inv_inplace_tinymat(Mat<eT>& X, const uword N) + { + arma_extra_debug_sigprint(); + + bool det_ok = true; + + // for more info, see: + // http://www.dr-lex.34sp.com/random/matrix_inv.html + // http://www.cvl.iis.u-tokyo.ac.jp/~miyazaki/tech/teche23.html + // http://www.euclideanspace.com/maths/algebra/matrix/functions/inverse/fourD/index.htm + // http://www.geometrictools.com//LibFoundation/Mathematics/Wm4Matrix4.inl + + switch(N) + { + case 1: + { + X[0] = eT(1) / X[0]; + }; + break; + + case 2: + { + const eT a = X[pos<0,0>::n2]; + const eT b = X[pos<0,1>::n2]; + const eT c = X[pos<1,0>::n2]; + const eT d = X[pos<1,1>::n2]; + + const eT tmp_det = (a*d - b*c); + + if(tmp_det != eT(0)) + { + X[pos<0,0>::n2] = d / tmp_det; + X[pos<0,1>::n2] = -b / tmp_det; + X[pos<1,0>::n2] = -c / tmp_det; + X[pos<1,1>::n2] = a / tmp_det; + } + else + { + det_ok = false; + } + }; + break; + + case 3: + { + eT* X_col0 = X.colptr(0); + eT* X_col1 = X.colptr(1); + eT* X_col2 = X.colptr(2); + + const eT a11 = X_col0[0]; + const eT a21 = X_col0[1]; + const eT a31 = X_col0[2]; + + const eT a12 = X_col1[0]; + const eT a22 = X_col1[1]; + const eT a32 = X_col1[2]; + + const eT a13 = X_col2[0]; + const eT a23 = X_col2[1]; + const eT a33 = X_col2[2]; + + const eT tmp_det = a11*(a33*a22 - a32*a23) - a21*(a33*a12-a32*a13) + a31*(a23*a12 - a22*a13); + + if(tmp_det != eT(0)) + { + X_col0[0] = (a33*a22 - a32*a23) / tmp_det; + X_col0[1] = -(a33*a21 - a31*a23) / tmp_det; + X_col0[2] = (a32*a21 - a31*a22) / tmp_det; + + X_col1[0] = -(a33*a12 - a32*a13) / tmp_det; + X_col1[1] = (a33*a11 - a31*a13) / tmp_det; + X_col1[2] = -(a32*a11 - a31*a12) / tmp_det; + + X_col2[0] = (a23*a12 - a22*a13) / tmp_det; + X_col2[1] = -(a23*a11 - a21*a13) / tmp_det; + X_col2[2] = (a22*a11 - a21*a12) / tmp_det; + } + else + { + det_ok = false; + } + }; + break; + + case 4: + { + const eT tmp_det = det(X); + + if(tmp_det != eT(0)) + { + const Mat<eT> A(X); + + const eT* Am = A.memptr(); + eT* Xm = X.memptr(); + + Xm[pos<0,0>::n4] = ( Am[pos<1,2>::n4]*Am[pos<2,3>::n4]*Am[pos<3,1>::n4] - Am[pos<1,3>::n4]*Am[pos<2,2>::n4]*Am[pos<3,1>::n4] + Am[pos<1,3>::n4]*Am[pos<2,1>::n4]*Am[pos<3,2>::n4] - Am[pos<1,1>::n4]*Am[pos<2,3>::n4]*Am[pos<3,2>::n4] - Am[pos<1,2>::n4]*Am[pos<2,1>::n4]*Am[pos<3,3>::n4] + Am[pos<1,1>::n4]*Am[pos<2,2>::n4]*Am[pos<3,3>::n4] ) / tmp_det; + Xm[pos<1,0>::n4] = ( Am[pos<1,3>::n4]*Am[pos<2,2>::n4]*Am[pos<3,0>::n4] - Am[pos<1,2>::n4]*Am[pos<2,3>::n4]*Am[pos<3,0>::n4] - Am[pos<1,3>::n4]*Am[pos<2,0>::n4]*Am[pos<3,2>::n4] + Am[pos<1,0>::n4]*Am[pos<2,3>::n4]*Am[pos<3,2>::n4] + Am[pos<1,2>::n4]*Am[pos<2,0>::n4]*Am[pos<3,3>::n4] - Am[pos<1,0>::n4]*Am[pos<2,2>::n4]*Am[pos<3,3>::n4] ) / tmp_det; + Xm[pos<2,0>::n4] = ( Am[pos<1,1>::n4]*Am[pos<2,3>::n4]*Am[pos<3,0>::n4] - Am[pos<1,3>::n4]*Am[pos<2,1>::n4]*Am[pos<3,0>::n4] + Am[pos<1,3>::n4]*Am[pos<2,0>::n4]*Am[pos<3,1>::n4] - Am[pos<1,0>::n4]*Am[pos<2,3>::n4]*Am[pos<3,1>::n4] - Am[pos<1,1>::n4]*Am[pos<2,0>::n4]*Am[pos<3,3>::n4] + Am[pos<1,0>::n4]*Am[pos<2,1>::n4]*Am[pos<3,3>::n4] ) / tmp_det; + Xm[pos<3,0>::n4] = ( Am[pos<1,2>::n4]*Am[pos<2,1>::n4]*Am[pos<3,0>::n4] - Am[pos<1,1>::n4]*Am[pos<2,2>::n4]*Am[pos<3,0>::n4] - Am[pos<1,2>::n4]*Am[pos<2,0>::n4]*Am[pos<3,1>::n4] + Am[pos<1,0>::n4]*Am[pos<2,2>::n4]*Am[pos<3,1>::n4] + Am[pos<1,1>::n4]*Am[pos<2,0>::n4]*Am[pos<3,2>::n4] - Am[pos<1,0>::n4]*Am[pos<2,1>::n4]*Am[pos<3,2>::n4] ) / tmp_det; + + Xm[pos<0,1>::n4] = ( Am[pos<0,3>::n4]*Am[pos<2,2>::n4]*Am[pos<3,1>::n4] - Am[pos<0,2>::n4]*Am[pos<2,3>::n4]*Am[pos<3,1>::n4] - Am[pos<0,3>::n4]*Am[pos<2,1>::n4]*Am[pos<3,2>::n4] + Am[pos<0,1>::n4]*Am[pos<2,3>::n4]*Am[pos<3,2>::n4] + Am[pos<0,2>::n4]*Am[pos<2,1>::n4]*Am[pos<3,3>::n4] - Am[pos<0,1>::n4]*Am[pos<2,2>::n4]*Am[pos<3,3>::n4] ) / tmp_det; + Xm[pos<1,1>::n4] = ( Am[pos<0,2>::n4]*Am[pos<2,3>::n4]*Am[pos<3,0>::n4] - Am[pos<0,3>::n4]*Am[pos<2,2>::n4]*Am[pos<3,0>::n4] + Am[pos<0,3>::n4]*Am[pos<2,0>::n4]*Am[pos<3,2>::n4] - Am[pos<0,0>::n4]*Am[pos<2,3>::n4]*Am[pos<3,2>::n4] - Am[pos<0,2>::n4]*Am[pos<2,0>::n4]*Am[pos<3,3>::n4] + Am[pos<0,0>::n4]*Am[pos<2,2>::n4]*Am[pos<3,3>::n4] ) / tmp_det; + Xm[pos<2,1>::n4] = ( Am[pos<0,3>::n4]*Am[pos<2,1>::n4]*Am[pos<3,0>::n4] - Am[pos<0,1>::n4]*Am[pos<2,3>::n4]*Am[pos<3,0>::n4] - Am[pos<0,3>::n4]*Am[pos<2,0>::n4]*Am[pos<3,1>::n4] + Am[pos<0,0>::n4]*Am[pos<2,3>::n4]*Am[pos<3,1>::n4] + Am[pos<0,1>::n4]*Am[pos<2,0>::n4]*Am[pos<3,3>::n4] - Am[pos<0,0>::n4]*Am[pos<2,1>::n4]*Am[pos<3,3>::n4] ) / tmp_det; + Xm[pos<3,1>::n4] = ( Am[pos<0,1>::n4]*Am[pos<2,2>::n4]*Am[pos<3,0>::n4] - Am[pos<0,2>::n4]*Am[pos<2,1>::n4]*Am[pos<3,0>::n4] + Am[pos<0,2>::n4]*Am[pos<2,0>::n4]*Am[pos<3,1>::n4] - Am[pos<0,0>::n4]*Am[pos<2,2>::n4]*Am[pos<3,1>::n4] - Am[pos<0,1>::n4]*Am[pos<2,0>::n4]*Am[pos<3,2>::n4] + Am[pos<0,0>::n4]*Am[pos<2,1>::n4]*Am[pos<3,2>::n4] ) / tmp_det; + + Xm[pos<0,2>::n4] = ( Am[pos<0,2>::n4]*Am[pos<1,3>::n4]*Am[pos<3,1>::n4] - Am[pos<0,3>::n4]*Am[pos<1,2>::n4]*Am[pos<3,1>::n4] + Am[pos<0,3>::n4]*Am[pos<1,1>::n4]*Am[pos<3,2>::n4] - Am[pos<0,1>::n4]*Am[pos<1,3>::n4]*Am[pos<3,2>::n4] - Am[pos<0,2>::n4]*Am[pos<1,1>::n4]*Am[pos<3,3>::n4] + Am[pos<0,1>::n4]*Am[pos<1,2>::n4]*Am[pos<3,3>::n4] ) / tmp_det; + Xm[pos<1,2>::n4] = ( Am[pos<0,3>::n4]*Am[pos<1,2>::n4]*Am[pos<3,0>::n4] - Am[pos<0,2>::n4]*Am[pos<1,3>::n4]*Am[pos<3,0>::n4] - Am[pos<0,3>::n4]*Am[pos<1,0>::n4]*Am[pos<3,2>::n4] + Am[pos<0,0>::n4]*Am[pos<1,3>::n4]*Am[pos<3,2>::n4] + Am[pos<0,2>::n4]*Am[pos<1,0>::n4]*Am[pos<3,3>::n4] - Am[pos<0,0>::n4]*Am[pos<1,2>::n4]*Am[pos<3,3>::n4] ) / tmp_det; + Xm[pos<2,2>::n4] = ( Am[pos<0,1>::n4]*Am[pos<1,3>::n4]*Am[pos<3,0>::n4] - Am[pos<0,3>::n4]*Am[pos<1,1>::n4]*Am[pos<3,0>::n4] + Am[pos<0,3>::n4]*Am[pos<1,0>::n4]*Am[pos<3,1>::n4] - Am[pos<0,0>::n4]*Am[pos<1,3>::n4]*Am[pos<3,1>::n4] - Am[pos<0,1>::n4]*Am[pos<1,0>::n4]*Am[pos<3,3>::n4] + Am[pos<0,0>::n4]*Am[pos<1,1>::n4]*Am[pos<3,3>::n4] ) / tmp_det; + Xm[pos<3,2>::n4] = ( Am[pos<0,2>::n4]*Am[pos<1,1>::n4]*Am[pos<3,0>::n4] - Am[pos<0,1>::n4]*Am[pos<1,2>::n4]*Am[pos<3,0>::n4] - Am[pos<0,2>::n4]*Am[pos<1,0>::n4]*Am[pos<3,1>::n4] + Am[pos<0,0>::n4]*Am[pos<1,2>::n4]*Am[pos<3,1>::n4] + Am[pos<0,1>::n4]*Am[pos<1,0>::n4]*Am[pos<3,2>::n4] - Am[pos<0,0>::n4]*Am[pos<1,1>::n4]*Am[pos<3,2>::n4] ) / tmp_det; + + Xm[pos<0,3>::n4] = ( Am[pos<0,3>::n4]*Am[pos<1,2>::n4]*Am[pos<2,1>::n4] - Am[pos<0,2>::n4]*Am[pos<1,3>::n4]*Am[pos<2,1>::n4] - Am[pos<0,3>::n4]*Am[pos<1,1>::n4]*Am[pos<2,2>::n4] + Am[pos<0,1>::n4]*Am[pos<1,3>::n4]*Am[pos<2,2>::n4] + Am[pos<0,2>::n4]*Am[pos<1,1>::n4]*Am[pos<2,3>::n4] - Am[pos<0,1>::n4]*Am[pos<1,2>::n4]*Am[pos<2,3>::n4] ) / tmp_det; + Xm[pos<1,3>::n4] = ( Am[pos<0,2>::n4]*Am[pos<1,3>::n4]*Am[pos<2,0>::n4] - Am[pos<0,3>::n4]*Am[pos<1,2>::n4]*Am[pos<2,0>::n4] + Am[pos<0,3>::n4]*Am[pos<1,0>::n4]*Am[pos<2,2>::n4] - Am[pos<0,0>::n4]*Am[pos<1,3>::n4]*Am[pos<2,2>::n4] - Am[pos<0,2>::n4]*Am[pos<1,0>::n4]*Am[pos<2,3>::n4] + Am[pos<0,0>::n4]*Am[pos<1,2>::n4]*Am[pos<2,3>::n4] ) / tmp_det; + Xm[pos<2,3>::n4] = ( Am[pos<0,3>::n4]*Am[pos<1,1>::n4]*Am[pos<2,0>::n4] - Am[pos<0,1>::n4]*Am[pos<1,3>::n4]*Am[pos<2,0>::n4] - Am[pos<0,3>::n4]*Am[pos<1,0>::n4]*Am[pos<2,1>::n4] + Am[pos<0,0>::n4]*Am[pos<1,3>::n4]*Am[pos<2,1>::n4] + Am[pos<0,1>::n4]*Am[pos<1,0>::n4]*Am[pos<2,3>::n4] - Am[pos<0,0>::n4]*Am[pos<1,1>::n4]*Am[pos<2,3>::n4] ) / tmp_det; + Xm[pos<3,3>::n4] = ( Am[pos<0,1>::n4]*Am[pos<1,2>::n4]*Am[pos<2,0>::n4] - Am[pos<0,2>::n4]*Am[pos<1,1>::n4]*Am[pos<2,0>::n4] + Am[pos<0,2>::n4]*Am[pos<1,0>::n4]*Am[pos<2,1>::n4] - Am[pos<0,0>::n4]*Am[pos<1,2>::n4]*Am[pos<2,1>::n4] - Am[pos<0,1>::n4]*Am[pos<1,0>::n4]*Am[pos<2,2>::n4] + Am[pos<0,0>::n4]*Am[pos<1,1>::n4]*Am[pos<2,2>::n4] ) / tmp_det; + } + else + { + det_ok = false; + } + }; + break; + + default: + ; + } + + return det_ok; + } + + + +template<typename eT> +inline +bool +auxlib::inv_inplace_lapack(Mat<eT>& out) + { + arma_extra_debug_sigprint(); + + if(out.is_empty()) + { + return true; + } + + #if defined(ARMA_USE_ATLAS) + { + podarray<int> ipiv(out.n_rows); + + int info = atlas::clapack_getrf(atlas::CblasColMajor, out.n_rows, out.n_cols, out.memptr(), out.n_rows, ipiv.memptr()); + + if(info == 0) + { + info = atlas::clapack_getri(atlas::CblasColMajor, out.n_rows, out.memptr(), out.n_rows, ipiv.memptr()); + } + + return (info == 0); + } + #elif defined(ARMA_USE_LAPACK) + { + blas_int n_rows = out.n_rows; + blas_int n_cols = out.n_cols; + blas_int lwork = 0; + blas_int lwork_min = (std::max)(blas_int(1), n_rows); + blas_int info = 0; + + podarray<blas_int> ipiv(out.n_rows); + + eT work_query[2]; + blas_int lwork_query = -1; + + lapack::getri(&n_rows, out.memptr(), &n_rows, ipiv.memptr(), &work_query[0], &lwork_query, &info); + + if(info == 0) + { + const blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) ); + + lwork = (lwork_proposed > lwork_min) ? lwork_proposed : lwork_min; + } + else + { + return false; + } + + podarray<eT> work( static_cast<uword>(lwork) ); + + lapack::getrf(&n_rows, &n_cols, out.memptr(), &n_rows, ipiv.memptr(), &info); + + if(info == 0) + { + lapack::getri(&n_rows, out.memptr(), &n_rows, ipiv.memptr(), work.memptr(), &lwork, &info); + } + + return (info == 0); + } + #else + { + arma_stop("inv(): use of ATLAS or LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +template<typename eT, typename T1> +inline +bool +auxlib::inv_tr(Mat<eT>& out, const Base<eT,T1>& X, const uword layout) + { + arma_extra_debug_sigprint(); + + out = X.get_ref(); + + arma_debug_check( (out.is_square() == false), "inv(): given matrix is not square" ); + + if(out.is_empty()) + { + return true; + } + + bool status; + + #if defined(ARMA_USE_LAPACK) + { + char uplo = (layout == 0) ? 'U' : 'L'; + char diag = 'N'; + blas_int n = blas_int(out.n_rows); + blas_int info = 0; + + lapack::trtri(&uplo, &diag, &n, out.memptr(), &n, &info); + + status = (info == 0); + } + #else + { + arma_ignore(layout); + arma_stop("inv(): use of LAPACK needs to be enabled"); + status = false; + } + #endif + + + if(status == true) + { + if(layout == 0) + { + // upper triangular + out = trimatu(out); + } + else + { + // lower triangular + out = trimatl(out); + } + } + + return status; + } + + + +template<typename eT, typename T1> +inline +bool +auxlib::inv_sym(Mat<eT>& out, const Base<eT,T1>& X, const uword layout) + { + arma_extra_debug_sigprint(); + + out = X.get_ref(); + + arma_debug_check( (out.is_square() == false), "inv(): given matrix is not square" ); + + if(out.is_empty()) + { + return true; + } + + bool status; + + #if defined(ARMA_USE_LAPACK) + { + char uplo = (layout == 0) ? 'U' : 'L'; + blas_int n = blas_int(out.n_rows); + blas_int lwork = 3 * (n*n); // TODO: use lwork = -1 to determine optimal size + blas_int info = 0; + + podarray<blas_int> ipiv; + ipiv.set_size(out.n_rows); + + podarray<eT> work; + work.set_size( uword(lwork) ); + + lapack::sytrf(&uplo, &n, out.memptr(), &n, ipiv.memptr(), work.memptr(), &lwork, &info); + + status = (info == 0); + + if(status == true) + { + lapack::sytri(&uplo, &n, out.memptr(), &n, ipiv.memptr(), work.memptr(), &info); + + out = (layout == 0) ? symmatu(out) : symmatl(out); + + status = (info == 0); + } + } + #else + { + arma_ignore(layout); + arma_stop("inv(): use of LAPACK needs to be enabled"); + status = false; + } + #endif + + return status; + } + + + +template<typename eT, typename T1> +inline +bool +auxlib::inv_sympd(Mat<eT>& out, const Base<eT,T1>& X, const uword layout) + { + arma_extra_debug_sigprint(); + + out = X.get_ref(); + + arma_debug_check( (out.is_square() == false), "inv(): given matrix is not square" ); + + if(out.is_empty()) + { + return true; + } + + bool status; + + #if defined(ARMA_USE_LAPACK) + { + char uplo = (layout == 0) ? 'U' : 'L'; + blas_int n = blas_int(out.n_rows); + blas_int info = 0; + + lapack::potrf(&uplo, &n, out.memptr(), &n, &info); + + status = (info == 0); + + if(status == true) + { + lapack::potri(&uplo, &n, out.memptr(), &n, &info); + + out = (layout == 0) ? symmatu(out) : symmatl(out); + + status = (info == 0); + } + } + #else + { + arma_ignore(layout); + arma_stop("inv(): use of LAPACK needs to be enabled"); + status = false; + } + #endif + + return status; + } + + + +template<typename eT, typename T1> +inline +eT +auxlib::det(const Base<eT,T1>& X, const bool slow) + { + const unwrap<T1> tmp(X.get_ref()); + const Mat<eT>& A = tmp.M; + + arma_debug_check( (A.is_square() == false), "det(): matrix is not square" ); + + const bool make_copy = (is_Mat<T1>::value == true) ? true : false; + + if(slow == false) + { + const uword N = A.n_rows; + + switch(N) + { + case 0: + case 1: + case 2: + return auxlib::det_tinymat(A, N); + break; + + case 3: + case 4: + { + const eT tmp_det = auxlib::det_tinymat(A, N); + return (tmp_det != eT(0)) ? tmp_det : auxlib::det_lapack(A, make_copy); + } + break; + + default: + return auxlib::det_lapack(A, make_copy); + } + } + + return auxlib::det_lapack(A, make_copy); + } + + + +template<typename eT> +inline +eT +auxlib::det_tinymat(const Mat<eT>& X, const uword N) + { + arma_extra_debug_sigprint(); + + switch(N) + { + case 0: + return eT(1); + break; + + case 1: + return X[0]; + break; + + case 2: + { + const eT* Xm = X.memptr(); + + return ( Xm[pos<0,0>::n2]*Xm[pos<1,1>::n2] - Xm[pos<0,1>::n2]*Xm[pos<1,0>::n2] ); + } + break; + + case 3: + { + // const double tmp1 = X.at(0,0) * X.at(1,1) * X.at(2,2); + // const double tmp2 = X.at(0,1) * X.at(1,2) * X.at(2,0); + // const double tmp3 = X.at(0,2) * X.at(1,0) * X.at(2,1); + // const double tmp4 = X.at(2,0) * X.at(1,1) * X.at(0,2); + // const double tmp5 = X.at(2,1) * X.at(1,2) * X.at(0,0); + // const double tmp6 = X.at(2,2) * X.at(1,0) * X.at(0,1); + // return (tmp1+tmp2+tmp3) - (tmp4+tmp5+tmp6); + + const eT* a_col0 = X.colptr(0); + const eT a11 = a_col0[0]; + const eT a21 = a_col0[1]; + const eT a31 = a_col0[2]; + + const eT* a_col1 = X.colptr(1); + const eT a12 = a_col1[0]; + const eT a22 = a_col1[1]; + const eT a32 = a_col1[2]; + + const eT* a_col2 = X.colptr(2); + const eT a13 = a_col2[0]; + const eT a23 = a_col2[1]; + const eT a33 = a_col2[2]; + + return ( a11*(a33*a22 - a32*a23) - a21*(a33*a12-a32*a13) + a31*(a23*a12 - a22*a13) ); + } + break; + + case 4: + { + const eT* Xm = X.memptr(); + + const eT val = \ + Xm[pos<0,3>::n4] * Xm[pos<1,2>::n4] * Xm[pos<2,1>::n4] * Xm[pos<3,0>::n4] \ + - Xm[pos<0,2>::n4] * Xm[pos<1,3>::n4] * Xm[pos<2,1>::n4] * Xm[pos<3,0>::n4] \ + - Xm[pos<0,3>::n4] * Xm[pos<1,1>::n4] * Xm[pos<2,2>::n4] * Xm[pos<3,0>::n4] \ + + Xm[pos<0,1>::n4] * Xm[pos<1,3>::n4] * Xm[pos<2,2>::n4] * Xm[pos<3,0>::n4] \ + + Xm[pos<0,2>::n4] * Xm[pos<1,1>::n4] * Xm[pos<2,3>::n4] * Xm[pos<3,0>::n4] \ + - Xm[pos<0,1>::n4] * Xm[pos<1,2>::n4] * Xm[pos<2,3>::n4] * Xm[pos<3,0>::n4] \ + - Xm[pos<0,3>::n4] * Xm[pos<1,2>::n4] * Xm[pos<2,0>::n4] * Xm[pos<3,1>::n4] \ + + Xm[pos<0,2>::n4] * Xm[pos<1,3>::n4] * Xm[pos<2,0>::n4] * Xm[pos<3,1>::n4] \ + + Xm[pos<0,3>::n4] * Xm[pos<1,0>::n4] * Xm[pos<2,2>::n4] * Xm[pos<3,1>::n4] \ + - Xm[pos<0,0>::n4] * Xm[pos<1,3>::n4] * Xm[pos<2,2>::n4] * Xm[pos<3,1>::n4] \ + - Xm[pos<0,2>::n4] * Xm[pos<1,0>::n4] * Xm[pos<2,3>::n4] * Xm[pos<3,1>::n4] \ + + Xm[pos<0,0>::n4] * Xm[pos<1,2>::n4] * Xm[pos<2,3>::n4] * Xm[pos<3,1>::n4] \ + + Xm[pos<0,3>::n4] * Xm[pos<1,1>::n4] * Xm[pos<2,0>::n4] * Xm[pos<3,2>::n4] \ + - Xm[pos<0,1>::n4] * Xm[pos<1,3>::n4] * Xm[pos<2,0>::n4] * Xm[pos<3,2>::n4] \ + - Xm[pos<0,3>::n4] * Xm[pos<1,0>::n4] * Xm[pos<2,1>::n4] * Xm[pos<3,2>::n4] \ + + Xm[pos<0,0>::n4] * Xm[pos<1,3>::n4] * Xm[pos<2,1>::n4] * Xm[pos<3,2>::n4] \ + + Xm[pos<0,1>::n4] * Xm[pos<1,0>::n4] * Xm[pos<2,3>::n4] * Xm[pos<3,2>::n4] \ + - Xm[pos<0,0>::n4] * Xm[pos<1,1>::n4] * Xm[pos<2,3>::n4] * Xm[pos<3,2>::n4] \ + - Xm[pos<0,2>::n4] * Xm[pos<1,1>::n4] * Xm[pos<2,0>::n4] * Xm[pos<3,3>::n4] \ + + Xm[pos<0,1>::n4] * Xm[pos<1,2>::n4] * Xm[pos<2,0>::n4] * Xm[pos<3,3>::n4] \ + + Xm[pos<0,2>::n4] * Xm[pos<1,0>::n4] * Xm[pos<2,1>::n4] * Xm[pos<3,3>::n4] \ + - Xm[pos<0,0>::n4] * Xm[pos<1,2>::n4] * Xm[pos<2,1>::n4] * Xm[pos<3,3>::n4] \ + - Xm[pos<0,1>::n4] * Xm[pos<1,0>::n4] * Xm[pos<2,2>::n4] * Xm[pos<3,3>::n4] \ + + Xm[pos<0,0>::n4] * Xm[pos<1,1>::n4] * Xm[pos<2,2>::n4] * Xm[pos<3,3>::n4] \ + ; + + return val; + } + break; + + default: + return eT(0); + ; + } + } + + + +//! immediate determinant of a matrix using ATLAS or LAPACK +template<typename eT> +inline +eT +auxlib::det_lapack(const Mat<eT>& X, const bool make_copy) + { + arma_extra_debug_sigprint(); + + Mat<eT> X_copy; + + if(make_copy == true) + { + X_copy = X; + } + + Mat<eT>& tmp = (make_copy == true) ? X_copy : const_cast< Mat<eT>& >(X); + + if(tmp.is_empty()) + { + return eT(1); + } + + + #if defined(ARMA_USE_ATLAS) + { + podarray<int> ipiv(tmp.n_rows); + + //const int info = + atlas::clapack_getrf(atlas::CblasColMajor, tmp.n_rows, tmp.n_cols, tmp.memptr(), tmp.n_rows, ipiv.memptr()); + + // on output tmp appears to be L+U_alt, where U_alt is U with the main diagonal set to zero + eT val = tmp.at(0,0); + for(uword i=1; i < tmp.n_rows; ++i) + { + val *= tmp.at(i,i); + } + + int sign = +1; + for(uword i=0; i < tmp.n_rows; ++i) + { + if( int(i) != ipiv.mem[i] ) // NOTE: no adjustment required, as the clapack version of getrf() assumes counting from 0 + { + sign *= -1; + } + } + + return ( (sign < 0) ? -val : val ); + } + #elif defined(ARMA_USE_LAPACK) + { + podarray<blas_int> ipiv(tmp.n_rows); + + blas_int info = 0; + blas_int n_rows = blas_int(tmp.n_rows); + blas_int n_cols = blas_int(tmp.n_cols); + + lapack::getrf(&n_rows, &n_cols, tmp.memptr(), &n_rows, ipiv.memptr(), &info); + + // on output tmp appears to be L+U_alt, where U_alt is U with the main diagonal set to zero + eT val = tmp.at(0,0); + for(uword i=1; i < tmp.n_rows; ++i) + { + val *= tmp.at(i,i); + } + + blas_int sign = +1; + for(uword i=0; i < tmp.n_rows; ++i) + { + if( blas_int(i) != (ipiv.mem[i] - 1) ) // NOTE: adjustment of -1 is required as Fortran counts from 1 + { + sign *= -1; + } + } + + return ( (sign < 0) ? -val : val ); + } + #else + { + arma_ignore(X); + arma_ignore(make_copy); + arma_ignore(tmp); + arma_stop("det(): use of ATLAS or LAPACK needs to be enabled"); + return eT(0); + } + #endif + } + + + +//! immediate log determinant of a matrix using ATLAS or LAPACK +template<typename eT, typename T1> +inline +bool +auxlib::log_det(eT& out_val, typename get_pod_type<eT>::result& out_sign, const Base<eT,T1>& X) + { + arma_extra_debug_sigprint(); + + typedef typename get_pod_type<eT>::result T; + + #if defined(ARMA_USE_ATLAS) + { + Mat<eT> tmp(X.get_ref()); + arma_debug_check( (tmp.is_square() == false), "log_det(): given matrix is not square" ); + + if(tmp.is_empty()) + { + out_val = eT(0); + out_sign = T(1); + return true; + } + + podarray<int> ipiv(tmp.n_rows); + + const int info = atlas::clapack_getrf(atlas::CblasColMajor, tmp.n_rows, tmp.n_cols, tmp.memptr(), tmp.n_rows, ipiv.memptr()); + + // on output tmp appears to be L+U_alt, where U_alt is U with the main diagonal set to zero + + sword sign = (is_complex<eT>::value == false) ? ( (access::tmp_real( tmp.at(0,0) ) < T(0)) ? -1 : +1 ) : +1; + eT val = (is_complex<eT>::value == false) ? std::log( (access::tmp_real( tmp.at(0,0) ) < T(0)) ? tmp.at(0,0)*T(-1) : tmp.at(0,0) ) : std::log( tmp.at(0,0) ); + + for(uword i=1; i < tmp.n_rows; ++i) + { + const eT x = tmp.at(i,i); + + sign *= (is_complex<eT>::value == false) ? ( (access::tmp_real(x) < T(0)) ? -1 : +1 ) : +1; + val += (is_complex<eT>::value == false) ? std::log( (access::tmp_real(x) < T(0)) ? x*T(-1) : x ) : std::log(x); + } + + for(uword i=0; i < tmp.n_rows; ++i) + { + if( int(i) != ipiv.mem[i] ) // NOTE: no adjustment required, as the clapack version of getrf() assumes counting from 0 + { + sign *= -1; + } + } + + out_val = val; + out_sign = T(sign); + + return (info == 0); + } + #elif defined(ARMA_USE_LAPACK) + { + Mat<eT> tmp(X.get_ref()); + arma_debug_check( (tmp.is_square() == false), "log_det(): given matrix is not square" ); + + if(tmp.is_empty()) + { + out_val = eT(0); + out_sign = T(1); + return true; + } + + podarray<blas_int> ipiv(tmp.n_rows); + + blas_int info = 0; + blas_int n_rows = blas_int(tmp.n_rows); + blas_int n_cols = blas_int(tmp.n_cols); + + lapack::getrf(&n_rows, &n_cols, tmp.memptr(), &n_rows, ipiv.memptr(), &info); + + // on output tmp appears to be L+U_alt, where U_alt is U with the main diagonal set to zero + + sword sign = (is_complex<eT>::value == false) ? ( (access::tmp_real( tmp.at(0,0) ) < T(0)) ? -1 : +1 ) : +1; + eT val = (is_complex<eT>::value == false) ? std::log( (access::tmp_real( tmp.at(0,0) ) < T(0)) ? tmp.at(0,0)*T(-1) : tmp.at(0,0) ) : std::log( tmp.at(0,0) ); + + for(uword i=1; i < tmp.n_rows; ++i) + { + const eT x = tmp.at(i,i); + + sign *= (is_complex<eT>::value == false) ? ( (access::tmp_real(x) < T(0)) ? -1 : +1 ) : +1; + val += (is_complex<eT>::value == false) ? std::log( (access::tmp_real(x) < T(0)) ? x*T(-1) : x ) : std::log(x); + } + + for(uword i=0; i < tmp.n_rows; ++i) + { + if( blas_int(i) != (ipiv.mem[i] - 1) ) // NOTE: adjustment of -1 is required as Fortran counts from 1 + { + sign *= -1; + } + } + + out_val = val; + out_sign = T(sign); + + return (info == 0); + } + #else + { + arma_ignore(X); + + out_val = eT(0); + out_sign = T(0); + + arma_stop("log_det(): use of ATLAS or LAPACK needs to be enabled"); + + return false; + } + #endif + } + + + +//! immediate LU decomposition of a matrix using ATLAS or LAPACK +template<typename eT, typename T1> +inline +bool +auxlib::lu(Mat<eT>& L, Mat<eT>& U, podarray<blas_int>& ipiv, const Base<eT,T1>& X) + { + arma_extra_debug_sigprint(); + + U = X.get_ref(); + + const uword U_n_rows = U.n_rows; + const uword U_n_cols = U.n_cols; + + if(U.is_empty()) + { + L.set_size(U_n_rows, 0); + U.set_size(0, U_n_cols); + ipiv.reset(); + return true; + } + + #if defined(ARMA_USE_ATLAS) || defined(ARMA_USE_LAPACK) + { + bool status; + + #if defined(ARMA_USE_ATLAS) + { + ipiv.set_size( (std::min)(U_n_rows, U_n_cols) ); + + int info = atlas::clapack_getrf(atlas::CblasColMajor, U_n_rows, U_n_cols, U.memptr(), U_n_rows, ipiv.memptr()); + + status = (info == 0); + } + #elif defined(ARMA_USE_LAPACK) + { + ipiv.set_size( (std::min)(U_n_rows, U_n_cols) ); + + blas_int info = 0; + + blas_int n_rows = U_n_rows; + blas_int n_cols = U_n_cols; + + + lapack::getrf(&n_rows, &n_cols, U.memptr(), &n_rows, ipiv.memptr(), &info); + + // take into account that Fortran counts from 1 + arrayops::inplace_minus(ipiv.memptr(), blas_int(1), ipiv.n_elem); + + status = (info == 0); + } + #endif + + L.copy_size(U); + + for(uword col=0; col < U_n_cols; ++col) + { + for(uword row=0; (row < col) && (row < U_n_rows); ++row) + { + L.at(row,col) = eT(0); + } + + if( L.in_range(col,col) == true ) + { + L.at(col,col) = eT(1); + } + + for(uword row = (col+1); row < U_n_rows; ++row) + { + L.at(row,col) = U.at(row,col); + U.at(row,col) = eT(0); + } + } + + return status; + } + #else + { + arma_stop("lu(): use of ATLAS or LAPACK needs to be enabled"); + + return false; + } + #endif + } + + + +template<typename eT, typename T1> +inline +bool +auxlib::lu(Mat<eT>& L, Mat<eT>& U, Mat<eT>& P, const Base<eT,T1>& X) + { + arma_extra_debug_sigprint(); + + podarray<blas_int> ipiv1; + const bool status = auxlib::lu(L, U, ipiv1, X); + + if(status == true) + { + if(U.is_empty()) + { + // L and U have been already set to the correct empty matrices + P.eye(L.n_rows, L.n_rows); + return true; + } + + const uword n = ipiv1.n_elem; + const uword P_rows = U.n_rows; + + podarray<blas_int> ipiv2(P_rows); + + const blas_int* ipiv1_mem = ipiv1.memptr(); + blas_int* ipiv2_mem = ipiv2.memptr(); + + for(uword i=0; i<P_rows; ++i) + { + ipiv2_mem[i] = blas_int(i); + } + + for(uword i=0; i<n; ++i) + { + const uword k = static_cast<uword>(ipiv1_mem[i]); + + if( ipiv2_mem[i] != ipiv2_mem[k] ) + { + std::swap( ipiv2_mem[i], ipiv2_mem[k] ); + } + } + + P.zeros(P_rows, P_rows); + + for(uword row=0; row<P_rows; ++row) + { + P.at(row, static_cast<uword>(ipiv2_mem[row])) = eT(1); + } + + if(L.n_cols > U.n_rows) + { + L.shed_cols(U.n_rows, L.n_cols-1); + } + + if(U.n_rows > L.n_cols) + { + U.shed_rows(L.n_cols, U.n_rows-1); + } + } + + return status; + } + + + +template<typename eT, typename T1> +inline +bool +auxlib::lu(Mat<eT>& L, Mat<eT>& U, const Base<eT,T1>& X) + { + arma_extra_debug_sigprint(); + + podarray<blas_int> ipiv1; + const bool status = auxlib::lu(L, U, ipiv1, X); + + if(status == true) + { + if(U.is_empty()) + { + // L and U have been already set to the correct empty matrices + return true; + } + + const uword n = ipiv1.n_elem; + const uword P_rows = U.n_rows; + + podarray<blas_int> ipiv2(P_rows); + + const blas_int* ipiv1_mem = ipiv1.memptr(); + blas_int* ipiv2_mem = ipiv2.memptr(); + + for(uword i=0; i<P_rows; ++i) + { + ipiv2_mem[i] = blas_int(i); + } + + for(uword i=0; i<n; ++i) + { + const uword k = static_cast<uword>(ipiv1_mem[i]); + + if( ipiv2_mem[i] != ipiv2_mem[k] ) + { + std::swap( ipiv2_mem[i], ipiv2_mem[k] ); + L.swap_rows( static_cast<uword>(ipiv2_mem[i]), static_cast<uword>(ipiv2_mem[k]) ); + } + } + + if(L.n_cols > U.n_rows) + { + L.shed_cols(U.n_rows, L.n_cols-1); + } + + if(U.n_rows > L.n_cols) + { + U.shed_rows(L.n_cols, U.n_rows-1); + } + } + + return status; + } + + + +//! immediate eigenvalues of a symmetric real matrix using LAPACK +template<typename eT, typename T1> +inline +bool +auxlib::eig_sym(Col<eT>& eigval, const Base<eT,T1>& X) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + Mat<eT> A(X.get_ref()); + + arma_debug_check( (A.is_square() == false), "eig_sym(): given matrix is not square"); + + if(A.is_empty()) + { + eigval.reset(); + return true; + } + + eigval.set_size(A.n_rows); + + char jobz = 'N'; + char uplo = 'U'; + + blas_int N = blas_int(A.n_rows); + blas_int lwork = 3 * ( (std::max)(blas_int(1), 3*N-1) ); + blas_int info = 0; + + podarray<eT> work( static_cast<uword>(lwork) ); + + lapack::syev(&jobz, &uplo, &N, A.memptr(), &N, eigval.memptr(), work.memptr(), &lwork, &info); + + return (info == 0); + } + #else + { + arma_ignore(eigval); + arma_ignore(X); + arma_stop("eig_sym(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +//! immediate eigenvalues of a hermitian complex matrix using LAPACK +template<typename T, typename T1> +inline +bool +auxlib::eig_sym(Col<T>& eigval, const Base<std::complex<T>,T1>& X) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename std::complex<T> eT; + + Mat<eT> A(X.get_ref()); + + arma_debug_check( (A.is_square() == false), "eig_sym(): given matrix is not square"); + + if(A.is_empty()) + { + eigval.reset(); + return true; + } + + eigval.set_size(A.n_rows); + + char jobz = 'N'; + char uplo = 'U'; + + blas_int N = blas_int(A.n_rows); + blas_int lwork = 3 * ( (std::max)(blas_int(1), 2*N-1) ); + blas_int info = 0; + + podarray<eT> work( static_cast<uword>(lwork) ); + podarray<T> rwork( static_cast<uword>( (std::max)(blas_int(1), 3*N-2) ) ); + + arma_extra_debug_print("lapack::heev()"); + lapack::heev(&jobz, &uplo, &N, A.memptr(), &N, eigval.memptr(), work.memptr(), &lwork, rwork.memptr(), &info); + + return (info == 0); + } + #else + { + arma_ignore(eigval); + arma_ignore(X); + arma_stop("eig_sym(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +//! immediate eigenvalues and eigenvectors of a symmetric real matrix using LAPACK +template<typename eT, typename T1> +inline +bool +auxlib::eig_sym(Col<eT>& eigval, Mat<eT>& eigvec, const Base<eT,T1>& X) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + eigvec = X.get_ref(); + + arma_debug_check( (eigvec.is_square() == false), "eig_sym(): given matrix is not square" ); + + if(eigvec.is_empty()) + { + eigval.reset(); + eigvec.reset(); + return true; + } + + eigval.set_size(eigvec.n_rows); + + char jobz = 'V'; + char uplo = 'U'; + + blas_int N = blas_int(eigvec.n_rows); + blas_int lwork = 3 * ( (std::max)(blas_int(1), 3*N-1) ); + blas_int info = 0; + + podarray<eT> work( static_cast<uword>(lwork) ); + + lapack::syev(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), work.memptr(), &lwork, &info); + + return (info == 0); + } + #else + { + arma_ignore(eigval); + arma_ignore(eigvec); + arma_ignore(X); + arma_stop("eig_sym(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +//! immediate eigenvalues and eigenvectors of a hermitian complex matrix using LAPACK +template<typename T, typename T1> +inline +bool +auxlib::eig_sym(Col<T>& eigval, Mat< std::complex<T> >& eigvec, const Base<std::complex<T>,T1>& X) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename std::complex<T> eT; + + eigvec = X.get_ref(); + + arma_debug_check( (eigvec.is_square() == false), "eig_sym(): given matrix is not square" ); + + if(eigvec.is_empty()) + { + eigval.reset(); + eigvec.reset(); + return true; + } + + eigval.set_size(eigvec.n_rows); + + char jobz = 'V'; + char uplo = 'U'; + + blas_int N = blas_int(eigvec.n_rows); + blas_int lwork = 3 * ( (std::max)(blas_int(1), 2*N-1) ); + blas_int info = 0; + + podarray<eT> work( static_cast<uword>(lwork) ); + podarray<T> rwork( static_cast<uword>((std::max)(blas_int(1), 3*N-2)) ); + + arma_extra_debug_print("lapack::heev()"); + lapack::heev(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), work.memptr(), &lwork, rwork.memptr(), &info); + + return (info == 0); + } + #else + { + arma_ignore(eigval); + arma_ignore(eigvec); + arma_ignore(X); + arma_stop("eig_sym(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +//! immediate eigenvalues and eigenvectors of a symmetric real matrix using LAPACK (divide and conquer algorithm) +template<typename eT, typename T1> +inline +bool +auxlib::eig_sym_dc(Col<eT>& eigval, Mat<eT>& eigvec, const Base<eT,T1>& X) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + eigvec = X.get_ref(); + + arma_debug_check( (eigvec.is_square() == false), "eig_sym(): given matrix is not square" ); + + if(eigvec.is_empty()) + { + eigval.reset(); + eigvec.reset(); + return true; + } + + eigval.set_size(eigvec.n_rows); + + char jobz = 'V'; + char uplo = 'U'; + + blas_int N = blas_int(eigvec.n_rows); + blas_int lwork = 3 * (1 + 6*N + 2*(N*N)); + blas_int liwork = 3 * (3 + 5*N + 2); + blas_int info = 0; + + podarray<eT> work( static_cast<uword>( lwork) ); + podarray<blas_int> iwork( static_cast<uword>(liwork) ); + + arma_extra_debug_print("lapack::syevd()"); + lapack::syevd(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), work.memptr(), &lwork, iwork.memptr(), &liwork, &info); + + return (info == 0); + } + #else + { + arma_ignore(eigval); + arma_ignore(eigvec); + arma_ignore(X); + arma_stop("eig_sym(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +//! immediate eigenvalues and eigenvectors of a hermitian complex matrix using LAPACK (divide and conquer algorithm) +template<typename T, typename T1> +inline +bool +auxlib::eig_sym_dc(Col<T>& eigval, Mat< std::complex<T> >& eigvec, const Base<std::complex<T>,T1>& X) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename std::complex<T> eT; + + eigvec = X.get_ref(); + + arma_debug_check( (eigvec.is_square() == false), "eig_sym(): given matrix is not square" ); + + if(eigvec.is_empty()) + { + eigval.reset(); + eigvec.reset(); + return true; + } + + eigval.set_size(eigvec.n_rows); + + char jobz = 'V'; + char uplo = 'U'; + + blas_int N = blas_int(eigvec.n_rows); + blas_int lwork = 3 * (2*N + N*N); + blas_int lrwork = 3 * (1 + 5*N + 2*(N*N)); + blas_int liwork = 3 * (3 + 5*N); + blas_int info = 0; + + podarray<eT> work( static_cast<uword>(lwork) ); + podarray<T> rwork( static_cast<uword>(lrwork) ); + podarray<blas_int> iwork( static_cast<uword>(liwork) ); + + arma_extra_debug_print("lapack::heevd()"); + lapack::heevd(&jobz, &uplo, &N, eigvec.memptr(), &N, eigval.memptr(), work.memptr(), &lwork, rwork.memptr(), &lrwork, iwork.memptr(), &liwork, &info); + + return (info == 0); + } + #else + { + arma_ignore(eigval); + arma_ignore(eigvec); + arma_ignore(X); + arma_stop("eig_sym(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +//! Eigenvalues and eigenvectors of a general square real matrix using LAPACK. +//! The argument 'side' specifies which eigenvectors should be calculated +//! (see code for mode details). +template<typename T, typename T1> +inline +bool +auxlib::eig_gen + ( + Col< std::complex<T> >& eigval, + Mat<T>& l_eigvec, + Mat<T>& r_eigvec, + const Base<T,T1>& X, + const char side + ) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + char jobvl; + char jobvr; + + switch(side) + { + case 'l': // left + jobvl = 'V'; + jobvr = 'N'; + break; + + case 'r': // right + jobvl = 'N'; + jobvr = 'V'; + break; + + case 'b': // both + jobvl = 'V'; + jobvr = 'V'; + break; + + case 'n': // neither + jobvl = 'N'; + jobvr = 'N'; + break; + + default: + arma_stop("eig_gen(): parameter 'side' is invalid"); + return false; + } + + Mat<T> A(X.get_ref()); + arma_debug_check( (A.is_square() == false), "eig_gen(): given matrix is not square" ); + + if(A.is_empty()) + { + eigval.reset(); + l_eigvec.reset(); + r_eigvec.reset(); + return true; + } + + const uword A_n_rows = A.n_rows; + + eigval.set_size(A_n_rows); + + l_eigvec.set_size(A_n_rows, A_n_rows); + r_eigvec.set_size(A_n_rows, A_n_rows); + + blas_int N = blas_int(A_n_rows); + blas_int lwork = 3 * ( (std::max)(blas_int(1), 4*N) ); + blas_int info = 0; + + podarray<T> work( static_cast<uword>(lwork) ); + + podarray<T> wr(A_n_rows); + podarray<T> wi(A_n_rows); + + arma_extra_debug_print("lapack::geev()"); + lapack::geev(&jobvl, &jobvr, &N, A.memptr(), &N, wr.memptr(), wi.memptr(), l_eigvec.memptr(), &N, r_eigvec.memptr(), &N, work.memptr(), &lwork, &info); + + eigval.set_size(A_n_rows); + for(uword i=0; i<A_n_rows; ++i) + { + eigval[i] = std::complex<T>(wr[i], wi[i]); + } + + return (info == 0); + } + #else + { + arma_ignore(eigval); + arma_ignore(l_eigvec); + arma_ignore(r_eigvec); + arma_ignore(X); + arma_ignore(side); + arma_stop("eig_gen(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + + + +//! Eigenvalues and eigenvectors of a general square complex matrix using LAPACK +//! The argument 'side' specifies which eigenvectors should be calculated +//! (see code for mode details). +template<typename T, typename T1> +inline +bool +auxlib::eig_gen + ( + Col< std::complex<T> >& eigval, + Mat< std::complex<T> >& l_eigvec, + Mat< std::complex<T> >& r_eigvec, + const Base< std::complex<T>, T1 >& X, + const char side + ) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef typename std::complex<T> eT; + + char jobvl; + char jobvr; + + switch(side) + { + case 'l': // left + jobvl = 'V'; + jobvr = 'N'; + break; + + case 'r': // right + jobvl = 'N'; + jobvr = 'V'; + break; + + case 'b': // both + jobvl = 'V'; + jobvr = 'V'; + break; + + case 'n': // neither + jobvl = 'N'; + jobvr = 'N'; + break; + + default: + arma_stop("eig_gen(): parameter 'side' is invalid"); + return false; + } + + Mat<eT> A(X.get_ref()); + arma_debug_check( (A.is_square() == false), "eig_gen(): given matrix is not square" ); + + if(A.is_empty()) + { + eigval.reset(); + l_eigvec.reset(); + r_eigvec.reset(); + return true; + } + + const uword A_n_rows = A.n_rows; + + eigval.set_size(A_n_rows); + + l_eigvec.set_size(A_n_rows, A_n_rows); + r_eigvec.set_size(A_n_rows, A_n_rows); + + blas_int N = blas_int(A_n_rows); + blas_int lwork = 3 * ( (std::max)(blas_int(1), 2*N) ); + blas_int info = 0; + + podarray<eT> work( static_cast<uword>(lwork) ); + podarray<T> rwork( static_cast<uword>(2*N) ); + + arma_extra_debug_print("lapack::cx_geev()"); + lapack::cx_geev(&jobvl, &jobvr, &N, A.memptr(), &N, eigval.memptr(), l_eigvec.memptr(), &N, r_eigvec.memptr(), &N, work.memptr(), &lwork, rwork.memptr(), &info); + + return (info == 0); + } + #else + { + arma_ignore(eigval); + arma_ignore(l_eigvec); + arma_ignore(r_eigvec); + arma_ignore(X); + arma_ignore(side); + arma_stop("eig_gen(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +template<typename eT, typename T1> +inline +bool +auxlib::chol(Mat<eT>& out, const Base<eT,T1>& X) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + out = X.get_ref(); + + arma_debug_check( (out.is_square() == false), "chol(): given matrix is not square" ); + + if(out.is_empty()) + { + return true; + } + + const uword out_n_rows = out.n_rows; + + char uplo = 'U'; + blas_int n = out_n_rows; + blas_int info = 0; + + lapack::potrf(&uplo, &n, out.memptr(), &n, &info); + + for(uword col=0; col<out_n_rows; ++col) + { + eT* colptr = out.colptr(col); + + for(uword row=(col+1); row < out_n_rows; ++row) + { + colptr[row] = eT(0); + } + } + + return (info == 0); + } + #else + { + arma_ignore(out); + arma_ignore(X); + + arma_stop("chol(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +template<typename eT, typename T1> +inline +bool +auxlib::qr(Mat<eT>& Q, Mat<eT>& R, const Base<eT,T1>& X) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + R = X.get_ref(); + + const uword R_n_rows = R.n_rows; + const uword R_n_cols = R.n_cols; + + if(R.is_empty()) + { + Q.eye(R_n_rows, R_n_rows); + return true; + } + + blas_int m = static_cast<blas_int>(R_n_rows); + blas_int n = static_cast<blas_int>(R_n_cols); + blas_int lwork = 0; + blas_int lwork_min = (std::max)(blas_int(1), (std::max)(m,n)); // take into account requirements of geqrf() _and_ orgqr()/ungqr() + blas_int k = (std::min)(m,n); + blas_int info = 0; + + podarray<eT> tau( static_cast<uword>(k) ); + + eT work_query[2]; + blas_int lwork_query = -1; + + lapack::geqrf(&m, &n, R.memptr(), &m, tau.memptr(), &work_query[0], &lwork_query, &info); + + if(info == 0) + { + const blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) ); + + lwork = (lwork_proposed > lwork_min) ? lwork_proposed : lwork_min; + } + else + { + return false; + } + + podarray<eT> work( static_cast<uword>(lwork) ); + + lapack::geqrf(&m, &n, R.memptr(), &m, tau.memptr(), work.memptr(), &lwork, &info); + + Q.set_size(R_n_rows, R_n_rows); + + arrayops::copy( Q.memptr(), R.memptr(), (std::min)(Q.n_elem, R.n_elem) ); + + // + // construct R + + for(uword col=0; col < R_n_cols; ++col) + { + for(uword row=(col+1); row < R_n_rows; ++row) + { + R.at(row,col) = eT(0); + } + } + + + if( (is_float<eT>::value == true) || (is_double<eT>::value == true) ) + { + lapack::orgqr(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork, &info); + } + else + if( (is_supported_complex_float<eT>::value == true) || (is_supported_complex_double<eT>::value == true) ) + { + lapack::ungqr(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork, &info); + } + + return (info == 0); + } + #else + { + arma_ignore(Q); + arma_ignore(R); + arma_ignore(X); + arma_stop("qr(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +template<typename eT, typename T1> +inline +bool +auxlib::qr_econ(Mat<eT>& Q, Mat<eT>& R, const Base<eT,T1>& X) + { + arma_extra_debug_sigprint(); + + // This function implements a memory-efficient QR for a non-square X that has dimensions m x n. + // This basically discards the basis for the null-space. + // + // if m <= n: (use standard routine) + // Q[m,m]*R[m,n] = X[m,n] + // geqrf Needs A[m,n]: Uses R + // orgqr Needs A[m,m]: Uses Q + // otherwise: (memory-efficient routine) + // Q[m,n]*R[n,n] = X[m,n] + // geqrf Needs A[m,n]: Uses Q + // geqrf Needs A[m,n]: Uses Q + + #if defined(ARMA_USE_LAPACK) + { + if(is_Mat<T1>::value == true) + { + const unwrap<T1> tmp(X.get_ref()); + const Mat<eT>& M = tmp.M; + + if(M.n_rows < M.n_cols) + { + return auxlib::qr(Q, R, X); + } + } + + Q = X.get_ref(); + + const uword Q_n_rows = Q.n_rows; + const uword Q_n_cols = Q.n_cols; + + if( Q_n_rows <= Q_n_cols ) + { + return auxlib::qr(Q, R, Q); + } + + if(Q.is_empty()) + { + Q.set_size(Q_n_rows, 0 ); + R.set_size(0, Q_n_cols); + return true; + } + + blas_int m = static_cast<blas_int>(Q_n_rows); + blas_int n = static_cast<blas_int>(Q_n_cols); + blas_int lwork = 0; + blas_int lwork_min = (std::max)(blas_int(1), (std::max)(m,n)); // take into account requirements of geqrf() _and_ orgqr()/ungqr() + blas_int k = (std::min)(m,n); + blas_int info = 0; + + podarray<eT> tau( static_cast<uword>(k) ); + + eT work_query[2]; + blas_int lwork_query = -1; + + lapack::geqrf(&m, &n, Q.memptr(), &m, tau.memptr(), &work_query[0], &lwork_query, &info); + + if(info == 0) + { + const blas_int lwork_proposed = static_cast<blas_int>( access::tmp_real(work_query[0]) ); + + lwork = (lwork_proposed > lwork_min) ? lwork_proposed : lwork_min; + } + else + { + return false; + } + + podarray<eT> work( static_cast<uword>(lwork) ); + + lapack::geqrf(&m, &n, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork, &info); + + // Q now has the elements on and above the diagonal of the array + // contain the min(M,N)-by-N upper trapezoidal matrix Q + // (Q is upper triangular if m >= n); + // the elements below the diagonal, with the array TAU, + // represent the orthogonal matrix Q as a product of min(m,n) elementary reflectors. + + R.set_size(Q_n_cols, Q_n_cols); + + // + // construct R + + for(uword col=0; col < Q_n_cols; ++col) + { + for(uword row=0; row <= col; ++row) + { + R.at(row,col) = Q.at(row,col); + } + + for(uword row=(col+1); row < Q_n_cols; ++row) + { + R.at(row,col) = eT(0); + } + } + + if( (is_float<eT>::value == true) || (is_double<eT>::value == true) ) + { + lapack::orgqr(&m, &n, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork, &info); + } + else + if( (is_supported_complex_float<eT>::value == true) || (is_supported_complex_double<eT>::value == true) ) + { + lapack::ungqr(&m, &n, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &lwork, &info); + } + + return (info == 0); + } + #else + { + arma_ignore(Q); + arma_ignore(R); + arma_ignore(X); + arma_stop("qr_econ(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +template<typename eT, typename T1> +inline +bool +auxlib::svd(Col<eT>& S, const Base<eT,T1>& X, uword& X_n_rows, uword& X_n_cols) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + Mat<eT> A(X.get_ref()); + + X_n_rows = A.n_rows; + X_n_cols = A.n_cols; + + if(A.is_empty()) + { + S.reset(); + return true; + } + + Mat<eT> U(1, 1); + Mat<eT> V(1, A.n_cols); + + char jobu = 'N'; + char jobvt = 'N'; + + blas_int m = A.n_rows; + blas_int n = A.n_cols; + blas_int min_mn = (std::min)(m,n); + blas_int lda = A.n_rows; + blas_int ldu = U.n_rows; + blas_int ldvt = V.n_rows; + blas_int lwork = 0; + blas_int lwork_min = (std::max)( blas_int(1), (std::max)( (3*min_mn + (std::max)(m,n)), 5*min_mn ) ); + blas_int info = 0; + + S.set_size( static_cast<uword>(min_mn) ); + + eT work_query[2]; + blas_int lwork_query = -1; + + lapack::gesvd<eT> + ( + &jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, &info + ); + + if(info == 0) + { + const blas_int lwork_proposed = static_cast<blas_int>( work_query[0] ); + + lwork = (lwork_proposed > lwork_min) ? lwork_proposed : lwork_min; + + podarray<eT> work( static_cast<uword>(lwork) ); + + lapack::gesvd<eT> + ( + &jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork, &info + ); + } + + return (info == 0); + } + #else + { + arma_ignore(S); + arma_ignore(X); + arma_ignore(X_n_rows); + arma_ignore(X_n_cols); + arma_stop("svd(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +template<typename T, typename T1> +inline +bool +auxlib::svd(Col<T>& S, const Base<std::complex<T>, T1>& X, uword& X_n_rows, uword& X_n_cols) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef std::complex<T> eT; + + Mat<eT> A(X.get_ref()); + + X_n_rows = A.n_rows; + X_n_cols = A.n_cols; + + if(A.is_empty()) + { + S.reset(); + return true; + } + + Mat<eT> U(1, 1); + Mat<eT> V(1, A.n_cols); + + char jobu = 'N'; + char jobvt = 'N'; + + blas_int m = A.n_rows; + blas_int n = A.n_cols; + blas_int min_mn = (std::min)(m,n); + blas_int lda = A.n_rows; + blas_int ldu = U.n_rows; + blas_int ldvt = V.n_rows; + blas_int lwork = 3 * ( (std::max)(blas_int(1), 2*min_mn+(std::max)(m,n) ) ); + blas_int info = 0; + + S.set_size( static_cast<uword>(min_mn) ); + + podarray<eT> work( static_cast<uword>(lwork ) ); + podarray< T> rwork( static_cast<uword>(5*min_mn) ); + + // let gesvd_() calculate the optimum size of the workspace + blas_int lwork_tmp = -1; + + lapack::cx_gesvd<T> + ( + &jobu, &jobvt, + &m, &n, + A.memptr(), &lda, + S.memptr(), + U.memptr(), &ldu, + V.memptr(), &ldvt, + work.memptr(), &lwork_tmp, + rwork.memptr(), + &info + ); + + if(info == 0) + { + blas_int proposed_lwork = static_cast<blas_int>(real(work[0])); + if(proposed_lwork > lwork) + { + lwork = proposed_lwork; + work.set_size( static_cast<uword>(lwork) ); + } + + lapack::cx_gesvd<T> + ( + &jobu, &jobvt, + &m, &n, + A.memptr(), &lda, + S.memptr(), + U.memptr(), &ldu, + V.memptr(), &ldvt, + work.memptr(), &lwork, + rwork.memptr(), + &info + ); + } + + return (info == 0); + } + #else + { + arma_ignore(S); + arma_ignore(X); + arma_ignore(X_n_rows); + arma_ignore(X_n_cols); + + arma_stop("svd(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +template<typename eT, typename T1> +inline +bool +auxlib::svd(Col<eT>& S, const Base<eT,T1>& X) + { + arma_extra_debug_sigprint(); + + uword junk; + return auxlib::svd(S, X, junk, junk); + } + + + +template<typename T, typename T1> +inline +bool +auxlib::svd(Col<T>& S, const Base<std::complex<T>, T1>& X) + { + arma_extra_debug_sigprint(); + + uword junk; + return auxlib::svd(S, X, junk, junk); + } + + + +template<typename eT, typename T1> +inline +bool +auxlib::svd(Mat<eT>& U, Col<eT>& S, Mat<eT>& V, const Base<eT,T1>& X) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + Mat<eT> A(X.get_ref()); + + if(A.is_empty()) + { + U.eye(A.n_rows, A.n_rows); + S.reset(); + V.eye(A.n_cols, A.n_cols); + return true; + } + + U.set_size(A.n_rows, A.n_rows); + V.set_size(A.n_cols, A.n_cols); + + char jobu = 'A'; + char jobvt = 'A'; + + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_cols); + blas_int min_mn = (std::min)(m,n); + blas_int lda = blas_int(A.n_rows); + blas_int ldu = blas_int(U.n_rows); + blas_int ldvt = blas_int(V.n_rows); + blas_int lwork_min = (std::max)( blas_int(1), (std::max)( (3*min_mn + (std::max)(m,n)), 5*min_mn ) ); + blas_int lwork = 0; + blas_int info = 0; + + S.set_size( static_cast<uword>(min_mn) ); + + // let gesvd_() calculate the optimum size of the workspace + eT work_query[2]; + blas_int lwork_query = -1; + + lapack::gesvd<eT> + ( + &jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, &work_query[0], &lwork_query, &info + ); + + if(info == 0) + { + const blas_int lwork_proposed = static_cast<blas_int>( work_query[0] ); + + lwork = (lwork_proposed > lwork_min) ? lwork_proposed : lwork_min; + + podarray<eT> work( static_cast<uword>(lwork) ); + + lapack::gesvd<eT> + ( + &jobu, &jobvt, &m, &n, A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, work.memptr(), &lwork, &info + ); + + op_strans::apply(V,V); // op_strans will work out that an in-place transpose can be done + } + + return (info == 0); + } + #else + { + arma_ignore(U); + arma_ignore(S); + arma_ignore(V); + arma_ignore(X); + arma_stop("svd(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +template<typename T, typename T1> +inline +bool +auxlib::svd(Mat< std::complex<T> >& U, Col<T>& S, Mat< std::complex<T> >& V, const Base< std::complex<T>, T1>& X) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef std::complex<T> eT; + + Mat<eT> A(X.get_ref()); + + if(A.is_empty()) + { + U.eye(A.n_rows, A.n_rows); + S.reset(); + V.eye(A.n_cols, A.n_cols); + return true; + } + + U.set_size(A.n_rows, A.n_rows); + V.set_size(A.n_cols, A.n_cols); + + char jobu = 'A'; + char jobvt = 'A'; + + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_cols); + blas_int min_mn = (std::min)(m,n); + blas_int lda = blas_int(A.n_rows); + blas_int ldu = blas_int(U.n_rows); + blas_int ldvt = blas_int(V.n_rows); + blas_int lwork = 3 * ( (std::max)(blas_int(1), 2*min_mn + (std::max)(m,n) ) ); + blas_int info = 0; + + S.set_size( static_cast<uword>(min_mn) ); + + podarray<eT> work( static_cast<uword>(lwork ) ); + podarray<T> rwork( static_cast<uword>(5*min_mn) ); + + // let gesvd_() calculate the optimum size of the workspace + blas_int lwork_tmp = -1; + lapack::cx_gesvd<T> + ( + &jobu, &jobvt, + &m, &n, + A.memptr(), &lda, + S.memptr(), + U.memptr(), &ldu, + V.memptr(), &ldvt, + work.memptr(), &lwork_tmp, + rwork.memptr(), + &info + ); + + if(info == 0) + { + blas_int proposed_lwork = static_cast<blas_int>(real(work[0])); + + if(proposed_lwork > lwork) + { + lwork = proposed_lwork; + work.set_size( static_cast<uword>(lwork) ); + } + + lapack::cx_gesvd<T> + ( + &jobu, &jobvt, + &m, &n, + A.memptr(), &lda, + S.memptr(), + U.memptr(), &ldu, + V.memptr(), &ldvt, + work.memptr(), &lwork, + rwork.memptr(), + &info + ); + + op_htrans::apply(V,V); // op_htrans will work out that an in-place transpose can be done + } + + return (info == 0); + } + #else + { + arma_ignore(U); + arma_ignore(S); + arma_ignore(V); + arma_ignore(X); + arma_stop("svd(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +template<typename eT, typename T1> +inline +bool +auxlib::svd_econ(Mat<eT>& U, Col<eT>& S, Mat<eT>& V, const Base<eT,T1>& X, const char mode) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + Mat<eT> A(X.get_ref()); + + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_cols); + blas_int min_mn = (std::min)(m,n); + blas_int lda = blas_int(A.n_rows); + + S.set_size( static_cast<uword>(min_mn) ); + + blas_int ldu = 0; + blas_int ldvt = 0; + + char jobu; + char jobvt; + + switch(mode) + { + case 'l': + jobu = 'S'; + jobvt = 'N'; + + ldu = m; + ldvt = 1; + + U.set_size( static_cast<uword>(ldu), static_cast<uword>(min_mn) ); + V.reset(); + + break; + + + case 'r': + jobu = 'N'; + jobvt = 'S'; + + ldu = 1; + ldvt = (std::min)(m,n); + + U.reset(); + V.set_size( static_cast<uword>(ldvt), static_cast<uword>(n) ); + + break; + + + case 'b': + jobu = 'S'; + jobvt = 'S'; + + ldu = m; + ldvt = (std::min)(m,n); + + U.set_size( static_cast<uword>(ldu), static_cast<uword>(min_mn) ); + V.set_size( static_cast<uword>(ldvt), static_cast<uword>(n ) ); + + break; + + + default: + U.reset(); + S.reset(); + V.reset(); + return false; + } + + + if(A.is_empty()) + { + U.eye(); + S.reset(); + V.eye(); + return true; + } + + + blas_int lwork = 3 * ( (std::max)(blas_int(1), (std::max)( (3*min_mn + (std::max)(m,n)), 5*min_mn ) ) ); + blas_int info = 0; + + + podarray<eT> work( static_cast<uword>(lwork) ); + + // let gesvd_() calculate the optimum size of the workspace + blas_int lwork_tmp = -1; + + lapack::gesvd<eT> + ( + &jobu, &jobvt, + &m, &n, + A.memptr(), &lda, + S.memptr(), + U.memptr(), &ldu, + V.memptr(), &ldvt, + work.memptr(), &lwork_tmp, + &info + ); + + if(info == 0) + { + blas_int proposed_lwork = static_cast<blas_int>(work[0]); + if(proposed_lwork > lwork) + { + lwork = proposed_lwork; + work.set_size( static_cast<uword>(lwork) ); + } + + lapack::gesvd<eT> + ( + &jobu, &jobvt, + &m, &n, + A.memptr(), &lda, + S.memptr(), + U.memptr(), &ldu, + V.memptr(), &ldvt, + work.memptr(), &lwork, + &info + ); + + op_strans::apply(V,V); // op_strans will work out that an in-place transpose can be done + } + + return (info == 0); + } + #else + { + arma_ignore(U); + arma_ignore(S); + arma_ignore(V); + arma_ignore(X); + arma_ignore(mode); + arma_stop("svd(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +template<typename T, typename T1> +inline +bool +auxlib::svd_econ(Mat< std::complex<T> >& U, Col<T>& S, Mat< std::complex<T> >& V, const Base< std::complex<T>, T1>& X, const char mode) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef std::complex<T> eT; + + Mat<eT> A(X.get_ref()); + + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_cols); + blas_int min_mn = (std::min)(m,n); + blas_int lda = blas_int(A.n_rows); + + S.set_size( static_cast<uword>(min_mn) ); + + blas_int ldu = 0; + blas_int ldvt = 0; + + char jobu; + char jobvt; + + switch(mode) + { + case 'l': + jobu = 'S'; + jobvt = 'N'; + + ldu = m; + ldvt = 1; + + U.set_size( static_cast<uword>(ldu), static_cast<uword>(min_mn) ); + V.reset(); + + break; + + + case 'r': + jobu = 'N'; + jobvt = 'S'; + + ldu = 1; + ldvt = (std::min)(m,n); + + U.reset(); + V.set_size( static_cast<uword>(ldvt), static_cast<uword>(n) ); + + break; + + + case 'b': + jobu = 'S'; + jobvt = 'S'; + + ldu = m; + ldvt = (std::min)(m,n); + + U.set_size( static_cast<uword>(ldu), static_cast<uword>(min_mn) ); + V.set_size( static_cast<uword>(ldvt), static_cast<uword>(n) ); + + break; + + + default: + U.reset(); + S.reset(); + V.reset(); + return false; + } + + + if(A.is_empty()) + { + U.eye(); + S.reset(); + V.eye(); + return true; + } + + + blas_int lwork = 3 * ( (std::max)(blas_int(1), (std::max)( (3*min_mn + (std::max)(m,n)), 5*min_mn ) ) ); + blas_int info = 0; + + + podarray<eT> work( static_cast<uword>(lwork ) ); + podarray<T> rwork( static_cast<uword>(5*min_mn) ); + + // let gesvd_() calculate the optimum size of the workspace + blas_int lwork_tmp = -1; + + lapack::cx_gesvd<T> + ( + &jobu, &jobvt, + &m, &n, + A.memptr(), &lda, + S.memptr(), + U.memptr(), &ldu, + V.memptr(), &ldvt, + work.memptr(), &lwork_tmp, + rwork.memptr(), + &info + ); + + if(info == 0) + { + blas_int proposed_lwork = static_cast<blas_int>(real(work[0])); + if(proposed_lwork > lwork) + { + lwork = proposed_lwork; + work.set_size( static_cast<uword>(lwork) ); + } + + lapack::cx_gesvd<T> + ( + &jobu, &jobvt, + &m, &n, + A.memptr(), &lda, + S.memptr(), + U.memptr(), &ldu, + V.memptr(), &ldvt, + work.memptr(), &lwork, + rwork.memptr(), + &info + ); + + op_htrans::apply(V,V); // op_strans will work out that an in-place transpose can be done + } + + return (info == 0); + } + #else + { + arma_ignore(U); + arma_ignore(S); + arma_ignore(V); + arma_ignore(X); + arma_ignore(mode); + arma_stop("svd(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +template<typename eT, typename T1> +inline +bool +auxlib::svd_dc(Mat<eT>& U, Col<eT>& S, Mat<eT>& V, const Base<eT,T1>& X) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + Mat<eT> A(X.get_ref()); + + if(A.is_empty()) + { + U.eye(A.n_rows, A.n_rows); + S.reset(); + V.eye(A.n_cols, A.n_cols); + return true; + } + + U.set_size(A.n_rows, A.n_rows); + V.set_size(A.n_cols, A.n_cols); + + char jobz = 'A'; + + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_cols); + blas_int min_mn = (std::min)(m,n); + blas_int lda = blas_int(A.n_rows); + blas_int ldu = blas_int(U.n_rows); + blas_int ldvt = blas_int(V.n_rows); + blas_int lwork = 3 * ( 3*min_mn*min_mn + (std::max)( (std::max)(m,n), 4*min_mn*min_mn + 4*min_mn ) ); + blas_int info = 0; + + S.set_size( static_cast<uword>(min_mn) ); + + podarray<eT> work( static_cast<uword>(lwork ) ); + podarray<blas_int> iwork( static_cast<uword>(8*min_mn) ); + + lapack::gesdd<eT> + ( + &jobz, &m, &n, + A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, + work.memptr(), &lwork, iwork.memptr(), &info + ); + + op_strans::apply(V,V); // op_strans will work out that an in-place transpose can be done + + return (info == 0); + } + #else + { + arma_ignore(U); + arma_ignore(S); + arma_ignore(V); + arma_ignore(X); + arma_stop("svd(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +template<typename T, typename T1> +inline +bool +auxlib::svd_dc(Mat< std::complex<T> >& U, Col<T>& S, Mat< std::complex<T> >& V, const Base< std::complex<T>, T1>& X) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + typedef std::complex<T> eT; + + Mat<eT> A(X.get_ref()); + + if(A.is_empty()) + { + U.eye(A.n_rows, A.n_rows); + S.reset(); + V.eye(A.n_cols, A.n_cols); + return true; + } + + U.set_size(A.n_rows, A.n_rows); + V.set_size(A.n_cols, A.n_cols); + + char jobz = 'A'; + + blas_int m = blas_int(A.n_rows); + blas_int n = blas_int(A.n_cols); + blas_int min_mn = (std::min)(m,n); + blas_int lda = blas_int(A.n_rows); + blas_int ldu = blas_int(U.n_rows); + blas_int ldvt = blas_int(V.n_rows); + blas_int lwork = 3 * (min_mn*min_mn + 2*min_mn + (std::max)(m,n)); + blas_int info = 0; + + S.set_size( static_cast<uword>(min_mn) ); + + podarray<eT> work( static_cast<uword>(lwork ) ); + podarray<T> rwork( static_cast<uword>(5*min_mn*min_mn + 7*min_mn) ); + podarray<blas_int> iwork( static_cast<uword>(8*min_mn ) ); + + lapack::cx_gesdd<T> + ( + &jobz, &m, &n, + A.memptr(), &lda, S.memptr(), U.memptr(), &ldu, V.memptr(), &ldvt, + work.memptr(), &lwork, rwork.memptr(), iwork.memptr(), &info + ); + + op_htrans::apply(V,V); // op_htrans will work out that an in-place transpose can be done + + return (info == 0); + } + #else + { + arma_ignore(U); + arma_ignore(S); + arma_ignore(V); + arma_ignore(X); + arma_stop("svd(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +//! Solve a system of linear equations. +//! Assumes that A.n_rows = A.n_cols and B.n_rows = A.n_rows +template<typename eT, typename T1> +inline +bool +auxlib::solve(Mat<eT>& out, Mat<eT>& A, const Base<eT,T1>& X, const bool slow) + { + arma_extra_debug_sigprint(); + + bool status = false; + + const uword A_n_rows = A.n_rows; + + if( (A_n_rows <= 4) && (slow == false) ) + { + Mat<eT> A_inv; + + status = auxlib::inv_noalias_tinymat(A_inv, A, A_n_rows); + + if(status == true) + { + const unwrap_check<T1> Y( X.get_ref(), out ); + const Mat<eT>& B = Y.M; + + const uword B_n_rows = B.n_rows; + const uword B_n_cols = B.n_cols; + + arma_debug_check( (A_n_rows != B_n_rows), "solve(): number of rows in the given objects must be the same" ); + + if(A.is_empty() || B.is_empty()) + { + out.zeros(A.n_cols, B_n_cols); + return true; + } + + out.set_size(A_n_rows, B_n_cols); + + gemm_emul<false,false,false,false>::apply(out, A_inv, B); + + return true; + } + } + + if( (A_n_rows > 4) || (status == false) ) + { + out = X.get_ref(); + + const uword B_n_rows = out.n_rows; + const uword B_n_cols = out.n_cols; + + arma_debug_check( (A_n_rows != B_n_rows), "solve(): number of rows in the given objects must be the same" ); + + if(A.is_empty() || out.is_empty()) + { + out.zeros(A.n_cols, B_n_cols); + return true; + } + + #if defined(ARMA_USE_ATLAS) + { + podarray<int> ipiv(A_n_rows + 2); // +2 for paranoia: old versions of Atlas might be trashing memory + + int info = atlas::clapack_gesv<eT>(atlas::CblasColMajor, A_n_rows, B_n_cols, A.memptr(), A_n_rows, ipiv.memptr(), out.memptr(), A_n_rows); + + return (info == 0); + } + #elif defined(ARMA_USE_LAPACK) + { + blas_int n = blas_int(A_n_rows); // assuming A is square + blas_int lda = blas_int(A_n_rows); + blas_int ldb = blas_int(A_n_rows); + blas_int nrhs = blas_int(B_n_cols); + blas_int info = 0; + + podarray<blas_int> ipiv(A_n_rows + 2); // +2 for paranoia: some versions of Lapack might be trashing memory + + arma_extra_debug_print("lapack::gesv()"); + lapack::gesv<eT>(&n, &nrhs, A.memptr(), &lda, ipiv.memptr(), out.memptr(), &ldb, &info); + + arma_extra_debug_print("lapack::gesv() -- finished"); + + return (info == 0); + } + #else + { + arma_stop("solve(): use of ATLAS or LAPACK needs to be enabled"); + return false; + } + #endif + } + + return true; + } + + + +//! Solve an over-determined system. +//! Assumes that A.n_rows > A.n_cols and B.n_rows = A.n_rows +template<typename eT, typename T1> +inline +bool +auxlib::solve_od(Mat<eT>& out, Mat<eT>& A, const Base<eT,T1>& X) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + Mat<eT> tmp = X.get_ref(); + + const uword A_n_rows = A.n_rows; + const uword A_n_cols = A.n_cols; + + const uword B_n_rows = tmp.n_rows; + const uword B_n_cols = tmp.n_cols; + + arma_debug_check( (A_n_rows != B_n_rows), "solve(): number of rows in the given objects must be the same" ); + + out.set_size(A_n_cols, B_n_cols); + + if(A.is_empty() || tmp.is_empty()) + { + out.zeros(); + return true; + } + + char trans = 'N'; + + blas_int m = blas_int(A_n_rows); + blas_int n = blas_int(A_n_cols); + blas_int lda = blas_int(A_n_rows); + blas_int ldb = blas_int(A_n_rows); + blas_int nrhs = blas_int(B_n_cols); + blas_int lwork = 3 * ( (std::max)(blas_int(1), n + (std::max)(n, nrhs)) ); + blas_int info = 0; + + podarray<eT> work( static_cast<uword>(lwork) ); + + // NOTE: the dgels() function in the lapack library supplied by ATLAS 3.6 seems to have problems + arma_extra_debug_print("lapack::gels()"); + lapack::gels<eT>( &trans, &m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, work.memptr(), &lwork, &info ); + + arma_extra_debug_print("lapack::gels() -- finished"); + + for(uword col=0; col<B_n_cols; ++col) + { + arrayops::copy( out.colptr(col), tmp.colptr(col), A_n_cols ); + } + + return (info == 0); + } + #else + { + arma_ignore(out); + arma_ignore(A); + arma_ignore(X); + arma_stop("solve(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +//! Solve an under-determined system. +//! Assumes that A.n_rows < A.n_cols and B.n_rows = A.n_rows +template<typename eT, typename T1> +inline +bool +auxlib::solve_ud(Mat<eT>& out, Mat<eT>& A, const Base<eT,T1>& X) + { + arma_extra_debug_sigprint(); + + // TODO: this function provides the same results as Octave 3.4.2. + // TODO: however, these results are different than Matlab 7.12.0.635. + // TODO: figure out whether both Octave and Matlab are correct, or only one of them + + #if defined(ARMA_USE_LAPACK) + { + const unwrap<T1> Y( X.get_ref() ); + const Mat<eT>& B = Y.M; + + const uword A_n_rows = A.n_rows; + const uword A_n_cols = A.n_cols; + + const uword B_n_rows = B.n_rows; + const uword B_n_cols = B.n_cols; + + arma_debug_check( (A_n_rows != B_n_rows), "solve(): number of rows in the given objects must be the same" ); + + // B could be an alias of "out", hence we need to check whether B is empty before setting the size of "out" + if(A.is_empty() || B.is_empty()) + { + out.zeros(A_n_cols, B_n_cols); + return true; + } + + char trans = 'N'; + + blas_int m = blas_int(A_n_rows); + blas_int n = blas_int(A_n_cols); + blas_int lda = blas_int(A_n_rows); + blas_int ldb = blas_int(A_n_cols); + blas_int nrhs = blas_int(B_n_cols); + blas_int lwork = 3 * ( (std::max)(blas_int(1), m + (std::max)(m,nrhs)) ); + blas_int info = 0; + + Mat<eT> tmp(A_n_cols, B_n_cols); + tmp.zeros(); + + for(uword col=0; col<B_n_cols; ++col) + { + eT* tmp_colmem = tmp.colptr(col); + + arrayops::copy( tmp_colmem, B.colptr(col), B_n_rows ); + + for(uword row=B_n_rows; row<A_n_cols; ++row) + { + tmp_colmem[row] = eT(0); + } + } + + podarray<eT> work( static_cast<uword>(lwork) ); + + // NOTE: the dgels() function in the lapack library supplied by ATLAS 3.6 seems to have problems + arma_extra_debug_print("lapack::gels()"); + lapack::gels<eT>( &trans, &m, &n, &nrhs, A.memptr(), &lda, tmp.memptr(), &ldb, work.memptr(), &lwork, &info ); + + arma_extra_debug_print("lapack::gels() -- finished"); + + out.set_size(A_n_cols, B_n_cols); + + for(uword col=0; col<B_n_cols; ++col) + { + arrayops::copy( out.colptr(col), tmp.colptr(col), A_n_cols ); + } + + return (info == 0); + } + #else + { + arma_ignore(out); + arma_ignore(A); + arma_ignore(X); + arma_stop("solve(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +// +// solve_tr + +template<typename eT> +inline +bool +auxlib::solve_tr(Mat<eT>& out, const Mat<eT>& A, const Mat<eT>& B, const uword layout) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + if(A.is_empty() || B.is_empty()) + { + out.zeros(A.n_cols, B.n_cols); + return true; + } + + out = B; + + char uplo = (layout == 0) ? 'U' : 'L'; + char trans = 'N'; + char diag = 'N'; + blas_int n = blas_int(A.n_rows); + blas_int nrhs = blas_int(B.n_cols); + blas_int info = 0; + + lapack::trtrs<eT>(&uplo, &trans, &diag, &n, &nrhs, A.memptr(), &n, out.memptr(), &n, &info); + + return (info == 0); + } + #else + { + arma_ignore(out); + arma_ignore(A); + arma_ignore(B); + arma_ignore(layout); + arma_stop("solve(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +// +// Schur decomposition + +template<typename eT> +inline +bool +auxlib::schur_dec(Mat<eT>& Z, Mat<eT>& T, const Mat<eT>& A) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + arma_debug_check( (A.is_square() == false), "schur_dec(): given matrix is not square" ); + + if(A.is_empty()) + { + Z.reset(); + T.reset(); + return true; + } + + const uword A_n_rows = A.n_rows; + + Z.set_size(A_n_rows, A_n_rows); + T = A; + + char jobvs = 'V'; // get Schur vectors (Z) + char sort = 'N'; // do not sort eigenvalues/vectors + blas_int* select = 0; // pointer to sorting function + blas_int n = blas_int(A_n_rows); + blas_int sdim = 0; // output for sorting + blas_int lwork = 3 * ( (std::max)(blas_int(1), 3*n) ); + blas_int info = 0; + + podarray<eT> work( static_cast<uword>(lwork) ); + podarray<blas_int> bwork(A_n_rows); + + podarray<eT> wr(A_n_rows); // output for eigenvalues + podarray<eT> wi(A_n_rows); // output for eigenvalues + + lapack::gees(&jobvs, &sort, select, &n, T.memptr(), &n, &sdim, wr.memptr(), wi.memptr(), Z.memptr(), &n, work.memptr(), &lwork, bwork.memptr(), &info); + + return (info == 0); + } + #else + { + arma_ignore(Z); + arma_ignore(T); + arma_ignore(A); + + arma_stop("schur_dec(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +template<typename cT> +inline +bool +auxlib::schur_dec(Mat<std::complex<cT> >& Z, Mat<std::complex<cT> >& T, const Mat<std::complex<cT> >& A) + { + arma_extra_debug_sigprint(); + + #if defined(ARMA_USE_LAPACK) + { + arma_debug_check( (A.is_square() == false), "schur_dec(): matrix A is not square" ); + + if(A.is_empty()) + { + Z.reset(); + T.reset(); + return true; + } + + typedef std::complex<cT> eT; + + const uword A_n_rows = A.n_rows; + + Z.set_size(A_n_rows, A_n_rows); + T = A; + + char jobvs = 'V'; // get Schur vectors (Z) + char sort = 'N'; // do not sort eigenvalues/vectors + blas_int* select = 0; // pointer to sorting function + blas_int n = blas_int(A_n_rows); + blas_int sdim = 0; // output for sorting + blas_int lwork = 3 * ( (std::max)(blas_int(1), 2*n) ); + blas_int info = 0; + + podarray<eT> work( static_cast<uword>(lwork) ); + podarray<blas_int> bwork(A_n_rows); + + podarray<eT> w(A_n_rows); // output for eigenvalues + podarray<cT> rwork(A_n_rows); + + lapack::cx_gees(&jobvs, &sort, select, &n, T.memptr(), &n, &sdim, w.memptr(), Z.memptr(), &n, work.memptr(), &lwork, rwork.memptr(), bwork.memptr(), &info); + + return (info == 0); + } + #else + { + arma_ignore(Z); + arma_ignore(T); + arma_ignore(A); + + arma_stop("schur_dec(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +// +// syl (solution of the Sylvester equation AX + XB = C) + +template<typename eT> +inline +bool +auxlib::syl(Mat<eT>& X, const Mat<eT>& A, const Mat<eT>& B, const Mat<eT>& C) + { + arma_extra_debug_sigprint(); + + arma_debug_check + ( + (A.is_square() == false) || (B.is_square() == false), + "syl(): given matrix is not square" + ); + + arma_debug_check + ( + (C.n_rows != A.n_rows) || (C.n_cols != B.n_cols), + "syl(): matrices are not conformant" + ); + + if(A.is_empty() || B.is_empty() || C.is_empty()) + { + X.reset(); + return true; + } + + #if defined(ARMA_USE_LAPACK) + { + Mat<eT> Z1, Z2, T1, T2; + + const bool status_sd1 = auxlib::schur_dec(Z1, T1, A); + const bool status_sd2 = auxlib::schur_dec(Z2, T2, B); + + if( (status_sd1 == false) || (status_sd2 == false) ) + { + return false; + } + + char trana = 'N'; + char tranb = 'N'; + blas_int isgn = +1; + blas_int m = blas_int(T1.n_rows); + blas_int n = blas_int(T2.n_cols); + + eT scale = eT(0); + blas_int info = 0; + + Mat<eT> Y = trans(Z1) * C * Z2; + + lapack::trsyl<eT>(&trana, &tranb, &isgn, &m, &n, T1.memptr(), &m, T2.memptr(), &n, Y.memptr(), &m, &scale, &info); + + //Y /= scale; + Y /= (-scale); + + X = Z1 * Y * trans(Z2); + + return (info >= 0); + } + #else + { + arma_stop("syl(): use of LAPACK needs to be enabled"); + return false; + } + #endif + } + + + +// +// lyap (solution of the continuous Lyapunov equation AX + XA^H + Q = 0) + +template<typename eT> +inline +bool +auxlib::lyap(Mat<eT>& X, const Mat<eT>& A, const Mat<eT>& Q) + { + arma_extra_debug_sigprint(); + + arma_debug_check( (A.is_square() == false), "lyap(): matrix A is not square"); + arma_debug_check( (Q.is_square() == false), "lyap(): matrix Q is not square"); + arma_debug_check( (A.n_rows != Q.n_rows), "lyap(): matrices A and Q have different dimensions"); + + Mat<eT> htransA; + op_htrans::apply_noalias(htransA, A); + + const Mat<eT> mQ = -Q; + + return auxlib::syl(X, A, htransA, mQ); + } + + + +// +// dlyap (solution of the discrete Lyapunov equation AXA^H - X + Q = 0) + +template<typename eT> +inline +bool +auxlib::dlyap(Mat<eT>& X, const Mat<eT>& A, const Mat<eT>& Q) + { + arma_extra_debug_sigprint(); + + arma_debug_check( (A.is_square() == false), "dlyap(): matrix A is not square"); + arma_debug_check( (Q.is_square() == false), "dlyap(): matrix Q is not square"); + arma_debug_check( (A.n_rows != Q.n_rows), "dlyap(): matrices A and Q have different dimensions"); + + const Col<eT> vecQ = reshape(Q, Q.n_elem, 1); + + const Mat<eT> M = eye< Mat<eT> >(Q.n_elem, Q.n_elem) - kron(conj(A), A); + + Col<eT> vecX; + + const bool status = solve(vecX, M, vecQ); + + if(status == true) + { + X = reshape(vecX, Q.n_rows, Q.n_cols); + return true; + } + else + { + X.reset(); + return false; + } + } + + + +//! @}