auxlib_meat.hpp

Go to the documentation of this file.
00001 // Copyright (C) 2009 NICTA and the authors listed below
00002 // 
00003 // Authors:
00004 // - Conrad Sanderson (conradsand at ieee dot org)
00005 // - Edmund Highcock (edmund dot highcock at merton dot ox dot ac dot uk)
00006 // 
00007 // This file is part of the Armadillo C++ library.
00008 // It is provided without any warranty of fitness
00009 // for any purpose. You can redistribute this file
00010 // and/or modify it under the terms of the GNU
00011 // Lesser General Public License (LGPL) as published
00012 // by the Free Software Foundation, either version 3
00013 // of the License or (at your option) any later version.
00014 // (see http://www.opensource.org/licenses for more info)
00015 
00016 
00017 //! \addtogroup auxlib
00018 //! @{
00019 
00020 
00021 //! immediate matrix inverse
00022 template<typename eT>
00023 inline
00024 void
00025 auxlib::inv_noalias(Mat<eT>& out, const Mat<eT>& X)
00026   {
00027   arma_extra_debug_sigprint();
00028   
00029   switch(X.n_rows)
00030     {
00031     case 1:
00032       {
00033       out.set_size(1,1);
00034       out[0] = eT(1) / X[0];
00035       };
00036       break;
00037       
00038     case 2:
00039       {
00040       out.set_size(2,2);
00041       
00042       const eT a = X.at(0,0);
00043       const eT b = X.at(0,1);
00044       const eT c = X.at(1,0);
00045       const eT d = X.at(1,1);
00046       
00047       const eT k = eT(1) / (a*d - b*c);
00048       
00049       out.at(0,0) =  d*k;
00050       out.at(0,1) = -b*k;
00051       out.at(1,0) = -c*k;
00052       out.at(1,1) =  a*k;
00053       };
00054       break;
00055     
00056     case 3:
00057       {
00058       out.set_size(3,3);
00059       
00060       const eT* X_col0 = X.colptr(0);
00061       const eT a11 = X_col0[0];
00062       const eT a21 = X_col0[1];
00063       const eT a31 = X_col0[2];
00064       
00065       const eT* X_col1 = X.colptr(1);
00066       const eT a12 = X_col1[0];
00067       const eT a22 = X_col1[1];
00068       const eT a32 = X_col1[2];
00069       
00070       const eT* X_col2 = X.colptr(2);
00071       const eT a13 = X_col2[0];
00072       const eT a23 = X_col2[1];
00073       const eT a33 = X_col2[2];
00074       
00075       const eT k = eT(1) / ( a11*(a33*a22 - a32*a23) - a21*(a33*a12-a32*a13) + a31*(a23*a12 - a22*a13) );
00076       
00077       
00078       eT* out_col0 = out.colptr(0);
00079       out_col0[0] =  (a33*a22 - a32*a23) * k;
00080       out_col0[1] = -(a33*a21 - a31*a23) * k;
00081       out_col0[2] =  (a32*a21 - a31*a22) * k;
00082       
00083       eT* out_col1 = out.colptr(1);
00084       out_col1[0] = -(a33*a12 - a32*a13) * k;
00085       out_col1[1] =  (a33*a11 - a31*a13) * k;
00086       out_col1[2] = -(a32*a11 - a31*a12) * k;
00087       
00088       eT* out_col2 = out.colptr(2);
00089       out_col2[0] =  (a23*a12 - a22*a13) * k;
00090       out_col2[1] = -(a23*a11 - a21*a13) * k;
00091       out_col2[2] =  (a22*a11 - a21*a12) * k;
00092       };
00093       break;
00094       
00095     case 4:
00096       {
00097       out.set_size(4,4);
00098       
00099       out.at(0,0) = X.at(1,2)*X.at(2,3)*X.at(3,1) - X.at(1,3)*X.at(2,2)*X.at(3,1) + X.at(1,3)*X.at(2,1)*X.at(3,2) - X.at(1,1)*X.at(2,3)*X.at(3,2) - X.at(1,2)*X.at(2,1)*X.at(3,3) + X.at(1,1)*X.at(2,2)*X.at(3,3);
00100       out.at(1,0) = X.at(1,3)*X.at(2,2)*X.at(3,0) - X.at(1,2)*X.at(2,3)*X.at(3,0) - X.at(1,3)*X.at(2,0)*X.at(3,2) + X.at(1,0)*X.at(2,3)*X.at(3,2) + X.at(1,2)*X.at(2,0)*X.at(3,3) - X.at(1,0)*X.at(2,2)*X.at(3,3);
00101       out.at(2,0) = X.at(1,1)*X.at(2,3)*X.at(3,0) - X.at(1,3)*X.at(2,1)*X.at(3,0) + X.at(1,3)*X.at(2,0)*X.at(3,1) - X.at(1,0)*X.at(2,3)*X.at(3,1) - X.at(1,1)*X.at(2,0)*X.at(3,3) + X.at(1,0)*X.at(2,1)*X.at(3,3);
00102       out.at(3,0) = X.at(1,2)*X.at(2,1)*X.at(3,0) - X.at(1,1)*X.at(2,2)*X.at(3,0) - X.at(1,2)*X.at(2,0)*X.at(3,1) + X.at(1,0)*X.at(2,2)*X.at(3,1) + X.at(1,1)*X.at(2,0)*X.at(3,2) - X.at(1,0)*X.at(2,1)*X.at(3,2);
00103       
00104       out.at(0,1) = X.at(0,3)*X.at(2,2)*X.at(3,1) - X.at(0,2)*X.at(2,3)*X.at(3,1) - X.at(0,3)*X.at(2,1)*X.at(3,2) + X.at(0,1)*X.at(2,3)*X.at(3,2) + X.at(0,2)*X.at(2,1)*X.at(3,3) - X.at(0,1)*X.at(2,2)*X.at(3,3);
00105       out.at(1,1) = X.at(0,2)*X.at(2,3)*X.at(3,0) - X.at(0,3)*X.at(2,2)*X.at(3,0) + X.at(0,3)*X.at(2,0)*X.at(3,2) - X.at(0,0)*X.at(2,3)*X.at(3,2) - X.at(0,2)*X.at(2,0)*X.at(3,3) + X.at(0,0)*X.at(2,2)*X.at(3,3);
00106       out.at(2,1) = X.at(0,3)*X.at(2,1)*X.at(3,0) - X.at(0,1)*X.at(2,3)*X.at(3,0) - X.at(0,3)*X.at(2,0)*X.at(3,1) + X.at(0,0)*X.at(2,3)*X.at(3,1) + X.at(0,1)*X.at(2,0)*X.at(3,3) - X.at(0,0)*X.at(2,1)*X.at(3,3);
00107       out.at(3,1) = X.at(0,1)*X.at(2,2)*X.at(3,0) - X.at(0,2)*X.at(2,1)*X.at(3,0) + X.at(0,2)*X.at(2,0)*X.at(3,1) - X.at(0,0)*X.at(2,2)*X.at(3,1) - X.at(0,1)*X.at(2,0)*X.at(3,2) + X.at(0,0)*X.at(2,1)*X.at(3,2);
00108       
00109       out.at(0,2) = X.at(0,2)*X.at(1,3)*X.at(3,1) - X.at(0,3)*X.at(1,2)*X.at(3,1) + X.at(0,3)*X.at(1,1)*X.at(3,2) - X.at(0,1)*X.at(1,3)*X.at(3,2) - X.at(0,2)*X.at(1,1)*X.at(3,3) + X.at(0,1)*X.at(1,2)*X.at(3,3);
00110       out.at(1,2) = X.at(0,3)*X.at(1,2)*X.at(3,0) - X.at(0,2)*X.at(1,3)*X.at(3,0) - X.at(0,3)*X.at(1,0)*X.at(3,2) + X.at(0,0)*X.at(1,3)*X.at(3,2) + X.at(0,2)*X.at(1,0)*X.at(3,3) - X.at(0,0)*X.at(1,2)*X.at(3,3);
00111       out.at(2,2) = X.at(0,1)*X.at(1,3)*X.at(3,0) - X.at(0,3)*X.at(1,1)*X.at(3,0) + X.at(0,3)*X.at(1,0)*X.at(3,1) - X.at(0,0)*X.at(1,3)*X.at(3,1) - X.at(0,1)*X.at(1,0)*X.at(3,3) + X.at(0,0)*X.at(1,1)*X.at(3,3);
00112       out.at(3,2) = X.at(0,2)*X.at(1,1)*X.at(3,0) - X.at(0,1)*X.at(1,2)*X.at(3,0) - X.at(0,2)*X.at(1,0)*X.at(3,1) + X.at(0,0)*X.at(1,2)*X.at(3,1) + X.at(0,1)*X.at(1,0)*X.at(3,2) - X.at(0,0)*X.at(1,1)*X.at(3,2);
00113       
00114       out.at(0,3) = X.at(0,3)*X.at(1,2)*X.at(2,1) - X.at(0,2)*X.at(1,3)*X.at(2,1) - X.at(0,3)*X.at(1,1)*X.at(2,2) + X.at(0,1)*X.at(1,3)*X.at(2,2) + X.at(0,2)*X.at(1,1)*X.at(2,3) - X.at(0,1)*X.at(1,2)*X.at(2,3);
00115       out.at(1,3) = X.at(0,2)*X.at(1,3)*X.at(2,0) - X.at(0,3)*X.at(1,2)*X.at(2,0) + X.at(0,3)*X.at(1,0)*X.at(2,2) - X.at(0,0)*X.at(1,3)*X.at(2,2) - X.at(0,2)*X.at(1,0)*X.at(2,3) + X.at(0,0)*X.at(1,2)*X.at(2,3);
00116       out.at(2,3) = X.at(0,3)*X.at(1,1)*X.at(2,0) - X.at(0,1)*X.at(1,3)*X.at(2,0) - X.at(0,3)*X.at(1,0)*X.at(2,1) + X.at(0,0)*X.at(1,3)*X.at(2,1) + X.at(0,1)*X.at(1,0)*X.at(2,3) - X.at(0,0)*X.at(1,1)*X.at(2,3);
00117       out.at(3,3) = X.at(0,1)*X.at(1,2)*X.at(2,0) - X.at(0,2)*X.at(1,1)*X.at(2,0) + X.at(0,2)*X.at(1,0)*X.at(2,1) - X.at(0,0)*X.at(1,2)*X.at(2,1) - X.at(0,1)*X.at(1,0)*X.at(2,2) + X.at(0,0)*X.at(1,1)*X.at(2,2);      
00118       
00119       out /= det(X);
00120       };
00121       break;
00122       
00123     default:
00124       {
00125       #if defined(ARMA_USE_ATLAS)
00126         {
00127         out = X;
00128         podarray<int> ipiv(out.n_rows);
00129         
00130         int info = atlas::clapack_getrf(atlas::CblasColMajor, out.n_rows, out.n_cols, out.memptr(), out.n_rows, ipiv.memptr());
00131         
00132         if(info == 0)
00133           {
00134           info = atlas::clapack_getri(atlas::CblasColMajor, out.n_rows, out.memptr(), out.n_rows, ipiv.memptr());
00135           }
00136         
00137         arma_check( (info > 0), "auxlib::inv_noalias(): matrix appears to be singular" );
00138         }
00139       #elif defined(ARMA_USE_LAPACK)
00140         {
00141         out = X;
00142         
00143         int n_rows = out.n_rows;
00144         int n_cols = out.n_cols;
00145         int info   = 0;
00146         
00147         podarray<int> ipiv(out.n_rows);
00148         
00149         // 84 was empirically found -- it is the maximum value suggested by LAPACK (as provided by ATLAS v3.6)
00150         // based on tests with various matrix types on 32-bit and 64-bit machines
00151         //
00152         // the "work" array is deliberately long so that a secondary (time-consuming)
00153         // memory allocation is avoided, if possible
00154         
00155         int work_len = (std::max)(1, n_rows*84);
00156         podarray<eT> work(work_len);
00157         
00158         lapack::getrf_(&n_rows, &n_cols, out.memptr(), &n_rows, ipiv.memptr(), &info);
00159         
00160         if(info == 0)
00161           {
00162           // query for optimum size of work_len
00163           
00164           int work_len_tmp = -1;
00165           lapack::getri_(&n_rows, out.memptr(), &n_rows, ipiv.memptr(), work.memptr(), &work_len_tmp, &info);
00166           
00167           if(info == 0)
00168             {
00169             int proposed_work_len = static_cast<int>(auxlib::tmp_real(work[0]));
00170             
00171             // if necessary, allocate more memory
00172             if(work_len < proposed_work_len)
00173               {
00174               work_len = proposed_work_len;
00175               work.set_size(work_len);
00176               }
00177             }
00178           
00179           lapack::getri_(&n_rows, out.memptr(), &n_rows, ipiv.memptr(), work.memptr(), &work_len, &info);
00180           }
00181         
00182         arma_check( (info > 0), "auxlib::inv_noalias(): matrix appears to be singular" );
00183         }
00184       #else
00185         {
00186         arma_stop("auxlib::inv_noalias(): need ATLAS or LAPACK library");
00187         }
00188       #endif
00189       };
00190     }
00191   }
00192   
00193   
00194   
00195 //! immediate inplace matrix inverse
00196 template<typename eT>
00197 inline
00198 void
00199 auxlib::inv_inplace(Mat<eT>& X)
00200   {
00201   arma_extra_debug_sigprint();
00202   
00203   // for more info, see:
00204   // http://www.dr-lex.34sp.com/random/matrix_inv.html
00205   // http://www.cvl.iis.u-tokyo.ac.jp/~miyazaki/tech/teche23.html
00206   // http://www.euclideanspace.com/maths/algebra/matrix/functions/inverse/fourD/index.htm
00207   // http://www.geometrictools.com//LibFoundation/Mathematics/Wm4Matrix4.inl
00208   
00209   switch(X.n_rows)
00210     {
00211     case 1:
00212       {
00213       X[0] = eT(1) / X[0];
00214       };
00215       break;
00216       
00217     case 2:
00218       {
00219       const eT a = X.at(0,0);
00220       const eT b = X.at(0,1);
00221       const eT c = X.at(1,0);
00222       const eT d = X.at(1,1);
00223       
00224       const eT k = eT(1) / (a*d - b*c);
00225       
00226       X.at(0,0) =  d*k;
00227       X.at(0,1) = -b*k;
00228       X.at(1,0) = -c*k;
00229       X.at(1,1) =  a*k;
00230       };
00231       break;
00232     
00233     case 3:
00234       {
00235       eT* X_col0 = X.colptr(0);
00236       eT* X_col1 = X.colptr(1);
00237       eT* X_col2 = X.colptr(2);
00238       
00239       const eT a11 = X_col0[0];
00240       const eT a21 = X_col0[1];
00241       const eT a31 = X_col0[2];
00242       
00243       const eT a12 = X_col1[0];
00244       const eT a22 = X_col1[1];
00245       const eT a32 = X_col1[2];
00246       
00247       const eT a13 = X_col2[0];
00248       const eT a23 = X_col2[1];
00249       const eT a33 = X_col2[2];
00250       
00251       const eT k = eT(1) / ( a11*(a33*a22 - a32*a23) - a21*(a33*a12-a32*a13) + a31*(a23*a12 - a22*a13) );
00252       
00253       X_col0[0] =  (a33*a22 - a32*a23) * k;
00254       X_col0[1] = -(a33*a21 - a31*a23) * k;
00255       X_col0[2] =  (a32*a21 - a31*a22) * k;
00256       
00257       X_col1[0] = -(a33*a12 - a32*a13) * k;
00258       X_col1[1] =  (a33*a11 - a31*a13) * k;
00259       X_col1[2] = -(a32*a11 - a31*a12) * k;
00260       
00261       X_col2[0] =  (a23*a12 - a22*a13) * k;
00262       X_col2[1] = -(a23*a11 - a21*a13) * k;
00263       X_col2[2] =  (a22*a11 - a21*a12) * k;
00264       };
00265       break;
00266       
00267     case 4:
00268       {
00269       const Mat<eT> A(X);
00270       
00271       X.at(0,0) = A.at(1,2)*A.at(2,3)*A.at(3,1) - A.at(1,3)*A.at(2,2)*A.at(3,1) + A.at(1,3)*A.at(2,1)*A.at(3,2) - A.at(1,1)*A.at(2,3)*A.at(3,2) - A.at(1,2)*A.at(2,1)*A.at(3,3) + A.at(1,1)*A.at(2,2)*A.at(3,3);
00272       X.at(1,0) = A.at(1,3)*A.at(2,2)*A.at(3,0) - A.at(1,2)*A.at(2,3)*A.at(3,0) - A.at(1,3)*A.at(2,0)*A.at(3,2) + A.at(1,0)*A.at(2,3)*A.at(3,2) + A.at(1,2)*A.at(2,0)*A.at(3,3) - A.at(1,0)*A.at(2,2)*A.at(3,3);
00273       X.at(2,0) = A.at(1,1)*A.at(2,3)*A.at(3,0) - A.at(1,3)*A.at(2,1)*A.at(3,0) + A.at(1,3)*A.at(2,0)*A.at(3,1) - A.at(1,0)*A.at(2,3)*A.at(3,1) - A.at(1,1)*A.at(2,0)*A.at(3,3) + A.at(1,0)*A.at(2,1)*A.at(3,3);
00274       X.at(3,0) = A.at(1,2)*A.at(2,1)*A.at(3,0) - A.at(1,1)*A.at(2,2)*A.at(3,0) - A.at(1,2)*A.at(2,0)*A.at(3,1) + A.at(1,0)*A.at(2,2)*A.at(3,1) + A.at(1,1)*A.at(2,0)*A.at(3,2) - A.at(1,0)*A.at(2,1)*A.at(3,2);
00275       
00276       X.at(0,1) = A.at(0,3)*A.at(2,2)*A.at(3,1) - A.at(0,2)*A.at(2,3)*A.at(3,1) - A.at(0,3)*A.at(2,1)*A.at(3,2) + A.at(0,1)*A.at(2,3)*A.at(3,2) + A.at(0,2)*A.at(2,1)*A.at(3,3) - A.at(0,1)*A.at(2,2)*A.at(3,3);
00277       X.at(1,1) = A.at(0,2)*A.at(2,3)*A.at(3,0) - A.at(0,3)*A.at(2,2)*A.at(3,0) + A.at(0,3)*A.at(2,0)*A.at(3,2) - A.at(0,0)*A.at(2,3)*A.at(3,2) - A.at(0,2)*A.at(2,0)*A.at(3,3) + A.at(0,0)*A.at(2,2)*A.at(3,3);
00278       X.at(2,1) = A.at(0,3)*A.at(2,1)*A.at(3,0) - A.at(0,1)*A.at(2,3)*A.at(3,0) - A.at(0,3)*A.at(2,0)*A.at(3,1) + A.at(0,0)*A.at(2,3)*A.at(3,1) + A.at(0,1)*A.at(2,0)*A.at(3,3) - A.at(0,0)*A.at(2,1)*A.at(3,3);
00279       X.at(3,1) = A.at(0,1)*A.at(2,2)*A.at(3,0) - A.at(0,2)*A.at(2,1)*A.at(3,0) + A.at(0,2)*A.at(2,0)*A.at(3,1) - A.at(0,0)*A.at(2,2)*A.at(3,1) - A.at(0,1)*A.at(2,0)*A.at(3,2) + A.at(0,0)*A.at(2,1)*A.at(3,2);
00280       
00281       X.at(0,2) = A.at(0,2)*A.at(1,3)*A.at(3,1) - A.at(0,3)*A.at(1,2)*A.at(3,1) + A.at(0,3)*A.at(1,1)*A.at(3,2) - A.at(0,1)*A.at(1,3)*A.at(3,2) - A.at(0,2)*A.at(1,1)*A.at(3,3) + A.at(0,1)*A.at(1,2)*A.at(3,3);
00282       X.at(1,2) = A.at(0,3)*A.at(1,2)*A.at(3,0) - A.at(0,2)*A.at(1,3)*A.at(3,0) - A.at(0,3)*A.at(1,0)*A.at(3,2) + A.at(0,0)*A.at(1,3)*A.at(3,2) + A.at(0,2)*A.at(1,0)*A.at(3,3) - A.at(0,0)*A.at(1,2)*A.at(3,3);
00283       X.at(2,2) = A.at(0,1)*A.at(1,3)*A.at(3,0) - A.at(0,3)*A.at(1,1)*A.at(3,0) + A.at(0,3)*A.at(1,0)*A.at(3,1) - A.at(0,0)*A.at(1,3)*A.at(3,1) - A.at(0,1)*A.at(1,0)*A.at(3,3) + A.at(0,0)*A.at(1,1)*A.at(3,3);
00284       X.at(3,2) = A.at(0,2)*A.at(1,1)*A.at(3,0) - A.at(0,1)*A.at(1,2)*A.at(3,0) - A.at(0,2)*A.at(1,0)*A.at(3,1) + A.at(0,0)*A.at(1,2)*A.at(3,1) + A.at(0,1)*A.at(1,0)*A.at(3,2) - A.at(0,0)*A.at(1,1)*A.at(3,2);
00285       
00286       X.at(0,3) = A.at(0,3)*A.at(1,2)*A.at(2,1) - A.at(0,2)*A.at(1,3)*A.at(2,1) - A.at(0,3)*A.at(1,1)*A.at(2,2) + A.at(0,1)*A.at(1,3)*A.at(2,2) + A.at(0,2)*A.at(1,1)*A.at(2,3) - A.at(0,1)*A.at(1,2)*A.at(2,3);
00287       X.at(1,3) = A.at(0,2)*A.at(1,3)*A.at(2,0) - A.at(0,3)*A.at(1,2)*A.at(2,0) + A.at(0,3)*A.at(1,0)*A.at(2,2) - A.at(0,0)*A.at(1,3)*A.at(2,2) - A.at(0,2)*A.at(1,0)*A.at(2,3) + A.at(0,0)*A.at(1,2)*A.at(2,3);
00288       X.at(2,3) = A.at(0,3)*A.at(1,1)*A.at(2,0) - A.at(0,1)*A.at(1,3)*A.at(2,0) - A.at(0,3)*A.at(1,0)*A.at(2,1) + A.at(0,0)*A.at(1,3)*A.at(2,1) + A.at(0,1)*A.at(1,0)*A.at(2,3) - A.at(0,0)*A.at(1,1)*A.at(2,3);
00289       X.at(3,3) = A.at(0,1)*A.at(1,2)*A.at(2,0) - A.at(0,2)*A.at(1,1)*A.at(2,0) + A.at(0,2)*A.at(1,0)*A.at(2,1) - A.at(0,0)*A.at(1,2)*A.at(2,1) - A.at(0,1)*A.at(1,0)*A.at(2,2) + A.at(0,0)*A.at(1,1)*A.at(2,2);      
00290       
00291       X /= det(A);
00292       };
00293       break;
00294       
00295     default:
00296       {
00297       #if defined(ARMA_USE_ATLAS)
00298         {
00299         Mat<eT>& out = X;
00300         podarray<int> ipiv(out.n_rows);
00301         
00302         int info = atlas::clapack_getrf(atlas::CblasColMajor, out.n_rows, out.n_cols, out.memptr(), out.n_rows, ipiv.memptr());
00303         
00304         if(info == 0)
00305           {
00306           info = atlas::clapack_getri(atlas::CblasColMajor, out.n_rows, out.memptr(), out.n_rows, ipiv.memptr());
00307           }
00308         
00309         arma_check( (info > 0), "auxlib::inv_inplace(): matrix appears to be singular" );
00310         }
00311       #elif defined(ARMA_USE_LAPACK)
00312         {
00313         Mat<eT>& out = X;
00314         
00315         int n_rows = out.n_rows;
00316         int n_cols = out.n_cols;
00317         int info   = 0;
00318         
00319         podarray<int> ipiv(out.n_rows);
00320         
00321         // 84 was empirically found -- it is the maximum value suggested by LAPACK (as provided by ATLAS v3.6)
00322         // based on tests with various matrix types on 32-bit and 64-bit machines
00323         //
00324         // the "work" array is deliberately long so that a secondary (time-consuming)
00325         // memory allocation is avoided, if possible
00326         
00327         int work_len = (std::max)(1, n_rows*84);
00328         podarray<eT> work(work_len);
00329         
00330         lapack::getrf_(&n_rows, &n_cols, out.memptr(), &n_rows, ipiv.memptr(), &info);
00331         
00332         if(info == 0)
00333           {
00334           // query for optimum size of work_len
00335           
00336           int work_len_tmp = -1;
00337           lapack::getri_(&n_rows, out.memptr(), &n_rows, ipiv.memptr(), work.memptr(), &work_len_tmp, &info);
00338           
00339           if(info == 0)
00340             {
00341             int proposed_work_len = static_cast<int>(auxlib::tmp_real(work[0]));
00342             
00343             // if necessary, allocate more memory
00344             if(work_len < proposed_work_len)
00345               {
00346               work_len = proposed_work_len;
00347               work.set_size(work_len);
00348               }
00349             }
00350           
00351           lapack::getri_(&n_rows, out.memptr(), &n_rows, ipiv.memptr(), work.memptr(), &work_len, &info);
00352           }
00353         
00354         arma_check( (info > 0), "auxlib::inv_noalias(): matrix appears to be singular" );
00355         }
00356       #else
00357         {
00358         arma_stop("auxlib::inv_inplace(): need ATLAS or LAPACK");
00359         }
00360       #endif
00361       }
00362     
00363     }
00364   
00365   }
00366 
00367 
00368 //! immediate determinant of a matrix using ATLAS or LAPACK
00369 template<typename eT>
00370 inline
00371 eT
00372 auxlib::det(const Mat<eT>& X)
00373   {
00374   arma_extra_debug_sigprint();
00375   
00376   switch(X.n_rows)
00377     {
00378     case 0:
00379       return 0.0;
00380     
00381     case 1:
00382       return X[0];
00383     
00384     case 2:
00385       return (X.at(0,0)*X.at(1,1) - X.at(0,1)*X.at(1,0));
00386     
00387     case 3:
00388       {
00389       const eT* a_col0 = X.colptr(0);
00390       const eT  a11    = a_col0[0];
00391       const eT  a21    = a_col0[1];
00392       const eT  a31    = a_col0[2];
00393       
00394       const eT* a_col1 = X.colptr(1);
00395       const eT  a12    = a_col1[0];
00396       const eT  a22    = a_col1[1];
00397       const eT  a32    = a_col1[2];
00398       
00399       const eT* a_col2 = X.colptr(2);
00400       const eT  a13    = a_col2[0];
00401       const eT  a23    = a_col2[1];
00402       const eT  a33    = a_col2[2];
00403       
00404       return ( a11*(a33*a22 - a32*a23) - a21*(a33*a12-a32*a13) + a31*(a23*a12 - a22*a13) );
00405       
00406       // const double tmp1 = X.at(0,0) * X.at(1,1) * X.at(2,2);
00407       // const double tmp2 = X.at(0,1) * X.at(1,2) * X.at(2,0);
00408       // const double tmp3 = X.at(0,2) * X.at(1,0) * X.at(2,1);
00409       // const double tmp4 = X.at(2,0) * X.at(1,1) * X.at(0,2);
00410       // const double tmp5 = X.at(2,1) * X.at(1,2) * X.at(0,0);
00411       // const double tmp6 = X.at(2,2) * X.at(1,0) * X.at(0,1);
00412       // return (tmp1+tmp2+tmp3) - (tmp4+tmp5+tmp6);
00413       }
00414       
00415     case 4:
00416       {
00417       const eT val = \
00418           X.at(0,3) * X.at(1,2) * X.at(2,1) * X.at(3,0) \
00419         - X.at(0,2) * X.at(1,3) * X.at(2,1) * X.at(3,0) \
00420         - X.at(0,3) * X.at(1,1) * X.at(2,2) * X.at(3,0) \
00421         + X.at(0,1) * X.at(1,3) * X.at(2,2) * X.at(3,0) \
00422         + X.at(0,2) * X.at(1,1) * X.at(2,3) * X.at(3,0) \
00423         - X.at(0,1) * X.at(1,2) * X.at(2,3) * X.at(3,0) \
00424         - X.at(0,3) * X.at(1,2) * X.at(2,0) * X.at(3,1) \
00425         + X.at(0,2) * X.at(1,3) * X.at(2,0) * X.at(3,1) \
00426         + X.at(0,3) * X.at(1,0) * X.at(2,2) * X.at(3,1) \
00427         - X.at(0,0) * X.at(1,3) * X.at(2,2) * X.at(3,1) \
00428         - X.at(0,2) * X.at(1,0) * X.at(2,3) * X.at(3,1) \
00429         + X.at(0,0) * X.at(1,2) * X.at(2,3) * X.at(3,1) \
00430         + X.at(0,3) * X.at(1,1) * X.at(2,0) * X.at(3,2) \
00431         - X.at(0,1) * X.at(1,3) * X.at(2,0) * X.at(3,2) \
00432         - X.at(0,3) * X.at(1,0) * X.at(2,1) * X.at(3,2) \
00433         + X.at(0,0) * X.at(1,3) * X.at(2,1) * X.at(3,2) \
00434         + X.at(0,1) * X.at(1,0) * X.at(2,3) * X.at(3,2) \
00435         - X.at(0,0) * X.at(1,1) * X.at(2,3) * X.at(3,2) \
00436         - X.at(0,2) * X.at(1,1) * X.at(2,0) * X.at(3,3) \
00437         + X.at(0,1) * X.at(1,2) * X.at(2,0) * X.at(3,3) \
00438         + X.at(0,2) * X.at(1,0) * X.at(2,1) * X.at(3,3) \
00439         - X.at(0,0) * X.at(1,2) * X.at(2,1) * X.at(3,3) \
00440         - X.at(0,1) * X.at(1,0) * X.at(2,2) * X.at(3,3) \
00441         + X.at(0,0) * X.at(1,1) * X.at(2,2) * X.at(3,3) \
00442         ;
00443       
00444       return val;
00445       }
00446       
00447     default:  
00448       {
00449       #if defined(ARMA_USE_ATLAS)
00450         {
00451         Mat<eT> tmp = X;
00452         podarray<int> ipiv(tmp.n_rows);
00453         
00454         atlas::clapack_getrf(atlas::CblasColMajor, tmp.n_rows, tmp.n_cols, tmp.memptr(), tmp.n_rows, ipiv.memptr());
00455         
00456         // on output tmp appears to be L+U_alt, where U_alt is U with the main diagonal set to zero
00457         eT val = tmp.at(0,0);
00458         for(u32 i=1; i < tmp.n_rows; ++i)
00459           {
00460           val *= tmp.at(i,i);
00461           }
00462       
00463         int sign = +1;
00464         for(u32 i=0; i < tmp.n_rows; ++i)
00465           {
00466           if( int(i) != ipiv.mem[i] )  // NOTE: no adjustment required, as the clapack version of getrf() assumes counting from 0
00467             {
00468             sign *= -1;
00469             }
00470           }
00471         
00472         return val * eT(sign);
00473         }
00474       #elif defined(ARMA_USE_LAPACK)
00475         {
00476         Mat<eT> tmp = X;
00477         podarray<int> ipiv(tmp.n_rows);
00478         
00479         int info = 0;
00480         int n_rows = int(tmp.n_rows);
00481         int n_cols = int(tmp.n_cols);
00482         
00483         lapack::getrf_(&n_rows, &n_cols, tmp.memptr(), &n_rows, ipiv.memptr(), &info);
00484         
00485         // on output tmp appears to be L+U_alt, where U_alt is U with the main diagonal set to zero
00486         eT val = tmp.at(0,0);
00487         for(u32 i=1; i < tmp.n_rows; ++i)
00488           {
00489           val *= tmp.at(i,i);
00490           }
00491       
00492         int sign = +1;
00493         for(u32 i=0; i < tmp.n_rows; ++i)
00494           {
00495           if( int(i) != (ipiv.mem[i] - 1) )  // NOTE: adjustment of -1 is required as Fortran counts from 1
00496             {
00497             sign *= -1;
00498             }
00499           }
00500         
00501         return val * eT(sign);
00502         }
00503       #else
00504         {
00505         arma_stop("auxlib::det(): need ATLAS or LAPACK library");
00506         return eT(0);
00507         }
00508       #endif
00509       }
00510     }
00511   }
00512 
00513 
00514 
00515 //! immediate LU decomposition of a matrix using ATLAS or LAPACK
00516 template<typename eT>
00517 inline
00518 void
00519 auxlib::lu(Mat<eT>& L, Mat<eT>& U, podarray<int>& ipiv, const Mat<eT>& X)
00520   {
00521   arma_extra_debug_sigprint();
00522   
00523   U = X;
00524   
00525   #if defined(ARMA_USE_ATLAS) || defined(ARMA_USE_LAPACK)
00526     {
00527     
00528     #if defined(ARMA_USE_ATLAS)
00529       {
00530       ipiv.set_size(U.n_rows);
00531     
00532       //int info = 
00533       atlas::clapack_getrf(atlas::CblasColMajor, U.n_rows, U.n_cols, U.memptr(), U.n_rows, ipiv.memptr());
00534       }
00535     #elif defined(ARMA_USE_LAPACK)
00536       {
00537       ipiv.set_size(U.n_rows);
00538       int info = 0;
00539       
00540       int n_rows = U.n_rows;
00541       int n_cols = U.n_cols;
00542       
00543       lapack::getrf_(&n_rows, &n_cols, U.memptr(), &n_rows, ipiv.memptr(), &info);
00544       
00545       // take into account that Fortran counts from 1
00546       for(u32 i=0; i<U.n_rows; ++i)
00547         {
00548         ipiv[i] -= 1;
00549         }
00550       
00551       }
00552     #endif
00553     
00554     
00555     L.set_size(U.n_rows, U.n_rows);
00556     
00557     for(u32 col=0; col<L.n_cols; ++col)
00558       {
00559       
00560       for(u32 row=0; row<col; ++row)
00561         {
00562         L.at(row,col) = eT(0);
00563         }
00564       
00565       L.at(col,col) = eT(1);
00566       
00567       for(u32 row=col+1; row<L.n_rows; ++row)
00568         {
00569         L.at(row,col) = U.at(row,col);
00570         U.at(row,col) = eT(0);
00571         }
00572       
00573       }
00574     }
00575   #else
00576     {
00577     arma_stop("auxlib::lu(): need ATLAS or LAPACK library");
00578     }
00579   #endif
00580   
00581   }
00582 
00583 
00584 
00585 template<typename eT>
00586 inline
00587 void
00588 auxlib::lu(Mat<eT>& L, Mat<eT>& U, Mat<eT>& P, const Mat<eT>& X)
00589   {
00590   arma_extra_debug_sigprint();
00591   
00592   podarray<int> ipiv;
00593   auxlib::lu(L, U, ipiv, X);
00594   
00595   const u32 n = ipiv.n_elem;
00596 
00597   Mat<u32> P_tmp(n,n);
00598   Mat<u32> ident = eye< Mat<u32> >(n,n);
00599   
00600   for(u32 i=n; i>0; --i)
00601     {
00602     const u32 j = i-1;
00603     const u32 k = ipiv[j];
00604     
00605     ident.swap_rows(j,k);
00606     
00607     if(i == n)
00608       {
00609       P_tmp = ident;
00610       }
00611     else
00612       {
00613       P_tmp *= ident;
00614       }
00615     
00616     ident.swap_rows(j,k);
00617     }
00618   
00619   P = conv_to< Mat<eT> >::from(P_tmp);
00620   }
00621 
00622 
00623 
00624 template<typename eT>
00625 inline
00626 void
00627 auxlib::lu(Mat<eT>& L, Mat<eT>& U, const Mat<eT>& X)
00628   {
00629   arma_extra_debug_sigprint();
00630   
00631   podarray<int> ipiv;
00632   auxlib::lu(L, U, ipiv, X);
00633   }
00634   
00635 
00636 
00637 //! immediate eigenvalues of a symmetric real matrix using LAPACK
00638 template<typename eT>
00639 inline
00640 void
00641 auxlib::eig_sym(Col<eT>& eigval, const Mat<eT>& A_orig)
00642   {
00643   arma_extra_debug_sigprint();
00644   
00645   #if defined(ARMA_USE_LAPACK)
00646     {
00647     const unwrap_check<Mat<eT> > tmp(A_orig, eigval);
00648     const Mat<eT>& A = tmp.M;
00649     
00650     arma_debug_check( (A.n_rows != A.n_cols), "auxlib::eig_sym(): given matrix is not square");
00651     
00652     // rudimentary "better-than-nothing" test for symmetry
00653     //arma_debug_check( (A.at(A.n_rows-1, 0) != A.at(0, A.n_cols-1)), "auxlib::eig(): given matrix is not symmetric" );
00654 
00655     char jobz  = 'N';
00656     char uplo  = 'U';
00657     
00658     int n_rows = A.n_rows;
00659     int lwork  = (std::max)(1,3*n_rows-1);
00660     
00661     eigval.set_size(n_rows);
00662     podarray<eT> work(lwork);
00663   
00664     Mat<eT> A_copy = A;
00665     int info;
00666 
00667     arma_extra_debug_print("lapack::syev_()");
00668     lapack::syev_(&jobz, &uplo, &n_rows, A_copy.memptr(), &n_rows, eigval.memptr(), work.memptr(), &lwork, &info);
00669     }
00670   #else
00671     {
00672     arma_stop("auxlib::eig_sym(): need LAPACK library");
00673     }
00674   #endif
00675   }
00676 
00677 
00678 
00679 //! immediate eigenvalues of a hermitian complex matrix using LAPACK
00680 template<typename T> 
00681 inline
00682 void
00683 auxlib::eig_sym(Col<T>& eigval, const Mat< std::complex<T> >& A)
00684   {
00685   arma_extra_debug_sigprint();
00686 
00687   typedef typename std::complex<T> eT;
00688   
00689   #if defined(ARMA_USE_LAPACK)
00690     {
00691     arma_debug_check( (A.n_rows != A.n_cols), "auxlib::eig_sym(): given matrix is not hermitian");
00692 
00693     char jobz  = 'N'; 
00694     char uplo  = 'U';
00695 
00696     int n_rows = A.n_rows;
00697     int lda    = A.n_rows;
00698     int lwork  = (std::max)(1, 2*n_rows - 1);  // TODO: automatically find best size of lwork
00699 
00700     eigval.set_size(n_rows);
00701 
00702     podarray<eT> work(lwork);
00703     podarray<T>  rwork( (std::max)(1, 3*n_rows - 2) );
00704   
00705     Mat<eT> A_copy = A;
00706     int info;
00707   
00708     arma_extra_debug_print("lapack::heev_()");
00709     lapack::heev_(&jobz, &uplo, &n_rows, A_copy.memptr(), &lda, eigval.memptr(), work.memptr(), &lwork, rwork.memptr(), &info);
00710     }
00711   #else
00712     {
00713     arma_stop("auxlib::eig_sym(): need LAPACK library");
00714     }
00715   #endif
00716   }
00717 
00718 
00719 
00720 //! immediate eigenvalues and eigenvectors of a symmetric real matrix using LAPACK
00721 template<typename eT>
00722 inline
00723 void
00724 auxlib::eig_sym(Col<eT>& eigval, Mat<eT>& eigvec, const Mat<eT>& A_orig)
00725   {
00726   arma_extra_debug_sigprint();
00727   
00728   // TODO: check for aliasing
00729   
00730   #if defined(ARMA_USE_LAPACK)
00731     {
00732     const unwrap_check< Mat<eT> > tmp1(A_orig, eigval);
00733     const Mat<eT>& A_tmp = tmp1.M;
00734     
00735     const unwrap_check< Mat<eT> > tmp2(A_tmp, eigvec);
00736     const Mat<eT>& A = tmp2.M;
00737     
00738     arma_debug_check( (A.n_rows != A.n_cols), "auxlib::eig_sym(): given matrix is not square" );
00739     
00740     // rudimentary "better-than-nothing" test for symmetry
00741     //arma_debug_check( (A.at(A.n_rows-1, 0) != A.at(0, A.n_cols-1)), "auxlib::eig(): given matrix is not symmetric" );
00742     
00743     
00744     char jobz  = 'V';
00745     char uplo  = 'U';
00746     
00747     int n_rows = A.n_rows;
00748     int lwork  = (std::max)(1, 3*n_rows-1);
00749     
00750     eigval.set_size(n_rows);
00751     podarray<eT> work(lwork);
00752   
00753     eigvec = A;
00754     int info;
00755     
00756     arma_extra_debug_print("lapack::syev_()");
00757     lapack::syev_(&jobz, &uplo, &n_rows, eigvec.memptr(), &n_rows, eigval.memptr(), work.memptr(), &lwork, &info);
00758     }
00759   #else
00760     {
00761     arma_stop("auxlib::eig_sym(): need LAPACK library");
00762     }
00763   #endif
00764   
00765   }
00766 
00767 
00768 
00769 //! immediate eigenvalues and eigenvectors of a hermitian complex matrix using LAPACK
00770 template<typename T>
00771 inline
00772 void
00773 auxlib::eig_sym(Col<T>& eigval, Mat< std::complex<T> >& eigvec, const Mat< std::complex<T> >& A_orig) 
00774   {
00775   arma_extra_debug_sigprint();
00776   
00777   typedef typename std::complex<T> eT;
00778 
00779   #if defined(ARMA_USE_LAPACK)
00780     {
00781     const unwrap_check< Mat<eT> > tmp(A_orig, eigvec);
00782     const Mat<eT>& A = tmp.M;
00783     
00784     arma_debug_check( (A.n_rows != A.n_cols), "auxlib::eig_sym(): given matrix is not hermitian" );
00785     
00786     char jobz  = 'V';
00787     char uplo  = 'U';
00788 
00789     int n_rows = A.n_rows;
00790     int lda    = A.n_rows;
00791     int lwork  = (std::max)(1, 2*n_rows - 1);  // TODO: automatically find best size of lwork
00792     
00793     eigval.set_size(n_rows);
00794 
00795     podarray<eT> work(lwork);
00796     podarray<T>  rwork( (std::max)(1, 3*n_rows - 2) );
00797   
00798     eigvec = A;
00799     int info;
00800   
00801     arma_extra_debug_print("lapack::heev_()");
00802     lapack::heev_(&jobz, &uplo, &n_rows, eigvec.memptr(), &lda, eigval.memptr(), work.memptr(), &lwork, rwork.memptr(), &info);
00803     }
00804   #else
00805     {
00806     arma_stop("auxlib::eig_sym(): need LAPACK library");
00807     }
00808   #endif
00809   
00810   }
00811 
00812 
00813 
00814 //! Eigenvalues and eigenvectors of a general square real matrix using LAPACK.
00815 //! The argument 'side' specifies which eigenvectors should be calculated
00816 //! (see code for mode details).
00817 template<typename T>
00818 inline
00819 void
00820 auxlib::eig_gen
00821   (
00822   Col< std::complex<T> >& eigval,
00823   Mat<T>& l_eigvec,
00824   Mat<T>& r_eigvec,
00825   const Mat<T>& A, 
00826   const char side
00827   )
00828   {
00829   arma_extra_debug_sigprint();
00830 
00831   // TODO: check for aliasing
00832   
00833   #if defined(ARMA_USE_LAPACK)
00834     {
00835     arma_debug_check( (A.n_rows != A.n_cols), "auxlib::eig_gen(): given matrix is not square" );
00836     
00837     char jobvl;
00838     char jobvr;
00839 
00840     switch(side)
00841       {
00842       case 'l':  // left
00843         jobvl = 'V';
00844         jobvr = 'N';
00845         break;
00846         
00847       case 'r':  // right
00848         jobvl = 'N';
00849         jobvr = 'V';
00850         break;
00851         
00852       case 'b':  // both
00853         jobvl = 'V';
00854         jobvr = 'V';
00855         break;
00856         
00857       case 'n':  // neither
00858         jobvl = 'N';
00859         jobvr = 'N';
00860         break;
00861 
00862       default:
00863         arma_stop("auxlib::eig_gen(): parameter 'side' is invalid");
00864       }
00865 
00866        
00867     int n_rows = A.n_rows;
00868     int lda    = A.n_rows;
00869     int lwork  = (std::max)(1, 4*n_rows);  // TODO: automatically find best size of lwork
00870     
00871     eigval.set_size(n_rows);
00872     l_eigvec.set_size(n_rows, n_rows);
00873     r_eigvec.set_size(n_rows, n_rows);
00874     
00875     podarray<T> work(lwork);
00876     podarray<T> rwork( (std::max)(1, 3*n_rows) );
00877     
00878     podarray<T> wr(n_rows);
00879     podarray<T> wi(n_rows);
00880     
00881     Mat<T> A_copy = A;
00882     int info;
00883     
00884     arma_extra_debug_print("lapack::cx_geev_()");
00885     lapack::geev_(&jobvl, &jobvr, &n_rows, A_copy.memptr(), &lda, wr.memptr(), wi.memptr(), l_eigvec.memptr(), &n_rows, r_eigvec.memptr(), &n_rows, work.memptr(), &lwork, &info);
00886     
00887     
00888     eigval.set_size(n_rows);
00889     for(u32 i=0; i<u32(n_rows); ++i)
00890       {
00891       eigval[i] = std::complex<T>(wr[i], wi[i]);
00892       }
00893     
00894     }
00895   #else
00896     {
00897     arma_stop("auxlib::eig_gen(): need LAPACK library");
00898     }
00899   #endif
00900   
00901   }
00902 
00903 
00904 
00905 
00906 
00907 //! Eigenvalues and eigenvectors of a general square complex matrix using LAPACK
00908 //! The argument 'side' specifies which eigenvectors should be calculated
00909 //! (see code for mode details).
00910 template<typename T>
00911 inline
00912 void
00913 auxlib::eig_gen
00914   (
00915   Col< std::complex<T> >& eigval,
00916   Mat< std::complex<T> >& l_eigvec, 
00917   Mat< std::complex<T> >& r_eigvec, 
00918   const Mat< std::complex<T> >& A, 
00919   const char side
00920   )
00921   {
00922   arma_extra_debug_sigprint();
00923 
00924   // TODO: check for aliasing
00925   
00926   typedef typename std::complex<T> eT;
00927 
00928   #if defined(ARMA_USE_LAPACK)
00929     {
00930     arma_debug_check( (A.n_rows != A.n_cols), "auxlib::eig_gen(): given matrix is not square" );
00931     
00932     char jobvl;
00933     char jobvr;
00934 
00935     switch(side)
00936       {
00937       case 'l':  // left
00938         jobvl = 'V';
00939         jobvr = 'N';
00940         break;
00941         
00942       case 'r':  // right
00943         jobvl = 'N';
00944         jobvr = 'V';
00945         break;
00946         
00947       case 'b':  // both
00948         jobvl = 'V';
00949         jobvr = 'V';
00950         break;
00951         
00952       case 'n':  // neither
00953         jobvl = 'N';
00954         jobvr = 'N';
00955         break;
00956 
00957       default:
00958         arma_stop("auxlib::eig_gen(): parameter 'side' is invalid");
00959       }
00960     
00961        
00962     int n_rows = A.n_rows;
00963     int lda    = A.n_rows;
00964     int lwork  = (std::max)(1, 4*n_rows);  // TODO: automatically find best size of lwork
00965     
00966     eigval.set_size(n_rows);
00967     l_eigvec.set_size(n_rows, n_rows);
00968     r_eigvec.set_size(n_rows, n_rows);
00969     
00970     podarray<eT> work(lwork);
00971     podarray<T>  rwork( (std::max)(1, 3*n_rows) );  // was 2,3
00972     
00973     Mat<eT> A_copy = A;
00974     int info;
00975     
00976     arma_extra_debug_print("lapack::cx_geev_()");
00977     lapack::cx_geev_(&jobvl, &jobvr, &n_rows, A_copy.memptr(), &lda, eigval.memptr(), l_eigvec.memptr(), &n_rows, r_eigvec.memptr(), &n_rows, work.memptr(), &lwork, rwork.memptr(), &info);
00978     }
00979   #else
00980     {
00981     arma_stop("auxlib::eig_gen(): need LAPACK library");
00982     }
00983   #endif
00984   
00985   }
00986 
00987 
00988 
00989 template<typename eT> 
00990 inline
00991 bool
00992 auxlib::chol(Mat<eT>& out, const Mat<eT>& X)
00993   {
00994   arma_extra_debug_sigprint();
00995   
00996   #if defined(ARMA_USE_LAPACK)
00997     {
00998     char uplo = 'U';
00999     int  n    = X.n_rows;
01000     int  lda  = X.n_rows;
01001     int  info;
01002     
01003     out = X;
01004     lapack::potrf_(&uplo, &n, out.memptr(), &lda, &info);
01005     
01006     for(u32 col=0; col<X.n_rows; ++col)
01007       {
01008       eT* colptr = out.colptr(col);
01009       for(u32 row=col+1; row<X.n_rows; ++row)
01010         {
01011         colptr[row] = eT(0);
01012         }
01013       }
01014     
01015     return (info == 0);
01016     }
01017   #else
01018     {
01019     arma_stop("auxlib::chol(): need LAPACK library");
01020     return false;
01021     }
01022   #endif
01023   }
01024 
01025 
01026 
01027 template<typename eT>
01028 inline
01029 bool 
01030 auxlib::qr(Mat<eT>& Q, Mat<eT>& R, const Mat<eT>& X)
01031   {
01032   arma_extra_debug_sigprint();
01033   
01034   #if defined(ARMA_USE_LAPACK)
01035     {
01036     int m            = static_cast<int>(X.n_rows);
01037     int n            = static_cast<int>(X.n_cols);
01038     int work_len     = (std::max)(1,n);
01039     int work_len_tmp;
01040     int k            = (std::min)(m,n);
01041     int info;
01042     
01043     podarray<eT> tau(k);
01044     podarray<eT> work(work_len);
01045     
01046     R = X;
01047     
01048     // query for the optimum value of work_len
01049     work_len_tmp = -1;
01050     lapack::geqrf_(&m, &n, R.memptr(), &m, tau.memptr(), work.memptr(), &work_len_tmp, &info);
01051     
01052     if(info == 0)
01053       {
01054       work_len = static_cast<int>(auxlib::tmp_real(work[0]));
01055       work.set_size(work_len);
01056       }
01057     
01058     lapack::geqrf_(&m, &n, R.memptr(), &m, tau.memptr(), work.memptr(), &work_len, &info);
01059     
01060     Q.set_size(X.n_rows, X.n_rows);
01061     
01062           eT* Q_mem = Q.memptr();
01063     const eT* R_mem = R.mem;
01064     
01065     const u32 n_elem_copy = (std::min)(Q.n_elem, R.n_elem);
01066     for(u32 i=0; i < n_elem_copy; ++i)
01067       {
01068       Q_mem[i] = R_mem[i];
01069       }
01070     
01071     
01072     // construct R
01073     for(u32 row=0; row < R.n_rows; ++row)
01074       {
01075       const u32 n_elem_tmp = (std::min)(row, R.n_cols);
01076       for(u32 col=0; col < n_elem_tmp; ++col)
01077         {
01078         R.at(row,col) = eT(0);
01079         }
01080       }
01081       
01082     
01083     if( (is_float<eT>::value == true) || (is_double<eT>::value == true) )
01084       {
01085       // query for the optimum value of work_len
01086       work_len_tmp = -1;
01087       lapack::orgqr_(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &work_len_tmp, &info);
01088       
01089       if(info == 0)
01090         {
01091         work_len = static_cast<int>(auxlib::tmp_real(work[0]));
01092         work.set_size(work_len);
01093         }
01094       
01095       lapack::orgqr_(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &work_len, &info);
01096       }
01097     else
01098     if( (is_supported_complex_float<eT>::value == true) || (is_supported_complex_double<eT>::value == true) )
01099       {
01100       // query for the optimum value of work_len
01101       work_len_tmp = -1;
01102       lapack::ungqr_(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &work_len_tmp, &info);
01103       
01104       if(info == 0)
01105         {
01106         work_len = static_cast<int>(auxlib::tmp_real(work[0]));
01107         work.set_size(work_len);
01108         }
01109       
01110       lapack::ungqr_(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &work_len, &info);
01111       }
01112     
01113     return (info == 0);
01114     }
01115   #else
01116     {
01117     arma_stop("auxlib::qr(): need LAPACK library");
01118     return false;
01119     }
01120   #endif
01121   }
01122 
01123 
01124 
01125 template<typename eT> 
01126 inline
01127 bool
01128 auxlib::svd(Col<eT>& S, const Mat<eT>& X)
01129   {
01130   arma_extra_debug_sigprint();
01131   
01132   #if defined(ARMA_USE_LAPACK)
01133     {
01134     Mat<eT> A = X;
01135     
01136     char jobu  = 'N';
01137     char jobvt = 'N';
01138     
01139     int  m     = A.n_rows;
01140     int  n     = A.n_cols;
01141     int  lda   = A.n_rows;
01142     int  ldu   = A.n_rows;
01143     int  ldvt  = A.n_cols;
01144     int  lwork = 2;
01145     int  info;
01146     
01147     Mat<eT> U(1,1);
01148     Mat<eT> V(1,1);
01149     
01150     S.set_size( (std::min)(m, n) );
01151     
01152     podarray<eT> work(lwork);
01153   
01154   
01155     // let gesvd_() calculate the optimum size of the workspace
01156     int lwork_tmp = -1;
01157     
01158     lapack::gesvd_<eT>
01159       (
01160       &jobu, &jobvt,
01161       &m,&n,
01162       A.memptr(), &lda,
01163       S.memptr(),
01164       U.memptr(), &ldu,
01165       V.memptr(), &ldvt,
01166       work.memptr(), &lwork_tmp,
01167       &info
01168       );
01169     
01170     if(info == 0)
01171       {
01172       lwork = static_cast<int>(work[0]);
01173       work.set_size(lwork);
01174       
01175       lapack::gesvd_<eT>
01176         (
01177         &jobu, &jobvt,
01178         &m, &n,
01179         A.memptr(), &lda,
01180         S.memptr(),
01181         U.memptr(), &ldu,
01182         V.memptr(), &ldvt,
01183         work.memptr(), &lwork,
01184         &info
01185         );
01186       
01187       return (info == 0);
01188       }
01189     else
01190       {
01191       return false;
01192       }
01193     }
01194   #else
01195     {
01196     arma_stop("auxlib::svd(): need LAPACK library");
01197     return false;
01198     }
01199   #endif
01200   }
01201 
01202 
01203   
01204 template<typename T>
01205 inline
01206 bool
01207 auxlib::svd(Col<T>& S, const Mat< std::complex<T> >& X)
01208   {
01209   arma_extra_debug_sigprint();
01210   
01211   typedef std::complex<T> eT;
01212   
01213   #if defined(ARMA_USE_LAPACK)
01214     {
01215     Mat<eT> A = X;
01216     
01217     char jobu  = 'N';
01218     char jobvt = 'N';
01219     
01220     int  m     = A.n_rows;
01221     int  n     = A.n_cols;
01222     int  lda   = A.n_rows;
01223     int  ldu   = A.n_rows;
01224     int  ldvt  = A.n_cols;
01225     int  lwork = 2 * (std::min)(m,n) + (std::max)(m,n);
01226     int  info;
01227     
01228     Mat<eT> U(1,1);
01229     Mat<eT> V(1,1);
01230     
01231     S.set_size( (std::min)(m,n) );
01232     
01233     podarray<eT> work(lwork);
01234     podarray<T>  rwork( 5*(std::min)(m,n) );
01235   
01236     // let gesvd_() calculate the optimum size of the workspace
01237     int lwork_tmp = -1;
01238     
01239     lapack::cx_gesvd_<T>
01240       (
01241       &jobu, &jobvt,
01242       &m, &n,
01243       A.memptr(), &lda,
01244       S.memptr(),
01245       U.memptr(), &ldu,
01246       V.memptr(), &ldvt,
01247       work.memptr(), &lwork_tmp,
01248       rwork.memptr(),
01249       &info
01250       );
01251     
01252     if(info == 0)
01253       {
01254       int proposed_lwork = static_cast<int>(real(work[0]));
01255       if(proposed_lwork > lwork)
01256         {
01257         lwork = proposed_lwork;
01258         work.set_size(lwork);
01259         }
01260       
01261       lapack::cx_gesvd_<T>
01262         (
01263         &jobu, &jobvt,
01264         &m, &n,
01265         A.memptr(), &lda,
01266         S.memptr(),
01267         U.memptr(), &ldu,
01268         V.memptr(), &ldvt,
01269         work.memptr(), &lwork,
01270         rwork.memptr(),
01271         &info
01272         );
01273       
01274       return (info == 0);
01275       }
01276     else
01277       {
01278       return false;
01279       }
01280     }
01281   #else
01282     {
01283     arma_stop("auxlib::svd(): need LAPACK library");
01284     return false;
01285     }
01286   #endif
01287   }
01288 
01289 
01290 
01291 template<typename eT>
01292 inline
01293 bool
01294 auxlib::svd(Mat<eT>& U, Col<eT>& S, Mat<eT>& V, const Mat<eT>& X)
01295   {
01296   arma_extra_debug_sigprint();
01297   
01298   #if defined(ARMA_USE_LAPACK)
01299     {
01300     Mat<eT> A = X;
01301     
01302     char jobu  = 'A';
01303     char jobvt = 'A';
01304     
01305     int  m     = A.n_rows;
01306     int  n     = A.n_cols;
01307     int  lda   = A.n_rows;
01308     int  ldu   = A.n_rows;
01309     int  ldvt  = A.n_cols;
01310     int  lwork = 2;
01311     int  info;
01312     
01313     U.set_size(m,m);
01314     V.set_size(n,n);
01315     
01316     S.set_size( (std::min)(m,n) );
01317     podarray<eT> work(lwork);
01318   
01319     // let gesvd_() calculate the optimum size of the workspace
01320     int lwork_tmp = -1;
01321     
01322     lapack::gesvd_<eT>
01323       (
01324       &jobu, &jobvt,
01325       &m, &n,
01326       A.memptr(), &lda,
01327       S.memptr(),
01328       U.memptr(), &ldu,
01329       V.memptr(), &ldvt,
01330       work.memptr(), &lwork_tmp,
01331       &info
01332       );
01333     
01334     if(info == 0)
01335       {
01336       lwork = static_cast<int>(work[0]);
01337       work.set_size(lwork);
01338       
01339       lapack::gesvd_<eT>
01340         (
01341         &jobu, &jobvt,
01342         &m, &n,
01343         A.memptr(), &lda,
01344         S.memptr(),
01345         U.memptr(), &ldu,
01346         V.memptr(), &ldvt,
01347         work.memptr(), &lwork,
01348         &info
01349         );
01350       
01351       op_trans::apply(V,V);  // op_trans will work out that an in-place transpose can be done
01352       
01353       return (info == 0);
01354       }
01355     else
01356       {
01357       return false;
01358       }
01359     }
01360   #else
01361     {
01362     arma_stop("auxlib::svd(): need LAPACK library");
01363     return false;
01364     }
01365   #endif
01366   }
01367 
01368 
01369 
01370 template<typename T>
01371 inline
01372 bool
01373 auxlib::svd(Mat< std::complex<T> >& U, Col<T> &S, Mat< std::complex<T> >& V, const Mat< std::complex<T> >& X)
01374   {
01375   arma_extra_debug_sigprint();
01376   
01377   typedef std::complex<T> eT;
01378   
01379   #if defined(ARMA_USE_LAPACK)
01380     {
01381     Mat<eT> A = X;
01382     
01383     char jobu  = 'A';
01384     char jobvt = 'A';
01385     
01386     int  m     = A.n_rows;
01387     int  n     = A.n_cols;
01388     int  lda   = A.n_rows;
01389     int  ldu   = A.n_rows;
01390     int  ldvt  = A.n_cols;
01391     int  lwork = 2;
01392     int  info;
01393     
01394     U.set_size(m,m);
01395     V.set_size(n,n);
01396     
01397     S.set_size( (std::min)(m,n) );
01398     
01399     podarray<eT> work(lwork);
01400     podarray<T>  rwork( 5*(std::min)(m,n) );
01401   
01402     // let gesvd_() calculate the optimum size of the workspace
01403     int lwork_tmp = -1;
01404     lapack::cx_gesvd_<T>
01405      (
01406      &jobu, &jobvt,
01407      &m, &n,
01408      A.memptr(), &lda,
01409      S.memptr(),
01410      U.memptr(), &ldu,
01411      V.memptr(), &ldvt,
01412      work.memptr(), &lwork_tmp,
01413      rwork.memptr(),
01414      &info
01415      );
01416     
01417     if(info == 0)
01418       {
01419       lwork = static_cast<int>(real(work[0]));
01420       work.set_size(lwork);
01421       
01422       lapack::cx_gesvd_<T>
01423         (
01424         &jobu, &jobvt,
01425         &m, &n,
01426         A.memptr(), &lda,
01427         S.memptr(),
01428         U.memptr(), &ldu,
01429         V.memptr(), &ldvt,
01430         work.memptr(), &lwork,
01431         rwork.memptr(),
01432         &info
01433         );
01434       
01435       op_htrans::apply(V,V);  // op_htrans will work out that an in-place transpose can be done
01436       
01437       for(u32 i=0; i<A.n_cols; ++i)
01438         {
01439         V.at(i,i) = std::conj( V.at(i,i) );
01440         }
01441       
01442       return (info == 0);
01443       }
01444     else
01445       {
01446       return false;
01447       }
01448     }
01449   #else
01450     {
01451     arma_stop("auxlib::svd(): need LAPACK library");
01452     return false;
01453     }
01454   #endif
01455   
01456   }
01457 
01458 
01459 //! Solve a system of linear equations
01460 //! Assumes that A.n_rows = A.n_cols
01461 //! and B.n_rows = A.n_rows
01462 template<typename eT>
01463 inline
01464 bool
01465 auxlib::solve(Mat<eT>& out, const Mat<eT>& A, const Mat<eT>& B)
01466   {
01467   arma_extra_debug_sigprint();
01468   
01469   #if defined(ARMA_USE_LAPACK)
01470     {
01471     int n    = A.n_rows;
01472     int lda  = A.n_rows;
01473     int ldb  = A.n_rows;
01474     int nrhs = B.n_cols;
01475     int info;
01476     
01477     podarray<int> ipiv(n);
01478     
01479     out = B;
01480     Mat<eT> A_copy = A;
01481   
01482     lapack::gesv_<eT>(&n, &nrhs, A_copy.memptr(), &lda, ipiv.memptr(), out.memptr(), &ldb, &info);
01483   
01484     return (info == 0);
01485     }
01486   #else
01487     {
01488     arma_stop("auxlib::solve(): need LAPACK library");
01489     return false;
01490     }
01491   #endif
01492   }
01493 
01494 
01495                
01496 //! Solve an over-determined system.
01497 //! Assumes that A.n_rows > A.n_cols
01498 //! and B.n_rows = A.n_rows
01499 template<typename eT>
01500 inline
01501 bool
01502 auxlib::solve_od(Mat<eT>& out, const Mat<eT>& A, const Mat<eT>& B)
01503   {
01504   arma_extra_debug_sigprint();
01505   
01506   #if defined(ARMA_USE_LAPACK)
01507     {
01508     char trans = 'N';
01509     
01510     int  m     = A.n_rows;
01511     int  n     = A.n_cols;
01512     int  lda   = A.n_rows;
01513     int  ldb   = A.n_rows;
01514     int  nrhs  = B.n_cols;
01515     int  lwork = n + (std::max)(n, nrhs);
01516     int  info;
01517     
01518     Mat<eT> A_copy = A;
01519     Mat<eT> tmp    = B;
01520     
01521     
01522     podarray<eT> work(lwork);
01523     
01524     arma_extra_debug_print("lapack::gels_()");
01525     
01526     // NOTE:
01527     // the dgels() function in the lapack library supplied by ATLAS 3.6
01528     // seems to have problems
01529     
01530     lapack::gels_<eT>
01531       (
01532       &trans, &m, &n, &nrhs,
01533       A_copy.memptr(), &lda,
01534       tmp.memptr(), &ldb,
01535       work.memptr(), &lwork,
01536       &info
01537       );
01538     
01539     arma_extra_debug_print("lapack::gels_() -- finished");
01540     
01541     out.set_size(A.n_cols, B.n_cols);
01542     
01543     for(u32 col=0; col<B.n_cols; ++col)
01544       {
01545       syslib::copy_elem( out.colptr(col), tmp.colptr(col), A.n_cols );
01546       }
01547     
01548     return (info == 0);
01549     }
01550   #else
01551     {
01552     arma_stop("auxlib::solve_od(): need LAPACK library");
01553     return false;
01554     }
01555   #endif
01556   }
01557 
01558 
01559 
01560 //! Solve an under-determined system.
01561 //! Assumes that A.n_rows < A.n_cols
01562 //! and B.n_rows = A.n_rows
01563 template<typename eT>
01564 inline
01565 bool
01566 auxlib::solve_ud(Mat<eT>& out, const Mat<eT>& A, const Mat<eT>& B)
01567   {
01568   arma_extra_debug_sigprint();
01569   
01570   #if defined(ARMA_USE_LAPACK)
01571     {
01572     char trans = 'N';
01573     
01574     int  m     = A.n_rows;
01575     int  n     = A.n_cols;
01576     int  lda   = A.n_rows;
01577     int  ldb   = A.n_cols;
01578     int  nrhs  = B.n_cols;
01579     int  lwork = m + (std::max)(m,nrhs);
01580     int  info;
01581     
01582     
01583     Mat<eT> A_copy = A;
01584     
01585     Mat<eT> tmp;
01586     tmp.zeros(A.n_cols, B.n_cols);
01587     
01588     for(u32 col=0; col<B.n_cols; ++col)
01589       {
01590       eT* tmp_colmem = tmp.colptr(col);
01591       
01592       syslib::copy_elem( tmp_colmem, B.colptr(col), B.n_rows );
01593       
01594       for(u32 row=B.n_rows; row<A.n_cols; ++row)
01595         {
01596         tmp_colmem[row] = eT(0);
01597         }
01598       }
01599     
01600     podarray<eT> work(lwork);
01601     
01602     arma_extra_debug_print("lapack::gels_()");
01603     
01604     // NOTE:
01605     // the dgels() function in the lapack library supplied by ATLAS 3.6
01606     // seems to have problems
01607     
01608     lapack::gels_<eT>
01609       (
01610       &trans, &m, &n, &nrhs,
01611       A_copy.memptr(), &lda,
01612       tmp.memptr(), &ldb,
01613       work.memptr(), &lwork,
01614       &info
01615       );
01616     
01617     arma_extra_debug_print("lapack::gels_() -- finished");
01618     
01619     out.set_size(A.n_cols, B.n_cols);
01620     
01621     for(u32 col=0; col<B.n_cols; ++col)
01622       {
01623       syslib::copy_elem( out.colptr(col), tmp.colptr(col), A.n_cols );
01624       }
01625   
01626     return (info == 0);
01627     }
01628   #else
01629     {
01630     arma_stop("auxlib::solve_ud(): need LAPACK library");
01631     return false;
01632     }
01633   #endif
01634   }
01635 
01636 
01637 
01638 //! @}