auxlib_meat.hpp

Go to the documentation of this file.
00001 // Copyright (C) 2010 NICTA and the authors listed below
00002 // http://nicta.com.au
00003 // 
00004 // Authors:
00005 // - Conrad Sanderson (conradsand at ieee dot org)
00006 // - Edmund Highcock (edmund dot highcock at merton dot ox dot ac dot uk)
00007 // 
00008 // This file is part of the Armadillo C++ library.
00009 // It is provided without any warranty of fitness
00010 // for any purpose. You can redistribute this file
00011 // and/or modify it under the terms of the GNU
00012 // Lesser General Public License (LGPL) as published
00013 // by the Free Software Foundation, either version 3
00014 // of the License or (at your option) any later version.
00015 // (see http://www.opensource.org/licenses for more info)
00016 
00017 
00018 //! \addtogroup auxlib
00019 //! @{
00020 
00021 
00022 //! immediate matrix inverse
00023 template<typename eT>
00024 inline
00025 bool
00026 auxlib::inv_noalias(Mat<eT>& out, const Mat<eT>& X)
00027   {
00028   arma_extra_debug_sigprint();
00029   
00030   switch(X.n_rows)
00031     {
00032     case 1:
00033       {
00034       out.set_size(1,1);
00035       out[0] = eT(1) / X[0];
00036       };
00037       break;
00038       
00039     case 2:
00040       {
00041       out.set_size(2,2);
00042       
00043       const eT a = X.at(0,0);
00044       const eT b = X.at(0,1);
00045       const eT c = X.at(1,0);
00046       const eT d = X.at(1,1);
00047       
00048       const eT k = eT(1) / (a*d - b*c);
00049       
00050       out.at(0,0) =  d*k;
00051       out.at(0,1) = -b*k;
00052       out.at(1,0) = -c*k;
00053       out.at(1,1) =  a*k;
00054       };
00055       break;
00056     
00057     case 3:
00058       {
00059       out.set_size(3,3);
00060       
00061       const eT* X_col0 = X.colptr(0);
00062       const eT a11 = X_col0[0];
00063       const eT a21 = X_col0[1];
00064       const eT a31 = X_col0[2];
00065       
00066       const eT* X_col1 = X.colptr(1);
00067       const eT a12 = X_col1[0];
00068       const eT a22 = X_col1[1];
00069       const eT a32 = X_col1[2];
00070       
00071       const eT* X_col2 = X.colptr(2);
00072       const eT a13 = X_col2[0];
00073       const eT a23 = X_col2[1];
00074       const eT a33 = X_col2[2];
00075       
00076       const eT k = eT(1) / ( a11*(a33*a22 - a32*a23) - a21*(a33*a12-a32*a13) + a31*(a23*a12 - a22*a13) );
00077       
00078       
00079       eT* out_col0 = out.colptr(0);
00080       out_col0[0] =  (a33*a22 - a32*a23) * k;
00081       out_col0[1] = -(a33*a21 - a31*a23) * k;
00082       out_col0[2] =  (a32*a21 - a31*a22) * k;
00083       
00084       eT* out_col1 = out.colptr(1);
00085       out_col1[0] = -(a33*a12 - a32*a13) * k;
00086       out_col1[1] =  (a33*a11 - a31*a13) * k;
00087       out_col1[2] = -(a32*a11 - a31*a12) * k;
00088       
00089       eT* out_col2 = out.colptr(2);
00090       out_col2[0] =  (a23*a12 - a22*a13) * k;
00091       out_col2[1] = -(a23*a11 - a21*a13) * k;
00092       out_col2[2] =  (a22*a11 - a21*a12) * k;
00093       };
00094       break;
00095       
00096     case 4:
00097       {
00098       out.set_size(4,4);
00099       
00100       out.at(0,0) = X.at(1,2)*X.at(2,3)*X.at(3,1) - X.at(1,3)*X.at(2,2)*X.at(3,1) + X.at(1,3)*X.at(2,1)*X.at(3,2) - X.at(1,1)*X.at(2,3)*X.at(3,2) - X.at(1,2)*X.at(2,1)*X.at(3,3) + X.at(1,1)*X.at(2,2)*X.at(3,3);
00101       out.at(1,0) = X.at(1,3)*X.at(2,2)*X.at(3,0) - X.at(1,2)*X.at(2,3)*X.at(3,0) - X.at(1,3)*X.at(2,0)*X.at(3,2) + X.at(1,0)*X.at(2,3)*X.at(3,2) + X.at(1,2)*X.at(2,0)*X.at(3,3) - X.at(1,0)*X.at(2,2)*X.at(3,3);
00102       out.at(2,0) = X.at(1,1)*X.at(2,3)*X.at(3,0) - X.at(1,3)*X.at(2,1)*X.at(3,0) + X.at(1,3)*X.at(2,0)*X.at(3,1) - X.at(1,0)*X.at(2,3)*X.at(3,1) - X.at(1,1)*X.at(2,0)*X.at(3,3) + X.at(1,0)*X.at(2,1)*X.at(3,3);
00103       out.at(3,0) = X.at(1,2)*X.at(2,1)*X.at(3,0) - X.at(1,1)*X.at(2,2)*X.at(3,0) - X.at(1,2)*X.at(2,0)*X.at(3,1) + X.at(1,0)*X.at(2,2)*X.at(3,1) + X.at(1,1)*X.at(2,0)*X.at(3,2) - X.at(1,0)*X.at(2,1)*X.at(3,2);
00104       
00105       out.at(0,1) = X.at(0,3)*X.at(2,2)*X.at(3,1) - X.at(0,2)*X.at(2,3)*X.at(3,1) - X.at(0,3)*X.at(2,1)*X.at(3,2) + X.at(0,1)*X.at(2,3)*X.at(3,2) + X.at(0,2)*X.at(2,1)*X.at(3,3) - X.at(0,1)*X.at(2,2)*X.at(3,3);
00106       out.at(1,1) = X.at(0,2)*X.at(2,3)*X.at(3,0) - X.at(0,3)*X.at(2,2)*X.at(3,0) + X.at(0,3)*X.at(2,0)*X.at(3,2) - X.at(0,0)*X.at(2,3)*X.at(3,2) - X.at(0,2)*X.at(2,0)*X.at(3,3) + X.at(0,0)*X.at(2,2)*X.at(3,3);
00107       out.at(2,1) = X.at(0,3)*X.at(2,1)*X.at(3,0) - X.at(0,1)*X.at(2,3)*X.at(3,0) - X.at(0,3)*X.at(2,0)*X.at(3,1) + X.at(0,0)*X.at(2,3)*X.at(3,1) + X.at(0,1)*X.at(2,0)*X.at(3,3) - X.at(0,0)*X.at(2,1)*X.at(3,3);
00108       out.at(3,1) = X.at(0,1)*X.at(2,2)*X.at(3,0) - X.at(0,2)*X.at(2,1)*X.at(3,0) + X.at(0,2)*X.at(2,0)*X.at(3,1) - X.at(0,0)*X.at(2,2)*X.at(3,1) - X.at(0,1)*X.at(2,0)*X.at(3,2) + X.at(0,0)*X.at(2,1)*X.at(3,2);
00109       
00110       out.at(0,2) = X.at(0,2)*X.at(1,3)*X.at(3,1) - X.at(0,3)*X.at(1,2)*X.at(3,1) + X.at(0,3)*X.at(1,1)*X.at(3,2) - X.at(0,1)*X.at(1,3)*X.at(3,2) - X.at(0,2)*X.at(1,1)*X.at(3,3) + X.at(0,1)*X.at(1,2)*X.at(3,3);
00111       out.at(1,2) = X.at(0,3)*X.at(1,2)*X.at(3,0) - X.at(0,2)*X.at(1,3)*X.at(3,0) - X.at(0,3)*X.at(1,0)*X.at(3,2) + X.at(0,0)*X.at(1,3)*X.at(3,2) + X.at(0,2)*X.at(1,0)*X.at(3,3) - X.at(0,0)*X.at(1,2)*X.at(3,3);
00112       out.at(2,2) = X.at(0,1)*X.at(1,3)*X.at(3,0) - X.at(0,3)*X.at(1,1)*X.at(3,0) + X.at(0,3)*X.at(1,0)*X.at(3,1) - X.at(0,0)*X.at(1,3)*X.at(3,1) - X.at(0,1)*X.at(1,0)*X.at(3,3) + X.at(0,0)*X.at(1,1)*X.at(3,3);
00113       out.at(3,2) = X.at(0,2)*X.at(1,1)*X.at(3,0) - X.at(0,1)*X.at(1,2)*X.at(3,0) - X.at(0,2)*X.at(1,0)*X.at(3,1) + X.at(0,0)*X.at(1,2)*X.at(3,1) + X.at(0,1)*X.at(1,0)*X.at(3,2) - X.at(0,0)*X.at(1,1)*X.at(3,2);
00114       
00115       out.at(0,3) = X.at(0,3)*X.at(1,2)*X.at(2,1) - X.at(0,2)*X.at(1,3)*X.at(2,1) - X.at(0,3)*X.at(1,1)*X.at(2,2) + X.at(0,1)*X.at(1,3)*X.at(2,2) + X.at(0,2)*X.at(1,1)*X.at(2,3) - X.at(0,1)*X.at(1,2)*X.at(2,3);
00116       out.at(1,3) = X.at(0,2)*X.at(1,3)*X.at(2,0) - X.at(0,3)*X.at(1,2)*X.at(2,0) + X.at(0,3)*X.at(1,0)*X.at(2,2) - X.at(0,0)*X.at(1,3)*X.at(2,2) - X.at(0,2)*X.at(1,0)*X.at(2,3) + X.at(0,0)*X.at(1,2)*X.at(2,3);
00117       out.at(2,3) = X.at(0,3)*X.at(1,1)*X.at(2,0) - X.at(0,1)*X.at(1,3)*X.at(2,0) - X.at(0,3)*X.at(1,0)*X.at(2,1) + X.at(0,0)*X.at(1,3)*X.at(2,1) + X.at(0,1)*X.at(1,0)*X.at(2,3) - X.at(0,0)*X.at(1,1)*X.at(2,3);
00118       out.at(3,3) = X.at(0,1)*X.at(1,2)*X.at(2,0) - X.at(0,2)*X.at(1,1)*X.at(2,0) + X.at(0,2)*X.at(1,0)*X.at(2,1) - X.at(0,0)*X.at(1,2)*X.at(2,1) - X.at(0,1)*X.at(1,0)*X.at(2,2) + X.at(0,0)*X.at(1,1)*X.at(2,2);      
00119       
00120       out /= det(X);
00121       };
00122       break;
00123       
00124     default:
00125       {
00126       #if defined(ARMA_USE_ATLAS)
00127         {
00128         out = X;
00129         podarray<int> ipiv(out.n_rows);
00130         
00131         int info = atlas::clapack_getrf(atlas::CblasColMajor, out.n_rows, out.n_cols, out.memptr(), out.n_rows, ipiv.memptr());
00132         
00133         if(info == 0)
00134           {
00135           info = atlas::clapack_getri(atlas::CblasColMajor, out.n_rows, out.memptr(), out.n_rows, ipiv.memptr());
00136           }
00137         
00138         return (info == 0);
00139         }
00140       #elif defined(ARMA_USE_LAPACK)
00141         {
00142         out = X;
00143         
00144         int n_rows = out.n_rows;
00145         int n_cols = out.n_cols;
00146         int info   = 0;
00147         
00148         podarray<int> ipiv(out.n_rows);
00149         
00150         // 84 was empirically found -- it is the maximum value suggested by LAPACK (as provided by ATLAS v3.6)
00151         // based on tests with various matrix types on 32-bit and 64-bit machines
00152         //
00153         // the "work" array is deliberately long so that a secondary (time-consuming)
00154         // memory allocation is avoided, if possible
00155         
00156         int work_len = (std::max)(1, n_rows*84);
00157         podarray<eT> work(work_len);
00158         
00159         lapack::getrf_(&n_rows, &n_cols, out.memptr(), &n_rows, ipiv.memptr(), &info);
00160         
00161         if(info == 0)
00162           {
00163           // query for optimum size of work_len
00164           
00165           int work_len_tmp = -1;
00166           lapack::getri_(&n_rows, out.memptr(), &n_rows, ipiv.memptr(), work.memptr(), &work_len_tmp, &info);
00167           
00168           if(info == 0)
00169             {
00170             int proposed_work_len = static_cast<int>(access::tmp_real(work[0]));
00171             
00172             // if necessary, allocate more memory
00173             if(work_len < proposed_work_len)
00174               {
00175               work_len = proposed_work_len;
00176               work.set_size(work_len);
00177               }
00178             }
00179           
00180           lapack::getri_(&n_rows, out.memptr(), &n_rows, ipiv.memptr(), work.memptr(), &work_len, &info);
00181           }
00182         
00183         return (info == 0);
00184         }
00185       #else
00186         {
00187         arma_stop("inv(): need ATLAS or LAPACK");
00188         }
00189       #endif
00190       };
00191     }
00192     
00193   return true;
00194   }
00195   
00196   
00197   
00198 //! immediate inplace matrix inverse
00199 template<typename eT>
00200 inline
00201 bool
00202 auxlib::inv_inplace(Mat<eT>& X)
00203   {
00204   arma_extra_debug_sigprint();
00205   
00206   // for more info, see:
00207   // http://www.dr-lex.34sp.com/random/matrix_inv.html
00208   // http://www.cvl.iis.u-tokyo.ac.jp/~miyazaki/tech/teche23.html
00209   // http://www.euclideanspace.com/maths/algebra/matrix/functions/inverse/fourD/index.htm
00210   // http://www.geometrictools.com//LibFoundation/Mathematics/Wm4Matrix4.inl
00211   
00212   switch(X.n_rows)
00213     {
00214     case 1:
00215       {
00216       X[0] = eT(1) / X[0];
00217       };
00218       break;
00219       
00220     case 2:
00221       {
00222       const eT a = X.at(0,0);
00223       const eT b = X.at(0,1);
00224       const eT c = X.at(1,0);
00225       const eT d = X.at(1,1);
00226       
00227       const eT k = eT(1) / (a*d - b*c);
00228       
00229       X.at(0,0) =  d*k;
00230       X.at(0,1) = -b*k;
00231       X.at(1,0) = -c*k;
00232       X.at(1,1) =  a*k;
00233       };
00234       break;
00235     
00236     case 3:
00237       {
00238       eT* X_col0 = X.colptr(0);
00239       eT* X_col1 = X.colptr(1);
00240       eT* X_col2 = X.colptr(2);
00241       
00242       const eT a11 = X_col0[0];
00243       const eT a21 = X_col0[1];
00244       const eT a31 = X_col0[2];
00245       
00246       const eT a12 = X_col1[0];
00247       const eT a22 = X_col1[1];
00248       const eT a32 = X_col1[2];
00249       
00250       const eT a13 = X_col2[0];
00251       const eT a23 = X_col2[1];
00252       const eT a33 = X_col2[2];
00253       
00254       const eT k = eT(1) / ( a11*(a33*a22 - a32*a23) - a21*(a33*a12-a32*a13) + a31*(a23*a12 - a22*a13) );
00255       
00256       X_col0[0] =  (a33*a22 - a32*a23) * k;
00257       X_col0[1] = -(a33*a21 - a31*a23) * k;
00258       X_col0[2] =  (a32*a21 - a31*a22) * k;
00259       
00260       X_col1[0] = -(a33*a12 - a32*a13) * k;
00261       X_col1[1] =  (a33*a11 - a31*a13) * k;
00262       X_col1[2] = -(a32*a11 - a31*a12) * k;
00263       
00264       X_col2[0] =  (a23*a12 - a22*a13) * k;
00265       X_col2[1] = -(a23*a11 - a21*a13) * k;
00266       X_col2[2] =  (a22*a11 - a21*a12) * k;
00267       };
00268       break;
00269       
00270     case 4:
00271       {
00272       const Mat<eT> A(X);
00273       
00274       X.at(0,0) = A.at(1,2)*A.at(2,3)*A.at(3,1) - A.at(1,3)*A.at(2,2)*A.at(3,1) + A.at(1,3)*A.at(2,1)*A.at(3,2) - A.at(1,1)*A.at(2,3)*A.at(3,2) - A.at(1,2)*A.at(2,1)*A.at(3,3) + A.at(1,1)*A.at(2,2)*A.at(3,3);
00275       X.at(1,0) = A.at(1,3)*A.at(2,2)*A.at(3,0) - A.at(1,2)*A.at(2,3)*A.at(3,0) - A.at(1,3)*A.at(2,0)*A.at(3,2) + A.at(1,0)*A.at(2,3)*A.at(3,2) + A.at(1,2)*A.at(2,0)*A.at(3,3) - A.at(1,0)*A.at(2,2)*A.at(3,3);
00276       X.at(2,0) = A.at(1,1)*A.at(2,3)*A.at(3,0) - A.at(1,3)*A.at(2,1)*A.at(3,0) + A.at(1,3)*A.at(2,0)*A.at(3,1) - A.at(1,0)*A.at(2,3)*A.at(3,1) - A.at(1,1)*A.at(2,0)*A.at(3,3) + A.at(1,0)*A.at(2,1)*A.at(3,3);
00277       X.at(3,0) = A.at(1,2)*A.at(2,1)*A.at(3,0) - A.at(1,1)*A.at(2,2)*A.at(3,0) - A.at(1,2)*A.at(2,0)*A.at(3,1) + A.at(1,0)*A.at(2,2)*A.at(3,1) + A.at(1,1)*A.at(2,0)*A.at(3,2) - A.at(1,0)*A.at(2,1)*A.at(3,2);
00278       
00279       X.at(0,1) = A.at(0,3)*A.at(2,2)*A.at(3,1) - A.at(0,2)*A.at(2,3)*A.at(3,1) - A.at(0,3)*A.at(2,1)*A.at(3,2) + A.at(0,1)*A.at(2,3)*A.at(3,2) + A.at(0,2)*A.at(2,1)*A.at(3,3) - A.at(0,1)*A.at(2,2)*A.at(3,3);
00280       X.at(1,1) = A.at(0,2)*A.at(2,3)*A.at(3,0) - A.at(0,3)*A.at(2,2)*A.at(3,0) + A.at(0,3)*A.at(2,0)*A.at(3,2) - A.at(0,0)*A.at(2,3)*A.at(3,2) - A.at(0,2)*A.at(2,0)*A.at(3,3) + A.at(0,0)*A.at(2,2)*A.at(3,3);
00281       X.at(2,1) = A.at(0,3)*A.at(2,1)*A.at(3,0) - A.at(0,1)*A.at(2,3)*A.at(3,0) - A.at(0,3)*A.at(2,0)*A.at(3,1) + A.at(0,0)*A.at(2,3)*A.at(3,1) + A.at(0,1)*A.at(2,0)*A.at(3,3) - A.at(0,0)*A.at(2,1)*A.at(3,3);
00282       X.at(3,1) = A.at(0,1)*A.at(2,2)*A.at(3,0) - A.at(0,2)*A.at(2,1)*A.at(3,0) + A.at(0,2)*A.at(2,0)*A.at(3,1) - A.at(0,0)*A.at(2,2)*A.at(3,1) - A.at(0,1)*A.at(2,0)*A.at(3,2) + A.at(0,0)*A.at(2,1)*A.at(3,2);
00283       
00284       X.at(0,2) = A.at(0,2)*A.at(1,3)*A.at(3,1) - A.at(0,3)*A.at(1,2)*A.at(3,1) + A.at(0,3)*A.at(1,1)*A.at(3,2) - A.at(0,1)*A.at(1,3)*A.at(3,2) - A.at(0,2)*A.at(1,1)*A.at(3,3) + A.at(0,1)*A.at(1,2)*A.at(3,3);
00285       X.at(1,2) = A.at(0,3)*A.at(1,2)*A.at(3,0) - A.at(0,2)*A.at(1,3)*A.at(3,0) - A.at(0,3)*A.at(1,0)*A.at(3,2) + A.at(0,0)*A.at(1,3)*A.at(3,2) + A.at(0,2)*A.at(1,0)*A.at(3,3) - A.at(0,0)*A.at(1,2)*A.at(3,3);
00286       X.at(2,2) = A.at(0,1)*A.at(1,3)*A.at(3,0) - A.at(0,3)*A.at(1,1)*A.at(3,0) + A.at(0,3)*A.at(1,0)*A.at(3,1) - A.at(0,0)*A.at(1,3)*A.at(3,1) - A.at(0,1)*A.at(1,0)*A.at(3,3) + A.at(0,0)*A.at(1,1)*A.at(3,3);
00287       X.at(3,2) = A.at(0,2)*A.at(1,1)*A.at(3,0) - A.at(0,1)*A.at(1,2)*A.at(3,0) - A.at(0,2)*A.at(1,0)*A.at(3,1) + A.at(0,0)*A.at(1,2)*A.at(3,1) + A.at(0,1)*A.at(1,0)*A.at(3,2) - A.at(0,0)*A.at(1,1)*A.at(3,2);
00288       
00289       X.at(0,3) = A.at(0,3)*A.at(1,2)*A.at(2,1) - A.at(0,2)*A.at(1,3)*A.at(2,1) - A.at(0,3)*A.at(1,1)*A.at(2,2) + A.at(0,1)*A.at(1,3)*A.at(2,2) + A.at(0,2)*A.at(1,1)*A.at(2,3) - A.at(0,1)*A.at(1,2)*A.at(2,3);
00290       X.at(1,3) = A.at(0,2)*A.at(1,3)*A.at(2,0) - A.at(0,3)*A.at(1,2)*A.at(2,0) + A.at(0,3)*A.at(1,0)*A.at(2,2) - A.at(0,0)*A.at(1,3)*A.at(2,2) - A.at(0,2)*A.at(1,0)*A.at(2,3) + A.at(0,0)*A.at(1,2)*A.at(2,3);
00291       X.at(2,3) = A.at(0,3)*A.at(1,1)*A.at(2,0) - A.at(0,1)*A.at(1,3)*A.at(2,0) - A.at(0,3)*A.at(1,0)*A.at(2,1) + A.at(0,0)*A.at(1,3)*A.at(2,1) + A.at(0,1)*A.at(1,0)*A.at(2,3) - A.at(0,0)*A.at(1,1)*A.at(2,3);
00292       X.at(3,3) = A.at(0,1)*A.at(1,2)*A.at(2,0) - A.at(0,2)*A.at(1,1)*A.at(2,0) + A.at(0,2)*A.at(1,0)*A.at(2,1) - A.at(0,0)*A.at(1,2)*A.at(2,1) - A.at(0,1)*A.at(1,0)*A.at(2,2) + A.at(0,0)*A.at(1,1)*A.at(2,2);      
00293       
00294       X /= det(A);
00295       };
00296       break;
00297       
00298     default:
00299       {
00300       #if defined(ARMA_USE_ATLAS)
00301         {
00302         Mat<eT>& out = X;
00303         podarray<int> ipiv(out.n_rows);
00304         
00305         int info = atlas::clapack_getrf(atlas::CblasColMajor, out.n_rows, out.n_cols, out.memptr(), out.n_rows, ipiv.memptr());
00306         
00307         if(info == 0)
00308           {
00309           info = atlas::clapack_getri(atlas::CblasColMajor, out.n_rows, out.memptr(), out.n_rows, ipiv.memptr());
00310           }
00311         
00312         return (info == 0);
00313         }
00314       #elif defined(ARMA_USE_LAPACK)
00315         {
00316         Mat<eT>& out = X;
00317         
00318         int n_rows = out.n_rows;
00319         int n_cols = out.n_cols;
00320         int info   = 0;
00321         
00322         podarray<int> ipiv(out.n_rows);
00323         
00324         // 84 was empirically found -- it is the maximum value suggested by LAPACK (as provided by ATLAS v3.6)
00325         // based on tests with various matrix types on 32-bit and 64-bit machines
00326         //
00327         // the "work" array is deliberately long so that a secondary (time-consuming)
00328         // memory allocation is avoided, if possible
00329         
00330         int work_len = (std::max)(1, n_rows*84);
00331         podarray<eT> work(work_len);
00332         
00333         lapack::getrf_(&n_rows, &n_cols, out.memptr(), &n_rows, ipiv.memptr(), &info);
00334         
00335         if(info == 0)
00336           {
00337           // query for optimum size of work_len
00338           
00339           int work_len_tmp = -1;
00340           lapack::getri_(&n_rows, out.memptr(), &n_rows, ipiv.memptr(), work.memptr(), &work_len_tmp, &info);
00341           
00342           if(info == 0)
00343             {
00344             int proposed_work_len = static_cast<int>(access::tmp_real(work[0]));
00345             
00346             // if necessary, allocate more memory
00347             if(work_len < proposed_work_len)
00348               {
00349               work_len = proposed_work_len;
00350               work.set_size(work_len);
00351               }
00352             }
00353           
00354           lapack::getri_(&n_rows, out.memptr(), &n_rows, ipiv.memptr(), work.memptr(), &work_len, &info);
00355           }
00356         
00357         return (info == 0);
00358         }
00359       #else
00360         {
00361         arma_stop("inv(): need ATLAS or LAPACK");
00362         }
00363       #endif
00364       }
00365     
00366     }
00367   
00368   return true;
00369   }
00370 
00371 
00372 //! immediate determinant of a matrix using ATLAS or LAPACK
00373 template<typename eT>
00374 inline
00375 eT
00376 auxlib::det(const Mat<eT>& X)
00377   {
00378   arma_extra_debug_sigprint();
00379   
00380   switch(X.n_rows)
00381     {
00382     case 0:
00383       return eT(0);
00384     
00385     case 1:
00386       return X[0];
00387     
00388     case 2:
00389       return (X.at(0,0)*X.at(1,1) - X.at(0,1)*X.at(1,0));
00390     
00391     case 3:
00392       {
00393       const eT* a_col0 = X.colptr(0);
00394       const eT  a11    = a_col0[0];
00395       const eT  a21    = a_col0[1];
00396       const eT  a31    = a_col0[2];
00397       
00398       const eT* a_col1 = X.colptr(1);
00399       const eT  a12    = a_col1[0];
00400       const eT  a22    = a_col1[1];
00401       const eT  a32    = a_col1[2];
00402       
00403       const eT* a_col2 = X.colptr(2);
00404       const eT  a13    = a_col2[0];
00405       const eT  a23    = a_col2[1];
00406       const eT  a33    = a_col2[2];
00407       
00408       return ( a11*(a33*a22 - a32*a23) - a21*(a33*a12-a32*a13) + a31*(a23*a12 - a22*a13) );
00409       
00410       // const double tmp1 = X.at(0,0) * X.at(1,1) * X.at(2,2);
00411       // const double tmp2 = X.at(0,1) * X.at(1,2) * X.at(2,0);
00412       // const double tmp3 = X.at(0,2) * X.at(1,0) * X.at(2,1);
00413       // const double tmp4 = X.at(2,0) * X.at(1,1) * X.at(0,2);
00414       // const double tmp5 = X.at(2,1) * X.at(1,2) * X.at(0,0);
00415       // const double tmp6 = X.at(2,2) * X.at(1,0) * X.at(0,1);
00416       // return (tmp1+tmp2+tmp3) - (tmp4+tmp5+tmp6);
00417       }
00418       
00419     case 4:
00420       {
00421       const eT val = \
00422           X.at(0,3) * X.at(1,2) * X.at(2,1) * X.at(3,0) \
00423         - X.at(0,2) * X.at(1,3) * X.at(2,1) * X.at(3,0) \
00424         - X.at(0,3) * X.at(1,1) * X.at(2,2) * X.at(3,0) \
00425         + X.at(0,1) * X.at(1,3) * X.at(2,2) * X.at(3,0) \
00426         + X.at(0,2) * X.at(1,1) * X.at(2,3) * X.at(3,0) \
00427         - X.at(0,1) * X.at(1,2) * X.at(2,3) * X.at(3,0) \
00428         - X.at(0,3) * X.at(1,2) * X.at(2,0) * X.at(3,1) \
00429         + X.at(0,2) * X.at(1,3) * X.at(2,0) * X.at(3,1) \
00430         + X.at(0,3) * X.at(1,0) * X.at(2,2) * X.at(3,1) \
00431         - X.at(0,0) * X.at(1,3) * X.at(2,2) * X.at(3,1) \
00432         - X.at(0,2) * X.at(1,0) * X.at(2,3) * X.at(3,1) \
00433         + X.at(0,0) * X.at(1,2) * X.at(2,3) * X.at(3,1) \
00434         + X.at(0,3) * X.at(1,1) * X.at(2,0) * X.at(3,2) \
00435         - X.at(0,1) * X.at(1,3) * X.at(2,0) * X.at(3,2) \
00436         - X.at(0,3) * X.at(1,0) * X.at(2,1) * X.at(3,2) \
00437         + X.at(0,0) * X.at(1,3) * X.at(2,1) * X.at(3,2) \
00438         + X.at(0,1) * X.at(1,0) * X.at(2,3) * X.at(3,2) \
00439         - X.at(0,0) * X.at(1,1) * X.at(2,3) * X.at(3,2) \
00440         - X.at(0,2) * X.at(1,1) * X.at(2,0) * X.at(3,3) \
00441         + X.at(0,1) * X.at(1,2) * X.at(2,0) * X.at(3,3) \
00442         + X.at(0,2) * X.at(1,0) * X.at(2,1) * X.at(3,3) \
00443         - X.at(0,0) * X.at(1,2) * X.at(2,1) * X.at(3,3) \
00444         - X.at(0,1) * X.at(1,0) * X.at(2,2) * X.at(3,3) \
00445         + X.at(0,0) * X.at(1,1) * X.at(2,2) * X.at(3,3) \
00446         ;
00447       
00448       return val;
00449       }
00450       
00451     default:  
00452       {
00453       #if defined(ARMA_USE_ATLAS)
00454         {
00455         Mat<eT> tmp = X;
00456         podarray<int> ipiv(tmp.n_rows);
00457         
00458         atlas::clapack_getrf(atlas::CblasColMajor, tmp.n_rows, tmp.n_cols, tmp.memptr(), tmp.n_rows, ipiv.memptr());
00459         
00460         // on output tmp appears to be L+U_alt, where U_alt is U with the main diagonal set to zero
00461         eT val = tmp.at(0,0);
00462         for(u32 i=1; i < tmp.n_rows; ++i)
00463           {
00464           val *= tmp.at(i,i);
00465           }
00466         
00467         int sign = +1;
00468         for(u32 i=0; i < tmp.n_rows; ++i)
00469           {
00470           if( int(i) != ipiv.mem[i] )  // NOTE: no adjustment required, as the clapack version of getrf() assumes counting from 0
00471             {
00472             sign *= -1;
00473             }
00474           }
00475         
00476         return ( (sign < 0) ? -val : val );
00477         }
00478       #elif defined(ARMA_USE_LAPACK)
00479         {
00480         Mat<eT> tmp = X;
00481         podarray<int> ipiv(tmp.n_rows);
00482         
00483         int info   = 0;
00484         int n_rows = int(tmp.n_rows);
00485         int n_cols = int(tmp.n_cols);
00486         
00487         lapack::getrf_(&n_rows, &n_cols, tmp.memptr(), &n_rows, ipiv.memptr(), &info);
00488         
00489         // on output tmp appears to be L+U_alt, where U_alt is U with the main diagonal set to zero
00490         eT val = tmp.at(0,0);
00491         for(u32 i=1; i < tmp.n_rows; ++i)
00492           {
00493           val *= tmp.at(i,i);
00494           }
00495         
00496         int sign = +1;
00497         for(u32 i=0; i < tmp.n_rows; ++i)
00498           {
00499           if( int(i) != (ipiv.mem[i] - 1) )  // NOTE: adjustment of -1 is required as Fortran counts from 1
00500             {
00501             sign *= -1;
00502             }
00503           }
00504         
00505         return ( (sign < 0) ? -val : val );
00506         }
00507       #else
00508         {
00509         arma_stop("det(): need ATLAS or LAPACK");
00510         return eT(0);
00511         }
00512       #endif
00513       }
00514     }
00515   }
00516 
00517 
00518 
00519 //! immediate log determinant of a matrix using ATLAS or LAPACK
00520 template<typename eT>
00521 inline
00522 void
00523 auxlib::log_det(eT& out_val, typename get_pod_type<eT>::result& out_sign, const Mat<eT>& X)
00524   {
00525   arma_extra_debug_sigprint();
00526   
00527   typedef typename get_pod_type<eT>::result T;
00528   
00529   #if defined(ARMA_USE_ATLAS)
00530     {
00531     Mat<eT> tmp = X;
00532     podarray<int> ipiv(tmp.n_rows);
00533     
00534     atlas::clapack_getrf(atlas::CblasColMajor, tmp.n_rows, tmp.n_cols, tmp.memptr(), tmp.n_rows, ipiv.memptr());
00535     
00536     // on output tmp appears to be L+U_alt, where U_alt is U with the main diagonal set to zero
00537     
00538     s32 sign = (is_complex<eT>::value == false) ? ( (access::tmp_real( tmp.at(0,0) ) < T(0)) ? -1 : +1 ) : +1;
00539     eT   val = (is_complex<eT>::value == false) ? std::log( (access::tmp_real( tmp.at(0,0) ) < T(0)) ? tmp.at(0,0)*T(-1) : tmp.at(0,0) ) : std::log( tmp.at(0,0) );
00540     
00541     for(u32 i=1; i < tmp.n_rows; ++i)
00542       {
00543       const eT x = tmp.at(i,i);
00544       
00545       sign *= (is_complex<eT>::value == false) ? ( (access::tmp_real(x) < T(0)) ? -1 : +1 ) : +1;
00546       val  += (is_complex<eT>::value == false) ? std::log( (access::tmp_real(x) < T(0)) ? x*T(-1) : x ) : std::log(x);
00547       }
00548     
00549     for(u32 i=0; i < tmp.n_rows; ++i)
00550       {
00551       if( int(i) != ipiv.mem[i] )  // NOTE: no adjustment required, as the clapack version of getrf() assumes counting from 0
00552         {
00553         sign *= -1;
00554         }
00555       }
00556     
00557     out_val  = val;
00558     out_sign = T(sign);
00559     }
00560   #elif defined(ARMA_USE_LAPACK)
00561     {
00562     Mat<eT> tmp = X;
00563     podarray<int> ipiv(tmp.n_rows);
00564     
00565     int info   = 0;
00566     int n_rows = int(tmp.n_rows);
00567     int n_cols = int(tmp.n_cols);
00568     
00569     lapack::getrf_(&n_rows, &n_cols, tmp.memptr(), &n_rows, ipiv.memptr(), &info);
00570     
00571     // on output tmp appears to be L+U_alt, where U_alt is U with the main diagonal set to zero
00572     
00573     s32 sign = (is_complex<eT>::value == false) ? ( (access::tmp_real( tmp.at(0,0) ) < T(0)) ? -1 : +1 ) : +1;
00574     eT   val = (is_complex<eT>::value == false) ? std::log( (access::tmp_real( tmp.at(0,0) ) < T(0)) ? tmp.at(0,0)*T(-1) : tmp.at(0,0) ) : std::log( tmp.at(0,0) );
00575     
00576     for(u32 i=1; i < tmp.n_rows; ++i)
00577       {
00578       const eT x = tmp.at(i,i);
00579       
00580       sign *= (is_complex<eT>::value == false) ? ( (access::tmp_real(x) < T(0)) ? -1 : +1 ) : +1;
00581       val  += (is_complex<eT>::value == false) ? std::log( (access::tmp_real(x) < T(0)) ? x*T(-1) : x ) : std::log(x);
00582       }
00583     
00584     for(u32 i=0; i < tmp.n_rows; ++i)
00585       {
00586       if( int(i) != (ipiv.mem[i] - 1) )  // NOTE: adjustment of -1 is required as Fortran counts from 1
00587         {
00588         sign *= -1;
00589         }
00590       }
00591     
00592     out_val  = val;
00593     out_sign = T(sign);
00594     }
00595   #else
00596     {
00597     arma_stop("log_det(): need ATLAS or LAPACK");
00598     
00599     out_val  = eT(0);
00600     out_sign =  T(0);
00601     }
00602   #endif
00603   }
00604 
00605 
00606 
00607 //! immediate LU decomposition of a matrix using ATLAS or LAPACK
00608 template<typename eT>
00609 inline
00610 void
00611 auxlib::lu(Mat<eT>& L, Mat<eT>& U, podarray<int>& ipiv, const Mat<eT>& X)
00612   {
00613   arma_extra_debug_sigprint();
00614   
00615   U = X;
00616   
00617   #if defined(ARMA_USE_ATLAS) || defined(ARMA_USE_LAPACK)
00618     {
00619     
00620     #if defined(ARMA_USE_ATLAS)
00621       {
00622       ipiv.set_size(U.n_rows);
00623     
00624       //int info = 
00625       atlas::clapack_getrf(atlas::CblasColMajor, U.n_rows, U.n_cols, U.memptr(), U.n_rows, ipiv.memptr());
00626       }
00627     #elif defined(ARMA_USE_LAPACK)
00628       {
00629       ipiv.set_size(U.n_rows);
00630       int info = 0;
00631       
00632       int n_rows = U.n_rows;
00633       int n_cols = U.n_cols;
00634       
00635       lapack::getrf_(&n_rows, &n_cols, U.memptr(), &n_rows, ipiv.memptr(), &info);
00636       
00637       // take into account that Fortran counts from 1
00638       for(u32 i=0; i<U.n_rows; ++i)
00639         {
00640         ipiv[i] -= 1;
00641         }
00642       
00643       }
00644     #endif
00645     
00646     
00647     L.set_size(U.n_rows, U.n_rows);
00648     
00649     for(u32 col=0; col<L.n_cols; ++col)
00650       {
00651       
00652       for(u32 row=0; row<col; ++row)
00653         {
00654         L.at(row,col) = eT(0);
00655         }
00656       
00657       L.at(col,col) = eT(1);
00658       
00659       for(u32 row=col+1; row<L.n_rows; ++row)
00660         {
00661         L.at(row,col) = U.at(row,col);
00662         U.at(row,col) = eT(0);
00663         }
00664       
00665       }
00666     }
00667   #else
00668     {
00669     arma_stop("lu(): need ATLAS or LAPACK");
00670     }
00671   #endif
00672   
00673   }
00674 
00675 
00676 
00677 template<typename eT>
00678 inline
00679 void
00680 auxlib::lu(Mat<eT>& L, Mat<eT>& U, Mat<eT>& P, const Mat<eT>& X)
00681   {
00682   arma_extra_debug_sigprint();
00683   
00684   podarray<int> ipiv;
00685   auxlib::lu(L, U, ipiv, X);
00686   
00687   const u32 n = ipiv.n_elem;
00688 
00689   Mat<u32> P_tmp(n,n);
00690   Mat<u32> ident = eye< Mat<u32> >(n,n);
00691   
00692   for(u32 i=n; i>0; --i)
00693     {
00694     const u32 j = i-1;
00695     const u32 k = ipiv[j];
00696     
00697     ident.swap_rows(j,k);
00698     
00699     if(i == n)
00700       {
00701       P_tmp = ident;
00702       }
00703     else
00704       {
00705       P_tmp *= ident;
00706       }
00707     
00708     ident.swap_rows(j,k);
00709     }
00710   
00711   P = conv_to< Mat<eT> >::from(P_tmp);
00712   }
00713 
00714 
00715 
00716 template<typename eT>
00717 inline
00718 void
00719 auxlib::lu(Mat<eT>& L, Mat<eT>& U, const Mat<eT>& X)
00720   {
00721   arma_extra_debug_sigprint();
00722   
00723   podarray<int> ipiv;
00724   auxlib::lu(L, U, ipiv, X);
00725   }
00726   
00727 
00728 
00729 //! immediate eigenvalues of a symmetric real matrix using LAPACK
00730 template<typename eT>
00731 inline
00732 void
00733 auxlib::eig_sym(Col<eT>& eigval, const Mat<eT>& A_orig)
00734   {
00735   arma_extra_debug_sigprint();
00736   
00737   #if defined(ARMA_USE_LAPACK)
00738     {
00739     const unwrap_check<Mat<eT> > tmp(A_orig, eigval);
00740     const Mat<eT>& A = tmp.M;
00741     
00742     arma_debug_check( (A.n_rows != A.n_cols), "eig_sym(): given matrix is not square");
00743     
00744     // rudimentary "better-than-nothing" test for symmetry
00745     //arma_debug_check( (A.at(A.n_rows-1, 0) != A.at(0, A.n_cols-1)), "auxlib::eig(): given matrix is not symmetric" );
00746 
00747     char jobz  = 'N';
00748     char uplo  = 'U';
00749     
00750     int n_rows = A.n_rows;
00751     int lwork  = (std::max)(1,3*n_rows-1);
00752     
00753     eigval.set_size(n_rows);
00754     podarray<eT> work(lwork);
00755   
00756     Mat<eT> A_copy = A;
00757     int info;
00758 
00759     arma_extra_debug_print("lapack::syev_()");
00760     lapack::syev_(&jobz, &uplo, &n_rows, A_copy.memptr(), &n_rows, eigval.memptr(), work.memptr(), &lwork, &info);
00761     }
00762   #else
00763     {
00764     arma_stop("eig_sym(): need LAPACK");
00765     }
00766   #endif
00767   }
00768 
00769 
00770 
00771 //! immediate eigenvalues of a hermitian complex matrix using LAPACK
00772 template<typename T> 
00773 inline
00774 void
00775 auxlib::eig_sym(Col<T>& eigval, const Mat< std::complex<T> >& A)
00776   {
00777   arma_extra_debug_sigprint();
00778 
00779   typedef typename std::complex<T> eT;
00780   
00781   #if defined(ARMA_USE_LAPACK)
00782     {
00783     arma_debug_check( (A.n_rows != A.n_cols), "eig_sym(): given matrix is not hermitian");
00784 
00785     char jobz  = 'N'; 
00786     char uplo  = 'U';
00787 
00788     int n_rows = A.n_rows;
00789     int lda    = A.n_rows;
00790     int lwork  = (std::max)(1, 2*n_rows - 1);  // TODO: automatically find best size of lwork
00791 
00792     eigval.set_size(n_rows);
00793 
00794     podarray<eT> work(lwork);
00795     podarray<T>  rwork( (std::max)(1, 3*n_rows - 2) );
00796   
00797     Mat<eT> A_copy = A;
00798     int info;
00799   
00800     arma_extra_debug_print("lapack::heev_()");
00801     lapack::heev_(&jobz, &uplo, &n_rows, A_copy.memptr(), &lda, eigval.memptr(), work.memptr(), &lwork, rwork.memptr(), &info);
00802     }
00803   #else
00804     {
00805     arma_stop("eig_sym(): need LAPACK");
00806     }
00807   #endif
00808   }
00809 
00810 
00811 
00812 //! immediate eigenvalues and eigenvectors of a symmetric real matrix using LAPACK
00813 template<typename eT>
00814 inline
00815 void
00816 auxlib::eig_sym(Col<eT>& eigval, Mat<eT>& eigvec, const Mat<eT>& A_orig)
00817   {
00818   arma_extra_debug_sigprint();
00819   
00820   // TODO: check for aliasing
00821   
00822   #if defined(ARMA_USE_LAPACK)
00823     {
00824     const unwrap_check< Mat<eT> > tmp1(A_orig, eigval);
00825     const Mat<eT>& A_tmp = tmp1.M;
00826     
00827     const unwrap_check< Mat<eT> > tmp2(A_tmp, eigvec);
00828     const Mat<eT>& A = tmp2.M;
00829     
00830     arma_debug_check( (A.n_rows != A.n_cols), "eig_sym(): given matrix is not square" );
00831     
00832     // rudimentary "better-than-nothing" test for symmetry
00833     //arma_debug_check( (A.at(A.n_rows-1, 0) != A.at(0, A.n_cols-1)), "auxlib::eig(): given matrix is not symmetric" );
00834     
00835     
00836     char jobz  = 'V';
00837     char uplo  = 'U';
00838     
00839     int n_rows = A.n_rows;
00840     int lwork  = (std::max)(1, 3*n_rows-1);
00841     
00842     eigval.set_size(n_rows);
00843     podarray<eT> work(lwork);
00844   
00845     eigvec = A;
00846     int info;
00847     
00848     arma_extra_debug_print("lapack::syev_()");
00849     lapack::syev_(&jobz, &uplo, &n_rows, eigvec.memptr(), &n_rows, eigval.memptr(), work.memptr(), &lwork, &info);
00850     }
00851   #else
00852     {
00853     arma_stop("eig_sym(): need LAPACK");
00854     }
00855   #endif
00856   
00857   }
00858 
00859 
00860 
00861 //! immediate eigenvalues and eigenvectors of a hermitian complex matrix using LAPACK
00862 template<typename T>
00863 inline
00864 void
00865 auxlib::eig_sym(Col<T>& eigval, Mat< std::complex<T> >& eigvec, const Mat< std::complex<T> >& A_orig) 
00866   {
00867   arma_extra_debug_sigprint();
00868   
00869   typedef typename std::complex<T> eT;
00870 
00871   #if defined(ARMA_USE_LAPACK)
00872     {
00873     const unwrap_check< Mat<eT> > tmp(A_orig, eigvec);
00874     const Mat<eT>& A = tmp.M;
00875     
00876     arma_debug_check( (A.n_rows != A.n_cols), "eig_sym(): given matrix is not hermitian" );
00877     
00878     char jobz  = 'V';
00879     char uplo  = 'U';
00880 
00881     int n_rows = A.n_rows;
00882     int lda    = A.n_rows;
00883     int lwork  = (std::max)(1, 2*n_rows - 1);  // TODO: automatically find best size of lwork
00884     
00885     eigval.set_size(n_rows);
00886 
00887     podarray<eT> work(lwork);
00888     podarray<T>  rwork( (std::max)(1, 3*n_rows - 2) );
00889   
00890     eigvec = A;
00891     int info;
00892   
00893     arma_extra_debug_print("lapack::heev_()");
00894     lapack::heev_(&jobz, &uplo, &n_rows, eigvec.memptr(), &lda, eigval.memptr(), work.memptr(), &lwork, rwork.memptr(), &info);
00895     }
00896   #else
00897     {
00898     arma_stop("eig_sym(): need LAPACK");
00899     }
00900   #endif
00901   
00902   }
00903 
00904 
00905 
00906 //! Eigenvalues and eigenvectors of a general square real matrix using LAPACK.
00907 //! The argument 'side' specifies which eigenvectors should be calculated
00908 //! (see code for mode details).
00909 template<typename T>
00910 inline
00911 void
00912 auxlib::eig_gen
00913   (
00914   Col< std::complex<T> >& eigval,
00915   Mat<T>& l_eigvec,
00916   Mat<T>& r_eigvec,
00917   const Mat<T>& A, 
00918   const char side
00919   )
00920   {
00921   arma_extra_debug_sigprint();
00922 
00923   // TODO: check for aliasing
00924   
00925   #if defined(ARMA_USE_LAPACK)
00926     {
00927     arma_debug_check( (A.n_rows != A.n_cols), "eig_gen(): given matrix is not square" );
00928     
00929     char jobvl;
00930     char jobvr;
00931 
00932     switch(side)
00933       {
00934       case 'l':  // left
00935         jobvl = 'V';
00936         jobvr = 'N';
00937         break;
00938         
00939       case 'r':  // right
00940         jobvl = 'N';
00941         jobvr = 'V';
00942         break;
00943         
00944       case 'b':  // both
00945         jobvl = 'V';
00946         jobvr = 'V';
00947         break;
00948         
00949       case 'n':  // neither
00950         jobvl = 'N';
00951         jobvr = 'N';
00952         break;
00953 
00954       default:
00955         arma_stop("eig_gen(): parameter 'side' is invalid");
00956       }
00957 
00958        
00959     int n_rows = A.n_rows;
00960     int lda    = A.n_rows;
00961     int lwork  = (std::max)(1, 4*n_rows);  // TODO: automatically find best size of lwork
00962     
00963     eigval.set_size(n_rows);
00964     l_eigvec.set_size(n_rows, n_rows);
00965     r_eigvec.set_size(n_rows, n_rows);
00966     
00967     podarray<T> work(lwork);
00968     podarray<T> rwork( (std::max)(1, 3*n_rows) );
00969     
00970     podarray<T> wr(n_rows);
00971     podarray<T> wi(n_rows);
00972     
00973     Mat<T> A_copy = A;
00974     int info;
00975     
00976     arma_extra_debug_print("lapack::cx_geev_()");
00977     lapack::geev_(&jobvl, &jobvr, &n_rows, A_copy.memptr(), &lda, wr.memptr(), wi.memptr(), l_eigvec.memptr(), &n_rows, r_eigvec.memptr(), &n_rows, work.memptr(), &lwork, &info);
00978     
00979     
00980     eigval.set_size(n_rows);
00981     for(u32 i=0; i<u32(n_rows); ++i)
00982       {
00983       eigval[i] = std::complex<T>(wr[i], wi[i]);
00984       }
00985     
00986     }
00987   #else
00988     {
00989     arma_stop("eig_gen(): need LAPACK");
00990     }
00991   #endif
00992   
00993   }
00994 
00995 
00996 
00997 
00998 
00999 //! Eigenvalues and eigenvectors of a general square complex matrix using LAPACK
01000 //! The argument 'side' specifies which eigenvectors should be calculated
01001 //! (see code for mode details).
01002 template<typename T>
01003 inline
01004 void
01005 auxlib::eig_gen
01006   (
01007   Col< std::complex<T> >& eigval,
01008   Mat< std::complex<T> >& l_eigvec, 
01009   Mat< std::complex<T> >& r_eigvec, 
01010   const Mat< std::complex<T> >& A, 
01011   const char side
01012   )
01013   {
01014   arma_extra_debug_sigprint();
01015 
01016   // TODO: check for aliasing
01017   
01018   typedef typename std::complex<T> eT;
01019 
01020   #if defined(ARMA_USE_LAPACK)
01021     {
01022     arma_debug_check( (A.n_rows != A.n_cols), "eig_gen(): given matrix is not square" );
01023     
01024     char jobvl;
01025     char jobvr;
01026 
01027     switch(side)
01028       {
01029       case 'l':  // left
01030         jobvl = 'V';
01031         jobvr = 'N';
01032         break;
01033         
01034       case 'r':  // right
01035         jobvl = 'N';
01036         jobvr = 'V';
01037         break;
01038         
01039       case 'b':  // both
01040         jobvl = 'V';
01041         jobvr = 'V';
01042         break;
01043         
01044       case 'n':  // neither
01045         jobvl = 'N';
01046         jobvr = 'N';
01047         break;
01048 
01049       default:
01050         arma_stop("eig_gen(): parameter 'side' is invalid");
01051       }
01052     
01053        
01054     int n_rows = A.n_rows;
01055     int lda    = A.n_rows;
01056     int lwork  = (std::max)(1, 4*n_rows);  // TODO: automatically find best size of lwork
01057     
01058     eigval.set_size(n_rows);
01059     l_eigvec.set_size(n_rows, n_rows);
01060     r_eigvec.set_size(n_rows, n_rows);
01061     
01062     podarray<eT> work(lwork);
01063     podarray<T>  rwork( (std::max)(1, 3*n_rows) );  // was 2,3
01064     
01065     Mat<eT> A_copy = A;
01066     int info;
01067     
01068     arma_extra_debug_print("lapack::cx_geev_()");
01069     lapack::cx_geev_(&jobvl, &jobvr, &n_rows, A_copy.memptr(), &lda, eigval.memptr(), l_eigvec.memptr(), &n_rows, r_eigvec.memptr(), &n_rows, work.memptr(), &lwork, rwork.memptr(), &info);
01070     }
01071   #else
01072     {
01073     arma_stop("eig_gen(): need LAPACK");
01074     }
01075   #endif
01076   
01077   }
01078 
01079 
01080 
01081 template<typename eT> 
01082 inline
01083 bool
01084 auxlib::chol(Mat<eT>& out, const Mat<eT>& X)
01085   {
01086   arma_extra_debug_sigprint();
01087   
01088   #if defined(ARMA_USE_LAPACK)
01089     {
01090     char uplo = 'U';
01091     int  n    = X.n_rows;
01092     int  lda  = X.n_rows;
01093     int  info;
01094     
01095     out = X;
01096     lapack::potrf_(&uplo, &n, out.memptr(), &lda, &info);
01097     
01098     for(u32 col=0; col<X.n_rows; ++col)
01099       {
01100       eT* colptr = out.colptr(col);
01101       for(u32 row=col+1; row<X.n_rows; ++row)
01102         {
01103         colptr[row] = eT(0);
01104         }
01105       }
01106     
01107     return (info == 0);
01108     }
01109   #else
01110     {
01111     arma_stop("chol(): need LAPACK");
01112     return false;
01113     }
01114   #endif
01115   }
01116 
01117 
01118 
01119 template<typename eT>
01120 inline
01121 bool 
01122 auxlib::qr(Mat<eT>& Q, Mat<eT>& R, const Mat<eT>& X)
01123   {
01124   arma_extra_debug_sigprint();
01125   
01126   #if defined(ARMA_USE_LAPACK)
01127     {
01128     int m            = static_cast<int>(X.n_rows);
01129     int n            = static_cast<int>(X.n_cols);
01130     int work_len     = (std::max)(1,n);
01131     int work_len_tmp;
01132     int k            = (std::min)(m,n);
01133     int info;
01134     
01135     podarray<eT> tau(k);
01136     podarray<eT> work(work_len);
01137     
01138     R = X;
01139     
01140     // query for the optimum value of work_len
01141     work_len_tmp = -1;
01142     lapack::geqrf_(&m, &n, R.memptr(), &m, tau.memptr(), work.memptr(), &work_len_tmp, &info);
01143     
01144     if(info == 0)
01145       {
01146       work_len = static_cast<int>(access::tmp_real(work[0]));
01147       work.set_size(work_len);
01148       }
01149     
01150     lapack::geqrf_(&m, &n, R.memptr(), &m, tau.memptr(), work.memptr(), &work_len, &info);
01151     
01152     Q.set_size(X.n_rows, X.n_rows);
01153     
01154           eT* Q_mem = Q.memptr();
01155     const eT* R_mem = R.mem;
01156     
01157     const u32 n_elem_copy = (std::min)(Q.n_elem, R.n_elem);
01158     for(u32 i=0; i < n_elem_copy; ++i)
01159       {
01160       Q_mem[i] = R_mem[i];
01161       }
01162     
01163     
01164     // construct R
01165     for(u32 row=0; row < R.n_rows; ++row)
01166       {
01167       const u32 n_elem_tmp = (std::min)(row, R.n_cols);
01168       for(u32 col=0; col < n_elem_tmp; ++col)
01169         {
01170         R.at(row,col) = eT(0);
01171         }
01172       }
01173       
01174     
01175     if( (is_float<eT>::value == true) || (is_double<eT>::value == true) )
01176       {
01177       // query for the optimum value of work_len
01178       work_len_tmp = -1;
01179       lapack::orgqr_(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &work_len_tmp, &info);
01180       
01181       if(info == 0)
01182         {
01183         work_len = static_cast<int>(access::tmp_real(work[0]));
01184         work.set_size(work_len);
01185         }
01186       
01187       lapack::orgqr_(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &work_len, &info);
01188       }
01189     else
01190     if( (is_supported_complex_float<eT>::value == true) || (is_supported_complex_double<eT>::value == true) )
01191       {
01192       // query for the optimum value of work_len
01193       work_len_tmp = -1;
01194       lapack::ungqr_(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &work_len_tmp, &info);
01195       
01196       if(info == 0)
01197         {
01198         work_len = static_cast<int>(access::tmp_real(work[0]));
01199         work.set_size(work_len);
01200         }
01201       
01202       lapack::ungqr_(&m, &m, &k, Q.memptr(), &m, tau.memptr(), work.memptr(), &work_len, &info);
01203       }
01204     
01205     return (info == 0);
01206     }
01207   #else
01208     {
01209     arma_stop("qr(): need LAPACK");
01210     return false;
01211     }
01212   #endif
01213   }
01214 
01215 
01216 
01217 template<typename eT> 
01218 inline
01219 bool
01220 auxlib::svd(Col<eT>& S, const Mat<eT>& X)
01221   {
01222   arma_extra_debug_sigprint();
01223   
01224   #if defined(ARMA_USE_LAPACK)
01225     {
01226     Mat<eT> A = X;
01227     
01228     Mat<eT> U(1, 1);
01229     Mat<eT> V(1, A.n_cols);
01230     
01231     char jobu  = 'N';
01232     char jobvt = 'N';
01233     
01234     int  m     = A.n_rows;
01235     int  n     = A.n_cols;
01236     int  lda   = A.n_rows;
01237     int  ldu   = U.n_rows;
01238     int  ldvt  = V.n_rows;
01239     int  lwork = 2 * (std::max)(1, (std::max)( (3*(std::min)(m,n) + (std::max)(m,n)), 5*(std::min)(m,n) ) );
01240     int  info;
01241     
01242     S.set_size( (std::min)(m, n) );
01243     
01244     podarray<eT> work(lwork);
01245   
01246   
01247     // let gesvd_() calculate the optimum size of the workspace
01248     int lwork_tmp = -1;
01249     
01250     lapack::gesvd_<eT>
01251       (
01252       &jobu, &jobvt,
01253       &m,&n,
01254       A.memptr(), &lda,
01255       S.memptr(),
01256       U.memptr(), &ldu,
01257       V.memptr(), &ldvt,
01258       work.memptr(), &lwork_tmp,
01259       &info
01260       );
01261     
01262     if(info == 0)
01263       {
01264       int proposed_lwork = static_cast<int>(work[0]);
01265       
01266       if(proposed_lwork > lwork)
01267         {
01268         lwork = proposed_lwork;
01269         work.set_size(lwork);
01270         }
01271       
01272       lapack::gesvd_<eT>
01273         (
01274         &jobu, &jobvt,
01275         &m, &n,
01276         A.memptr(), &lda,
01277         S.memptr(),
01278         U.memptr(), &ldu,
01279         V.memptr(), &ldvt,
01280         work.memptr(), &lwork,
01281         &info
01282         );
01283       }
01284     
01285     return (info == 0);
01286     }
01287   #else
01288     {
01289     arma_stop("svd(): need LAPACK");
01290     return false;
01291     }
01292   #endif
01293   }
01294 
01295 
01296   
01297 template<typename T>
01298 inline
01299 bool
01300 auxlib::svd(Col<T>& S, const Mat< std::complex<T> >& X)
01301   {
01302   arma_extra_debug_sigprint();
01303   
01304   typedef std::complex<T> eT;
01305   
01306   #if defined(ARMA_USE_LAPACK)
01307     {
01308     Mat<eT> A = X;
01309     
01310     Mat<eT> U(1, 1);
01311     Mat<eT> V(1, A.n_cols);
01312     
01313     char jobu  = 'N';
01314     char jobvt = 'N';
01315     
01316     int  m     = A.n_rows;
01317     int  n     = A.n_cols;
01318     int  lda   = A.n_rows;
01319     int  ldu   = U.n_rows;
01320     int  ldvt  = V.n_rows;
01321     int  lwork = 2 * (std::max)(1, 2*(std::min)(m,n)+(std::max)(m,n) );
01322     int  info;
01323     
01324     S.set_size( (std::min)(m,n) );
01325     
01326     podarray<eT> work(lwork);
01327     podarray<T>  rwork( 5*(std::min)(m,n) );
01328   
01329     // let gesvd_() calculate the optimum size of the workspace
01330     int lwork_tmp = -1;
01331     
01332     lapack::cx_gesvd_<T>
01333       (
01334       &jobu, &jobvt,
01335       &m, &n,
01336       A.memptr(), &lda,
01337       S.memptr(),
01338       U.memptr(), &ldu,
01339       V.memptr(), &ldvt,
01340       work.memptr(), &lwork_tmp,
01341       rwork.memptr(),
01342       &info
01343       );
01344     
01345     if(info == 0)
01346       {
01347       int proposed_lwork = static_cast<int>(real(work[0]));
01348       if(proposed_lwork > lwork)
01349         {
01350         lwork = proposed_lwork;
01351         work.set_size(lwork);
01352         }
01353       
01354       lapack::cx_gesvd_<T>
01355         (
01356         &jobu, &jobvt,
01357         &m, &n,
01358         A.memptr(), &lda,
01359         S.memptr(),
01360         U.memptr(), &ldu,
01361         V.memptr(), &ldvt,
01362         work.memptr(), &lwork,
01363         rwork.memptr(),
01364         &info
01365         );
01366       }
01367         
01368     return (info == 0);
01369     }
01370   #else
01371     {
01372     arma_stop("svd(): need LAPACK");
01373     return false;
01374     }
01375   #endif
01376   }
01377 
01378 
01379 
01380 template<typename eT>
01381 inline
01382 bool
01383 auxlib::svd(Mat<eT>& U, Col<eT>& S, Mat<eT>& V, const Mat<eT>& X)
01384   {
01385   arma_extra_debug_sigprint();
01386   
01387   #if defined(ARMA_USE_LAPACK)
01388     {
01389     Mat<eT> A = X;
01390     
01391     U.set_size(A.n_rows, A.n_rows);
01392     V.set_size(A.n_cols, A.n_cols);
01393     
01394     char jobu  = 'A';
01395     char jobvt = 'A';
01396     
01397     int  m     = A.n_rows;
01398     int  n     = A.n_cols;
01399     int  lda   = A.n_rows;
01400     int  ldu   = U.n_rows;
01401     int  ldvt  = V.n_rows;
01402     int  lwork = 2 * (std::max)(1, (std::max)( (3*(std::min)(m,n) + (std::max)(m,n)), 5*(std::min)(m,n) ) );
01403     int  info;
01404     
01405     
01406     S.set_size( (std::min)(m,n) );
01407     podarray<eT> work(lwork);
01408   
01409     // let gesvd_() calculate the optimum size of the workspace
01410     int lwork_tmp = -1;
01411     
01412     lapack::gesvd_<eT>
01413       (
01414       &jobu, &jobvt,
01415       &m, &n,
01416       A.memptr(), &lda,
01417       S.memptr(),
01418       U.memptr(), &ldu,
01419       V.memptr(), &ldvt,
01420       work.memptr(), &lwork_tmp,
01421       &info
01422       );
01423     
01424     if(info == 0)
01425       {
01426       int proposed_lwork = static_cast<int>(work[0]);
01427       if(proposed_lwork > lwork)
01428         {
01429         lwork = proposed_lwork;
01430         work.set_size(lwork);
01431         }
01432       
01433       lapack::gesvd_<eT>
01434         (
01435         &jobu, &jobvt,
01436         &m, &n,
01437         A.memptr(), &lda,
01438         S.memptr(),
01439         U.memptr(), &ldu,
01440         V.memptr(), &ldvt,
01441         work.memptr(), &lwork,
01442         &info
01443         );
01444       
01445       op_trans::apply(V,V);  // op_trans will work out that an in-place transpose can be done
01446       }
01447     
01448     return (info == 0);
01449     }
01450   #else
01451     {
01452     arma_stop("svd(): need LAPACK");
01453     return false;
01454     }
01455   #endif
01456   }
01457 
01458 
01459 
01460 template<typename T>
01461 inline
01462 bool
01463 auxlib::svd(Mat< std::complex<T> >& U, Col<T>& S, Mat< std::complex<T> >& V, const Mat< std::complex<T> >& X)
01464   {
01465   arma_extra_debug_sigprint();
01466   
01467   typedef std::complex<T> eT;
01468   
01469   #if defined(ARMA_USE_LAPACK)
01470     {
01471     Mat<eT> A = X;
01472     
01473     U.set_size(A.n_rows, A.n_rows);
01474     V.set_size(A.n_cols, A.n_cols);
01475     
01476     char jobu  = 'A';
01477     char jobvt = 'A';
01478     
01479     int  m     = A.n_rows;
01480     int  n     = A.n_cols;
01481     int  lda   = A.n_rows;
01482     int  ldu   = U.n_rows;
01483     int  ldvt  = V.n_rows;
01484     int  lwork = 2 * (std::max)(1, 2*(std::min)(m,n)+(std::max)(m,n) );
01485     int  info;
01486     
01487     S.set_size( (std::min)(m,n) );
01488     
01489     podarray<eT> work(lwork);
01490     podarray<T>  rwork( 5*(std::min)(m,n) );
01491   
01492     // let gesvd_() calculate the optimum size of the workspace
01493     int lwork_tmp = -1;
01494     lapack::cx_gesvd_<T>
01495      (
01496      &jobu, &jobvt,
01497      &m, &n,
01498      A.memptr(), &lda,
01499      S.memptr(),
01500      U.memptr(), &ldu,
01501      V.memptr(), &ldvt,
01502      work.memptr(), &lwork_tmp,
01503      rwork.memptr(),
01504      &info
01505      );
01506     
01507     if(info == 0)
01508       {
01509       int proposed_lwork = static_cast<int>(real(work[0]));
01510       if(proposed_lwork > lwork)
01511         {
01512         lwork = proposed_lwork;
01513         work.set_size(lwork);
01514         }
01515       
01516       lapack::cx_gesvd_<T>
01517         (
01518         &jobu, &jobvt,
01519         &m, &n,
01520         A.memptr(), &lda,
01521         S.memptr(),
01522         U.memptr(), &ldu,
01523         V.memptr(), &ldvt,
01524         work.memptr(), &lwork,
01525         rwork.memptr(),
01526         &info
01527         );
01528       
01529       op_htrans::apply(V,V);  // op_htrans will work out that an in-place transpose can be done
01530       }
01531     
01532     return (info == 0);
01533     }
01534   #else
01535     {
01536     arma_stop("svd(): need LAPACK");
01537     return false;
01538     }
01539   #endif
01540   
01541   }
01542 
01543 
01544 //! Solve a system of linear equations
01545 //! Assumes that A.n_rows = A.n_cols
01546 //! and B.n_rows = A.n_rows
01547 template<typename eT>
01548 inline
01549 bool
01550 auxlib::solve(Mat<eT>& out, const Mat<eT>& A, const Mat<eT>& B)
01551   {
01552   arma_extra_debug_sigprint();
01553   
01554   #if defined(ARMA_USE_LAPACK)
01555     {
01556     int n    = A.n_rows;
01557     int lda  = A.n_rows;
01558     int ldb  = A.n_rows;
01559     int nrhs = B.n_cols;
01560     int info;
01561     
01562     podarray<int> ipiv(n);
01563     
01564     out = B;
01565     Mat<eT> A_copy = A;
01566   
01567     lapack::gesv_<eT>(&n, &nrhs, A_copy.memptr(), &lda, ipiv.memptr(), out.memptr(), &ldb, &info);
01568   
01569     return (info == 0);
01570     }
01571   #else
01572     {
01573     arma_stop("solve(): need LAPACK");
01574     return false;
01575     }
01576   #endif
01577   }
01578 
01579 
01580                
01581 //! Solve an over-determined system.
01582 //! Assumes that A.n_rows > A.n_cols
01583 //! and B.n_rows = A.n_rows
01584 template<typename eT>
01585 inline
01586 bool
01587 auxlib::solve_od(Mat<eT>& out, const Mat<eT>& A, const Mat<eT>& B)
01588   {
01589   arma_extra_debug_sigprint();
01590   
01591   #if defined(ARMA_USE_LAPACK)
01592     {
01593     char trans = 'N';
01594     
01595     int  m     = A.n_rows;
01596     int  n     = A.n_cols;
01597     int  lda   = A.n_rows;
01598     int  ldb   = A.n_rows;
01599     int  nrhs  = B.n_cols;
01600     int  lwork = n + (std::max)(n, nrhs);
01601     int  info;
01602     
01603     Mat<eT> A_copy = A;
01604     Mat<eT> tmp    = B;
01605     
01606     
01607     podarray<eT> work(lwork);
01608     
01609     arma_extra_debug_print("lapack::gels_()");
01610     
01611     // NOTE:
01612     // the dgels() function in the lapack library supplied by ATLAS 3.6
01613     // seems to have problems
01614     
01615     lapack::gels_<eT>
01616       (
01617       &trans, &m, &n, &nrhs,
01618       A_copy.memptr(), &lda,
01619       tmp.memptr(), &ldb,
01620       work.memptr(), &lwork,
01621       &info
01622       );
01623     
01624     arma_extra_debug_print("lapack::gels_() -- finished");
01625     
01626     out.set_size(A.n_cols, B.n_cols);
01627     
01628     for(u32 col=0; col<B.n_cols; ++col)
01629       {
01630       syslib::copy_elem( out.colptr(col), tmp.colptr(col), A.n_cols );
01631       }
01632     
01633     return (info == 0);
01634     }
01635   #else
01636     {
01637     arma_stop("auxlib::solve_od(): need LAPACK");
01638     return false;
01639     }
01640   #endif
01641   }
01642 
01643 
01644 
01645 //! Solve an under-determined system.
01646 //! Assumes that A.n_rows < A.n_cols
01647 //! and B.n_rows = A.n_rows
01648 template<typename eT>
01649 inline
01650 bool
01651 auxlib::solve_ud(Mat<eT>& out, const Mat<eT>& A, const Mat<eT>& B)
01652   {
01653   arma_extra_debug_sigprint();
01654   
01655   #if defined(ARMA_USE_LAPACK)
01656     {
01657     char trans = 'N';
01658     
01659     int  m     = A.n_rows;
01660     int  n     = A.n_cols;
01661     int  lda   = A.n_rows;
01662     int  ldb   = A.n_cols;
01663     int  nrhs  = B.n_cols;
01664     int  lwork = m + (std::max)(m,nrhs);
01665     int  info;
01666     
01667     
01668     Mat<eT> A_copy = A;
01669     
01670     Mat<eT> tmp;
01671     tmp.zeros(A.n_cols, B.n_cols);
01672     
01673     for(u32 col=0; col<B.n_cols; ++col)
01674       {
01675       eT* tmp_colmem = tmp.colptr(col);
01676       
01677       syslib::copy_elem( tmp_colmem, B.colptr(col), B.n_rows );
01678       
01679       for(u32 row=B.n_rows; row<A.n_cols; ++row)
01680         {
01681         tmp_colmem[row] = eT(0);
01682         }
01683       }
01684     
01685     podarray<eT> work(lwork);
01686     
01687     arma_extra_debug_print("lapack::gels_()");
01688     
01689     // NOTE:
01690     // the dgels() function in the lapack library supplied by ATLAS 3.6
01691     // seems to have problems
01692     
01693     lapack::gels_<eT>
01694       (
01695       &trans, &m, &n, &nrhs,
01696       A_copy.memptr(), &lda,
01697       tmp.memptr(), &ldb,
01698       work.memptr(), &lwork,
01699       &info
01700       );
01701     
01702     arma_extra_debug_print("lapack::gels_() -- finished");
01703     
01704     out.set_size(A.n_cols, B.n_cols);
01705     
01706     for(u32 col=0; col<B.n_cols; ++col)
01707       {
01708       syslib::copy_elem( out.colptr(col), tmp.colptr(col), A.n_cols );
01709       }
01710   
01711     return (info == 0);
01712     }
01713   #else
01714     {
01715     arma_stop("auxlib::solve_ud(): need LAPACK");
01716     return false;
01717     }
01718   #endif
01719   }
01720 
01721 
01722 
01723 //! @}