glue_plus_meat.hpp

Go to the documentation of this file.
00001 // Copyright (C) 2009 NICTA
00002 // 
00003 // Authors:
00004 // - Conrad Sanderson (conradsand at ieee dot org)
00005 // 
00006 // This file is part of the Armadillo C++ library.
00007 // It is provided without any warranty of fitness
00008 // for any purpose. You can redistribute this file
00009 // and/or modify it under the terms of the GNU
00010 // Lesser General Public License (LGPL) as published
00011 // by the Free Software Foundation, either version 3
00012 // of the License or (at your option) any later version.
00013 // (see http://www.opensource.org/licenses for more info)
00014 
00015 
00016 //! \addtogroup glue_plus
00017 //! @{
00018 
00019 
00020 
00021 template<typename T1, typename T2>
00022 inline
00023 void
00024 glue_plus::apply(Mat<typename T1::elem_type>& out, const Glue<T1,T2,glue_plus>& X)
00025   {
00026   arma_extra_debug_sigprint();
00027   
00028   typedef typename T1::elem_type eT;
00029   
00030   const u32 N_mat = 1 + depth_lhs< glue_plus, Glue<T1,T2,glue_plus> >::num;
00031   arma_extra_debug_print( arma_boost::format("N_mat = %d") % N_mat );
00032 
00033   if(N_mat == 2)
00034     {
00035     if(is_glue_times<T1>::value == true)
00036       {
00037       out = X.B;
00038       glue_plus::apply_inplace(out, X.A);
00039       }
00040     else
00041     if(is_glue_times<T2>::value == true)
00042       {
00043       out = X.A;
00044       glue_plus::apply_inplace(out, X.B);
00045       }
00046     else
00047       {
00048       if(is_Mat<T1>::value == false)
00049         {
00050         out = X.A;
00051         glue_plus::apply_inplace(out, X.B);
00052         }
00053       else
00054       if(is_Mat<T2>::value == false)
00055         {
00056         out = X.B;
00057         glue_plus::apply_inplace(out, X.A);
00058         }
00059       else
00060         {
00061         // even though the calls to unwrap in this block are logically unnecessary,
00062         // the compiler ignores the above if statements and overzealously checks
00063         // whether the code in this block can be used during all instances of
00064         // template expansion
00065         
00066         const unwrap<T1> tmp1(X.A);
00067         const unwrap<T2> tmp2(X.B);
00068         
00069         glue_plus::apply(out, tmp1.M, tmp2.M);
00070         }
00071       }
00072     }
00073   else
00074     {
00075     const Mat<eT>* ptrs[N_mat];
00076     bool            del[N_mat];
00077 
00078     mat_ptrs<glue_plus, Glue<T1,T2,glue_plus> >::get_ptrs(ptrs, del, X);
00079 
00080     for(u32 i=0; i<N_mat; ++i)  arma_extra_debug_print( arma_boost::format("ptrs[%d] = %x") % i % ptrs[i] );
00081     for(u32 i=0; i<N_mat; ++i)  arma_extra_debug_print( arma_boost::format(" del[%d] = %d") % i %  del[i] );
00082 
00083     const u32 n_rows = ptrs[0]->n_rows;
00084     const u32 n_cols = ptrs[0]->n_cols;
00085   
00086     const Mat<eT>& tmp_mat = *(ptrs[0]);
00087     
00088     for(u32 i=1; i<N_mat; ++i)
00089       {
00090       arma_debug_assert_same_size(tmp_mat, *(ptrs[i]), "matrix addition");
00091       }
00092   
00093   
00094     // no aliasing problem
00095     out.set_size(n_rows, n_cols);
00096     
00097     const u32 n_elem = ptrs[0]->n_elem;
00098     
00099     for(u32 j=0; j<n_elem; ++j)
00100       {
00101       eT acc = ptrs[0]->mem[j];
00102     
00103       for(u32 i=1; i < N_mat; ++i)
00104         {
00105         acc += ptrs[i]->mem[j];
00106         }
00107     
00108       out[j] = acc;
00109       }
00110     
00111     
00112     for(u32 i=0; i<N_mat; ++i)
00113       {
00114       if(del[i] == true)
00115         {
00116         arma_extra_debug_print( arma_boost::format("delete mat_ptr[%d]") % i );
00117         delete ptrs[i];
00118         }
00119       }
00120     }
00121   }
00122 
00123 
00124 
00125 template<typename T1>
00126 inline
00127 void
00128 glue_plus::apply_inplace(Mat<typename T1::elem_type>& out, const T1& X)
00129   {
00130   arma_extra_debug_sigprint();
00131   
00132   typedef typename T1::elem_type eT;
00133   
00134   const unwrap<T1>   tmp(X);
00135   const Mat<eT>& B = tmp.M;
00136   
00137   arma_debug_assert_same_size(out, B, "matrix addition");
00138   
00139   
00140         eT* out_mem = out.memptr();
00141   const eT* B_mem   = B.mem;
00142   
00143   const u32 n_elem  = B.n_elem;
00144   
00145   for(u32 i=0; i<n_elem; ++i)
00146     {
00147     out_mem[i] += B_mem[i];
00148     }
00149   
00150   }
00151 
00152 
00153 
00154 //! matrix addition with different element types
00155 template<typename eT1, typename eT2>
00156 inline
00157 void
00158 glue_plus::apply_mixed(Mat<typename promote_type<eT1,eT2>::result>& out, const Mat<eT1>& X, const Mat<eT2>& Y)
00159   {
00160   arma_extra_debug_sigprint();
00161   
00162   typedef typename promote_type<eT1,eT2>::result out_eT;
00163   
00164   arma_debug_assert_same_size(X,Y, "matrix addition");  
00165   
00166   //out.set_size(X.n_rows, X.n_cols);
00167   out.copy_size(X);
00168   
00169         out_eT* out_mem = out.memptr();
00170   const eT1*    X_mem   = X.mem;
00171   const eT2*    Y_mem   = Y.mem;
00172   
00173   const u32 n_elem = out.n_elem;
00174   
00175   for(u32 i=0; i<n_elem; ++i)
00176     {
00177     out_mem[i] = upgrade_val<eT1,eT2>::apply(X_mem[i]) + upgrade_val<eT1,eT2>::apply(Y_mem[i]);
00178     }
00179   }
00180 
00181 
00182 
00183 template<typename eT>
00184 inline
00185 void
00186 glue_plus::apply(Mat<eT>& out, const Mat<eT>& A, const Mat<eT>& B)
00187   {
00188   arma_extra_debug_sigprint();
00189   
00190   arma_debug_assert_same_size(A, B, "matrix addition");
00191   
00192   // no aliasing problem
00193   //out.set_size(A.n_rows, A.n_cols);
00194   out.copy_size(A);
00195   
00196         eT* out_mem = out.memptr();
00197   const eT* A_mem   = A.mem;
00198   const eT* B_mem   = B.mem;
00199     
00200   const u32 n_elem  = out.n_elem;
00201   
00202   for(u32 i=0; i<n_elem; ++i)
00203     {
00204     out_mem[i] = A_mem[i] + B_mem[i];
00205     }
00206     
00207   }
00208 
00209 
00210 
00211 template<typename eT>
00212 inline
00213 void
00214 glue_plus::apply(Mat<eT>& out, const Mat<eT>& A, const Mat<eT>& B, const Mat<eT>& C)
00215   {
00216   arma_extra_debug_sigprint();
00217   
00218   arma_debug_assert_same_size(A, B, "matrix addition");
00219   arma_debug_assert_same_size(A, C, "matrix addition");
00220   
00221   // no aliasing problem
00222   //out.set_size(A.n_rows, A.n_cols);
00223   out.copy_size(A);
00224     
00225         eT* out_mem = out.memptr();
00226   const eT* A_mem   = A.mem;
00227   const eT* B_mem   = B.mem;
00228   const eT* C_mem   = C.mem;
00229   
00230   const u32 n_elem  = A.n_elem;
00231   
00232   for(u32 i=0; i<n_elem; ++i)
00233     {
00234     out_mem[i] = A_mem[i] + B_mem[i] + C_mem[i];
00235     }
00236     
00237   }
00238 
00239 
00240 
00241 #if defined(ARMA_GOOD_COMPILER)
00242 
00243 
00244 
00245 template<typename eT>
00246 inline
00247 void
00248 glue_plus::apply(Mat<eT>& out, const Glue<Mat<eT>,Mat<eT>,glue_plus>& X)
00249   {
00250   glue_plus::apply(out, X.A, X.B);
00251   }
00252 
00253 
00254 
00255 template<typename eT>
00256 inline
00257 void
00258 glue_plus::apply(Mat<eT>& out, const Glue< Glue<Mat<eT>,Mat<eT>,glue_plus>, Mat<eT>, glue_plus>& X)
00259   {
00260   glue_plus::apply(out, X.A.A, X.A.B, X.B);
00261   }
00262 
00263 
00264 
00265 // possible aliasing cases:
00266 // Q = Q + Q.row(0)  -> no problem  (aliasing has no effect or incompatible matrix dimensions).
00267 //                      however, the only time the above will work is when Q has the same dimensions as Q.row(0),
00268 //                      meaning that doing this addition operation is pretty silly
00269 // Q = Q + R.row(0)  -> no problem
00270 // Q = R + Q.row(0)  -> output Q is set to size of R, which may destroy input Q
00271 //
00272 // strategy:
00273 // if the matrix from the second argument is an alias of the output matrix,
00274 // make a proper matrix out of the second argument
00275 
00276 template<typename eT>
00277 inline
00278 void
00279 glue_plus::apply(Mat<eT>& out, const Glue<Mat<eT>, subview<eT>, glue_plus>& X)
00280   {
00281   arma_extra_debug_sigprint();
00282   
00283   const Mat<eT>& orig_A = X.A;
00284   const Mat<eT>& orig_B = X.B.m;
00285   
00286   if( &out != &orig_B )
00287     {
00288     //const u32 sub_B_n_rows = X.B.n_rows;
00289     //const u32 sub_B_n_cols = X.B.n_cols;
00290     
00291     arma_debug_assert_same_size(X.A, X.B, "matrix addition");
00292       
00293     
00294     //out.set_size(orig_A.n_rows, orig_A.n_cols);
00295     out.copy_size(orig_A);
00296     
00297     for(u32 col = 0; col<orig_A.n_cols; ++col)
00298       {
00299       const u32 B_col_mod = X.B.aux_col1 + col;
00300       
00301       for(u32 row = 0; row<orig_A.n_rows; ++row)
00302         {
00303         const u32 B_row_mod = X.B.aux_row1 + row;
00304         
00305         out.at(row,col) =  orig_A.at(row, col) + orig_B.at(B_row_mod, B_col_mod);
00306         }
00307       
00308       }
00309     
00310     }
00311   else
00312     {
00313     const Mat<eT> processed_B(X.B);  // create a matrix out of subview
00314     glue_plus::apply(out, orig_A, processed_B);
00315     }
00316      
00317   }
00318 
00319 
00320 // possible aliasing cases:
00321 // Q = Q.row(0) + Q  -> no problem (aliasing has no effect or incompatible matrix dimensions)
00322 // Q = Q.row(0) + R  -> problem (output Q is set to size of Q.row(0) which may destroy input Q)
00323 // Q = R.row(0) + Q  -> problem (output Q is set to size of R.row(0) which may destroy input Q)
00324 
00325 template<typename eT>
00326 inline
00327 void
00328 glue_plus::apply(Mat<eT>& out, const Glue< subview<eT>, Mat<eT>, glue_plus>& X)
00329   {
00330   arma_extra_debug_sigprint();
00331   
00332   const Mat<eT>& orig_A = X.A.m;
00333   
00334   const unwrap_check< Mat<eT> > tmp(X.B, out);
00335   const Mat<eT>& orig_B = tmp.M;
00336   
00337   if( &out != &orig_A )
00338     {
00339     const u32 sub_A_n_rows = X.A.n_rows;
00340     const u32 sub_A_n_cols = X.A.n_cols;
00341     
00342     arma_debug_assert_same_size(X.A, X.B, "matrix addition");
00343       
00344     out.set_size(sub_A_n_rows, sub_A_n_cols);
00345     
00346     for(u32 col = 0; col<sub_A_n_cols; ++col)
00347       {
00348       const u32 A_col_mod = X.A.aux_col1 + col;
00349       
00350       for(u32 row = 0; row<sub_A_n_rows; ++row)
00351         {
00352         const u32 A_row_mod = X.A.aux_row1 + row;
00353         
00354         out.at(row,col) =  orig_A.at(A_row_mod, A_col_mod) + orig_B.at(row, col);
00355         }
00356       
00357       }
00358     }
00359   else
00360     {
00361     const Mat<eT> processed_A(X.A);
00362     glue_plus::apply(out, processed_A, orig_B);
00363     }
00364   
00365   
00366   }
00367 
00368 
00369 // possible aliasing cases:
00370 // Q = Q.row(0) + Q.row(0)  -> input Q is destroyed unless Q.row(0) has the same size as Q 
00371 // Q = Q.row(0) + R.row(0)  -> input Q is destroyed unless Q.row(0) has the same size as Q 
00372 // Q = R.row(0) + Q.row(0)  -> input Q is destroyed
00373 
00374 template<typename eT>
00375 inline
00376 void
00377 glue_plus::apply(Mat<eT>& out, const Glue< subview<eT>, subview<eT>, glue_plus>& X)
00378   {
00379   arma_extra_debug_sigprint();
00380   
00381   const Mat<eT>& orig_A = X.A.m;
00382   const Mat<eT>& orig_B = X.B.m;
00383   
00384   if( (&out != &orig_A) && (&out != &orig_B) )
00385     {
00386     const u32 sub_A_n_rows = X.A.n_rows;
00387     const u32 sub_A_n_cols = X.A.n_cols;
00388     
00389     //const u32 sub_B_n_rows = X.B.n_rows;
00390     //const u32 sub_B_n_cols = X.B.n_cols;
00391     
00392     arma_debug_assert_same_size(X.A, X.B, "matrix addition");
00393       
00394     out.set_size(sub_A_n_rows, sub_A_n_cols);
00395     
00396     for(u32 col = 0; col<sub_A_n_cols; ++col)
00397       {
00398       const u32 A_col_mod = X.A.aux_col1 + col;
00399       const u32 B_col_mod = X.B.aux_col1 + col;
00400       
00401       for(u32 row = 0; row<sub_A_n_rows; ++row)
00402         {
00403         const u32 A_row_mod = X.A.aux_row1 + row;
00404         const u32 B_row_mod = X.B.aux_row1 + row;
00405         
00406         out.at(row,col) =  orig_A.at(A_row_mod, A_col_mod) + orig_B.at(B_row_mod, B_col_mod);
00407         }
00408       
00409       }
00410     }
00411   else
00412     {
00413     const Mat<eT> processed_A(X.A);
00414     const Mat<eT> processed_B(X.B);
00415     
00416     glue_plus::apply(out, processed_A, processed_B);
00417     }
00418   }
00419 
00420 
00421 
00422 template<typename T1>
00423 inline
00424 void
00425 glue_plus::apply_inplace(Mat<typename T1::elem_type>& out, const Op<T1, op_square>& X)
00426   {
00427   arma_extra_debug_sigprint();
00428   
00429   typedef typename T1::elem_type eT;
00430   
00431   const unwrap<T1> tmp(X.m);
00432   const Mat<eT>& B = tmp.M;
00433   
00434   arma_debug_assert_same_size(out, B, "matrix addition");
00435     
00436         eT* out_mem = out.memptr();
00437   const eT* B_mem   = B.mem;
00438   
00439   const u32 n_elem  = out.n_elem;
00440   
00441   for(u32 i=0; i<n_elem; ++i)
00442     {
00443     const eT tmp_val = B_mem[i];
00444     out_mem[i] += tmp_val*tmp_val;
00445     }
00446   }
00447 
00448 
00449 
00450 template<typename T1>
00451 inline
00452 void
00453 glue_plus::apply_inplace(Mat<typename T1::elem_type>& out, const Op<T1, op_scalar_times>& X)
00454   {
00455   arma_extra_debug_sigprint();
00456   
00457   typedef typename T1::elem_type eT;
00458   
00459   const unwrap<T1> tmp(X.m);
00460   const Mat<eT>& B = tmp.M;
00461   
00462   arma_debug_assert_same_size(out, B, "matrix addition");
00463   
00464         eT* out_mem = out.memptr();
00465   const eT* B_mem   = B.mem;
00466   
00467   const eT  k       = X.aux;
00468   const u32 n_elem  = out.n_elem;
00469   
00470   for(u32 i=0; i<n_elem; ++i)
00471     {
00472     out_mem[i] += k * B_mem[i];
00473     }
00474   }
00475 
00476 
00477 
00478 template<typename T1>
00479 inline
00480 void
00481 glue_plus::apply_inplace(Mat<typename T1::elem_type>& out, const Op<T1, op_scalar_div_pre>& X)
00482   {
00483   arma_extra_debug_sigprint();
00484   
00485   typedef typename T1::elem_type eT;
00486   
00487   const unwrap<T1>   tmp(X.m);
00488   const Mat<eT>& A = tmp.M;
00489   
00490   arma_debug_assert_same_size(out, A, "matrix addition");
00491   
00492         eT* out_mem = out.memptr();
00493   const eT* A_mem   = A.mem;
00494   
00495   const eT  k       = X.aux;
00496   const u32 n_elem  = out.n_elem;
00497   
00498   for(u32 i=0; i<n_elem; ++i)
00499     {
00500     out_mem[i] += k / A_mem[i];
00501     }
00502   }
00503 
00504 
00505 
00506 template<typename T1>
00507 inline
00508 void
00509 glue_plus::apply_inplace(Mat<typename T1::elem_type>& out, const Op<T1, op_scalar_div_post>& X)
00510   {
00511   arma_extra_debug_sigprint();
00512   
00513   typedef typename T1::elem_type eT;
00514   
00515   const unwrap<T1>   tmp(X.m);
00516   const Mat<eT>& A = tmp.M;
00517   
00518   arma_debug_assert_same_size(out, A, "matrix addition");
00519   
00520         eT* out_mem = out.memptr();
00521   const eT* A_mem   = A.mem;
00522   
00523   const eT  k       = X.aux;
00524   const u32 n_elem  = out.n_elem;
00525   
00526   for(u32 i=0; i<n_elem; ++i)
00527     {
00528     out_mem[i] += A_mem[i] / k;
00529     }
00530   }
00531 
00532 
00533 
00534 template<typename T1, typename T2>
00535 inline
00536 void
00537 glue_plus::apply_inplace(Mat<typename T1::elem_type>& out, const Glue<T1, T2, glue_plus>& X)
00538   {
00539   arma_extra_debug_sigprint();
00540   
00541   out = X + out;
00542   }
00543 
00544 
00545 
00546 template<typename T1, typename T2>
00547 inline
00548 void
00549 glue_plus::apply_inplace(Mat<typename T1::elem_type>& out, const Glue<Op<T1, op_scalar_times>, Op<T2, op_scalar_times>, glue_plus>& X)
00550   {
00551   arma_extra_debug_sigprint();
00552   
00553   typedef typename T1::elem_type eT;
00554   
00555   const unwrap<T1> tmp1(X.A.m);
00556   const unwrap<T2> tmp2(X.B.m);
00557   
00558   const Mat<eT>& A = tmp1.M;
00559   const Mat<eT>& B = tmp2.M;
00560   
00561   arma_debug_assert_same_size(out, A, "matrix addition");
00562   arma_debug_assert_same_size(  A, B, "matrix addition");
00563   
00564         eT* out_mem = out.memptr();
00565   const eT* A_mem   = A.mem;
00566   const eT* B_mem   = B.mem;
00567   
00568   const eT  k1      = X.A.aux;
00569   const eT  k2      = X.B.aux;
00570   
00571   const u32 n_elem  = out.n_elem;
00572   
00573   for(u32 i=0; i<n_elem; ++i)
00574     {
00575     out_mem[i] += k1*A_mem[i] + k2*B_mem[i];
00576     }
00577   }
00578 
00579 
00580 
00581 template<typename T1, typename T2, typename T3>
00582 inline
00583 void
00584 glue_plus::apply_inplace(Mat<typename T1::elem_type>& out, const Glue< Glue< Op<T1, op_scalar_times>, Op<T2, op_scalar_times>, glue_plus>, Op<T3, op_scalar_times>, glue_plus>& X)
00585   {
00586   arma_extra_debug_sigprint();
00587   
00588   typedef typename T1::elem_type eT;
00589   
00590   const unwrap<T1> tmp1(X.A.A.m);
00591   const unwrap<T2> tmp2(X.A.B.m);
00592   const unwrap<T3> tmp3(X.B.m);
00593   
00594   const Mat<eT>& A = tmp1.M;
00595   const Mat<eT>& B = tmp2.M;
00596   const Mat<eT>& C = tmp3.M;
00597   
00598   arma_debug_assert_same_size(out, A, "matrix addition");
00599   arma_debug_assert_same_size(  A, B, "matrix addition");
00600   arma_debug_assert_same_size(  B, C, "matrix addition");
00601   
00602         eT* out_mem = out.memptr();
00603   const eT* A_mem   = A.mem;
00604   const eT* B_mem   = B.mem;
00605   const eT* C_mem   = C.mem;
00606   
00607   const eT  k1      = X.A.A.aux;
00608   const eT  k2      = X.A.B.aux;
00609   const eT  k3      = X.B.aux;
00610   
00611   const u32 n_elem  = out.n_elem;
00612   
00613   for(u32 i=0; i<n_elem; ++i)
00614     {
00615     out_mem[i] += k1*A_mem[i] + k2*B_mem[i] + k3*C_mem[i];
00616     }
00617   }
00618 
00619 
00620 
00621 template<typename T1, typename T2>
00622 inline
00623 void
00624 glue_plus::apply_inplace(Mat<typename T1::elem_type>& out, const Glue<T1, T2, glue_times>& X)
00625   {
00626   arma_extra_debug_sigprint();
00627   
00628   typedef typename T1::elem_type eT;
00629   
00630   const unwrap_check<T1> tmp1(X.A, out);
00631   const unwrap_check<T2> tmp2(X.B, out);
00632   
00633   const Mat<eT>& A = tmp1.M;
00634   const Mat<eT>& B = tmp2.M;
00635   
00636   arma_debug_assert_mul_size(A, B, "matrix multiplication");
00637   arma_debug_assert_same_size(out.n_rows, out.n_cols, A.n_rows, B.n_cols, "matrix addition");
00638   
00639   gemm<false,false,false,true>::apply(out, A, B, eT(1), eT(1));
00640   }
00641 
00642 
00643 
00644 template<typename T1, typename T2>
00645 inline
00646 void
00647 glue_plus::apply_inplace(Mat<typename T1::elem_type>& out, const Glue<Op<T1, op_trans>, T2, glue_times>& X)
00648   {
00649   arma_extra_debug_sigprint();
00650   
00651   typedef typename T1::elem_type eT;
00652   
00653   const unwrap_check<T1> tmp1(X.A.m, out);
00654   const unwrap_check<T2> tmp2(X.B,   out);
00655   
00656   const Mat<eT>& A = tmp1.M;
00657   const Mat<eT>& B = tmp2.M;
00658   
00659   // we need to remember that A deliberately hasn't been transposed
00660   
00661   arma_debug_assert_mul_size(A.n_cols, A.n_rows, B.n_rows, B.n_cols, "matrix multiplication");
00662   arma_debug_assert_same_size(out.n_rows, out.n_cols, A.n_cols, B.n_cols, "matrix addition");
00663   
00664   gemm<true,false,false,true>::apply(out, A, B, eT(1), eT(1));
00665   }
00666 
00667 
00668 
00669 template<typename T1, typename T2>
00670 inline
00671 void
00672 glue_plus::apply_inplace(Mat<typename T1::elem_type>& out, const Glue<T1, Op<T2, op_trans>, glue_times>& X)
00673   {
00674   arma_extra_debug_sigprint();
00675   
00676   typedef typename T1::elem_type eT;
00677   
00678   const unwrap_check<T1> tmp1(X.A,   out);
00679   const unwrap_check<T2> tmp2(X.B.m, out);
00680   
00681   const Mat<eT>& A = tmp1.M;
00682   const Mat<eT>& B = tmp2.M;
00683   
00684   // we need to remember that B deliberately hasn't been transposed
00685   
00686   arma_debug_assert_mul_size(A.n_rows, A.n_cols, B.n_cols, B.n_rows, "matrix multiplication");
00687   arma_debug_assert_same_size(out.n_rows, out.n_cols, A.n_rows, B.n_rows, "matrix addition");
00688   
00689   gemm<false,true,false,true>::apply(out, A, B, eT(1), eT(1));
00690   }
00691 
00692 
00693 
00694 template<typename T1, typename T2>
00695 inline
00696 void
00697 glue_plus::apply_inplace(Mat<typename T1::elem_type>& out, const Glue<Op<T1, op_trans>, Op<T2, op_trans>, glue_times>& X)
00698   {
00699   arma_extra_debug_sigprint();
00700   
00701   typedef typename T1::elem_type eT;
00702   
00703   const unwrap_check<T1> tmp1(X.A.m, out);
00704   const unwrap_check<T2> tmp2(X.B.m, out);
00705   
00706   const Mat<eT>& A = tmp1.M;
00707   const Mat<eT>& B = tmp2.M;
00708   
00709   // we need to remember that A and B deliberately haven't been transposed
00710   
00711   arma_debug_assert_mul_size(A.n_cols, A.n_rows, B.n_cols, B.n_rows, "matrix multiplication");
00712   arma_debug_assert_same_size(out.n_rows, out.n_cols, A.n_cols, B.n_rows, "matrix addition");
00713   
00714   gemm<true,true,false,true>::apply(out, A, B, eT(1), eT(1));
00715   }
00716 
00717 
00718 
00719 template<typename T1, typename T2>
00720 inline
00721 void
00722 glue_plus::apply_inplace(Mat<typename T1::elem_type>& out, const Glue<T1, T2, glue_times_vec>& X)
00723   {
00724   arma_extra_debug_sigprint();
00725   
00726   typedef typename T1::elem_type eT;
00727   
00728   const unwrap_check<T1> tmp1(X.A, out);
00729   const unwrap_check<T2> tmp2(X.B, out);
00730   
00731   const Mat<eT>& A = tmp1.M;
00732   const Mat<eT>& B = tmp2.M;
00733   
00734   arma_debug_assert_mul_size(A, B, "vector multiplication");
00735   arma_debug_assert_same_size(out.n_rows, out.n_cols, A.n_rows, B.n_cols, "matrix addition");
00736   
00737   if(A.is_vec() && B.is_vec())
00738     {
00739     if(A.n_cols == 1)  // column * row  (i.e. outer product)
00740       {
00741       glue_times_vec::mul_col_row_inplace_add(out, A.mem, B.mem);
00742       }
00743     else   // row * column  (i.e. inner product)
00744       {
00745       out[0] += op_dot::direct_dot(A.n_elem, A.mem, B.mem);
00746       }
00747     }
00748   else  // one of the arguments is a matrix
00749     {
00750     if(A.is_vec() == false)  // A is a matrix, B is a vector
00751       {
00752       gemv<false,false,true>::apply(out.memptr(), A, B.mem, eT(1), eT(1));
00753       }
00754     else  // A is a vector, B is a matrix
00755       {
00756       gemm<false,false,false,true>::apply(out, A, B, eT(1), eT(1));
00757       }
00758     }
00759   }
00760 
00761 
00762 
00763 template<typename T1, typename T2>
00764 inline
00765 void
00766 glue_plus::apply_inplace(Mat<typename T1::elem_type>& out, const Glue<Op<T1, op_trans>, T2, glue_times_vec>& X)
00767   {
00768   arma_extra_debug_sigprint();
00769   
00770   typedef typename T1::elem_type eT;
00771   
00772   const unwrap_check<T1> tmp1(X.A.m, out);
00773   const unwrap_check<T2> tmp2(X.B,   out);
00774   
00775   const Mat<eT>& A = tmp1.M;
00776   const Mat<eT>& B = tmp2.M;
00777   
00778   // we need to remember that A deliberately hasn't been transposed
00779   
00780   arma_debug_assert_mul_size(A.n_cols, A.n_rows, B.n_rows, B.n_cols, "vector multiplication");
00781   
00782   arma_debug_assert_same_size(out.n_rows, out.n_cols, A.n_cols, B.n_cols, "matrix addition");
00783   
00784   
00785   if(A.is_vec() && B.is_vec())
00786     {
00787     if(A.n_rows == 1)  // trans(row) * row, equivalent to column * row  (i.e. outer product)
00788       {
00789       glue_times_vec::mul_col_row_inplace_add(out, A.mem, B.mem);
00790       }
00791     else   // trans(column) * column, equivalent to row * column  (i.e. inner product)
00792       {
00793       out[0] += op_dot::direct_dot(A.n_elem, A.mem, B.mem);
00794       }
00795     }
00796   else  // one of the arguments is a matrix
00797     {
00798     if(A.is_vec() == false)  // A is a matrix, B is a vector
00799       {
00800       gemv<true,false,true>::apply(out.memptr(), A, B.mem, eT(1), eT(1));
00801       }
00802     else  // A is a vector, B is a matrix
00803       {
00804       gemm<true,false,false,true>::apply(out, A, B, eT(1), eT(1));
00805       }
00806     }
00807   }
00808 
00809 
00810 
00811 //
00812 //
00813 //
00814 
00815 
00816 
00817 template<typename T1, typename T2>
00818 inline
00819 void
00820 glue_plus::apply
00821   (
00822   Mat<typename T1::elem_type>& out,
00823   const Glue< Glue< T1, Col<typename T1::elem_type>, glue_times_vec>, T2, glue_plus>& in
00824   )
00825   {
00826   arma_extra_debug_sigprint();
00827   
00828   typedef typename T1::elem_type eT;
00829   
00830   const unwrap<T1> tmp1(in.A.A);
00831   const unwrap<T2> tmp2(in.B);
00832   
00833   const Mat<eT>& A = tmp1.M;
00834   const Col<eT>& B = in.A.B;
00835   const Mat<eT>& C = tmp2.M;
00836   
00837   arma_debug_assert_mul_size(A, B, "matrix multiplication");
00838   arma_debug_assert_same_size(A.n_rows, B.n_cols, C.n_rows, C.n_cols, "matrix addition");
00839   
00840   if( (&out != &A) && (&out != &B) )
00841     {
00842     out = C;
00843     gemv<false,false,true>::apply(out.memptr(), A, B.mem, eT(1), eT(1));
00844     }
00845   else
00846     {
00847     const unwrap_check< Mat<eT> > tmpA(A,out);
00848     const unwrap_check< Col<eT> > tmpB(B,out);
00849     
00850     const Mat<eT>& A_safe = tmpA.M;
00851     const Col<eT>& B_safe = tmpB.M;
00852     
00853     out = C;
00854     gemv<false,false,true>::apply(out.memptr(), A_safe, B_safe.mem, eT(1), eT(1));
00855     }
00856   
00857   }
00858 
00859 
00860 
00861 template<typename T1, typename T2>
00862 inline
00863 void
00864 glue_plus::apply
00865   (
00866   Mat<typename T1::elem_type>& out,
00867   const Glue< Glue< Row<typename T1::elem_type>, T1, glue_times_vec>, T2, glue_plus>& in
00868   )
00869   {
00870   arma_extra_debug_sigprint();
00871   
00872   typedef typename T1::elem_type eT;
00873   
00874   const unwrap<T1> tmp1(in.A.B);
00875   const unwrap<T2> tmp2(in.B);
00876   
00877   const Row<eT>& A = in.A.A;
00878   const Mat<eT>& B = tmp1.M;
00879   const Mat<eT>& C = tmp2.M;
00880   
00881   arma_debug_assert_mul_size(A, B, "matrix multiplication");
00882   arma_debug_assert_same_size(A.n_rows, B.n_cols, C.n_rows, C.n_cols, "matrix addition");
00883   
00884   if( (&out != &A) && (&out != &B) )
00885     {
00886     out = C;
00887     gemv<true,false,true>::apply(out.memptr(), B, A.mem, eT(1), eT(1));
00888     }
00889   else
00890     {
00891     const unwrap_check< Mat<eT> > tmpA(A,out);
00892     const unwrap_check< Mat<eT> > tmpB(B,out);
00893     
00894     const Mat<eT>& A_safe = tmpA.M;
00895     const Mat<eT>& B_safe = tmpB.M;
00896     
00897     out = C;
00898     gemv<true,false,true>::apply(out.memptr(), B_safe, A_safe.mem, eT(1), eT(1));
00899     }
00900   
00901   }
00902 
00903 
00904 
00905 template<typename T1, typename T2>
00906 inline
00907 void
00908 glue_plus::apply
00909   (
00910   Mat<typename T1::elem_type>& out,
00911   const Glue<Op<T1, op_scalar_times>, Op<T2, op_scalar_times>, glue_plus>& in
00912   )
00913   {
00914   arma_extra_debug_sigprint();
00915   
00916   typedef typename T1::elem_type eT;
00917   
00918   const unwrap<T1> tmp1(in.A.m);
00919   const unwrap<T2> tmp2(in.B.m);
00920   
00921   const Mat<eT>& A = tmp1.M;
00922   const Mat<eT>& B = tmp2.M;
00923   
00924   arma_debug_assert_same_size(A, B, "matrix addition");
00925   
00926   //out.set_size(A.n_rows, A.n_cols);
00927   out.copy_size(A);
00928   
00929   const eT k1 = in.A.aux;
00930   const eT k2 = in.B.aux;
00931   
00932         eT* out_mem = out.memptr();
00933   const eT* A_mem   = A.mem;
00934   const eT* B_mem   = B.mem;
00935   
00936   const u32 local_n_elem = A.n_elem;
00937   
00938   for(u32 i=0; i<local_n_elem; ++i)
00939     {
00940     out_mem[i] = k1*A_mem[i] + k2*B_mem[i];
00941     }
00942   
00943   }
00944 
00945 
00946 
00947 template<typename T1, typename T2, typename T3>
00948 inline
00949 void
00950 glue_plus::apply
00951   (
00952   Mat<typename T1::elem_type>& out,
00953   const Glue< Glue<Op<T1, op_scalar_times>, Op<T2, op_scalar_times>, glue_plus>, Op<T3, op_scalar_times>, glue_plus>& in
00954   )
00955   {
00956   arma_extra_debug_sigprint();
00957   
00958   typedef typename T1::elem_type eT;
00959   
00960   const unwrap<T1> tmp1(in.A.A.m);
00961   const unwrap<T2> tmp2(in.A.B.m);
00962   const unwrap<T3> tmp3(in.B.m);
00963   
00964   const Mat<eT>& A = tmp1.M;
00965   const Mat<eT>& B = tmp2.M;
00966   const Mat<eT>& C = tmp3.M;
00967   
00968   arma_debug_assert_same_size(A, B, "matrix addition");
00969   arma_debug_assert_same_size(B, C, "matrix addition");
00970   
00971   //out.set_size(A.n_rows, A.n_cols);
00972   out.copy_size(A);
00973   
00974   const eT k1 = in.A.A.aux;
00975   const eT k2 = in.A.B.aux;
00976   const eT k3 = in.B.aux;
00977   
00978         eT* out_mem = out.memptr();
00979   const eT* A_mem   = A.mem;
00980   const eT* B_mem   = B.mem;
00981   const eT* C_mem   = C.mem;
00982   
00983   const u32 local_n_elem = A.n_elem;
00984   
00985   for(u32 i=0; i<local_n_elem; ++i)
00986     {
00987     out_mem[i] = k1*A_mem[i] + k2*B_mem[i] + k3*C_mem[i];
00988     }
00989   
00990   }
00991 
00992 
00993 
00994 template<typename T1, typename T2>
00995 inline
00996 void
00997 glue_plus::apply
00998   (
00999   Mat<typename T1::elem_type>& out,
01000   const Glue<Op<T1, op_scalar_div_pre>, Op<T2, op_scalar_div_pre>, glue_plus>& in
01001   )
01002   {
01003   arma_extra_debug_sigprint();
01004   
01005   typedef typename T1::elem_type eT;
01006   
01007   const unwrap<T1> tmp1(in.A.m);
01008   const unwrap<T2> tmp2(in.B.m);
01009   
01010   const Mat<eT>& A = tmp1.M;
01011   const Mat<eT>& B = tmp2.M;
01012   
01013   arma_debug_assert_same_size(A, B, "matrix addition");
01014   
01015   //out.set_size(A.n_rows, A.n_cols);
01016   out.copy_size(A);
01017   
01018   const eT k1 = in.A.aux;
01019   const eT k2 = in.B.aux;
01020   
01021         eT* out_mem = out.memptr();
01022   const eT* A_mem   = A.mem;
01023   const eT* B_mem   = B.mem;
01024   
01025   const u32 local_n_elem = A.n_elem;
01026   
01027   for(u32 i=0; i<local_n_elem; ++i)
01028     {
01029     out_mem[i] = k1/A_mem[i] + k2/B_mem[i];
01030     }
01031   
01032   }
01033 
01034 
01035 
01036 template<typename T1, typename T2, typename T3>
01037 inline
01038 void
01039 glue_plus::apply
01040   (
01041   Mat<typename T1::elem_type>& out,
01042   const Glue< Glue<Op<T1, op_scalar_div_pre>, Op<T2, op_scalar_div_pre>, glue_plus>, Op<T3, op_scalar_div_pre>, glue_plus>& in
01043   )
01044   {
01045   arma_extra_debug_sigprint();
01046   
01047   typedef typename T1::elem_type eT;
01048   
01049   const unwrap<T1> tmp1(in.A.A.m);
01050   const unwrap<T2> tmp2(in.A.B.m);
01051   const unwrap<T3> tmp3(in.B.m);
01052   
01053   const Mat<eT>& A = tmp1.M;
01054   const Mat<eT>& B = tmp2.M;
01055   const Mat<eT>& C = tmp3.M;
01056   
01057   arma_debug_assert_same_size(A, B, "matrix addition");
01058   arma_debug_assert_same_size(B, C, "matrix addition");
01059   
01060   //out.set_size(A.n_rows, A.n_cols);
01061   out.copy_size(A);
01062   
01063   const eT k1 = in.A.A.aux;
01064   const eT k2 = in.A.B.aux;
01065   const eT k3 = in.B.aux;
01066   
01067         eT* out_mem = out.memptr();
01068   const eT* A_mem   = A.mem;
01069   const eT* B_mem   = B.mem;
01070   const eT* C_mem   = C.mem;
01071   
01072   const u32 local_n_elem = A.n_elem;
01073   
01074   for(u32 i=0; i<local_n_elem; ++i)
01075     {
01076     out_mem[i] = k1/A_mem[i] + k2/B_mem[i] + k3/C_mem[i];
01077     }
01078   
01079   }
01080 
01081 
01082 
01083 template<typename T1, typename T2>
01084 inline
01085 void
01086 glue_plus::apply
01087   (
01088   Mat<typename T1::elem_type>& out,
01089   const Glue<Op<T1, op_scalar_div_post>, Op<T2, op_scalar_div_post>, glue_plus>& in
01090   )
01091   {
01092   arma_extra_debug_sigprint();
01093   
01094   typedef typename T1::elem_type eT;
01095   
01096   const unwrap<T1> tmp1(in.A.m);
01097   const unwrap<T2> tmp2(in.B.m);
01098   
01099   const Mat<eT>& A = tmp1.M;
01100   const Mat<eT>& B = tmp2.M;
01101   
01102   arma_debug_assert_same_size(A, B, "matrix addition");
01103   
01104   //out.set_size(A.n_rows, A.n_cols);
01105   out.copy_size(A);
01106   
01107   const eT k1 = in.A.aux;
01108   const eT k2 = in.B.aux;
01109   
01110         eT* out_mem = out.memptr();
01111   const eT* A_mem   = A.mem;
01112   const eT* B_mem   = B.mem;
01113   
01114   const u32 local_n_elem = A.n_elem;
01115   
01116   for(u32 i=0; i<local_n_elem; ++i)
01117     {
01118     out_mem[i] = A_mem[i]/k1 + B_mem[i]/k2;
01119     }
01120   
01121   }
01122 
01123 
01124 
01125 template<typename T1, typename T2, typename T3>
01126 inline
01127 void
01128 glue_plus::apply
01129   (
01130   Mat<typename T1::elem_type>& out,
01131   const Glue< Glue<Op<T1, op_scalar_div_post>, Op<T2, op_scalar_div_post>, glue_plus>, Op<T3, op_scalar_div_post>, glue_plus>& in
01132   )
01133   {
01134   arma_extra_debug_sigprint();
01135   
01136   typedef typename T1::elem_type eT;
01137   
01138   const unwrap<T1> tmp1(in.A.A.m);
01139   const unwrap<T2> tmp2(in.A.B.m);
01140   const unwrap<T3> tmp3(in.B.m);
01141   
01142   const Mat<eT>& A = tmp1.M;
01143   const Mat<eT>& B = tmp2.M;
01144   const Mat<eT>& C = tmp3.M;
01145   
01146   arma_debug_assert_same_size(A, B, "matrix addition");
01147   arma_debug_assert_same_size(B, C, "matrix addition");
01148   
01149   //out.set_size(A.n_rows, A.n_cols);
01150   out.copy_size(A);
01151   
01152   const eT k1 = in.A.A.aux;
01153   const eT k2 = in.A.B.aux;
01154   const eT k3 = in.B.aux;
01155   
01156         eT* out_mem = out.memptr();
01157   const eT* A_mem   = A.mem;
01158   const eT* B_mem   = B.mem;
01159   const eT* C_mem   = C.mem;
01160   
01161   const u32 local_n_elem = A.n_elem;
01162   
01163   for(u32 i=0; i<local_n_elem; ++i)
01164     {
01165     out_mem[i] = A_mem[i]/k1 + B_mem[i]/k2 + C_mem[i]/k3;
01166     }
01167   
01168   }
01169 
01170 
01171 
01172 #endif
01173 
01174 
01175 
01176 //
01177 // glue_plus_diag
01178 
01179 
01180 template<typename T1, typename T2>
01181 inline
01182 void
01183 glue_plus_diag::apply(Mat<typename T1::elem_type>& out, const T1& A_orig, const Op<T2,op_diagmat>& B_orig)
01184   {
01185   arma_extra_debug_sigprint();
01186   
01187   isnt_same_type<typename T1::elem_type, typename T2::elem_type>::check();
01188   
01189   typedef typename T1::elem_type eT;
01190   
01191   const unwrap<T1> tmp1(A_orig);
01192   const unwrap<T2> tmp2(B_orig.m);
01193   
01194   const Mat<eT>& A = tmp1.M;  
01195   const Mat<eT>& B = tmp2.M;
01196   
01197   arma_debug_check( !B.is_square(), "glue_plus_diag::apply(): matrices must be square" );
01198   arma_debug_assert_same_size(A, B, "matrix addition");
01199 
01200   
01201   // no aliasing problem
01202   //out.set_size(A.n_rows, A.n_cols);
01203   out.copy_size(A);
01204   
01205   for(u32 col=0; col<A.n_cols; ++col)
01206     {
01207     for(u32 row=0; row<A.n_rows; ++row)
01208       {
01209       if(col != row)
01210         {
01211         out.at(row,col) = A.at(row,col);
01212         }
01213       else
01214         {
01215         out.at(row,col) = A.at(row,col) + B.at(row,col);
01216         }
01217       }
01218     }
01219   
01220   }
01221 
01222 
01223 
01224 template<typename T1, typename T2>
01225 inline
01226 void
01227 glue_plus_diag::apply(Mat<typename T1::elem_type>& out, const Op<T1,op_diagmat>& A_orig, const Op<T2,op_diagmat>& B_orig)
01228   {
01229   arma_extra_debug_sigprint();
01230   
01231   isnt_same_type<typename T1::elem_type, typename T2::elem_type>::check();
01232   
01233   const unwrap<T1> tmp1(A_orig.m);
01234   const unwrap<T2> tmp2(B_orig.m);
01235   
01236   typedef typename T1::elem_type eT;
01237   
01238   const Mat<eT>& A = tmp1.M;
01239   const Mat<eT>& B = tmp2.M;
01240   
01241   arma_debug_check( !A.is_square(), "glue_plus_diag::apply(): matrices must be square" );
01242   arma_debug_assert_same_size(A, B, "matrix addition");
01243   
01244   
01245   if( (&out != &A) && (&out != &B) )
01246     {
01247     out.zeros(A.n_rows, A.n_cols);
01248     
01249     for(u32 i=0; i<A.n_rows; ++i)
01250       {
01251       out.at(i,i) = A.at(i,i) + B.at(i,i);
01252       }
01253     }
01254   else
01255     {
01256     //out.set_size(A.n_rows, A.n_cols);
01257     out.copy_size(A);
01258   
01259     for(u32 col=0; col<A.n_cols; ++col)
01260       {
01261       for(u32 row=0; row<A.n_rows; ++row)
01262         {
01263         if(col != row)
01264           {
01265           out.at(row,col) = 0.0;
01266           }
01267         else
01268           {
01269           out.at(row,col) = A.at(row,col) + B.at(row,col);
01270           }
01271         }
01272       }
01273     }
01274   
01275   }
01276 
01277 
01278 
01279 template<typename T1, typename T2>
01280 inline
01281 void
01282 glue_plus_diag::apply(Mat<typename T1::elem_type>& out, const Glue<T1, Op<T2,op_diagmat>, glue_plus_diag>& X)
01283   {
01284   glue_plus_diag::apply(out, X.A, X.B);
01285   }
01286 
01287 
01288 
01289 template<typename T1, typename T2>
01290 inline
01291 void
01292 glue_plus_diag::apply(Mat<typename T1::elem_type>& out, const Glue<Op<T1,op_diagmat>, T2, glue_plus_diag>& X)
01293   {
01294   glue_plus_diag::apply(out, X.B, X.A);  // NOTE: arguments are swapped
01295   }
01296 
01297 
01298 
01299 template<typename T1, typename T2>
01300 inline
01301 void
01302 glue_plus_diag::apply(Mat<typename T1::elem_type>& out, const Glue<Op<T1,op_diagmat>, Op<T2,op_diagmat>, glue_plus_diag>& X)
01303   {
01304   glue_plus_diag::apply(out, X.A, X.B);
01305   }
01306 
01307 
01308 
01309 //! @}