CoeffBasedProduct.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
5 // Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
6 //
7 // Eigen is free software; you can redistribute it and/or
8 // modify it under the terms of the GNU Lesser General Public
9 // License as published by the Free Software Foundation; either
10 // version 3 of the License, or (at your option) any later version.
11 //
12 // Alternatively, you can redistribute it and/or
13 // modify it under the terms of the GNU General Public License as
14 // published by the Free Software Foundation; either version 2 of
15 // the License, or (at your option) any later version.
16 //
17 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
18 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
20 // GNU General Public License for more details.
21 //
22 // You should have received a copy of the GNU Lesser General Public
23 // License and a copy of the GNU General Public License along with
24 // Eigen. If not, see <http://www.gnu.org/licenses/>.
25 
26 #ifndef EIGEN_COEFFBASED_PRODUCT_H
27 #define EIGEN_COEFFBASED_PRODUCT_H
28 
29 namespace Eigen {
30 
31 namespace internal {
32 
33 /*********************************************************************************
34 * Coefficient based product implementation.
35 * It is designed for the following use cases:
36 * - small fixed sizes
37 * - lazy products
38 *********************************************************************************/
39 
40 /* Since the all the dimensions of the product are small, here we can rely
41  * on the generic Assign mechanism to evaluate the product per coeff (or packet).
42  *
43  * Note that here the inner-loops should always be unrolled.
44  */
45 
46 template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
47 struct product_coeff_impl;
48 
49 template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
50 struct product_packet_impl;
51 
52 template<typename LhsNested, typename RhsNested, int NestingFlags>
53 struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
54 {
55  typedef MatrixXpr XprKind;
56  typedef typename remove_all<LhsNested>::type _LhsNested;
57  typedef typename remove_all<RhsNested>::type _RhsNested;
58  typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar;
59  typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind,
60  typename traits<_RhsNested>::StorageKind>::ret StorageKind;
61  typedef typename promote_index_type<typename traits<_LhsNested>::Index,
62  typename traits<_RhsNested>::Index>::type Index;
63 
64  enum {
65  LhsCoeffReadCost = _LhsNested::CoeffReadCost,
66  RhsCoeffReadCost = _RhsNested::CoeffReadCost,
67  LhsFlags = _LhsNested::Flags,
68  RhsFlags = _RhsNested::Flags,
69 
70  RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
71  ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
72  InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
73 
74  MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
75  MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
76 
77  LhsRowMajor = LhsFlags & RowMajorBit,
78  RhsRowMajor = RhsFlags & RowMajorBit,
79 
80  SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
81 
82  CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
83  && (ColsAtCompileTime == Dynamic
84  || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
85  && (RhsFlags&AlignedBit)
86  )
87  ),
88 
89  CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
90  && (RowsAtCompileTime == Dynamic
91  || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
92  && (LhsFlags&AlignedBit)
93  )
94  ),
95 
96  EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
97  : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
98  : (RhsRowMajor && !CanVectorizeLhs),
99 
100  Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
101  | (EvalToRowMajor ? RowMajorBit : 0)
102  | NestingFlags
103  | (LhsFlags & RhsFlags & AlignedBit)
104  // TODO enable vectorization for mixed types
105  | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
106 
107  CoeffReadCost = InnerSize == Dynamic ? Dynamic
108  : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
109  + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
110 
111  /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
112  * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
113  * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
114  * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
115  */
116  CanVectorizeInner = SameType
117  && LhsRowMajor
118  && (!RhsRowMajor)
119  && (LhsFlags & RhsFlags & ActualPacketAccessBit)
120  && (LhsFlags & RhsFlags & AlignedBit)
121  && (InnerSize % packet_traits<Scalar>::size == 0)
122  };
123 };
124 
125 } // end namespace internal
126 
127 template<typename LhsNested, typename RhsNested, int NestingFlags>
129  : internal::no_assignment_operator,
130  public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> >
131 {
132  public:
133 
136  typedef typename Base::PlainObject PlainObject;
137 
138  private:
139 
140  typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested;
141  typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested;
142 
143  enum {
144  PacketSize = internal::packet_traits<Scalar>::size,
145  InnerSize = internal::traits<CoeffBasedProduct>::InnerSize,
146  Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
147  CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner
148  };
149 
150  typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
151  Unroll ? InnerSize-1 : Dynamic,
152  _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
153 
154  typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
155 
156  public:
157 
159  : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs)
160  {}
161 
162  template<typename Lhs, typename Rhs>
163  inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs)
164  : m_lhs(lhs), m_rhs(rhs)
165  {
166  // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
167  // We still allow to mix T and complex<T>.
168  EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
169  YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
170  eigen_assert(lhs.cols() == rhs.rows()
171  && "invalid matrix product"
172  && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
173  }
174 
175  EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
176  EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
177 
179  {
180  Scalar res;
181  ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
182  return res;
183  }
184 
185  /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
186  * which is why we don't set the LinearAccessBit.
187  */
188  EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
189  {
190  Scalar res;
191  const Index row = RowsAtCompileTime == 1 ? 0 : index;
192  const Index col = RowsAtCompileTime == 1 ? index : 0;
193  ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
194  return res;
195  }
196 
197  template<int LoadMode>
199  {
200  PacketScalar res;
201  internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
202  Unroll ? InnerSize-1 : Dynamic,
203  _LhsNested, _RhsNested, PacketScalar, LoadMode>
204  ::run(row, col, m_lhs, m_rhs, res);
205  return res;
206  }
207 
208  // Implicit conversion to the nested type (trigger the evaluation of the product)
209  EIGEN_STRONG_INLINE operator const PlainObject& () const
210  {
211  m_result.lazyAssign(*this);
212  return m_result;
213  }
214 
215  const _LhsNested& lhs() const { return m_lhs; }
216  const _RhsNested& rhs() const { return m_rhs; }
217 
219  { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
220 
221  template<int DiagonalIndex>
223  { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
224 
226  { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
227 
228  protected:
229  typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
230  typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
231 
233 };
234 
235 namespace internal {
236 
237 // here we need to overload the nested rule for products
238 // such that the nested type is a const reference to a plain matrix
239 template<typename Lhs, typename Rhs, int N, typename PlainObject>
240 struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
241 {
242  typedef PlainObject const& type;
243 };
244 
245 /***************************************************************************
246 * Normal product .coeff() implementation (with meta-unrolling)
247 ***************************************************************************/
248 
249 /**************************************
250 *** Scalar path - no vectorization ***
251 **************************************/
252 
253 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
254 struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
255 {
256  typedef typename Lhs::Index Index;
257  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
258  {
259  product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
260  res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
261  }
262 };
263 
264 template<typename Lhs, typename Rhs, typename RetScalar>
265 struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
266 {
267  typedef typename Lhs::Index Index;
268  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
269  {
270  res = lhs.coeff(row, 0) * rhs.coeff(0, col);
271  }
272 };
273 
274 template<typename Lhs, typename Rhs, typename RetScalar>
275 struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
276 {
277  typedef typename Lhs::Index Index;
278  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
279  {
280  eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
281  res = lhs.coeff(row, 0) * rhs.coeff(0, col);
282  for(Index i = 1; i < lhs.cols(); ++i)
283  res += lhs.coeff(row, i) * rhs.coeff(i, col);
284  }
285 };
286 
287 /*******************************************
288 *** Scalar path with inner vectorization ***
289 *******************************************/
290 
291 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
292 struct product_coeff_vectorized_unroller
293 {
294  typedef typename Lhs::Index Index;
295  enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
296  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
297  {
298  product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
299  pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
300  }
301 };
302 
303 template<typename Lhs, typename Rhs, typename Packet>
304 struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
305 {
306  typedef typename Lhs::Index Index;
307  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
308  {
309  pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
310  }
311 };
312 
313 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
314 struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
315 {
316  typedef typename Lhs::PacketScalar Packet;
317  typedef typename Lhs::Index Index;
318  enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
319  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
320  {
321  Packet pres;
322  product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
323  product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
324  res = predux(pres);
325  }
326 };
327 
328 template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
329 struct product_coeff_vectorized_dyn_selector
330 {
331  typedef typename Lhs::Index Index;
332  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
333  {
334  res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
335  }
336 };
337 
338 // NOTE the 3 following specializations are because taking .col(0) on a vector is a bit slower
339 // NOTE maybe they are now useless since we have a specialization for Block<Matrix>
340 template<typename Lhs, typename Rhs, int RhsCols>
341 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
342 {
343  typedef typename Lhs::Index Index;
344  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
345  {
346  res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
347  }
348 };
349 
350 template<typename Lhs, typename Rhs, int LhsRows>
351 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
352 {
353  typedef typename Lhs::Index Index;
354  static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
355  {
356  res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
357  }
358 };
359 
360 template<typename Lhs, typename Rhs>
361 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
362 {
363  typedef typename Lhs::Index Index;
364  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
365  {
366  res = lhs.transpose().cwiseProduct(rhs).sum();
367  }
368 };
369 
370 template<typename Lhs, typename Rhs, typename RetScalar>
371 struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
372 {
373  typedef typename Lhs::Index Index;
374  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
375  {
376  product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
377  }
378 };
379 
380 /*******************
381 *** Packet path ***
382 *******************/
383 
384 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
385 struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
386 {
387  typedef typename Lhs::Index Index;
388  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
389  {
390  product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
391  res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
392  }
393 };
394 
395 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
396 struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
397 {
398  typedef typename Lhs::Index Index;
399  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
400  {
401  product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
402  res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
403  }
404 };
405 
406 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
407 struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
408 {
409  typedef typename Lhs::Index Index;
410  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
411  {
412  res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
413  }
414 };
415 
416 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
417 struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
418 {
419  typedef typename Lhs::Index Index;
420  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
421  {
422  res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
423  }
424 };
425 
426 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
427 struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
428 {
429  typedef typename Lhs::Index Index;
430  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
431  {
432  eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
433  res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
434  for(Index i = 1; i < lhs.cols(); ++i)
435  res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
436  }
437 };
438 
439 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
440 struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
441 {
442  typedef typename Lhs::Index Index;
443  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
444  {
445  eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
446  res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
447  for(Index i = 1; i < lhs.cols(); ++i)
448  res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
449  }
450 };
451 
452 } // end namespace internal
453 
454 } // end namespace Eigen
455 
456 #endif // EIGEN_COEFFBASED_PRODUCT_H