25 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
26 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
33 template<
typename Scalar,
typename Index,
int Pack1,
int Pack2,
int StorageOrder>
36 template<
int BlockRows>
inline
37 void pack(Scalar* blockA,
const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
40 for(Index k=0; k<i; k++)
41 for(Index w=0; w<BlockRows; w++)
42 blockA[count++] = lhs(i+w,k);
45 for(Index k=i; k<i+BlockRows; k++)
47 for(Index w=0; w<h; w++)
48 blockA[count++] =
conj(lhs(k, i+w));
50 blockA[count++] =
real(lhs(k,k));
52 for(Index w=h+1; w<BlockRows; w++)
53 blockA[count++] = lhs(i+w, k);
57 for(Index k=i+BlockRows; k<cols; k++)
58 for(Index w=0; w<BlockRows; w++)
59 blockA[count++] =
conj(lhs(k, i+w));
61 void operator()(Scalar* blockA,
const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
63 const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
65 Index peeled_mc = (rows/Pack1)*Pack1;
66 for(Index i=0; i<peeled_mc; i+=Pack1)
68 pack<Pack1>(blockA, lhs, cols, i, count);
71 if(rows-peeled_mc>=Pack2)
73 pack<Pack2>(blockA, lhs, cols, peeled_mc, count);
78 for(Index i=peeled_mc; i<rows; i++)
80 for(Index k=0; k<i; k++)
81 blockA[count++] = lhs(i, k);
83 blockA[count++] =
real(lhs(i, i));
85 for(Index k=i+1; k<cols; k++)
86 blockA[count++] =
conj(lhs(k, i));
91 template<
typename Scalar,
typename Index,
int nr,
int StorageOrder>
94 enum { PacketSize = packet_traits<Scalar>::size };
95 void operator()(Scalar* blockB,
const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
97 Index end_k = k2 + rows;
99 const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
100 Index packet_cols = (cols/nr)*nr;
103 for(Index j2=0; j2<k2; j2+=nr)
105 for(Index k=k2; k<end_k; k++)
107 blockB[count+0] = rhs(k,j2+0);
108 blockB[count+1] = rhs(k,j2+1);
111 blockB[count+2] = rhs(k,j2+2);
112 blockB[count+3] = rhs(k,j2+3);
119 for(Index j2=k2; j2<(std::min)(k2+rows,packet_cols); j2+=nr)
123 for(Index k=k2; k<j2; k++)
125 blockB[count+0] =
conj(rhs(j2+0,k));
126 blockB[count+1] =
conj(rhs(j2+1,k));
129 blockB[count+2] =
conj(rhs(j2+2,k));
130 blockB[count+3] =
conj(rhs(j2+3,k));
136 for(Index k=j2; k<j2+nr; k++)
139 for (Index w=0 ; w<h; ++w)
140 blockB[count+w] = rhs(k,j2+w);
142 blockB[count+h] =
real(rhs(k,k));
145 for (Index w=h+1 ; w<nr; ++w)
146 blockB[count+w] =
conj(rhs(j2+w,k));
151 for(Index k=j2+nr; k<end_k; k++)
153 blockB[count+0] = rhs(k,j2+0);
154 blockB[count+1] = rhs(k,j2+1);
157 blockB[count+2] = rhs(k,j2+2);
158 blockB[count+3] = rhs(k,j2+3);
165 for(Index j2=k2+rows; j2<packet_cols; j2+=nr)
167 for(Index k=k2; k<end_k; k++)
169 blockB[count+0] =
conj(rhs(j2+0,k));
170 blockB[count+1] =
conj(rhs(j2+1,k));
173 blockB[count+2] =
conj(rhs(j2+2,k));
174 blockB[count+3] =
conj(rhs(j2+3,k));
181 for(Index j2=packet_cols; j2<cols; ++j2)
184 Index half = (std::min)(end_k,j2);
185 for(Index k=k2; k<half; k++)
187 blockB[count] =
conj(rhs(j2,k));
191 if(half==j2 && half<k2+rows)
193 blockB[count] =
real(rhs(j2,j2));
200 for(Index k=half+1; k<k2+rows; k++)
202 blockB[count] = rhs(k,j2);
212 template <
typename Scalar,
typename Index,
213 int LhsStorageOrder,
bool LhsSelfAdjoint,
bool ConjugateLhs,
214 int RhsStorageOrder,
bool RhsSelfAdjoint,
bool ConjugateRhs,
216 struct product_selfadjoint_matrix;
218 template <
typename Scalar,
typename Index,
219 int LhsStorageOrder,
bool LhsSelfAdjoint,
bool ConjugateLhs,
220 int RhsStorageOrder,
bool RhsSelfAdjoint,
bool ConjugateRhs>
221 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,
RowMajor>
225 Index rows, Index cols,
226 const Scalar* lhs, Index lhsStride,
227 const Scalar* rhs, Index rhsStride,
228 Scalar* res, Index resStride,
231 product_selfadjoint_matrix<Scalar, Index,
237 ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha);
241 template <
typename Scalar,
typename Index,
242 int LhsStorageOrder,
bool ConjugateLhs,
243 int RhsStorageOrder,
bool ConjugateRhs>
244 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,
ColMajor>
248 Index rows, Index cols,
249 const Scalar* _lhs, Index lhsStride,
250 const Scalar* _rhs, Index rhsStride,
251 Scalar* res, Index resStride,
256 const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
257 const_blas_data_mapper<Scalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
259 typedef gebp_traits<Scalar,Scalar> Traits;
264 computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
266 kc = (std::min)(kc,mc);
268 std::size_t sizeW = kc*Traits::WorkSpaceFactor;
269 std::size_t sizeB = sizeW + kc*cols;
272 Scalar* blockB = allocatedBlockB + sizeW;
274 gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
275 symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
276 gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
277 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
279 for(Index k2=0; k2<size; k2+=kc)
281 const Index actual_kc = (std::min)(k2+kc,size)-k2;
286 pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);
292 for(Index i2=0; i2<k2; i2+=mc)
294 const Index actual_mc = (std::min)(i2+mc,k2)-i2;
296 pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc);
298 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
302 const Index actual_mc = (std::min)(k2+kc,size)-k2;
304 pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
306 gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
309 for(Index i2=k2+kc; i2<size; i2+=mc)
311 const Index actual_mc = (std::min)(i2+mc,size)-i2;
312 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
313 (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
315 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
322 template <
typename Scalar,
typename Index,
323 int LhsStorageOrder,
bool ConjugateLhs,
324 int RhsStorageOrder,
bool ConjugateRhs>
325 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,
ColMajor>
329 Index rows, Index cols,
330 const Scalar* _lhs, Index lhsStride,
331 const Scalar* _rhs, Index rhsStride,
332 Scalar* res, Index resStride,
337 const_blas_data_mapper<Scalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
339 typedef gebp_traits<Scalar,Scalar> Traits;
344 computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
345 std::size_t sizeW = kc*Traits::WorkSpaceFactor;
346 std::size_t sizeB = sizeW + kc*cols;
349 Scalar* blockB = allocatedBlockB + sizeW;
351 gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
352 gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
353 symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
355 for(Index k2=0; k2<size; k2+=kc)
357 const Index actual_kc = (std::min)(k2+kc,size)-k2;
359 pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
362 for(Index i2=0; i2<rows; i2+=mc)
364 const Index actual_mc = (std::min)(i2+mc,rows)-i2;
365 pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
367 gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);
380 template<
typename Lhs,
int LhsMode,
typename Rhs,
int RhsMode>
381 struct traits<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false> >
382 :
traits<ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs> >
386 template<
typename Lhs,
int LhsMode,
typename Rhs,
int RhsMode>
387 struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
388 :
public ProductBase<SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>, Lhs, Rhs >
392 SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) :
Base(lhs,rhs) {}
401 template<
typename Dest>
void scaleAndAddTo(Dest& dst,
Scalar alpha)
const
403 eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
405 typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
406 typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
408 Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(m_lhs)
409 * RhsBlasTraits::extractScalarFactor(m_rhs);
411 internal::product_selfadjoint_matrix<
Scalar,
Index,
416 internal::traits<Rhs>::Flags &RowMajorBit) ?
RowMajor :
ColMajor, RhsIsSelfAdjoint,
420 lhs.rows(), rhs.cols(),
421 &lhs.coeffRef(0,0), lhs.outerStride(),
422 &rhs.coeffRef(0,0), rhs.outerStride(),
423 &dst.coeffRef(0,0), dst.outerStride(),
431 #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H