25 #ifndef EIGEN_PARALLELIZER_H
26 #define EIGEN_PARALLELIZER_H
45 #ifdef EIGEN_HAS_OPENMP
49 *v = omp_get_max_threads();
76 template<
typename Index>
struct GemmParallelInfo
78 GemmParallelInfo() : sync(-1), users(0), rhs_start(0), rhs_length(0) {}
87 template<
bool Condition,
typename Functor,
typename Index>
92 #if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
109 if((!Condition) || (omp_get_num_threads()>1))
110 return func(0,rows, 0,cols);
112 Index size = transpose ? cols : rows;
116 Index max_threads = std::max<Index>(1,size / 32);
119 Index threads = std::min<Index>(
nbThreads(), max_threads);
122 return func(0,rows, 0,cols);
124 func.initParallelSession();
127 std::swap(rows,cols);
129 Index blockCols = (cols / threads) & ~Index(0x3);
130 Index blockRows = (rows / threads) & ~Index(0x7);
132 GemmParallelInfo<Index>* info =
new GemmParallelInfo<Index>[threads];
134 #pragma omp parallel for schedule(static,1) num_threads(threads)
135 for(Index i=0; i<threads; ++i)
137 Index r0 = i*blockRows;
138 Index actualBlockRows = (i+1==threads) ? rows-r0 : blockRows;
140 Index c0 = i*blockCols;
141 Index actualBlockCols = (i+1==threads) ? cols-c0 : blockCols;
143 info[i].rhs_start = c0;
144 info[i].rhs_length = actualBlockCols;
147 func(0, cols, r0, actualBlockRows, info);
149 func(r0, actualBlockRows, 0,cols, info);
160 #endif // EIGEN_PARALLELIZER_H