Parallelizer.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
5 //
6 // Eigen is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU Lesser General Public
8 // License as published by the Free Software Foundation; either
9 // version 3 of the License, or (at your option) any later version.
10 //
11 // Alternatively, you can redistribute it and/or
12 // modify it under the terms of the GNU General Public License as
13 // published by the Free Software Foundation; either version 2 of
14 // the License, or (at your option) any later version.
15 //
16 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
17 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
19 // GNU General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public
22 // License and a copy of the GNU General Public License along with
23 // Eigen. If not, see <http://www.gnu.org/licenses/>.
24 
25 #ifndef EIGEN_PARALLELIZER_H
26 #define EIGEN_PARALLELIZER_H
27 
28 namespace Eigen {
29 
30 namespace internal {
31 
33 inline void manage_multi_threading(Action action, int* v)
34 {
35  static EIGEN_UNUSED int m_maxThreads = -1;
36 
37  if(action==SetAction)
38  {
40  m_maxThreads = *v;
41  }
42  else if(action==GetAction)
43  {
45  #ifdef EIGEN_HAS_OPENMP
46  if(m_maxThreads>0)
47  *v = m_maxThreads;
48  else
49  *v = omp_get_max_threads();
50  #else
51  *v = 1;
52  #endif
53  }
54  else
55  {
56  eigen_internal_assert(false);
57  }
58 }
59 
62 inline int nbThreads()
63 {
64  int ret;
66  return ret;
67 }
68 
71 inline void setNbThreads(int v)
72 {
74 }
75 
76 template<typename Index> struct GemmParallelInfo
77 {
78  GemmParallelInfo() : sync(-1), users(0), rhs_start(0), rhs_length(0) {}
79 
80  int volatile sync;
81  int volatile users;
82 
83  Index rhs_start;
84  Index rhs_length;
85 };
86 
87 template<bool Condition, typename Functor, typename Index>
88 void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
89 {
90  // TODO when EIGEN_USE_BLAS is defined,
91  // we should still enable OMP for other scalar types
92 #if !(defined (EIGEN_HAS_OPENMP)) || defined (EIGEN_USE_BLAS)
93  // FIXME the transpose variable is only needed to properly split
94  // the matrix product when multithreading is enabled. This is a temporary
95  // fix to support row-major destination matrices. This whole
96  // parallelizer mechanism has to be redisigned anyway.
97  EIGEN_UNUSED_VARIABLE(transpose);
98  func(0,rows, 0,cols);
99 #else
100 
101  // Dynamically check whether we should enable or disable OpenMP.
102  // The conditions are:
103  // - the max number of threads we can create is greater than 1
104  // - we are not already in a parallel code
105  // - the sizes are large enough
106 
107  // 1- are we already in a parallel session?
108  // FIXME omp_get_num_threads()>1 only works for openmp, what if the user does not use openmp?
109  if((!Condition) || (omp_get_num_threads()>1))
110  return func(0,rows, 0,cols);
111 
112  Index size = transpose ? cols : rows;
113 
114  // 2- compute the maximal number of threads from the size of the product:
115  // FIXME this has to be fine tuned
116  Index max_threads = std::max<Index>(1,size / 32);
117 
118  // 3 - compute the number of threads we are going to use
119  Index threads = std::min<Index>(nbThreads(), max_threads);
120 
121  if(threads==1)
122  return func(0,rows, 0,cols);
123 
124  func.initParallelSession();
125 
126  if(transpose)
127  std::swap(rows,cols);
128 
129  Index blockCols = (cols / threads) & ~Index(0x3);
130  Index blockRows = (rows / threads) & ~Index(0x7);
131 
132  GemmParallelInfo<Index>* info = new GemmParallelInfo<Index>[threads];
133 
134  #pragma omp parallel for schedule(static,1) num_threads(threads)
135  for(Index i=0; i<threads; ++i)
136  {
137  Index r0 = i*blockRows;
138  Index actualBlockRows = (i+1==threads) ? rows-r0 : blockRows;
139 
140  Index c0 = i*blockCols;
141  Index actualBlockCols = (i+1==threads) ? cols-c0 : blockCols;
142 
143  info[i].rhs_start = c0;
144  info[i].rhs_length = actualBlockCols;
145 
146  if(transpose)
147  func(0, cols, r0, actualBlockRows, info);
148  else
149  func(r0, actualBlockRows, 0,cols, info);
150  }
151 
152  delete[] info;
153 #endif
154 }
155 
156 } // end namespace internal
157 
158 } // end namespace Eigen
159 
160 #endif // EIGEN_PARALLELIZER_H