Manicore
Library to implement schemes on n-dimensionnal manifolds.
parallel_for.hpp
Go to the documentation of this file.
1 #ifndef PARALLEL_FOR
2 #define PARALLEL_FOR
3 
4 #include <thread>
5 #include <forward_list>
6 #include <Eigen/Sparse>
7 
11 namespace Manicore
12 {
13 
15  static std::pair<std::vector<int>, std::vector<int>>
16  distributeLoad(size_t nb_elements, unsigned nb_threads)
17  {
18  // Vectors of start and end indices
19  std::vector<int> start(nb_threads);
20  std::vector<int> end(nb_threads);
21 
22  // Compute the batch size and the remainder
23  unsigned batch_size = nb_elements / nb_threads;
24  unsigned batch_remainder = nb_elements % nb_threads;
25 
26  // Distribute the remainder over the threads to get the start and end indices for each thread
27  for (unsigned i = 0; i < nb_threads; ++i) {
28  if (i < batch_remainder){
29  start[i] = i * batch_size + i;
30  end[i] = start[i] + batch_size + 1;
31  }
32  else{
33  start[i] = i * batch_size + batch_remainder;
34  end[i] = start[i] + batch_size;
35  }
36  }
37 
38  return std::make_pair(start, end);
39  }
40 
42  static inline void parallel_for(unsigned nb_elements,
43  std::function<void(size_t start, size_t end)> functor,
44  bool use_threads = true)
45  {
46  unsigned nb_threads_hint = std::thread::hardware_concurrency();
47  unsigned nb_threads = nb_threads_hint == 0 ? 8 : (nb_threads_hint);
48 
49  // Generate the start and end indices
50  auto [start, end] = distributeLoad(nb_elements, nb_threads);
51 
52  std::vector<std::thread> my_threads(nb_threads);
53 
54  if (use_threads) {
55  // Multithread execution
56  for (unsigned i = 0; i < nb_threads; ++i) {
57  my_threads[i] = std::thread(functor, start[i], end[i]);
58  }
59  } else {
60  // Single thread execution (for easy debugging)
61  for(unsigned i = 0; i < nb_threads; ++i) {
62  functor(start[i], end[i]);
63  }
64  }
65 
66  // Wait for the other thread to finish their task
67  if (use_threads) {
68  std::for_each(my_threads.begin(), my_threads.end(), std::mem_fn(&std::thread::join));
69  }
70  }
71 
73  template<typename FType>
74  Eigen::SparseMatrix<double> parallel_assembly(
75  size_t nb_elements ,
76  std::pair<size_t,size_t> systemSize ,
77  FType localAssembly ,
78  bool use_threads = true )
79  {
80  std::forward_list<Eigen::Triplet<double>> triplets;
81  if (use_threads) {
82  // Select the number of threads
83  unsigned nb_threads_hint = std::thread::hardware_concurrency();
84  unsigned nb_threads = nb_threads_hint == 0 ? 8 : (nb_threads_hint);
85  // Generate the start and end indices
86  auto [start, end] = distributeLoad(nb_elements, nb_threads);
87  // Create vectors of triplets
88  std::vector<std::forward_list<Eigen::Triplet<double>>> tripletsVect(nb_threads);
89 
90  // Assign a task to each thread
91  std::vector<std::thread> my_threads(nb_threads);
92  for (unsigned i = 0; i < nb_threads; ++i) {
93  my_threads[i] = std::thread(localAssembly, start[i], end[i], &tripletsVect[i]);
94  }
95  // Wait for the other threads to finish their task
96  std::for_each(my_threads.begin(), my_threads.end(), std::mem_fn(&std::thread::join));
97  // Join triplets
98  for (unsigned i = 0; i < nb_threads; ++i) {
99  triplets.splice_after(triplets.cbefore_begin(),tripletsVect[i]);
100  }
101  } else { // Single thread
102  localAssembly(0,nb_elements,&triplets);
103  }
104  // Construct the system
105  Eigen::SparseMatrix<double> system(systemSize.first,systemSize.second);
106  system.setFromTriplets(triplets.cbegin(),triplets.cend());
107  return system;
108  }
109 
110 } // end of namespace
111 #endif
Definition: maxwell.hpp:23
static std::pair< std::vector< int >, std::vector< int > > distributeLoad(size_t nb_elements, unsigned nb_threads)
Function to distribute elements (considered as jobs) over threads. It returns a pair of vectors indic...
Definition: parallel_for.hpp:16
static void parallel_for(unsigned nb_elements, std::function< void(size_t start, size_t end)> functor, bool use_threads=true)
Generic function to execute threaded processes.
Definition: parallel_for.hpp:42
Eigen::SparseMatrix< double > parallel_assembly(size_t nb_elements, std::pair< size_t, size_t > systemSize, FType localAssembly, bool use_threads=true)
Function to assemble a global sparse matrix from a procedure that compute local contributions.
Definition: parallel_for.hpp:74