LLVM OpenMP* Runtime Library
kmp_taskdeps.cpp
1 /*
2  * kmp_taskdeps.cpp
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 //#define KMP_SUPPORT_GRAPH_OUTPUT 1
14 
15 #include "kmp.h"
16 #include "kmp_io.h"
17 #include "kmp_wait_release.h"
18 #include "kmp_taskdeps.h"
19 #if OMPT_SUPPORT
20 #include "ompt-specific.h"
21 #endif
22 
23 // TODO: Improve memory allocation? keep a list of pre-allocated structures?
24 // allocate in blocks? re-use list finished list entries?
25 // TODO: don't use atomic ref counters for stack-allocated nodes.
26 // TODO: find an alternate to atomic refs for heap-allocated nodes?
27 // TODO: Finish graph output support
28 // TODO: kmp_lock_t seems a tad to big (and heavy weight) for this. Check other
29 // runtime locks
30 // TODO: Any ITT support needed?
31 
32 #ifdef KMP_SUPPORT_GRAPH_OUTPUT
33 static std::atomic<kmp_int32> kmp_node_id_seed = 0;
34 #endif
35 
36 static void __kmp_init_node(kmp_depnode_t *node) {
37  node->dn.successors = NULL;
38  node->dn.task = NULL; // will point to the right task
39  // once dependences have been processed
40  for (int i = 0; i < MAX_MTX_DEPS; ++i)
41  node->dn.mtx_locks[i] = NULL;
42  node->dn.mtx_num_locks = 0;
43  __kmp_init_lock(&node->dn.lock);
44  KMP_ATOMIC_ST_RLX(&node->dn.nrefs, 1); // init creates the first reference
45 #ifdef KMP_SUPPORT_GRAPH_OUTPUT
46  node->dn.id = KMP_ATOMIC_INC(&kmp_node_id_seed);
47 #endif
48 #if USE_ITT_BUILD && USE_ITT_NOTIFY
49  __itt_sync_create(node, "OMP task dep node", NULL, 0);
50 #endif
51 }
52 
53 static inline kmp_depnode_t *__kmp_node_ref(kmp_depnode_t *node) {
54  KMP_ATOMIC_INC(&node->dn.nrefs);
55  return node;
56 }
57 
58 enum { KMP_DEPHASH_OTHER_SIZE = 97, KMP_DEPHASH_MASTER_SIZE = 997 };
59 
60 size_t sizes[] = {997, 2003, 4001, 8191, 16001, 32003, 64007, 131071, 270029};
61 const size_t MAX_GEN = 8;
62 
63 static inline size_t __kmp_dephash_hash(kmp_intptr_t addr, size_t hsize) {
64  // TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) %
65  // m_num_sets );
66  return ((addr >> 6) ^ (addr >> 2)) % hsize;
67 }
68 
69 static kmp_dephash_t *__kmp_dephash_extend(kmp_info_t *thread,
70  kmp_dephash_t *current_dephash) {
71  kmp_dephash_t *h;
72 
73  size_t gen = current_dephash->generation + 1;
74  if (gen >= MAX_GEN)
75  return current_dephash;
76  size_t new_size = sizes[gen];
77 
78  size_t size_to_allocate =
79  new_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t);
80 
81 #if USE_FAST_MEMORY
82  h = (kmp_dephash_t *)__kmp_fast_allocate(thread, size_to_allocate);
83 #else
84  h = (kmp_dephash_t *)__kmp_thread_malloc(thread, size_to_allocate);
85 #endif
86 
87  h->size = new_size;
88  h->nelements = current_dephash->nelements;
89  h->buckets = (kmp_dephash_entry **)(h + 1);
90  h->generation = gen;
91  h->nconflicts = 0;
92  h->last_all = current_dephash->last_all;
93 
94  // make sure buckets are properly initialized
95  for (size_t i = 0; i < new_size; i++) {
96  h->buckets[i] = NULL;
97  }
98 
99  // insert existing elements in the new table
100  for (size_t i = 0; i < current_dephash->size; i++) {
101  kmp_dephash_entry_t *next, *entry;
102  for (entry = current_dephash->buckets[i]; entry; entry = next) {
103  next = entry->next_in_bucket;
104  // Compute the new hash using the new size, and insert the entry in
105  // the new bucket.
106  size_t new_bucket = __kmp_dephash_hash(entry->addr, h->size);
107  entry->next_in_bucket = h->buckets[new_bucket];
108  if (entry->next_in_bucket) {
109  h->nconflicts++;
110  }
111  h->buckets[new_bucket] = entry;
112  }
113  }
114 
115  // Free old hash table
116 #if USE_FAST_MEMORY
117  __kmp_fast_free(thread, current_dephash);
118 #else
119  __kmp_thread_free(thread, current_dephash);
120 #endif
121 
122  return h;
123 }
124 
125 static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread,
126  kmp_taskdata_t *current_task) {
127  kmp_dephash_t *h;
128 
129  size_t h_size;
130 
131  if (current_task->td_flags.tasktype == TASK_IMPLICIT)
132  h_size = KMP_DEPHASH_MASTER_SIZE;
133  else
134  h_size = KMP_DEPHASH_OTHER_SIZE;
135 
136  size_t size = h_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t);
137 
138 #if USE_FAST_MEMORY
139  h = (kmp_dephash_t *)__kmp_fast_allocate(thread, size);
140 #else
141  h = (kmp_dephash_t *)__kmp_thread_malloc(thread, size);
142 #endif
143  h->size = h_size;
144 
145  h->generation = 0;
146  h->nelements = 0;
147  h->nconflicts = 0;
148  h->buckets = (kmp_dephash_entry **)(h + 1);
149  h->last_all = NULL;
150 
151  for (size_t i = 0; i < h_size; i++)
152  h->buckets[i] = 0;
153 
154  return h;
155 }
156 
157 static kmp_dephash_entry *__kmp_dephash_find(kmp_info_t *thread,
158  kmp_dephash_t **hash,
159  kmp_intptr_t addr) {
160  kmp_dephash_t *h = *hash;
161  if (h->nelements != 0 && h->nconflicts / h->size >= 1) {
162  *hash = __kmp_dephash_extend(thread, h);
163  h = *hash;
164  }
165  size_t bucket = __kmp_dephash_hash(addr, h->size);
166 
167  kmp_dephash_entry_t *entry;
168  for (entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket)
169  if (entry->addr == addr)
170  break;
171 
172  if (entry == NULL) {
173 // create entry. This is only done by one thread so no locking required
174 #if USE_FAST_MEMORY
175  entry = (kmp_dephash_entry_t *)__kmp_fast_allocate(
176  thread, sizeof(kmp_dephash_entry_t));
177 #else
178  entry = (kmp_dephash_entry_t *)__kmp_thread_malloc(
179  thread, sizeof(kmp_dephash_entry_t));
180 #endif
181  entry->addr = addr;
182  if (!h->last_all) // no predecessor task with omp_all_memory dependence
183  entry->last_out = NULL;
184  else // else link the omp_all_memory depnode to the new entry
185  entry->last_out = __kmp_node_ref(h->last_all);
186  entry->last_set = NULL;
187  entry->prev_set = NULL;
188  entry->last_flag = 0;
189  entry->mtx_lock = NULL;
190  entry->next_in_bucket = h->buckets[bucket];
191  h->buckets[bucket] = entry;
192  h->nelements++;
193  if (entry->next_in_bucket)
194  h->nconflicts++;
195  }
196  return entry;
197 }
198 
199 static kmp_depnode_list_t *__kmp_add_node(kmp_info_t *thread,
200  kmp_depnode_list_t *list,
201  kmp_depnode_t *node) {
202  kmp_depnode_list_t *new_head;
203 
204 #if USE_FAST_MEMORY
205  new_head = (kmp_depnode_list_t *)__kmp_fast_allocate(
206  thread, sizeof(kmp_depnode_list_t));
207 #else
208  new_head = (kmp_depnode_list_t *)__kmp_thread_malloc(
209  thread, sizeof(kmp_depnode_list_t));
210 #endif
211 
212  new_head->node = __kmp_node_ref(node);
213  new_head->next = list;
214 
215  return new_head;
216 }
217 
218 static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source,
219  kmp_depnode_t *sink,
220  kmp_task_t *sink_task) {
221 #if OMPX_TASKGRAPH
222  kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
223  kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task);
224  if (source->dn.task && sink_task) {
225  // Not supporting dependency between two tasks that one is within the TDG
226  // and the other is not
227  KMP_ASSERT(task_source->is_taskgraph == task_sink->is_taskgraph);
228  }
229  if (task_sink->is_taskgraph &&
230  __kmp_tdg_is_recording(task_sink->tdg->tdg_status)) {
231  kmp_node_info_t *source_info =
232  &task_sink->tdg->record_map[task_source->td_task_id];
233  bool exists = false;
234  for (int i = 0; i < source_info->nsuccessors; i++) {
235  if (source_info->successors[i] == task_sink->td_task_id) {
236  exists = true;
237  break;
238  }
239  }
240  if (!exists) {
241  if (source_info->nsuccessors >= source_info->successors_size) {
242  source_info->successors_size = 2 * source_info->successors_size;
243  kmp_int32 *old_succ_ids = source_info->successors;
244  kmp_int32 *new_succ_ids = (kmp_int32 *)__kmp_allocate(
245  source_info->successors_size * sizeof(kmp_int32));
246  source_info->successors = new_succ_ids;
247  __kmp_free(old_succ_ids);
248  }
249 
250  source_info->successors[source_info->nsuccessors] = task_sink->td_task_id;
251  source_info->nsuccessors++;
252 
253  kmp_node_info_t *sink_info =
254  &(task_sink->tdg->record_map[task_sink->td_task_id]);
255  sink_info->npredecessors++;
256  }
257  }
258 #endif
259 #ifdef KMP_SUPPORT_GRAPH_OUTPUT
260  kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
261  // do not use sink->dn.task as that is only filled after the dependences
262  // are already processed!
263  kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task);
264 
265  __kmp_printf("%d(%s) -> %d(%s)\n", source->dn.id,
266  task_source->td_ident->psource, sink->dn.id,
267  task_sink->td_ident->psource);
268 #endif
269 #if OMPT_SUPPORT && OMPT_OPTIONAL
270  /* OMPT tracks dependences between task (a=source, b=sink) in which
271  task a blocks the execution of b through the ompt_new_dependence_callback
272  */
273  if (ompt_enabled.ompt_callback_task_dependence) {
274  kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task);
275  ompt_data_t *sink_data;
276  if (sink_task)
277  sink_data = &(KMP_TASK_TO_TASKDATA(sink_task)->ompt_task_info.task_data);
278  else
279  sink_data = &__kmp_threads[gtid]->th.ompt_thread_info.task_data;
280 
281  ompt_callbacks.ompt_callback(ompt_callback_task_dependence)(
282  &(task_source->ompt_task_info.task_data), sink_data);
283  }
284 #endif /* OMPT_SUPPORT && OMPT_OPTIONAL */
285 }
286 
287 kmp_base_depnode_t *__kmpc_task_get_depnode(kmp_task_t *task) {
288  kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task);
289  return td->td_depnode ? &(td->td_depnode->dn) : NULL;
290 }
291 
292 kmp_depnode_list_t *__kmpc_task_get_successors(kmp_task_t *task) {
293  kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task);
294  return td->td_depnode->dn.successors;
295 }
296 
297 static inline kmp_int32
298 __kmp_depnode_link_successor(kmp_int32 gtid, kmp_info_t *thread,
299  kmp_task_t *task, kmp_depnode_t *node,
300  kmp_depnode_list_t *plist) {
301  if (!plist)
302  return 0;
303  kmp_int32 npredecessors = 0;
304  // link node as successor of list elements
305  for (kmp_depnode_list_t *p = plist; p; p = p->next) {
306  kmp_depnode_t *dep = p->node;
307 #if OMPX_TASKGRAPH
308  kmp_tdg_status tdg_status = KMP_TDG_NONE;
309  if (task) {
310  kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task);
311  if (td->is_taskgraph)
312  tdg_status = KMP_TASK_TO_TASKDATA(task)->tdg->tdg_status;
313  if (__kmp_tdg_is_recording(tdg_status))
314  __kmp_track_dependence(gtid, dep, node, task);
315  }
316 #endif
317  if (dep->dn.task) {
318  KMP_ACQUIRE_DEPNODE(gtid, dep);
319  if (dep->dn.task) {
320  if (!dep->dn.successors || dep->dn.successors->node != node) {
321 #if OMPX_TASKGRAPH
322  if (!(__kmp_tdg_is_recording(tdg_status)) && task)
323 #endif
324  __kmp_track_dependence(gtid, dep, node, task);
325  dep->dn.successors = __kmp_add_node(thread, dep->dn.successors, node);
326  KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
327  "%p\n",
328  gtid, KMP_TASK_TO_TASKDATA(dep->dn.task),
329  KMP_TASK_TO_TASKDATA(task)));
330  npredecessors++;
331  }
332  }
333  KMP_RELEASE_DEPNODE(gtid, dep);
334  }
335  }
336  return npredecessors;
337 }
338 
339 // Add the edge 'sink' -> 'source' in the task dependency graph
340 static inline kmp_int32 __kmp_depnode_link_successor(kmp_int32 gtid,
341  kmp_info_t *thread,
342  kmp_task_t *task,
343  kmp_depnode_t *source,
344  kmp_depnode_t *sink) {
345  if (!sink)
346  return 0;
347  kmp_int32 npredecessors = 0;
348 #if OMPX_TASKGRAPH
349  kmp_tdg_status tdg_status = KMP_TDG_NONE;
350  kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task);
351  if (task) {
352  if (td->is_taskgraph)
353  tdg_status = KMP_TASK_TO_TASKDATA(task)->tdg->tdg_status;
354  if (__kmp_tdg_is_recording(tdg_status) && sink->dn.task)
355  __kmp_track_dependence(gtid, sink, source, task);
356  }
357 #endif
358  if (sink->dn.task) {
359  // synchronously add source to sink' list of successors
360  KMP_ACQUIRE_DEPNODE(gtid, sink);
361  if (sink->dn.task) {
362  if (!sink->dn.successors || sink->dn.successors->node != source) {
363 #if OMPX_TASKGRAPH
364  if (!(__kmp_tdg_is_recording(tdg_status)) && task)
365 #endif
366  __kmp_track_dependence(gtid, sink, source, task);
367  sink->dn.successors = __kmp_add_node(thread, sink->dn.successors, source);
368  KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
369  "%p\n",
370  gtid, KMP_TASK_TO_TASKDATA(sink->dn.task),
371  KMP_TASK_TO_TASKDATA(task)));
372 #if OMPX_TASKGRAPH
373  if (__kmp_tdg_is_recording(tdg_status)) {
374  kmp_taskdata_t *tdd = KMP_TASK_TO_TASKDATA(sink->dn.task);
375  if (tdd->is_taskgraph) {
376  if (tdd->td_flags.onced)
377  // decrement npredecessors if sink->dn.task belongs to a taskgraph
378  // and
379  // 1) the task is reset to its initial state (by kmp_free_task) or
380  // 2) the task is complete but not yet reset
381  npredecessors--;
382  }
383  }
384 #endif
385  npredecessors++;
386  }
387  }
388  KMP_RELEASE_DEPNODE(gtid, sink);
389  }
390  return npredecessors;
391 }
392 
393 static inline kmp_int32
394 __kmp_process_dep_all(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *h,
395  bool dep_barrier, kmp_task_t *task) {
396  KA_TRACE(30, ("__kmp_process_dep_all: T#%d processing dep_all, "
397  "dep_barrier = %d\n",
398  gtid, dep_barrier));
399  kmp_info_t *thread = __kmp_threads[gtid];
400  kmp_int32 npredecessors = 0;
401 
402  // process previous omp_all_memory node if any
403  npredecessors +=
404  __kmp_depnode_link_successor(gtid, thread, task, node, h->last_all);
405  __kmp_node_deref(thread, h->last_all);
406  if (!dep_barrier) {
407  h->last_all = __kmp_node_ref(node);
408  } else {
409  // if this is a sync point in the serial sequence, then the previous
410  // outputs are guaranteed to be completed after the execution of this
411  // task so the previous output nodes can be cleared.
412  h->last_all = NULL;
413  }
414 
415  // process all regular dependences
416  for (size_t i = 0; i < h->size; i++) {
417  kmp_dephash_entry_t *info = h->buckets[i];
418  if (!info) // skip empty slots in dephash
419  continue;
420  for (; info; info = info->next_in_bucket) {
421  // for each entry the omp_all_memory works as OUT dependence
422  kmp_depnode_t *last_out = info->last_out;
423  kmp_depnode_list_t *last_set = info->last_set;
424  kmp_depnode_list_t *prev_set = info->prev_set;
425  if (last_set) {
426  npredecessors +=
427  __kmp_depnode_link_successor(gtid, thread, task, node, last_set);
428  __kmp_depnode_list_free(thread, last_set);
429  __kmp_depnode_list_free(thread, prev_set);
430  info->last_set = NULL;
431  info->prev_set = NULL;
432  info->last_flag = 0; // no sets in this dephash entry
433  } else {
434  npredecessors +=
435  __kmp_depnode_link_successor(gtid, thread, task, node, last_out);
436  }
437  __kmp_node_deref(thread, last_out);
438  if (!dep_barrier) {
439  info->last_out = __kmp_node_ref(node);
440  } else {
441  info->last_out = NULL;
442  }
443  }
444  }
445  KA_TRACE(30, ("__kmp_process_dep_all: T#%d found %d predecessors\n", gtid,
446  npredecessors));
447  return npredecessors;
448 }
449 
450 template <bool filter>
451 static inline kmp_int32
452 __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t **hash,
453  bool dep_barrier, kmp_int32 ndeps,
454  kmp_depend_info_t *dep_list, kmp_task_t *task) {
455  KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d dependences : "
456  "dep_barrier = %d\n",
457  filter, gtid, ndeps, dep_barrier));
458 
459  kmp_info_t *thread = __kmp_threads[gtid];
460  kmp_int32 npredecessors = 0;
461  for (kmp_int32 i = 0; i < ndeps; i++) {
462  const kmp_depend_info_t *dep = &dep_list[i];
463 
464  if (filter && dep->base_addr == 0)
465  continue; // skip filtered entries
466 
467  kmp_dephash_entry_t *info =
468  __kmp_dephash_find(thread, hash, dep->base_addr);
469  kmp_depnode_t *last_out = info->last_out;
470  kmp_depnode_list_t *last_set = info->last_set;
471  kmp_depnode_list_t *prev_set = info->prev_set;
472 
473  if (dep->flags.out) { // out or inout --> clean lists if any
474  if (last_set) {
475  npredecessors +=
476  __kmp_depnode_link_successor(gtid, thread, task, node, last_set);
477  __kmp_depnode_list_free(thread, last_set);
478  __kmp_depnode_list_free(thread, prev_set);
479  info->last_set = NULL;
480  info->prev_set = NULL;
481  info->last_flag = 0; // no sets in this dephash entry
482  } else {
483  npredecessors +=
484  __kmp_depnode_link_successor(gtid, thread, task, node, last_out);
485  }
486  __kmp_node_deref(thread, last_out);
487  if (!dep_barrier) {
488  info->last_out = __kmp_node_ref(node);
489  } else {
490  // if this is a sync point in the serial sequence, then the previous
491  // outputs are guaranteed to be completed after the execution of this
492  // task so the previous output nodes can be cleared.
493  info->last_out = NULL;
494  }
495  } else { // either IN or MTX or SET
496  if (info->last_flag == 0 || info->last_flag == dep->flag) {
497  // last_set either didn't exist or of same dep kind
498  // link node as successor of the last_out if any
499  npredecessors +=
500  __kmp_depnode_link_successor(gtid, thread, task, node, last_out);
501  // link node as successor of all nodes in the prev_set if any
502  npredecessors +=
503  __kmp_depnode_link_successor(gtid, thread, task, node, prev_set);
504  if (dep_barrier) {
505  // clean last_out and prev_set if any; don't touch last_set
506  __kmp_node_deref(thread, last_out);
507  info->last_out = NULL;
508  __kmp_depnode_list_free(thread, prev_set);
509  info->prev_set = NULL;
510  }
511  } else { // last_set is of different dep kind, make it prev_set
512  // link node as successor of all nodes in the last_set
513  npredecessors +=
514  __kmp_depnode_link_successor(gtid, thread, task, node, last_set);
515  // clean last_out if any
516  __kmp_node_deref(thread, last_out);
517  info->last_out = NULL;
518  // clean prev_set if any
519  __kmp_depnode_list_free(thread, prev_set);
520  if (!dep_barrier) {
521  // move last_set to prev_set, new last_set will be allocated
522  info->prev_set = last_set;
523  } else {
524  info->prev_set = NULL;
525  info->last_flag = 0;
526  }
527  info->last_set = NULL;
528  }
529  // for dep_barrier last_flag value should remain:
530  // 0 if last_set is empty, unchanged otherwise
531  if (!dep_barrier) {
532  info->last_flag = dep->flag; // store dep kind of the last_set
533  info->last_set = __kmp_add_node(thread, info->last_set, node);
534  }
535  // check if we are processing MTX dependency
536  if (dep->flag == KMP_DEP_MTX) {
537  if (info->mtx_lock == NULL) {
538  info->mtx_lock = (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t));
539  __kmp_init_lock(info->mtx_lock);
540  }
541  KMP_DEBUG_ASSERT(node->dn.mtx_num_locks < MAX_MTX_DEPS);
542  kmp_int32 m;
543  // Save lock in node's array
544  for (m = 0; m < MAX_MTX_DEPS; ++m) {
545  // sort pointers in decreasing order to avoid potential livelock
546  if (node->dn.mtx_locks[m] < info->mtx_lock) {
547  KMP_DEBUG_ASSERT(!node->dn.mtx_locks[node->dn.mtx_num_locks]);
548  for (int n = node->dn.mtx_num_locks; n > m; --n) {
549  // shift right all lesser non-NULL pointers
550  KMP_DEBUG_ASSERT(node->dn.mtx_locks[n - 1] != NULL);
551  node->dn.mtx_locks[n] = node->dn.mtx_locks[n - 1];
552  }
553  node->dn.mtx_locks[m] = info->mtx_lock;
554  break;
555  }
556  }
557  KMP_DEBUG_ASSERT(m < MAX_MTX_DEPS); // must break from loop
558  node->dn.mtx_num_locks++;
559  }
560  }
561  }
562  KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter,
563  gtid, npredecessors));
564  return npredecessors;
565 }
566 
567 #define NO_DEP_BARRIER (false)
568 #define DEP_BARRIER (true)
569 
570 // returns true if the task has any outstanding dependence
571 static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node,
572  kmp_task_t *task, kmp_dephash_t **hash,
573  bool dep_barrier, kmp_int32 ndeps,
574  kmp_depend_info_t *dep_list,
575  kmp_int32 ndeps_noalias,
576  kmp_depend_info_t *noalias_dep_list) {
577  int i, n_mtxs = 0, dep_all = 0;
578 #if KMP_DEBUG
579  kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
580 #endif
581  KA_TRACE(20, ("__kmp_check_deps: T#%d checking dependences for task %p : %d "
582  "possibly aliased dependences, %d non-aliased dependences : "
583  "dep_barrier=%d .\n",
584  gtid, taskdata, ndeps, ndeps_noalias, dep_barrier));
585 
586  // Filter deps in dep_list
587  // TODO: Different algorithm for large dep_list ( > 10 ? )
588  for (i = 0; i < ndeps; i++) {
589  if (dep_list[i].base_addr != 0 &&
590  dep_list[i].base_addr != (kmp_intptr_t)KMP_SIZE_T_MAX) {
591  KMP_DEBUG_ASSERT(
592  dep_list[i].flag == KMP_DEP_IN || dep_list[i].flag == KMP_DEP_OUT ||
593  dep_list[i].flag == KMP_DEP_INOUT ||
594  dep_list[i].flag == KMP_DEP_MTX || dep_list[i].flag == KMP_DEP_SET);
595  for (int j = i + 1; j < ndeps; j++) {
596  if (dep_list[i].base_addr == dep_list[j].base_addr) {
597  if (dep_list[i].flag != dep_list[j].flag) {
598  // two different dependences on same address work identical to OUT
599  dep_list[i].flag = KMP_DEP_OUT;
600  }
601  dep_list[j].base_addr = 0; // Mark j element as void
602  }
603  }
604  if (dep_list[i].flag == KMP_DEP_MTX) {
605  // limit number of mtx deps to MAX_MTX_DEPS per node
606  if (n_mtxs < MAX_MTX_DEPS && task != NULL) {
607  ++n_mtxs;
608  } else {
609  dep_list[i].flag = KMP_DEP_OUT; // downgrade mutexinoutset to inout
610  }
611  }
612  } else if (dep_list[i].flag == KMP_DEP_ALL ||
613  dep_list[i].base_addr == (kmp_intptr_t)KMP_SIZE_T_MAX) {
614  // omp_all_memory dependence can be marked by compiler by either
615  // (addr=0 && flag=0x80) (flag KMP_DEP_ALL), or (addr=-1).
616  // omp_all_memory overrides all other dependences if any
617  dep_all = 1;
618  break;
619  }
620  }
621 
622  // doesn't need to be atomic as no other thread is going to be accessing this
623  // node just yet.
624  // npredecessors is set -1 to ensure that none of the releasing tasks queues
625  // this task before we have finished processing all the dependences
626  node->dn.npredecessors = -1;
627 
628  // used to pack all npredecessors additions into a single atomic operation at
629  // the end
630  int npredecessors;
631 
632  if (!dep_all) { // regular dependences
633  npredecessors = __kmp_process_deps<true>(gtid, node, hash, dep_barrier,
634  ndeps, dep_list, task);
635  npredecessors += __kmp_process_deps<false>(
636  gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list, task);
637  } else { // omp_all_memory dependence
638  npredecessors = __kmp_process_dep_all(gtid, node, *hash, dep_barrier, task);
639  }
640 
641  node->dn.task = task;
642  KMP_MB();
643 
644  // Account for our initial fake value
645  npredecessors++;
646 
647  // Update predecessors and obtain current value to check if there are still
648  // any outstanding dependences (some tasks may have finished while we
649  // processed the dependences)
650  npredecessors =
651  node->dn.npredecessors.fetch_add(npredecessors) + npredecessors;
652 
653  KA_TRACE(20, ("__kmp_check_deps: T#%d found %d predecessors for task %p \n",
654  gtid, npredecessors, taskdata));
655 
656  // beyond this point the task could be queued (and executed) by a releasing
657  // task...
658  return npredecessors > 0 ? true : false;
659 }
660 
677 kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
678  kmp_task_t *new_task, kmp_int32 ndeps,
679  kmp_depend_info_t *dep_list,
680  kmp_int32 ndeps_noalias,
681  kmp_depend_info_t *noalias_dep_list) {
682 
683  kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
684  KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n", gtid,
685  loc_ref, new_taskdata));
686  __kmp_assert_valid_gtid(gtid);
687  kmp_info_t *thread = __kmp_threads[gtid];
688  kmp_taskdata_t *current_task = thread->th.th_current_task;
689 
690 #if OMPX_TASKGRAPH
691  // record TDG with deps
692  if (new_taskdata->is_taskgraph &&
693  __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) {
694  kmp_tdg_info_t *tdg = new_taskdata->tdg;
695  // extend record_map if needed
696  if (new_taskdata->td_task_id >= tdg->map_size) {
697  __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
698  if (new_taskdata->td_task_id >= tdg->map_size) {
699  kmp_uint old_size = tdg->map_size;
700  kmp_uint new_size = old_size * 2;
701  kmp_node_info_t *old_record = tdg->record_map;
702  kmp_node_info_t *new_record = (kmp_node_info_t *)__kmp_allocate(
703  new_size * sizeof(kmp_node_info_t));
704  KMP_MEMCPY(new_record, tdg->record_map,
705  old_size * sizeof(kmp_node_info_t));
706  tdg->record_map = new_record;
707 
708  __kmp_free(old_record);
709 
710  for (kmp_int i = old_size; i < new_size; i++) {
711  kmp_int32 *successorsList = (kmp_int32 *)__kmp_allocate(
712  __kmp_successors_size * sizeof(kmp_int32));
713  new_record[i].task = nullptr;
714  new_record[i].successors = successorsList;
715  new_record[i].nsuccessors = 0;
716  new_record[i].npredecessors = 0;
717  new_record[i].successors_size = __kmp_successors_size;
718  KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
719  }
720  // update the size at the end, so that we avoid other
721  // threads use old_record while map_size is already updated
722  tdg->map_size = new_size;
723  }
724  __kmp_release_bootstrap_lock(&tdg->graph_lock);
725  }
726  tdg->record_map[new_taskdata->td_task_id].task = new_task;
727  tdg->record_map[new_taskdata->td_task_id].parent_task =
728  new_taskdata->td_parent;
729  KMP_ATOMIC_INC(&tdg->num_tasks);
730  }
731 #endif
732 #if OMPT_SUPPORT
733  if (ompt_enabled.enabled) {
734  if (!current_task->ompt_task_info.frame.enter_frame.ptr)
735  current_task->ompt_task_info.frame.enter_frame.ptr =
736  OMPT_GET_FRAME_ADDRESS(0);
737  if (ompt_enabled.ompt_callback_task_create) {
738  ompt_callbacks.ompt_callback(ompt_callback_task_create)(
739  &(current_task->ompt_task_info.task_data),
740  &(current_task->ompt_task_info.frame),
741  &(new_taskdata->ompt_task_info.task_data),
742  ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 1,
743  OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid));
744  }
745 
746  new_taskdata->ompt_task_info.frame.enter_frame.ptr =
747  OMPT_GET_FRAME_ADDRESS(0);
748  }
749 
750 #if OMPT_OPTIONAL
751  /* OMPT grab all dependences if requested by the tool */
752  if (ndeps + ndeps_noalias > 0 && ompt_enabled.ompt_callback_dependences) {
753  kmp_int32 i;
754 
755  int ompt_ndeps = ndeps + ndeps_noalias;
756  ompt_dependence_t *ompt_deps = (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC(
757  thread, (ndeps + ndeps_noalias) * sizeof(ompt_dependence_t));
758 
759  KMP_ASSERT(ompt_deps != NULL);
760 
761  for (i = 0; i < ndeps; i++) {
762  ompt_deps[i].variable.ptr = (void *)dep_list[i].base_addr;
763  if (dep_list[i].base_addr == KMP_SIZE_T_MAX)
764  ompt_deps[i].dependence_type = ompt_dependence_type_out_all_memory;
765  else if (dep_list[i].flags.in && dep_list[i].flags.out)
766  ompt_deps[i].dependence_type = ompt_dependence_type_inout;
767  else if (dep_list[i].flags.out)
768  ompt_deps[i].dependence_type = ompt_dependence_type_out;
769  else if (dep_list[i].flags.in)
770  ompt_deps[i].dependence_type = ompt_dependence_type_in;
771  else if (dep_list[i].flags.mtx)
772  ompt_deps[i].dependence_type = ompt_dependence_type_mutexinoutset;
773  else if (dep_list[i].flags.set)
774  ompt_deps[i].dependence_type = ompt_dependence_type_inoutset;
775  else if (dep_list[i].flags.all)
776  ompt_deps[i].dependence_type = ompt_dependence_type_out_all_memory;
777  }
778  for (i = 0; i < ndeps_noalias; i++) {
779  ompt_deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr;
780  if (noalias_dep_list[i].base_addr == KMP_SIZE_T_MAX)
781  ompt_deps[ndeps + i].dependence_type =
782  ompt_dependence_type_out_all_memory;
783  else if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out)
784  ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inout;
785  else if (noalias_dep_list[i].flags.out)
786  ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_out;
787  else if (noalias_dep_list[i].flags.in)
788  ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_in;
789  else if (noalias_dep_list[i].flags.mtx)
790  ompt_deps[ndeps + i].dependence_type =
791  ompt_dependence_type_mutexinoutset;
792  else if (noalias_dep_list[i].flags.set)
793  ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset;
794  else if (noalias_dep_list[i].flags.all)
795  ompt_deps[ndeps + i].dependence_type =
796  ompt_dependence_type_out_all_memory;
797  }
798  ompt_callbacks.ompt_callback(ompt_callback_dependences)(
799  &(new_taskdata->ompt_task_info.task_data), ompt_deps, ompt_ndeps);
800  /* We can now free the allocated memory for the dependences */
801  /* For OMPD we might want to delay the free until end of this function */
802  KMP_OMPT_DEPS_FREE(thread, ompt_deps);
803  }
804 #endif /* OMPT_OPTIONAL */
805 #endif /* OMPT_SUPPORT */
806 
807  bool serial = current_task->td_flags.team_serial ||
808  current_task->td_flags.tasking_ser ||
809  current_task->td_flags.final;
810  kmp_task_team_t *task_team = thread->th.th_task_team;
811  serial = serial &&
812  !(task_team && (task_team->tt.tt_found_proxy_tasks ||
813  task_team->tt.tt_hidden_helper_task_encountered));
814 
815  if (!serial && (ndeps > 0 || ndeps_noalias > 0)) {
816  /* if no dependences have been tracked yet, create the dependence hash */
817  if (current_task->td_dephash == NULL)
818  current_task->td_dephash = __kmp_dephash_create(thread, current_task);
819 
820 #if USE_FAST_MEMORY
821  kmp_depnode_t *node =
822  (kmp_depnode_t *)__kmp_fast_allocate(thread, sizeof(kmp_depnode_t));
823 #else
824  kmp_depnode_t *node =
825  (kmp_depnode_t *)__kmp_thread_malloc(thread, sizeof(kmp_depnode_t));
826 #endif
827 
828  __kmp_init_node(node);
829  new_taskdata->td_depnode = node;
830 
831  if (__kmp_check_deps(gtid, node, new_task, &current_task->td_dephash,
832  NO_DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
833  noalias_dep_list)) {
834  KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking "
835  "dependences: "
836  "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
837  gtid, loc_ref, new_taskdata));
838 #if OMPT_SUPPORT
839  if (ompt_enabled.enabled) {
840  current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
841  }
842 #endif
843  return TASK_CURRENT_NOT_QUEUED;
844  }
845  } else {
846  KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependences "
847  "for task (serialized) loc=%p task=%p\n",
848  gtid, loc_ref, new_taskdata));
849  }
850 
851  KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking "
852  "dependences : "
853  "loc=%p task=%p, transferring to __kmp_omp_task\n",
854  gtid, loc_ref, new_taskdata));
855 
856  kmp_int32 ret = __kmp_omp_task(gtid, new_task, true);
857 #if OMPT_SUPPORT
858  if (ompt_enabled.enabled) {
859  current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
860  }
861 #endif
862  return ret;
863 }
864 
865 #if OMPT_SUPPORT
866 void __ompt_taskwait_dep_finish(kmp_taskdata_t *current_task,
867  ompt_data_t *taskwait_task_data) {
868  if (ompt_enabled.ompt_callback_task_schedule) {
869  ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
870  taskwait_task_data, ompt_taskwait_complete, NULL);
871  }
872  current_task->ompt_task_info.frame.enter_frame.ptr = NULL;
873  *taskwait_task_data = ompt_data_none;
874 }
875 #endif /* OMPT_SUPPORT */
876 
888 void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
889  kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
890  kmp_depend_info_t *noalias_dep_list) {
891  __kmpc_omp_taskwait_deps_51(loc_ref, gtid, ndeps, dep_list, ndeps_noalias,
892  noalias_dep_list, false);
893 }
894 
895 /* __kmpc_omp_taskwait_deps_51 : Function for OpenMP 5.1 nowait clause.
896  Placeholder for taskwait with nowait clause.
897  Earlier code of __kmpc_omp_wait_deps() is now
898  in this function.
899 */
900 void __kmpc_omp_taskwait_deps_51(ident_t *loc_ref, kmp_int32 gtid,
901  kmp_int32 ndeps, kmp_depend_info_t *dep_list,
902  kmp_int32 ndeps_noalias,
903  kmp_depend_info_t *noalias_dep_list,
904  kmp_int32 has_no_wait) {
905  KA_TRACE(10, ("__kmpc_omp_taskwait_deps(enter): T#%d loc=%p nowait#%d\n",
906  gtid, loc_ref, has_no_wait));
907  if (ndeps == 0 && ndeps_noalias == 0) {
908  KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d has no dependences to "
909  "wait upon : loc=%p\n",
910  gtid, loc_ref));
911  return;
912  }
913  __kmp_assert_valid_gtid(gtid);
914  kmp_info_t *thread = __kmp_threads[gtid];
915  kmp_taskdata_t *current_task = thread->th.th_current_task;
916 
917 #if OMPT_SUPPORT
918  // this function represents a taskwait construct with depend clause
919  // We signal 4 events:
920  // - creation of the taskwait task
921  // - dependences of the taskwait task
922  // - schedule and finish of the taskwait task
923  ompt_data_t *taskwait_task_data = &thread->th.ompt_thread_info.task_data;
924  KMP_ASSERT(taskwait_task_data->ptr == NULL);
925  if (ompt_enabled.enabled) {
926  if (!current_task->ompt_task_info.frame.enter_frame.ptr)
927  current_task->ompt_task_info.frame.enter_frame.ptr =
928  OMPT_GET_FRAME_ADDRESS(0);
929  if (ompt_enabled.ompt_callback_task_create) {
930  ompt_callbacks.ompt_callback(ompt_callback_task_create)(
931  &(current_task->ompt_task_info.task_data),
932  &(current_task->ompt_task_info.frame), taskwait_task_data,
933  ompt_task_taskwait | ompt_task_undeferred | ompt_task_mergeable, 1,
934  OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid));
935  }
936  }
937 
938 #if OMPT_OPTIONAL
939  /* OMPT grab all dependences if requested by the tool */
940  if (ndeps + ndeps_noalias > 0 && ompt_enabled.ompt_callback_dependences) {
941  kmp_int32 i;
942 
943  int ompt_ndeps = ndeps + ndeps_noalias;
944  ompt_dependence_t *ompt_deps = (ompt_dependence_t *)KMP_OMPT_DEPS_ALLOC(
945  thread, (ndeps + ndeps_noalias) * sizeof(ompt_dependence_t));
946 
947  KMP_ASSERT(ompt_deps != NULL);
948 
949  for (i = 0; i < ndeps; i++) {
950  ompt_deps[i].variable.ptr = (void *)dep_list[i].base_addr;
951  if (dep_list[i].flags.in && dep_list[i].flags.out)
952  ompt_deps[i].dependence_type = ompt_dependence_type_inout;
953  else if (dep_list[i].flags.out)
954  ompt_deps[i].dependence_type = ompt_dependence_type_out;
955  else if (dep_list[i].flags.in)
956  ompt_deps[i].dependence_type = ompt_dependence_type_in;
957  else if (dep_list[i].flags.mtx)
958  ompt_deps[ndeps + i].dependence_type =
959  ompt_dependence_type_mutexinoutset;
960  else if (dep_list[i].flags.set)
961  ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset;
962  }
963  for (i = 0; i < ndeps_noalias; i++) {
964  ompt_deps[ndeps + i].variable.ptr = (void *)noalias_dep_list[i].base_addr;
965  if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out)
966  ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inout;
967  else if (noalias_dep_list[i].flags.out)
968  ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_out;
969  else if (noalias_dep_list[i].flags.in)
970  ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_in;
971  else if (noalias_dep_list[i].flags.mtx)
972  ompt_deps[ndeps + i].dependence_type =
973  ompt_dependence_type_mutexinoutset;
974  else if (noalias_dep_list[i].flags.set)
975  ompt_deps[ndeps + i].dependence_type = ompt_dependence_type_inoutset;
976  }
977  ompt_callbacks.ompt_callback(ompt_callback_dependences)(
978  taskwait_task_data, ompt_deps, ompt_ndeps);
979  /* We can now free the allocated memory for the dependences */
980  /* For OMPD we might want to delay the free until end of this function */
981  KMP_OMPT_DEPS_FREE(thread, ompt_deps);
982  ompt_deps = NULL;
983  }
984 #endif /* OMPT_OPTIONAL */
985 #endif /* OMPT_SUPPORT */
986 
987  // We can return immediately as:
988  // - dependences are not computed in serial teams (except with proxy tasks)
989  // - if the dephash is not yet created it means we have nothing to wait for
990  bool ignore = current_task->td_flags.team_serial ||
991  current_task->td_flags.tasking_ser ||
992  current_task->td_flags.final;
993  ignore =
994  ignore && thread->th.th_task_team != NULL &&
995  thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE &&
996  thread->th.th_task_team->tt.tt_hidden_helper_task_encountered == FALSE;
997  ignore = ignore || current_task->td_dephash == NULL;
998 
999  if (ignore) {
1000  KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d has no blocking "
1001  "dependences : loc=%p\n",
1002  gtid, loc_ref));
1003 #if OMPT_SUPPORT
1004  __ompt_taskwait_dep_finish(current_task, taskwait_task_data);
1005 #endif /* OMPT_SUPPORT */
1006  return;
1007  }
1008 
1009  kmp_depnode_t node = {0};
1010  __kmp_init_node(&node);
1011 
1012  if (!__kmp_check_deps(gtid, &node, NULL, &current_task->td_dephash,
1013  DEP_BARRIER, ndeps, dep_list, ndeps_noalias,
1014  noalias_dep_list)) {
1015  KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d has no blocking "
1016  "dependences : loc=%p\n",
1017  gtid, loc_ref));
1018 #if OMPT_SUPPORT
1019  __ompt_taskwait_dep_finish(current_task, taskwait_task_data);
1020 #endif /* OMPT_SUPPORT */
1021  return;
1022  }
1023 
1024  int thread_finished = FALSE;
1025  kmp_flag_32<false, false> flag(
1026  (std::atomic<kmp_uint32> *)&node.dn.npredecessors, 0U);
1027  while (node.dn.npredecessors > 0) {
1028  flag.execute_tasks(thread, gtid, FALSE,
1029  &thread_finished USE_ITT_BUILD_ARG(NULL),
1030  __kmp_task_stealing_constraint);
1031  }
1032 
1033 #if OMPT_SUPPORT
1034  __ompt_taskwait_dep_finish(current_task, taskwait_task_data);
1035 #endif /* OMPT_SUPPORT */
1036  KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d finished waiting : loc=%p\
1037  \n",
1038  gtid, loc_ref));
1039 }
kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)
void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list)
Definition: kmp.h:246