17#include "kmp_wait_release.h"
18#include "kmp_taskdeps.h"
21#include "ompt-specific.h"
24#if ENABLE_LIBOMPTARGET
25static void (*tgt_target_nowait_query)(
void **);
27void __kmp_init_target_task() {
28 *(
void **)(&tgt_target_nowait_query) = KMP_DLSYM(
"__tgt_target_nowait_query");
33static void __kmp_enable_tasking(kmp_task_team_t *task_team,
34 kmp_info_t *this_thr);
35static void __kmp_alloc_task_deque(kmp_info_t *thread,
36 kmp_thread_data_t *thread_data);
37static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
38 kmp_task_team_t *task_team);
39static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask);
41static kmp_tdg_info_t *__kmp_find_tdg(kmp_int32 tdg_id);
42int __kmp_taskloop_task(
int gtid,
void *ptask);
48static bool __kmp_task_is_allowed(
int gtid,
const kmp_int32 is_constrained,
49 const kmp_taskdata_t *tasknew,
50 const kmp_taskdata_t *taskcurr) {
51 if (is_constrained && (tasknew->td_flags.tiedness == TASK_TIED)) {
55 kmp_taskdata_t *current = taskcurr->td_last_tied;
56 KMP_DEBUG_ASSERT(current != NULL);
58 if (current->td_flags.tasktype == TASK_EXPLICIT ||
59 current->td_taskwait_thread > 0) {
60 kmp_int32 level = current->td_level;
61 kmp_taskdata_t *parent = tasknew->td_parent;
62 while (parent != current && parent->td_level > level) {
64 parent = parent->td_parent;
65 KMP_DEBUG_ASSERT(parent != NULL);
67 if (parent != current)
72 kmp_depnode_t *node = tasknew->td_depnode;
74 if (!tasknew->is_taskgraph && UNLIKELY(node && (node->dn.mtx_num_locks > 0))) {
76 if (UNLIKELY(node && (node->dn.mtx_num_locks > 0))) {
78 for (
int i = 0; i < node->dn.mtx_num_locks; ++i) {
79 KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL);
80 if (__kmp_test_lock(node->dn.mtx_locks[i], gtid))
83 for (
int j = i - 1; j >= 0; --j)
84 __kmp_release_lock(node->dn.mtx_locks[j], gtid);
88 node->dn.mtx_num_locks = -node->dn.mtx_num_locks;
97static void __kmp_realloc_task_deque(kmp_info_t *thread,
98 kmp_thread_data_t *thread_data) {
99 kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
100 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == size);
101 kmp_int32 new_size = 2 * size;
103 KE_TRACE(10, (
"__kmp_realloc_task_deque: T#%d reallocating deque[from %d to "
104 "%d] for thread_data %p\n",
105 __kmp_gtid_from_thread(thread), size, new_size, thread_data));
107 kmp_taskdata_t **new_deque =
108 (kmp_taskdata_t **)__kmp_allocate(new_size *
sizeof(kmp_taskdata_t *));
111 for (i = thread_data->td.td_deque_head, j = 0; j < size;
112 i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++)
113 new_deque[j] = thread_data->td.td_deque[i];
115 __kmp_free(thread_data->td.td_deque);
117 thread_data->td.td_deque_head = 0;
118 thread_data->td.td_deque_tail = size;
119 thread_data->td.td_deque = new_deque;
120 thread_data->td.td_deque_size = new_size;
123static kmp_task_pri_t *__kmp_alloc_task_pri_list() {
124 kmp_task_pri_t *l = (kmp_task_pri_t *)__kmp_allocate(
sizeof(kmp_task_pri_t));
125 kmp_thread_data_t *thread_data = &l->td;
126 __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
127 thread_data->td.td_deque_last_stolen = -1;
128 KE_TRACE(20, (
"__kmp_alloc_task_pri_list: T#%d allocating deque[%d] "
129 "for thread_data %p\n",
130 __kmp_get_gtid(), INITIAL_TASK_DEQUE_SIZE, thread_data));
131 thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
132 INITIAL_TASK_DEQUE_SIZE *
sizeof(kmp_taskdata_t *));
133 thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
142static kmp_thread_data_t *
143__kmp_get_priority_deque_data(kmp_task_team_t *task_team, kmp_int32 pri) {
144 kmp_thread_data_t *thread_data;
145 kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list;
146 if (lst->priority == pri) {
148 thread_data = &lst->td;
149 }
else if (lst->priority < pri) {
152 kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
153 thread_data = &list->td;
154 list->priority = pri;
156 task_team->tt.tt_task_pri_list = list;
158 kmp_task_pri_t *next_queue = lst->next;
159 while (next_queue && next_queue->priority > pri) {
161 next_queue = lst->next;
164 if (next_queue == NULL) {
166 kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
167 thread_data = &list->td;
168 list->priority = pri;
171 }
else if (next_queue->priority == pri) {
173 thread_data = &next_queue->td;
176 kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
177 thread_data = &list->td;
178 list->priority = pri;
179 list->next = next_queue;
187static kmp_int32 __kmp_push_priority_task(kmp_int32 gtid, kmp_info_t *thread,
188 kmp_taskdata_t *taskdata,
189 kmp_task_team_t *task_team,
191 kmp_thread_data_t *thread_data = NULL;
193 (
"__kmp_push_priority_task: T#%d trying to push task %p, pri %d.\n",
194 gtid, taskdata, pri));
197 kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list;
198 if (UNLIKELY(lst == NULL)) {
199 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
200 if (task_team->tt.tt_task_pri_list == NULL) {
202 kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
203 thread_data = &list->td;
204 list->priority = pri;
206 task_team->tt.tt_task_pri_list = list;
209 thread_data = __kmp_get_priority_deque_data(task_team, pri);
211 __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
213 if (lst->priority == pri) {
215 thread_data = &lst->td;
217 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
218 thread_data = __kmp_get_priority_deque_data(task_team, pri);
219 __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
222 KMP_DEBUG_ASSERT(thread_data);
224 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
226 if (TCR_4(thread_data->td.td_deque_ntasks) >=
227 TASK_DEQUE_SIZE(thread_data->td)) {
228 if (__kmp_enable_task_throttling &&
229 __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
230 thread->th.th_current_task)) {
231 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
232 KA_TRACE(20, (
"__kmp_push_priority_task: T#%d deque is full; returning "
233 "TASK_NOT_PUSHED for task %p\n",
235 return TASK_NOT_PUSHED;
238 __kmp_realloc_task_deque(thread, thread_data);
241 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
242 TASK_DEQUE_SIZE(thread_data->td));
244 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
246 thread_data->td.td_deque_tail =
247 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
248 TCW_4(thread_data->td.td_deque_ntasks,
249 TCR_4(thread_data->td.td_deque_ntasks) + 1);
250 KMP_FSYNC_RELEASING(thread->th.th_current_task);
251 KMP_FSYNC_RELEASING(taskdata);
252 KA_TRACE(20, (
"__kmp_push_priority_task: T#%d returning "
253 "TASK_SUCCESSFULLY_PUSHED: task=%p ntasks=%d head=%u tail=%u\n",
254 gtid, taskdata, thread_data->td.td_deque_ntasks,
255 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
256 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
257 task_team->tt.tt_num_task_pri++;
258 return TASK_SUCCESSFULLY_PUSHED;
262static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
263 kmp_info_t *thread = __kmp_threads[gtid];
264 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
269 if (UNLIKELY(taskdata->td_flags.hidden_helper &&
270 !KMP_HIDDEN_HELPER_THREAD(gtid))) {
271 kmp_int32 shadow_gtid = KMP_GTID_TO_SHADOW_GTID(gtid);
272 __kmpc_give_task(task, __kmp_tid_from_gtid(shadow_gtid));
274 __kmp_hidden_helper_worker_thread_signal();
275 return TASK_SUCCESSFULLY_PUSHED;
278 kmp_task_team_t *task_team = thread->th.th_task_team;
279 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
280 kmp_thread_data_t *thread_data;
283 (
"__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata));
285 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
288 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
289 KMP_DEBUG_USE_VAR(counter);
292 (
"__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
293 gtid, counter, taskdata));
297 if (UNLIKELY(taskdata->td_flags.task_serial)) {
298 KA_TRACE(20, (
"__kmp_push_task: T#%d team serialized; returning "
299 "TASK_NOT_PUSHED for task %p\n",
301 return TASK_NOT_PUSHED;
306 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
307 if (UNLIKELY(!KMP_TASKING_ENABLED(task_team))) {
308 __kmp_enable_tasking(task_team, thread);
310 KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE);
311 KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL);
313 if (taskdata->td_flags.priority_specified && task->data2.priority > 0 &&
314 __kmp_max_task_priority > 0) {
315 int pri = KMP_MIN(task->data2.priority, __kmp_max_task_priority);
316 return __kmp_push_priority_task(gtid, thread, taskdata, task_team, pri);
320 thread_data = &task_team->tt.tt_threads_data[tid];
325 if (UNLIKELY(thread_data->td.td_deque == NULL)) {
326 __kmp_alloc_task_deque(thread, thread_data);
331 if (TCR_4(thread_data->td.td_deque_ntasks) >=
332 TASK_DEQUE_SIZE(thread_data->td)) {
333 if (__kmp_enable_task_throttling &&
334 __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
335 thread->th.th_current_task)) {
336 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full; returning "
337 "TASK_NOT_PUSHED for task %p\n",
339 return TASK_NOT_PUSHED;
341 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
343 if (TCR_4(thread_data->td.td_deque_ntasks) >=
344 TASK_DEQUE_SIZE(thread_data->td)) {
346 __kmp_realloc_task_deque(thread, thread_data);
352 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
354 if (TCR_4(thread_data->td.td_deque_ntasks) >=
355 TASK_DEQUE_SIZE(thread_data->td)) {
356 if (__kmp_enable_task_throttling &&
357 __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
358 thread->th.th_current_task)) {
359 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
360 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full on 2nd check; "
361 "returning TASK_NOT_PUSHED for task %p\n",
363 return TASK_NOT_PUSHED;
366 __kmp_realloc_task_deque(thread, thread_data);
371 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
372 TASK_DEQUE_SIZE(thread_data->td));
374 thread_data->td.td_deque[thread_data->td.td_deque_tail] =
377 thread_data->td.td_deque_tail =
378 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
379 TCW_4(thread_data->td.td_deque_ntasks,
380 TCR_4(thread_data->td.td_deque_ntasks) + 1);
381 KMP_FSYNC_RELEASING(thread->th.th_current_task);
382 KMP_FSYNC_RELEASING(taskdata);
383 KA_TRACE(20, (
"__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
384 "task=%p ntasks=%d head=%u tail=%u\n",
385 gtid, taskdata, thread_data->td.td_deque_ntasks,
386 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
388 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
390 return TASK_SUCCESSFULLY_PUSHED;
397void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) {
398 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(enter): T#%d "
399 "this_thread=%p, curtask=%p, "
400 "curtask_parent=%p\n",
401 0, this_thr, this_thr->th.th_current_task,
402 this_thr->th.th_current_task->td_parent));
404 this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent;
406 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(exit): T#%d "
407 "this_thread=%p, curtask=%p, "
408 "curtask_parent=%p\n",
409 0, this_thr, this_thr->th.th_current_task,
410 this_thr->th.th_current_task->td_parent));
419void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team,
423 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
426 tid, this_thr, this_thr->th.th_current_task,
427 team->t.t_implicit_task_taskdata[tid].td_parent));
429 KMP_DEBUG_ASSERT(this_thr != NULL);
432 if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) {
433 team->t.t_implicit_task_taskdata[0].td_parent =
434 this_thr->th.th_current_task;
435 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0];
438 team->t.t_implicit_task_taskdata[tid].td_parent =
439 team->t.t_implicit_task_taskdata[0].td_parent;
440 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid];
443 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
446 tid, this_thr, this_thr->th.th_current_task,
447 team->t.t_implicit_task_taskdata[tid].td_parent));
455static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
456 kmp_taskdata_t *current_task) {
457 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
458 kmp_info_t *thread = __kmp_threads[gtid];
461 (
"__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
462 gtid, taskdata, current_task));
464 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
469 current_task->td_flags.executing = 0;
472 thread->th.th_current_task = taskdata;
474 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 ||
475 taskdata->td_flags.tiedness == TASK_UNTIED);
476 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 ||
477 taskdata->td_flags.tiedness == TASK_UNTIED);
478 taskdata->td_flags.started = 1;
479 taskdata->td_flags.executing = 1;
480 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
481 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
488 KA_TRACE(10, (
"__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata));
498static inline void __ompt_task_start(kmp_task_t *task,
499 kmp_taskdata_t *current_task,
501 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
502 ompt_task_status_t status = ompt_task_switch;
503 if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) {
504 status = ompt_task_yield;
505 __kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0;
508 if (ompt_enabled.ompt_callback_task_schedule) {
509 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
510 &(current_task->ompt_task_info.task_data), status,
511 &(taskdata->ompt_task_info.task_data));
513 taskdata->ompt_task_info.scheduling_parent = current_task;
518static inline void __ompt_task_finish(kmp_task_t *task,
519 kmp_taskdata_t *resumed_task,
520 ompt_task_status_t status) {
521 if (ompt_enabled.ompt_callback_task_schedule) {
522 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
523 if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
524 taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
525 status = ompt_task_cancel;
529 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
530 &(taskdata->ompt_task_info.task_data), status,
531 (resumed_task ? &(resumed_task->ompt_task_info.task_data) : NULL));
537static void __kmpc_omp_task_begin_if0_template(
ident_t *loc_ref, kmp_int32 gtid,
540 void *return_address) {
541 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
542 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
544 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p "
546 gtid, loc_ref, taskdata, current_task));
548 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
551 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
552 KMP_DEBUG_USE_VAR(counter);
553 KA_TRACE(20, (
"__kmpc_omp_task_begin_if0: T#%d untied_count (%d) "
554 "incremented for task %p\n",
555 gtid, counter, taskdata));
558 taskdata->td_flags.task_serial =
560 __kmp_task_start(gtid, task, current_task);
564 if (current_task->ompt_task_info.frame.enter_frame.ptr == NULL) {
565 current_task->ompt_task_info.frame.enter_frame.ptr =
566 taskdata->ompt_task_info.frame.exit_frame.ptr = frame_address;
567 current_task->ompt_task_info.frame.enter_frame_flags =
568 taskdata->ompt_task_info.frame.exit_frame_flags =
569 OMPT_FRAME_FLAGS_APP;
571 if (ompt_enabled.ompt_callback_task_create) {
572 ompt_task_info_t *parent_info = &(current_task->ompt_task_info);
573 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
574 &(parent_info->task_data), &(parent_info->frame),
575 &(taskdata->ompt_task_info.task_data),
576 TASK_TYPE_DETAILS_FORMAT(taskdata), 0, return_address);
578 __ompt_task_start(task, current_task, gtid);
582 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,
588static void __kmpc_omp_task_begin_if0_ompt(
ident_t *loc_ref, kmp_int32 gtid,
591 void *return_address) {
592 __kmpc_omp_task_begin_if0_template<true>(loc_ref, gtid, task, frame_address,
609__attribute__((target(
"backchain")))
611void __kmpc_omp_task_begin_if0(
ident_t *loc_ref, kmp_int32 gtid,
614 if (UNLIKELY(ompt_enabled.enabled)) {
615 OMPT_STORE_RETURN_ADDRESS(gtid);
616 __kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task,
617 OMPT_GET_FRAME_ADDRESS(1),
618 OMPT_LOAD_RETURN_ADDRESS(gtid));
622 __kmpc_omp_task_begin_if0_template<false>(loc_ref, gtid, task, NULL, NULL);
628void __kmpc_omp_task_begin(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) {
629 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
633 (
"__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
634 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task));
636 __kmp_task_start(gtid, task, current_task);
638 KA_TRACE(10, (
"__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid,
639 loc_ref, KMP_TASK_TO_TASKDATA(task)));
649static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
650 kmp_info_t *thread) {
651 KA_TRACE(30, (
"__kmp_free_task: T#%d freeing data from task %p\n", gtid,
655 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
656 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0);
657 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1);
658 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
659 KMP_DEBUG_ASSERT(taskdata->td_allocated_child_tasks == 0 ||
660 taskdata->td_flags.task_serial == 1);
661 KMP_DEBUG_ASSERT(taskdata->td_incomplete_child_tasks == 0);
662 kmp_task_t *task = KMP_TASKDATA_TO_TASK(taskdata);
664 task->data1.destructors = NULL;
665 task->data2.priority = 0;
667 taskdata->td_flags.freed = 1;
670 if (!taskdata->is_taskgraph) {
674 __kmp_fast_free(thread, taskdata);
676 __kmp_thread_free(thread, taskdata);
680 taskdata->td_flags.complete = 0;
681 taskdata->td_flags.started = 0;
682 taskdata->td_flags.freed = 0;
683 taskdata->td_flags.executing = 0;
684 taskdata->td_flags.task_serial =
685 (taskdata->td_parent->td_flags.final ||
686 taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser);
689 KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
690 KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
692 KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1);
696 KA_TRACE(20, (
"__kmp_free_task: T#%d freed task %p\n", gtid, taskdata));
705static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
706 kmp_taskdata_t *taskdata,
707 kmp_info_t *thread) {
710 kmp_int32 team_serial =
711 (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) &&
712 !taskdata->td_flags.proxy;
713 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
715 kmp_int32 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
716 KMP_DEBUG_ASSERT(children >= 0);
719 while (children == 0) {
720 kmp_taskdata_t *parent_taskdata = taskdata->td_parent;
722 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
723 "and freeing itself\n",
727 __kmp_free_task(gtid, taskdata, thread);
729 taskdata = parent_taskdata;
735 if (taskdata->td_flags.tasktype == TASK_IMPLICIT) {
736 if (taskdata->td_dephash) {
737 int children = KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks);
738 kmp_tasking_flags_t flags_old = taskdata->td_flags;
739 if (children == 0 && flags_old.complete == 1) {
740 kmp_tasking_flags_t flags_new = flags_old;
741 flags_new.complete = 0;
742 if (KMP_COMPARE_AND_STORE_ACQ32(
743 RCAST(kmp_int32 *, &taskdata->td_flags),
744 *RCAST(kmp_int32 *, &flags_old),
745 *RCAST(kmp_int32 *, &flags_new))) {
746 KA_TRACE(100, (
"__kmp_free_task_and_ancestors: T#%d cleans "
747 "dephash of implicit task %p\n",
750 __kmp_dephash_free_entries(thread, taskdata->td_dephash);
757 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
758 KMP_DEBUG_ASSERT(children >= 0);
762 20, (
"__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
763 "not freeing it yet\n",
764 gtid, taskdata, children));
775static bool __kmp_track_children_task(kmp_taskdata_t *taskdata) {
776 kmp_tasking_flags_t flags = taskdata->td_flags;
777 bool ret = !(flags.team_serial || flags.tasking_ser);
778 ret = ret || flags.proxy == TASK_PROXY ||
779 flags.detachable == TASK_DETACHABLE || flags.hidden_helper;
781 KMP_ATOMIC_LD_ACQ(&taskdata->td_parent->td_incomplete_child_tasks) > 0;
783 if (taskdata->td_taskgroup && taskdata->is_taskgraph)
784 ret = ret || KMP_ATOMIC_LD_ACQ(&taskdata->td_taskgroup->count) > 0;
799static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
800 kmp_taskdata_t *resumed_task) {
801 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
802 kmp_info_t *thread = __kmp_threads[gtid];
803 kmp_task_team_t *task_team =
804 thread->th.th_task_team;
810 kmp_int32 children = 0;
812 KA_TRACE(10, (
"__kmp_task_finish(enter): T#%d finishing task %p and resuming "
814 gtid, taskdata, resumed_task));
816 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
819 is_taskgraph = taskdata->is_taskgraph;
822 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
825 kmp_int32 counter = KMP_ATOMIC_DEC(&taskdata->td_untied_count) - 1;
828 (
"__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
829 gtid, counter, taskdata));
833 if (resumed_task == NULL) {
834 KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial);
835 resumed_task = taskdata->td_parent;
838 thread->th.th_current_task = resumed_task;
839 resumed_task->td_flags.executing = 1;
840 KA_TRACE(10, (
"__kmp_task_finish(exit): T#%d partially done task %p, "
841 "resuming task %p\n",
842 gtid, taskdata, resumed_task));
850 (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
851 taskdata->td_flags.task_serial);
852 if (taskdata->td_flags.task_serial) {
853 if (resumed_task == NULL) {
854 resumed_task = taskdata->td_parent;
858 KMP_DEBUG_ASSERT(resumed_task !=
868 if (UNLIKELY(taskdata->td_flags.destructors_thunk)) {
869 kmp_routine_entry_t destr_thunk = task->data1.destructors;
870 KMP_ASSERT(destr_thunk);
871 destr_thunk(gtid, task);
874 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
875 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1);
876 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
878 bool completed =
true;
879 if (UNLIKELY(taskdata->td_flags.detachable == TASK_DETACHABLE)) {
880 if (taskdata->td_allow_completion_event.type ==
881 KMP_EVENT_ALLOW_COMPLETION) {
883 __kmp_acquire_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
884 if (taskdata->td_allow_completion_event.type ==
885 KMP_EVENT_ALLOW_COMPLETION) {
887 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
888 taskdata->td_flags.executing = 0;
895 __ompt_task_finish(task, resumed_task, ompt_task_detach);
901 taskdata->td_flags.proxy = TASK_PROXY;
904 __kmp_release_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
909 if (taskdata->td_target_data.async_handle != NULL) {
915 __ompt_task_finish(task, resumed_task, ompt_task_switch);
918 __kmpc_give_task(task, __kmp_tid_from_gtid(gtid));
919 if (KMP_HIDDEN_HELPER_THREAD(gtid))
920 __kmp_hidden_helper_worker_thread_signal();
925 taskdata->td_flags.complete = 1;
927 taskdata->td_flags.onced = 1;
933 __ompt_task_finish(task, resumed_task, ompt_task_complete);
937 if (__kmp_track_children_task(taskdata)) {
938 __kmp_release_deps(gtid, taskdata);
943 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
944 KMP_DEBUG_ASSERT(children >= 0);
946 if (taskdata->td_taskgroup && !taskdata->is_taskgraph)
948 if (taskdata->td_taskgroup)
950 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
951 }
else if (task_team && (task_team->tt.tt_found_proxy_tasks ||
952 task_team->tt.tt_hidden_helper_task_encountered)) {
955 __kmp_release_deps(gtid, taskdata);
961 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
962 taskdata->td_flags.executing = 0;
965 if (taskdata->td_flags.hidden_helper) {
967 KMP_ASSERT(KMP_HIDDEN_HELPER_THREAD(gtid));
968 KMP_ATOMIC_DEC(&__kmp_unexecuted_hidden_helper_tasks);
973 20, (
"__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
974 gtid, taskdata, children));
980 thread->th.th_current_task = resumed_task;
982 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
986 resumed_task->td_flags.executing = 1;
989 if (is_taskgraph && __kmp_track_children_task(taskdata) &&
990 taskdata->td_taskgroup) {
997 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
1002 10, (
"__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
1003 gtid, taskdata, resumed_task));
1009static void __kmpc_omp_task_complete_if0_template(
ident_t *loc_ref,
1012 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
1013 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
1014 KMP_DEBUG_ASSERT(gtid >= 0);
1016 __kmp_task_finish<ompt>(gtid, task, NULL);
1018 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
1019 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
1023 ompt_frame_t *ompt_frame;
1024 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1025 ompt_frame->enter_frame = ompt_data_none;
1026 ompt_frame->enter_frame_flags = OMPT_FRAME_FLAGS_RUNTIME;
1035void __kmpc_omp_task_complete_if0_ompt(
ident_t *loc_ref, kmp_int32 gtid,
1037 __kmpc_omp_task_complete_if0_template<true>(loc_ref, gtid, task);
1046void __kmpc_omp_task_complete_if0(
ident_t *loc_ref, kmp_int32 gtid,
1049 if (UNLIKELY(ompt_enabled.enabled)) {
1050 __kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task);
1054 __kmpc_omp_task_complete_if0_template<false>(loc_ref, gtid, task);
1060void __kmpc_omp_task_complete(
ident_t *loc_ref, kmp_int32 gtid,
1062 KA_TRACE(10, (
"__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid,
1063 loc_ref, KMP_TASK_TO_TASKDATA(task)));
1065 __kmp_task_finish<false>(gtid, task,
1068 KA_TRACE(10, (
"__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid,
1069 loc_ref, KMP_TASK_TO_TASKDATA(task)));
1085void __kmp_init_implicit_task(
ident_t *loc_ref, kmp_info_t *this_thr,
1086 kmp_team_t *team,
int tid,
int set_curr_task) {
1087 kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid];
1091 (
"__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
1092 tid, team, task, set_curr_task ?
"TRUE" :
"FALSE"));
1094 task->td_task_id = KMP_GEN_TASK_ID();
1095 task->td_team = team;
1098 task->td_ident = loc_ref;
1099 task->td_taskwait_ident = NULL;
1100 task->td_taskwait_counter = 0;
1101 task->td_taskwait_thread = 0;
1103 task->td_flags.tiedness = TASK_TIED;
1104 task->td_flags.tasktype = TASK_IMPLICIT;
1105 task->td_flags.proxy = TASK_FULL;
1108 task->td_flags.task_serial = 1;
1109 task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1110 task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1112 task->td_flags.started = 1;
1113 task->td_flags.executing = 1;
1114 task->td_flags.complete = 0;
1115 task->td_flags.freed = 0;
1117 task->td_flags.onced = 0;
1120 task->td_depnode = NULL;
1121 task->td_last_tied = task;
1122 task->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
1124 if (set_curr_task) {
1125 KMP_ATOMIC_ST_REL(&task->td_incomplete_child_tasks, 0);
1127 KMP_ATOMIC_ST_REL(&task->td_allocated_child_tasks, 0);
1128 task->td_taskgroup = NULL;
1129 task->td_dephash = NULL;
1130 __kmp_push_current_task_to_thread(this_thr, team, tid);
1132 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
1133 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
1137 if (UNLIKELY(ompt_enabled.enabled))
1138 __ompt_task_init(task, tid);
1141 KF_TRACE(10, (
"__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,
1150void __kmp_finish_implicit_task(kmp_info_t *thread) {
1151 kmp_taskdata_t *task = thread->th.th_current_task;
1152#if ENABLE_LIBOMPTARGET
1155 if (UNLIKELY(kmp_target_sync_cb != NULL))
1156 (*kmp_target_sync_cb)(NULL, thread->th.th_info.ds.ds_gtid,
1157 KMP_TASKDATA_TO_TASK(task), NULL);
1159 if (task->td_dephash) {
1161 task->td_flags.complete = 1;
1163 task->td_flags.onced = 1;
1165 children = KMP_ATOMIC_LD_ACQ(&task->td_incomplete_child_tasks);
1166 kmp_tasking_flags_t flags_old = task->td_flags;
1167 if (children == 0 && flags_old.complete == 1) {
1168 kmp_tasking_flags_t flags_new = flags_old;
1169 flags_new.complete = 0;
1170 if (KMP_COMPARE_AND_STORE_ACQ32(RCAST(kmp_int32 *, &task->td_flags),
1171 *RCAST(kmp_int32 *, &flags_old),
1172 *RCAST(kmp_int32 *, &flags_new))) {
1173 KA_TRACE(100, (
"__kmp_finish_implicit_task: T#%d cleans "
1174 "dephash of implicit task %p\n",
1175 thread->th.th_info.ds.ds_gtid, task));
1176 __kmp_dephash_free_entries(thread, task->td_dephash);
1186void __kmp_free_implicit_task(kmp_info_t *thread) {
1187 kmp_taskdata_t *task = thread->th.th_current_task;
1188 if (task && task->td_dephash) {
1189 __kmp_dephash_free(thread, task->td_dephash);
1190 task->td_dephash = NULL;
1196static size_t __kmp_round_up_to_val(
size_t size,
size_t val) {
1197 if (size & (val - 1)) {
1199 if (size <= KMP_SIZE_T_MAX - val) {
1218kmp_task_t *__kmp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1219 kmp_tasking_flags_t *flags,
1220 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
1221 kmp_routine_entry_t task_entry) {
1223 kmp_taskdata_t *taskdata;
1224 kmp_info_t *thread = __kmp_threads[gtid];
1225 kmp_team_t *team = thread->th.th_team;
1226 kmp_taskdata_t *parent_task = thread->th.th_current_task;
1227 size_t shareds_offset;
1229 if (UNLIKELY(!TCR_4(__kmp_init_middle)))
1230 __kmp_middle_initialize();
1232 if (flags->hidden_helper) {
1233 if (__kmp_enable_hidden_helper) {
1234 if (!TCR_4(__kmp_init_hidden_helper))
1235 __kmp_hidden_helper_initialize();
1238 flags->hidden_helper = FALSE;
1242 KA_TRACE(10, (
"__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
1243 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1244 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
1245 sizeof_shareds, task_entry));
1247 KMP_DEBUG_ASSERT(parent_task);
1248 if (parent_task->td_flags.final) {
1249 if (flags->merged_if0) {
1254 if (flags->tiedness == TASK_UNTIED && !team->t.t_serialized) {
1258 KMP_CHECK_UPDATE(thread->th.th_task_team->tt.tt_untied_task_encountered, 1);
1264 if (UNLIKELY(flags->proxy == TASK_PROXY ||
1265 flags->detachable == TASK_DETACHABLE || flags->hidden_helper)) {
1266 if (flags->proxy == TASK_PROXY) {
1267 flags->tiedness = TASK_UNTIED;
1268 flags->merged_if0 = 1;
1272 if ((thread->th.th_task_team) == NULL) {
1275 KMP_DEBUG_ASSERT(team->t.t_serialized);
1277 (
"T#%d creating task team in __kmp_task_alloc for proxy task\n",
1279 __kmp_task_team_setup(thread, team);
1280 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
1282 kmp_task_team_t *task_team = thread->th.th_task_team;
1285 if (!KMP_TASKING_ENABLED(task_team)) {
1288 (
"T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
1289 __kmp_enable_tasking(task_team, thread);
1290 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
1291 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
1293 if (thread_data->td.td_deque == NULL) {
1294 __kmp_alloc_task_deque(thread, thread_data);
1298 if ((flags->proxy == TASK_PROXY || flags->detachable == TASK_DETACHABLE) &&
1299 task_team->tt.tt_found_proxy_tasks == FALSE)
1300 TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE);
1301 if (flags->hidden_helper &&
1302 task_team->tt.tt_hidden_helper_task_encountered == FALSE)
1303 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, TRUE);
1308 shareds_offset =
sizeof(kmp_taskdata_t) + sizeof_kmp_task_t;
1309 shareds_offset = __kmp_round_up_to_val(shareds_offset,
sizeof(kmp_uint64));
1312 KA_TRACE(30, (
"__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid,
1314 KA_TRACE(30, (
"__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid,
1319 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset +
1322 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset +
1326 task = KMP_TASKDATA_TO_TASK(taskdata);
1329#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || KMP_ARCH_S390X || !KMP_HAVE_QUAD
1330 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(
double) - 1)) == 0);
1331 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(
double) - 1)) == 0);
1333 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(_Quad) - 1)) == 0);
1334 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(_Quad) - 1)) == 0);
1336 if (sizeof_shareds > 0) {
1338 task->shareds = &((
char *)taskdata)[shareds_offset];
1340 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
1343 task->shareds = NULL;
1345 task->routine = task_entry;
1348 taskdata->td_task_id = KMP_GEN_TASK_ID();
1349 taskdata->td_team = thread->th.th_team;
1350 taskdata->td_alloc_thread = thread;
1351 taskdata->td_parent = parent_task;
1352 taskdata->td_level = parent_task->td_level + 1;
1353 KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
1354 taskdata->td_ident = loc_ref;
1355 taskdata->td_taskwait_ident = NULL;
1356 taskdata->td_taskwait_counter = 0;
1357 taskdata->td_taskwait_thread = 0;
1358 KMP_DEBUG_ASSERT(taskdata->td_parent != NULL);
1360 if (flags->proxy == TASK_FULL)
1361 copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
1363 taskdata->td_flags = *flags;
1364 taskdata->td_task_team = thread->th.th_task_team;
1365 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
1366 taskdata->td_flags.tasktype = TASK_EXPLICIT;
1369 if (flags->hidden_helper) {
1370 kmp_info_t *shadow_thread = __kmp_threads[KMP_GTID_TO_SHADOW_GTID(gtid)];
1371 taskdata->td_team = shadow_thread->th.th_team;
1372 taskdata->td_task_team = shadow_thread->th.th_task_team;
1376 taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1379 taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1385 taskdata->td_flags.task_serial =
1386 (parent_task->td_flags.final || taskdata->td_flags.team_serial ||
1387 taskdata->td_flags.tasking_ser || flags->merged_if0);
1389 taskdata->td_flags.started = 0;
1390 taskdata->td_flags.executing = 0;
1391 taskdata->td_flags.complete = 0;
1392 taskdata->td_flags.freed = 0;
1394 taskdata->td_flags.onced = 0;
1395 taskdata->is_taskgraph = 0;
1396 taskdata->tdg =
nullptr;
1398 KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
1400 KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1);
1401 taskdata->td_taskgroup =
1402 parent_task->td_taskgroup;
1403 taskdata->td_dephash = NULL;
1404 taskdata->td_depnode = NULL;
1405 taskdata->td_target_data.async_handle = NULL;
1406 if (flags->tiedness == TASK_UNTIED)
1407 taskdata->td_last_tied = NULL;
1409 taskdata->td_last_tied = taskdata;
1410 taskdata->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
1412 if (UNLIKELY(ompt_enabled.enabled))
1413 __ompt_task_init(taskdata, gtid);
1417 if (__kmp_track_children_task(taskdata)) {
1418 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
1419 if (parent_task->td_taskgroup)
1420 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
1423 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) {
1424 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
1426 if (flags->hidden_helper) {
1427 taskdata->td_flags.task_serial = FALSE;
1429 KMP_ATOMIC_INC(&__kmp_unexecuted_hidden_helper_tasks);
1434 kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
1435 if (tdg && __kmp_tdg_is_recording(tdg->tdg_status) &&
1436 (task_entry != (kmp_routine_entry_t)__kmp_taskloop_task)) {
1437 taskdata->is_taskgraph = 1;
1438 taskdata->tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
1439 taskdata->td_task_id = KMP_GEN_TASK_ID();
1440 taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id);
1443 KA_TRACE(20, (
"__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1444 gtid, taskdata, taskdata->td_parent));
1449kmp_task_t *__kmpc_omp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1450 kmp_int32 flags,
size_t sizeof_kmp_task_t,
1451 size_t sizeof_shareds,
1452 kmp_routine_entry_t task_entry) {
1454 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
1455 __kmp_assert_valid_gtid(gtid);
1456 input_flags->native = FALSE;
1458 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "
1459 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1460 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1461 input_flags->proxy ?
"proxy" :
"",
1462 input_flags->detachable ?
"detachable" :
"", sizeof_kmp_task_t,
1463 sizeof_shareds, task_entry));
1465 retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1466 sizeof_shareds, task_entry);
1468 KA_TRACE(20, (
"__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval));
1473kmp_task_t *__kmpc_omp_target_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1475 size_t sizeof_kmp_task_t,
1476 size_t sizeof_shareds,
1477 kmp_routine_entry_t task_entry,
1478 kmp_int64 device_id) {
1479 auto &input_flags =
reinterpret_cast<kmp_tasking_flags_t &
>(flags);
1481 input_flags.tiedness = TASK_UNTIED;
1482 input_flags.target = 1;
1484 if (__kmp_enable_hidden_helper)
1485 input_flags.hidden_helper = TRUE;
1487 return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t,
1488 sizeof_shareds, task_entry);
1506 kmp_task_t *new_task, kmp_int32 naffins,
1507 kmp_task_affinity_info_t *affin_list) {
1509 KMP_DEBUG_ASSERT(affin_list != NULL);
1511 for (kmp_int32 i = 0; i < naffins; ++i) {
1512 KA_TRACE(30, (
"__kmpc_omp_reg_task_with_affinity: T#%d aff[%d] "
1513 "base_addr=0x%llx len=%zu flags={%d,%d,%d}\n",
1514 gtid, i, (
unsigned long long)affin_list[i].base_addr,
1515 affin_list[i].len, (
int)affin_list[i].flags.flag1,
1516 (
int)affin_list[i].flags.flag2,
1517 (
int)affin_list[i].flags.reserved));
1529__attribute__((target(
"backchain")))
1532__kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
1533 kmp_taskdata_t *current_task) {
1534 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
1538 30, (
"__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1539 gtid, taskdata, current_task));
1540 KMP_DEBUG_ASSERT(task);
1541 if (UNLIKELY(taskdata->td_flags.proxy == TASK_PROXY &&
1542 taskdata->td_flags.complete == 1)) {
1547 (
"__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1550 __kmp_bottom_half_finish_proxy(gtid, task);
1552 KA_TRACE(30, (
"__kmp_invoke_task(exit): T#%d completed bottom finish for "
1553 "proxy task %p, resuming task %p\n",
1554 gtid, taskdata, current_task));
1562 ompt_thread_info_t oldInfo;
1563 if (UNLIKELY(ompt_enabled.enabled)) {
1565 thread = __kmp_threads[gtid];
1566 oldInfo = thread->th.ompt_thread_info;
1567 thread->th.ompt_thread_info.wait_id = 0;
1568 thread->th.ompt_thread_info.state = (thread->th.th_team_serialized)
1569 ? ompt_state_work_serial
1570 : ompt_state_work_parallel;
1571 taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1576 if (taskdata->td_flags.proxy != TASK_PROXY) {
1577 __kmp_task_start(gtid, task, current_task);
1583 if (UNLIKELY(__kmp_omp_cancellation)) {
1584 thread = __kmp_threads[gtid];
1585 kmp_team_t *this_team = thread->th.th_team;
1586 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
1587 if ((taskgroup && taskgroup->cancel_request) ||
1588 (this_team->t.t_cancel_request == cancel_parallel)) {
1589#if OMPT_SUPPORT && OMPT_OPTIONAL
1590 ompt_data_t *task_data;
1591 if (UNLIKELY(ompt_enabled.ompt_callback_cancel)) {
1592 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
1593 ompt_callbacks.ompt_callback(ompt_callback_cancel)(
1595 ((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup
1596 : ompt_cancel_parallel) |
1597 ompt_cancel_discarded_task,
1610 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
1611 taskdata->td_last_tied = current_task->td_last_tied;
1612 KMP_DEBUG_ASSERT(taskdata->td_last_tied);
1614#if KMP_STATS_ENABLED
1616 switch (KMP_GET_THREAD_STATE()) {
1617 case FORK_JOIN_BARRIER:
1618 KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar);
1621 KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar);
1624 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield);
1627 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait);
1630 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup);
1633 KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate);
1640 if (UNLIKELY(ompt_enabled.enabled))
1641 __ompt_task_start(task, current_task, gtid);
1643#if OMPT_SUPPORT && OMPT_OPTIONAL
1644 if (UNLIKELY(ompt_enabled.ompt_callback_dispatch &&
1645 taskdata->ompt_task_info.dispatch_chunk.iterations > 0)) {
1646 ompt_data_t instance = ompt_data_none;
1647 instance.ptr = &(taskdata->ompt_task_info.dispatch_chunk);
1648 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1649 ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
1650 &(team_info->parallel_data), &(taskdata->ompt_task_info.task_data),
1651 ompt_dispatch_taskloop_chunk, instance);
1652 taskdata->ompt_task_info.dispatch_chunk = {0, 0};
1657 if (ompd_state & OMPD_ENABLE_BP)
1658 ompd_bp_task_begin();
1661#if USE_ITT_BUILD && USE_ITT_NOTIFY
1662 kmp_uint64 cur_time;
1663 kmp_int32 kmp_itt_count_task =
1664 __kmp_forkjoin_frames_mode == 3 && !taskdata->td_flags.task_serial &&
1665 current_task->td_flags.tasktype == TASK_IMPLICIT;
1666 if (kmp_itt_count_task) {
1667 thread = __kmp_threads[gtid];
1669 if (thread->th.th_bar_arrive_time)
1670 cur_time = __itt_get_timestamp();
1672 kmp_itt_count_task = 0;
1674 KMP_FSYNC_ACQUIRED(taskdata);
1677#if ENABLE_LIBOMPTARGET
1678 if (taskdata->td_target_data.async_handle != NULL) {
1682 KMP_ASSERT(tgt_target_nowait_query);
1683 tgt_target_nowait_query(&taskdata->td_target_data.async_handle);
1686 if (task->routine != NULL) {
1687#ifdef KMP_GOMP_COMPAT
1688 if (taskdata->td_flags.native) {
1689 ((void (*)(
void *))(*(task->routine)))(task->shareds);
1693 (*(task->routine))(gtid, task);
1696 KMP_POP_PARTITIONED_TIMER();
1698#if USE_ITT_BUILD && USE_ITT_NOTIFY
1699 if (kmp_itt_count_task) {
1701 thread->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
1703 KMP_FSYNC_CANCEL(taskdata);
1704 KMP_FSYNC_RELEASING(taskdata->td_parent);
1709 if (ompd_state & OMPD_ENABLE_BP)
1714 if (taskdata->td_flags.proxy != TASK_PROXY) {
1716 if (UNLIKELY(ompt_enabled.enabled)) {
1717 thread->th.ompt_thread_info = oldInfo;
1718 if (taskdata->td_flags.tiedness == TASK_TIED) {
1719 taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
1721 __kmp_task_finish<true>(gtid, task, current_task);
1724 __kmp_task_finish<false>(gtid, task, current_task);
1727 else if (UNLIKELY(ompt_enabled.enabled && taskdata->td_flags.target)) {
1728 __ompt_task_finish(task, current_task, ompt_task_switch);
1734 (
"__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1735 gtid, taskdata, current_task));
1749kmp_int32 __kmpc_omp_task_parts(
ident_t *loc_ref, kmp_int32 gtid,
1750 kmp_task_t *new_task) {
1751 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1753 KA_TRACE(10, (
"__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,
1754 loc_ref, new_taskdata));
1757 kmp_taskdata_t *parent;
1758 if (UNLIKELY(ompt_enabled.enabled)) {
1759 parent = new_taskdata->td_parent;
1760 if (ompt_enabled.ompt_callback_task_create) {
1761 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1762 &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
1763 &(new_taskdata->ompt_task_info.task_data),
1764 TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
1765 OMPT_GET_RETURN_ADDRESS(0));
1773 if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1775 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1776 new_taskdata->td_flags.task_serial = 1;
1777 __kmp_invoke_task(gtid, new_task, current_task);
1782 (
"__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1783 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
1784 gtid, loc_ref, new_taskdata));
1787 if (UNLIKELY(ompt_enabled.enabled)) {
1788 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
1789 parent->ompt_task_info.frame.enter_frame_flags = OMPT_FRAME_FLAGS_RUNTIME;
1792 return TASK_CURRENT_NOT_QUEUED;
1806kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
1807 bool serialize_immediate) {
1808 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1811 if (new_taskdata->is_taskgraph &&
1812 __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) {
1813 kmp_tdg_info_t *tdg = new_taskdata->tdg;
1815 if (new_taskdata->td_tdg_task_id >= new_taskdata->tdg->map_size) {
1816 __kmp_acquire_bootstrap_lock(&tdg->graph_lock);
1819 if (new_taskdata->td_tdg_task_id >= tdg->map_size) {
1820 kmp_uint old_size = tdg->map_size;
1821 kmp_uint new_size = old_size * 2;
1822 kmp_node_info_t *old_record = tdg->record_map;
1823 kmp_node_info_t *new_record = (kmp_node_info_t *)__kmp_allocate(
1824 new_size *
sizeof(kmp_node_info_t));
1826 KMP_MEMCPY(new_record, old_record, old_size *
sizeof(kmp_node_info_t));
1827 tdg->record_map = new_record;
1829 __kmp_free(old_record);
1831 for (kmp_int i = old_size; i < new_size; i++) {
1832 kmp_int32 *successorsList = (kmp_int32 *)__kmp_allocate(
1833 __kmp_successors_size *
sizeof(kmp_int32));
1834 new_record[i].task =
nullptr;
1835 new_record[i].successors = successorsList;
1836 new_record[i].nsuccessors = 0;
1837 new_record[i].npredecessors = 0;
1838 new_record[i].successors_size = __kmp_successors_size;
1839 KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0);
1843 tdg->map_size = new_size;
1845 __kmp_release_bootstrap_lock(&tdg->graph_lock);
1848 if (tdg->record_map[new_taskdata->td_tdg_task_id].task ==
nullptr) {
1849 tdg->record_map[new_taskdata->td_tdg_task_id].task = new_task;
1850 tdg->record_map[new_taskdata->td_tdg_task_id].parent_task =
1851 new_taskdata->td_parent;
1852 KMP_ATOMIC_INC(&tdg->num_tasks);
1859 if (new_taskdata->td_flags.proxy == TASK_PROXY ||
1860 __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1862 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1863 if (serialize_immediate)
1864 new_taskdata->td_flags.task_serial = 1;
1865 __kmp_invoke_task(gtid, new_task, current_task);
1866 }
else if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME &&
1867 __kmp_wpolicy_passive) {
1868 kmp_info_t *this_thr = __kmp_threads[gtid];
1869 kmp_team_t *team = this_thr->th.th_team;
1870 kmp_int32 nthreads = this_thr->th.th_team_nproc;
1871 for (
int i = 0; i < nthreads; ++i) {
1872 kmp_info_t *thread = team->t.t_threads[i];
1873 if (thread == this_thr)
1875 if (thread->th.th_sleep_loc != NULL) {
1876 __kmp_null_resume_wrapper(thread);
1881 return TASK_CURRENT_NOT_QUEUED;
1896kmp_int32 __kmpc_omp_task(
ident_t *loc_ref, kmp_int32 gtid,
1897 kmp_task_t *new_task) {
1899 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
1901#if KMP_DEBUG || OMPT_SUPPORT
1902 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1904 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
1906 __kmp_assert_valid_gtid(gtid);
1909 kmp_taskdata_t *parent = NULL;
1910 if (UNLIKELY(ompt_enabled.enabled)) {
1911 if (!new_taskdata->td_flags.started) {
1912 OMPT_STORE_RETURN_ADDRESS(gtid);
1913 parent = new_taskdata->td_parent;
1914 if (!parent->ompt_task_info.frame.enter_frame.ptr) {
1915 parent->ompt_task_info.frame.enter_frame.ptr =
1916 OMPT_GET_FRAME_ADDRESS(0);
1918 if (ompt_enabled.ompt_callback_task_create) {
1919 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1920 &(parent->ompt_task_info.task_data),
1921 &(parent->ompt_task_info.frame),
1922 &(new_taskdata->ompt_task_info.task_data),
1923 TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
1924 OMPT_LOAD_RETURN_ADDRESS(gtid));
1929 __ompt_task_finish(new_task,
1930 new_taskdata->ompt_task_info.scheduling_parent,
1932 new_taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
1937 res = __kmp_omp_task(gtid, new_task,
true);
1939 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning "
1940 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1941 gtid, loc_ref, new_taskdata));
1943 if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
1944 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
1963kmp_int32 __kmp_omp_taskloop_task(
ident_t *loc_ref, kmp_int32 gtid,
1964 kmp_task_t *new_task,
void *codeptr_ra) {
1966 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
1968#if KMP_DEBUG || OMPT_SUPPORT
1969 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1971 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
1975 kmp_taskdata_t *parent = NULL;
1976 if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) {
1977 parent = new_taskdata->td_parent;
1978 if (!parent->ompt_task_info.frame.enter_frame.ptr)
1979 parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1980 if (ompt_enabled.ompt_callback_task_create) {
1981 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1982 &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
1983 &(new_taskdata->ompt_task_info.task_data),
1984 TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0, codeptr_ra);
1989 res = __kmp_omp_task(gtid, new_task,
true);
1991 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning "
1992 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1993 gtid, loc_ref, new_taskdata));
1995 if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
1996 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
2003static kmp_int32 __kmpc_omp_taskwait_template(
ident_t *loc_ref, kmp_int32 gtid,
2004 void *frame_address,
2005 void *return_address) {
2006 kmp_taskdata_t *taskdata =
nullptr;
2008 int thread_finished = FALSE;
2009 KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
2011 KA_TRACE(10, (
"__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref));
2012 KMP_DEBUG_ASSERT(gtid >= 0);
2014 if (__kmp_tasking_mode != tskm_immediate_exec) {
2015 thread = __kmp_threads[gtid];
2016 taskdata = thread->th.th_current_task;
2018#if OMPT_SUPPORT && OMPT_OPTIONAL
2019 ompt_data_t *my_task_data;
2020 ompt_data_t *my_parallel_data;
2023 my_task_data = &(taskdata->ompt_task_info.task_data);
2024 my_parallel_data = OMPT_CUR_TEAM_DATA(thread);
2026 taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address;
2028 if (ompt_enabled.ompt_callback_sync_region) {
2029 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2030 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
2031 my_task_data, return_address);
2034 if (ompt_enabled.ompt_callback_sync_region_wait) {
2035 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2036 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
2037 my_task_data, return_address);
2042#if ENABLE_LIBOMPTARGET
2045 if (UNLIKELY(kmp_target_sync_cb))
2046 (*kmp_target_sync_cb)(loc_ref, gtid, KMP_TASKDATA_TO_TASK(taskdata),
2055 taskdata->td_taskwait_counter += 1;
2056 taskdata->td_taskwait_ident = loc_ref;
2057 taskdata->td_taskwait_thread = gtid + 1;
2060 void *itt_sync_obj = NULL;
2062 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
2067 !taskdata->td_flags.team_serial && !taskdata->td_flags.final;
2069 must_wait = must_wait || (thread->th.th_task_team != NULL &&
2070 thread->th.th_task_team->tt.tt_found_proxy_tasks);
2074 (__kmp_enable_hidden_helper && thread->th.th_task_team != NULL &&
2075 thread->th.th_task_team->tt.tt_hidden_helper_task_encountered);
2078 kmp_flag_32<false, false> flag(
2079 RCAST(std::atomic<kmp_uint32> *,
2080 &(taskdata->td_incomplete_child_tasks)),
2082 while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) != 0) {
2083 flag.execute_tasks(thread, gtid, FALSE,
2084 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
2085 __kmp_task_stealing_constraint);
2089 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
2090 KMP_FSYNC_ACQUIRED(taskdata);
2095 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2097#if OMPT_SUPPORT && OMPT_OPTIONAL
2099 if (ompt_enabled.ompt_callback_sync_region_wait) {
2100 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2101 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
2102 my_task_data, return_address);
2104 if (ompt_enabled.ompt_callback_sync_region) {
2105 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2106 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
2107 my_task_data, return_address);
2109 taskdata->ompt_task_info.frame.enter_frame = ompt_data_none;
2114 KA_TRACE(10, (
"__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
2115 "returning TASK_CURRENT_NOT_QUEUED\n",
2118 return TASK_CURRENT_NOT_QUEUED;
2121#if OMPT_SUPPORT && OMPT_OPTIONAL
2123static kmp_int32 __kmpc_omp_taskwait_ompt(
ident_t *loc_ref, kmp_int32 gtid,
2124 void *frame_address,
2125 void *return_address) {
2126 return __kmpc_omp_taskwait_template<true>(loc_ref, gtid, frame_address,
2133kmp_int32 __kmpc_omp_taskwait(
ident_t *loc_ref, kmp_int32 gtid) {
2134#if OMPT_SUPPORT && OMPT_OPTIONAL
2135 if (UNLIKELY(ompt_enabled.enabled)) {
2136 OMPT_STORE_RETURN_ADDRESS(gtid);
2137 return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0),
2138 OMPT_LOAD_RETURN_ADDRESS(gtid));
2141 return __kmpc_omp_taskwait_template<false>(loc_ref, gtid, NULL, NULL);
2145kmp_int32 __kmpc_omp_taskyield(
ident_t *loc_ref, kmp_int32 gtid,
int end_part) {
2146 kmp_taskdata_t *taskdata = NULL;
2148 int thread_finished = FALSE;
2151 KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
2153 KA_TRACE(10, (
"__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
2154 gtid, loc_ref, end_part));
2155 __kmp_assert_valid_gtid(gtid);
2157 if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) {
2158 thread = __kmp_threads[gtid];
2159 taskdata = thread->th.th_current_task;
2166 taskdata->td_taskwait_counter += 1;
2167 taskdata->td_taskwait_ident = loc_ref;
2168 taskdata->td_taskwait_thread = gtid + 1;
2171 void *itt_sync_obj = NULL;
2173 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
2176 if (!taskdata->td_flags.team_serial) {
2177 kmp_task_team_t *task_team = thread->th.th_task_team;
2178 if (task_team != NULL) {
2179 if (KMP_TASKING_ENABLED(task_team)) {
2181 if (UNLIKELY(ompt_enabled.enabled))
2182 thread->th.ompt_thread_info.ompt_task_yielded = 1;
2184 __kmp_execute_tasks_32(
2185 thread, gtid, (kmp_flag_32<> *)NULL, FALSE,
2186 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
2187 __kmp_task_stealing_constraint);
2189 if (UNLIKELY(ompt_enabled.enabled))
2190 thread->th.ompt_thread_info.ompt_task_yielded = 0;
2196 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
2201 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2204 KA_TRACE(10, (
"__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
2205 "returning TASK_CURRENT_NOT_QUEUED\n",
2208 return TASK_CURRENT_NOT_QUEUED;
2229 unsigned reserved31 : 31;
2309template <
typename T>
2310void *__kmp_task_reduction_init(
int gtid,
int num, T *data) {
2311 __kmp_assert_valid_gtid(gtid);
2312 kmp_info_t *thread = __kmp_threads[gtid];
2313 kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
2314 kmp_uint32 nth = thread->th.th_team_nproc;
2318 KMP_ASSERT(tg != NULL);
2319 KMP_ASSERT(data != NULL);
2320 KMP_ASSERT(num > 0);
2321 if (nth == 1 && !__kmp_enable_hidden_helper) {
2322 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
2326 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
2330 for (
int i = 0; i < num; ++i) {
2331 size_t size = data[i].reduce_size - 1;
2333 size += CACHE_LINE - size % CACHE_LINE;
2334 KMP_ASSERT(data[i].reduce_comb != NULL);
2337 arr[i].
flags = data[i].flags;
2341 __kmp_assign_orig<T>(arr[i], data[i]);
2342 if (!arr[i].flags.lazy_priv) {
2345 arr[i].
reduce_pend = (
char *)(arr[i].reduce_priv) + nth * size;
2346 if (arr[i].reduce_init != NULL) {
2348 for (
size_t j = 0; j < nth; ++j) {
2349 __kmp_call_init<T>(arr[i], j * size);
2356 arr[i].
reduce_priv = __kmp_allocate(nth *
sizeof(
void *));
2359 tg->reduce_data = (
void *)arr;
2360 tg->reduce_num_data = num;
2380 kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
2381 if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) {
2382 kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
2383 this_tdg->rec_taskred_data =
2385 this_tdg->rec_num_taskred = num;
2386 KMP_MEMCPY(this_tdg->rec_taskred_data, data,
2407 kmp_tdg_info_t *tdg = __kmp_find_tdg(__kmp_curr_tdg_idx);
2408 if (tdg && __kmp_tdg_is_recording(tdg->tdg_status)) {
2409 kmp_tdg_info_t *this_tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx];
2410 this_tdg->rec_taskred_data =
2412 this_tdg->rec_num_taskred = num;
2413 KMP_MEMCPY(this_tdg->rec_taskred_data, data,
2421template <
typename T>
2422void __kmp_task_reduction_init_copy(kmp_info_t *thr,
int num, T *data,
2423 kmp_taskgroup_t *tg,
void *reduce_data) {
2425 KA_TRACE(20, (
"__kmp_task_reduction_init_copy: Th %p, init taskgroup %p,"
2427 thr, tg, reduce_data));
2432 for (
int i = 0; i < num; ++i) {
2435 tg->reduce_data = (
void *)arr;
2436 tg->reduce_num_data = num;
2449 __kmp_assert_valid_gtid(gtid);
2450 kmp_info_t *thread = __kmp_threads[gtid];
2451 kmp_int32 nth = thread->th.th_team_nproc;
2455 kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp;
2457 tg = thread->th.th_current_task->td_taskgroup;
2458 KMP_ASSERT(tg != NULL);
2461 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
2464 if ((thread->th.th_current_task->is_taskgraph) &&
2465 (!__kmp_tdg_is_recording(
2466 __kmp_global_tdgs[__kmp_curr_tdg_idx]->tdg_status))) {
2467 tg = thread->th.th_current_task->td_taskgroup;
2468 KMP_ASSERT(tg != NULL);
2469 KMP_ASSERT(tg->reduce_data != NULL);
2471 num = tg->reduce_num_data;
2475 KMP_ASSERT(data != NULL);
2476 while (tg != NULL) {
2478 num = tg->reduce_num_data;
2479 for (
int i = 0; i < num; ++i) {
2480 if (!arr[i].flags.lazy_priv) {
2481 if (data == arr[i].reduce_shar ||
2482 (data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
2483 return (
char *)(arr[i].
reduce_priv) + tid * arr[i].reduce_size;
2486 void **p_priv = (
void **)(arr[i].reduce_priv);
2487 if (data == arr[i].reduce_shar)
2490 for (
int j = 0; j < nth; ++j)
2491 if (data == p_priv[j])
2495 if (p_priv[tid] == NULL) {
2497 p_priv[tid] = __kmp_allocate(arr[i].reduce_size);
2498 if (arr[i].reduce_init != NULL) {
2499 if (arr[i].reduce_orig != NULL) {
2500 ((void (*)(
void *,
void *))arr[i].reduce_init)(
2503 ((void (*)(
void *))arr[i].reduce_init)(p_priv[tid]);
2510 KMP_ASSERT(tg->parent);
2513 KMP_ASSERT2(0,
"Unknown task reduction item");
2519static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
2520 kmp_int32 nth = th->th.th_team_nproc;
2523 __kmp_enable_hidden_helper);
2526 kmp_int32 num = tg->reduce_num_data;
2527 for (
int i = 0; i < num; ++i) {
2529 void (*f_fini)(
void *) = (
void (*)(
void *))(arr[i].
reduce_fini);
2530 void (*f_comb)(
void *,
void *) =
2532 if (!arr[i].flags.lazy_priv) {
2535 for (
int j = 0; j < nth; ++j) {
2536 void *priv_data = (
char *)pr_data + j * size;
2537 f_comb(sh_data, priv_data);
2542 void **pr_data = (
void **)(arr[i].reduce_priv);
2543 for (
int j = 0; j < nth; ++j) {
2544 if (pr_data[j] != NULL) {
2545 f_comb(sh_data, pr_data[j]);
2548 __kmp_free(pr_data[j]);
2552 __kmp_free(arr[i].reduce_priv);
2554 __kmp_thread_free(th, arr);
2555 tg->reduce_data = NULL;
2556 tg->reduce_num_data = 0;
2562static void __kmp_task_reduction_clean(kmp_info_t *th, kmp_taskgroup_t *tg) {
2563 __kmp_thread_free(th, tg->reduce_data);
2564 tg->reduce_data = NULL;
2565 tg->reduce_num_data = 0;
2568template <
typename T>
2569void *__kmp_task_reduction_modifier_init(
ident_t *loc,
int gtid,
int is_ws,
2571 __kmp_assert_valid_gtid(gtid);
2572 kmp_info_t *thr = __kmp_threads[gtid];
2573 kmp_int32 nth = thr->th.th_team_nproc;
2574 __kmpc_taskgroup(loc, gtid);
2577 (
"__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n",
2578 gtid, thr->th.th_current_task->td_taskgroup));
2579 return (
void *)thr->th.th_current_task->td_taskgroup;
2581 kmp_team_t *team = thr->th.th_team;
2583 kmp_taskgroup_t *tg;
2584 reduce_data = KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[is_ws]);
2585 if (reduce_data == NULL &&
2586 __kmp_atomic_compare_store(&team->t.t_tg_reduce_data[is_ws], reduce_data,
2589 KMP_DEBUG_ASSERT(reduce_data == NULL);
2591 tg = (kmp_taskgroup_t *)__kmp_task_reduction_init<T>(gtid, num, data);
2595 KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[0]) == 0);
2596 KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[1]) == 0);
2597 KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[is_ws], reduce_data);
2600 (reduce_data = KMP_ATOMIC_LD_ACQ(&team->t.t_tg_reduce_data[is_ws])) ==
2604 KMP_DEBUG_ASSERT(reduce_data > (
void *)1);
2605 tg = thr->th.th_current_task->td_taskgroup;
2606 __kmp_task_reduction_init_copy<T>(thr, num, data, tg, reduce_data);
2628 int num,
void *data) {
2629 return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
2649 return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
2662 __kmpc_end_taskgroup(loc, gtid);
2666void __kmpc_taskgroup(
ident_t *loc,
int gtid) {
2667 __kmp_assert_valid_gtid(gtid);
2668 kmp_info_t *thread = __kmp_threads[gtid];
2669 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2670 kmp_taskgroup_t *tg_new =
2671 (kmp_taskgroup_t *)__kmp_thread_malloc(thread,
sizeof(kmp_taskgroup_t));
2672 KA_TRACE(10, (
"__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new));
2673 KMP_ATOMIC_ST_RLX(&tg_new->count, 0);
2674 KMP_ATOMIC_ST_RLX(&tg_new->cancel_request, cancel_noreq);
2675 tg_new->parent = taskdata->td_taskgroup;
2676 tg_new->reduce_data = NULL;
2677 tg_new->reduce_num_data = 0;
2678 tg_new->gomp_data = NULL;
2679 taskdata->td_taskgroup = tg_new;
2681#if OMPT_SUPPORT && OMPT_OPTIONAL
2682 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
2683 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2685 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2686 kmp_team_t *team = thread->th.th_team;
2687 ompt_data_t my_task_data = taskdata->ompt_task_info.task_data;
2689 ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data;
2691 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2692 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2693 &(my_task_data), codeptr);
2700void __kmpc_end_taskgroup(
ident_t *loc,
int gtid) {
2701 __kmp_assert_valid_gtid(gtid);
2702 kmp_info_t *thread = __kmp_threads[gtid];
2703 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2704 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
2705 int thread_finished = FALSE;
2707#if OMPT_SUPPORT && OMPT_OPTIONAL
2709 ompt_data_t my_task_data;
2710 ompt_data_t my_parallel_data;
2711 void *codeptr =
nullptr;
2712 if (UNLIKELY(ompt_enabled.enabled)) {
2713 team = thread->th.th_team;
2714 my_task_data = taskdata->ompt_task_info.task_data;
2716 my_parallel_data = team->t.ompt_team_info.parallel_data;
2717 codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2719 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2723 KA_TRACE(10, (
"__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc));
2724 KMP_DEBUG_ASSERT(taskgroup != NULL);
2725 KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
2727 if (__kmp_tasking_mode != tskm_immediate_exec) {
2729 taskdata->td_taskwait_counter += 1;
2730 taskdata->td_taskwait_ident = loc;
2731 taskdata->td_taskwait_thread = gtid + 1;
2735 void *itt_sync_obj = NULL;
2737 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
2741#if OMPT_SUPPORT && OMPT_OPTIONAL
2742 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
2743 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2744 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2745 &(my_task_data), codeptr);
2749#if ENABLE_LIBOMPTARGET
2752 if (UNLIKELY(kmp_target_sync_cb))
2753 (*kmp_target_sync_cb)(loc, gtid, KMP_TASKDATA_TO_TASK(taskdata), NULL);
2756 if (!taskdata->td_flags.team_serial ||
2757 (thread->th.th_task_team != NULL &&
2758 (thread->th.th_task_team->tt.tt_found_proxy_tasks ||
2759 thread->th.th_task_team->tt.tt_hidden_helper_task_encountered))) {
2760 kmp_flag_32<false, false> flag(
2761 RCAST(std::atomic<kmp_uint32> *, &(taskgroup->count)), 0U);
2762 while (KMP_ATOMIC_LD_ACQ(&taskgroup->count) != 0) {
2763 flag.execute_tasks(thread, gtid, FALSE,
2764 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
2765 __kmp_task_stealing_constraint);
2768 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2770#if OMPT_SUPPORT && OMPT_OPTIONAL
2771 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
2772 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2773 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2774 &(my_task_data), codeptr);
2779 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
2780 KMP_FSYNC_ACQUIRED(taskdata);
2783 KMP_DEBUG_ASSERT(taskgroup->count == 0);
2785 if (taskgroup->reduce_data != NULL &&
2786 !taskgroup->gomp_data) {
2789 kmp_team_t *t = thread->th.th_team;
2793 if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[0])) != NULL &&
2796 cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[0]);
2797 if (cnt == thread->th.th_team_nproc - 1) {
2800 __kmp_task_reduction_fini(thread, taskgroup);
2803 __kmp_thread_free(thread, reduce_data);
2804 KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[0], NULL);
2805 KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[0], 0);
2809 __kmp_task_reduction_clean(thread, taskgroup);
2811 }
else if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[1])) !=
2815 cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[1]);
2816 if (cnt == thread->th.th_team_nproc - 1) {
2818 __kmp_task_reduction_fini(thread, taskgroup);
2821 __kmp_thread_free(thread, reduce_data);
2822 KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[1], NULL);
2823 KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[1], 0);
2827 __kmp_task_reduction_clean(thread, taskgroup);
2831 __kmp_task_reduction_fini(thread, taskgroup);
2835 taskdata->td_taskgroup = taskgroup->parent;
2836 __kmp_thread_free(thread, taskgroup);
2838 KA_TRACE(10, (
"__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",
2841#if OMPT_SUPPORT && OMPT_OPTIONAL
2842 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
2843 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2844 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2845 &(my_task_data), codeptr);
2850static kmp_task_t *__kmp_get_priority_task(kmp_int32 gtid,
2851 kmp_task_team_t *task_team,
2852 kmp_int32 is_constrained) {
2853 kmp_task_t *task = NULL;
2854 kmp_taskdata_t *taskdata;
2855 kmp_taskdata_t *current;
2856 kmp_thread_data_t *thread_data;
2857 int ntasks = task_team->tt.tt_num_task_pri;
2860 20, (
"__kmp_get_priority_task(exit #1): T#%d No tasks to get\n", gtid));
2865 if (__kmp_atomic_compare_store(&task_team->tt.tt_num_task_pri, ntasks,
2868 ntasks = task_team->tt.tt_num_task_pri;
2869 }
while (ntasks > 0);
2871 KA_TRACE(20, (
"__kmp_get_priority_task(exit #2): T#%d No tasks to get\n",
2877 kmp_task_pri_t *list = task_team->tt.tt_task_pri_list;
2879 KMP_ASSERT(list != NULL);
2880 thread_data = &list->td;
2881 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
2882 deque_ntasks = thread_data->td.td_deque_ntasks;
2883 if (deque_ntasks == 0) {
2884 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2885 KA_TRACE(20, (
"__kmp_get_priority_task: T#%d No tasks to get from %p\n",
2886 __kmp_get_gtid(), thread_data));
2889 }
while (deque_ntasks == 0);
2890 KMP_DEBUG_ASSERT(deque_ntasks);
2891 int target = thread_data->td.td_deque_head;
2892 current = __kmp_threads[gtid]->th.th_current_task;
2893 taskdata = thread_data->td.td_deque[target];
2894 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
2896 thread_data->td.td_deque_head =
2897 (target + 1) & TASK_DEQUE_MASK(thread_data->td);
2899 if (!task_team->tt.tt_untied_task_encountered) {
2901 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2902 KA_TRACE(20, (
"__kmp_get_priority_task(exit #3): T#%d could not get task "
2903 "from %p: task_team=%p ntasks=%d head=%u tail=%u\n",
2904 gtid, thread_data, task_team, deque_ntasks, target,
2905 thread_data->td.td_deque_tail));
2906 task_team->tt.tt_num_task_pri++;
2912 for (i = 1; i < deque_ntasks; ++i) {
2913 target = (target + 1) & TASK_DEQUE_MASK(thread_data->td);
2914 taskdata = thread_data->td.td_deque[target];
2915 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
2921 if (taskdata == NULL) {
2923 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2925 10, (
"__kmp_get_priority_task(exit #4): T#%d could not get task from "
2926 "%p: task_team=%p ntasks=%d head=%u tail=%u\n",
2927 gtid, thread_data, task_team, deque_ntasks,
2928 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2929 task_team->tt.tt_num_task_pri++;
2933 for (i = i + 1; i < deque_ntasks; ++i) {
2935 target = (target + 1) & TASK_DEQUE_MASK(thread_data->td);
2936 thread_data->td.td_deque[prev] = thread_data->td.td_deque[target];
2940 thread_data->td.td_deque_tail ==
2941 (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(thread_data->td)));
2942 thread_data->td.td_deque_tail = target;
2944 thread_data->td.td_deque_ntasks = deque_ntasks - 1;
2945 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2946 task = KMP_TASKDATA_TO_TASK(taskdata);
2951static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid,
2952 kmp_task_team_t *task_team,
2953 kmp_int32 is_constrained) {
2955 kmp_taskdata_t *taskdata;
2956 kmp_thread_data_t *thread_data;
2959 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2960 KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data !=
2963 thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
2965 KA_TRACE(10, (
"__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
2966 gtid, thread_data->td.td_deque_ntasks,
2967 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2969 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
2971 (
"__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
2972 "ntasks=%d head=%u tail=%u\n",
2973 gtid, thread_data->td.td_deque_ntasks,
2974 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2978 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
2980 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
2981 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2983 (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
2984 "ntasks=%d head=%u tail=%u\n",
2985 gtid, thread_data->td.td_deque_ntasks,
2986 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2990 tail = (thread_data->td.td_deque_tail - 1) &
2991 TASK_DEQUE_MASK(thread_data->td);
2992 taskdata = thread_data->td.td_deque[tail];
2994 if (!__kmp_task_is_allowed(gtid, is_constrained, taskdata,
2995 thread->th.th_current_task)) {
2997 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2999 (
"__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: "
3000 "ntasks=%d head=%u tail=%u\n",
3001 gtid, thread_data->td.td_deque_ntasks,
3002 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3006 thread_data->td.td_deque_tail = tail;
3007 TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1);
3009 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3011 KA_TRACE(10, (
"__kmp_remove_my_task(exit #4): T#%d task %p removed: "
3012 "ntasks=%d head=%u tail=%u\n",
3013 gtid, taskdata, thread_data->td.td_deque_ntasks,
3014 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3016 task = KMP_TASKDATA_TO_TASK(taskdata);
3023static kmp_task_t *__kmp_steal_task(kmp_int32 victim_tid, kmp_int32 gtid,
3024 kmp_task_team_t *task_team,
3025 std::atomic<kmp_int32> *unfinished_threads,
3026 int *thread_finished,
3027 kmp_int32 is_constrained) {
3029 kmp_taskdata_t *taskdata;
3030 kmp_taskdata_t *current;
3031 kmp_thread_data_t *victim_td, *threads_data;
3033 kmp_info_t *victim_thr;
3035 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3037 threads_data = task_team->tt.tt_threads_data;
3038 KMP_DEBUG_ASSERT(threads_data != NULL);
3039 KMP_DEBUG_ASSERT(victim_tid >= 0);
3040 KMP_DEBUG_ASSERT(victim_tid < task_team->tt.tt_max_threads);
3042 victim_td = &threads_data[victim_tid];
3043 victim_thr = victim_td->td.td_thr;
3046 KA_TRACE(10, (
"__kmp_steal_task(enter): T#%d try to steal from T#%d: "
3047 "task_team=%p ntasks=%d head=%u tail=%u\n",
3048 gtid, __kmp_gtid_from_thread(victim_thr), task_team,
3049 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
3050 victim_td->td.td_deque_tail));
3052 if (TCR_4(victim_td->td.td_deque_ntasks) == 0) {
3053 KA_TRACE(10, (
"__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
3054 "task_team=%p ntasks=%d head=%u tail=%u\n",
3055 gtid, __kmp_gtid_from_thread(victim_thr), task_team,
3056 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
3057 victim_td->td.td_deque_tail));
3061 __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock);
3063 int ntasks = TCR_4(victim_td->td.td_deque_ntasks);
3066 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3067 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "
3068 "task_team=%p ntasks=%d head=%u tail=%u\n",
3069 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
3070 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3074 KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL);
3075 current = __kmp_threads[gtid]->th.th_current_task;
3076 taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
3077 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
3079 victim_td->td.td_deque_head =
3080 (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
3082 if (!task_team->tt.tt_untied_task_encountered) {
3084 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3085 KA_TRACE(10, (
"__kmp_steal_task(exit #3): T#%d could not steal from "
3086 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
3087 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
3088 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3093 target = victim_td->td.td_deque_head;
3095 for (i = 1; i < ntasks; ++i) {
3096 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
3097 taskdata = victim_td->td.td_deque[target];
3098 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
3104 if (taskdata == NULL) {
3106 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3107 KA_TRACE(10, (
"__kmp_steal_task(exit #4): T#%d could not steal from "
3108 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
3109 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
3110 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3114 for (i = i + 1; i < ntasks; ++i) {
3116 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
3117 victim_td->td.td_deque[prev] = victim_td->td.td_deque[target];
3121 victim_td->td.td_deque_tail ==
3122 (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(victim_td->td)));
3123 victim_td->td.td_deque_tail = target;
3125 if (*thread_finished) {
3132 KMP_ATOMIC_INC(unfinished_threads);
3135 (
"__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
3136 gtid, count + 1, task_team));
3137 *thread_finished = FALSE;
3139 TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1);
3141 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3145 (
"__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: "
3146 "task_team=%p ntasks=%d head=%u tail=%u\n",
3147 gtid, taskdata, __kmp_gtid_from_thread(victim_thr), task_team,
3148 ntasks, victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3150 task = KMP_TASKDATA_TO_TASK(taskdata);
3164static inline int __kmp_execute_tasks_template(
3165 kmp_info_t *thread, kmp_int32 gtid, C *flag,
int final_spin,
3166 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3167 kmp_int32 is_constrained) {
3168 kmp_task_team_t *task_team = thread->th.th_task_team;
3169 kmp_thread_data_t *threads_data;
3171 kmp_info_t *other_thread;
3172 kmp_taskdata_t *current_task = thread->th.th_current_task;
3173 std::atomic<kmp_int32> *unfinished_threads;
3174 kmp_int32 nthreads, victim_tid = -2, use_own_tasks = 1, new_victim = 0,
3175 tid = thread->th.th_info.ds.ds_tid;
3177 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3178 KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid]);
3180 if (task_team == NULL || current_task == NULL)
3183 KA_TRACE(15, (
"__kmp_execute_tasks_template(enter): T#%d final_spin=%d "
3184 "*thread_finished=%d\n",
3185 gtid, final_spin, *thread_finished));
3187 thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
3188 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
3190 KMP_DEBUG_ASSERT(threads_data != NULL);
3192 nthreads = task_team->tt.tt_nproc;
3193 unfinished_threads = &(task_team->tt.tt_unfinished_threads);
3194 KMP_DEBUG_ASSERT(*unfinished_threads >= 0);
3199#if ENABLE_LIBOMPTARGET
3201 if (UNLIKELY(kmp_target_sync_cb))
3202 (*kmp_target_sync_cb)(NULL, gtid, KMP_TASKDATA_TO_TASK(current_task),
3207 if (task_team->tt.tt_num_task_pri) {
3208 task = __kmp_get_priority_task(gtid, task_team, is_constrained);
3210 if (task == NULL && use_own_tasks) {
3211 task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained);
3213 if ((task == NULL) && (nthreads > 1)) {
3217 if (victim_tid == -2) {
3218 victim_tid = threads_data[tid].td.td_deque_last_stolen;
3221 other_thread = threads_data[victim_tid].td.td_thr;
3223 if (victim_tid != -1) {
3225 }
else if (!new_victim) {
3231 victim_tid = __kmp_get_random(thread) % (nthreads - 1);
3232 if (victim_tid >= tid) {
3236 other_thread = threads_data[victim_tid].td.td_thr;
3246 if ((__kmp_tasking_mode == tskm_task_teams) &&
3247 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
3248 (TCR_PTR(CCAST(
void *, other_thread->th.th_sleep_loc)) !=
3251 __kmp_null_resume_wrapper(other_thread);
3265 __kmp_steal_task(victim_tid, gtid, task_team, unfinished_threads,
3266 thread_finished, is_constrained);
3269 if (threads_data[tid].td.td_deque_last_stolen != victim_tid) {
3270 threads_data[tid].td.td_deque_last_stolen = victim_tid;
3277 KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
3286#if USE_ITT_BUILD && USE_ITT_NOTIFY
3287 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
3288 if (itt_sync_obj == NULL) {
3290 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
3292 __kmp_itt_task_starting(itt_sync_obj);
3295 __kmp_invoke_task(gtid, task, current_task);
3297 if (itt_sync_obj != NULL)
3298 __kmp_itt_task_finished(itt_sync_obj);
3305 if (flag == NULL || (!final_spin && flag->done_check())) {
3308 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
3312 if (thread->th.th_task_team == NULL) {
3315 KMP_YIELD(__kmp_library == library_throughput);
3318 if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
3319 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d stolen task spawned "
3320 "other tasks, restart\n",
3331 KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks) == 0) {
3335 if (!*thread_finished) {
3337 kmp_int32 count = -1 +
3339 KMP_ATOMIC_DEC(unfinished_threads);
3340 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d dec "
3341 "unfinished_threads to %d task_team=%p\n",
3342 gtid, count, task_team));
3343 *thread_finished = TRUE;
3351 if (flag != NULL && flag->done_check()) {
3354 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
3362 if (thread->th.th_task_team == NULL) {
3364 (
"__kmp_execute_tasks_template: T#%d no more tasks\n", gtid));
3373 if (flag == NULL || (!final_spin && flag->done_check())) {
3375 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
3382 if (nthreads == 1 &&
3383 KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks))
3387 (
"__kmp_execute_tasks_template: T#%d can't find work\n", gtid));
3393template <
bool C,
bool S>
3394int __kmp_execute_tasks_32(
3395 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32<C, S> *flag,
int final_spin,
3396 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3397 kmp_int32 is_constrained) {
3398 return __kmp_execute_tasks_template(
3399 thread, gtid, flag, final_spin,
3400 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3403template <
bool C,
bool S>
3404int __kmp_execute_tasks_64(
3405 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64<C, S> *flag,
int final_spin,
3406 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3407 kmp_int32 is_constrained) {
3408 return __kmp_execute_tasks_template(
3409 thread, gtid, flag, final_spin,
3410 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3413template <
bool C,
bool S>
3414int __kmp_atomic_execute_tasks_64(
3415 kmp_info_t *thread, kmp_int32 gtid, kmp_atomic_flag_64<C, S> *flag,
3416 int final_spin,
int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3417 kmp_int32 is_constrained) {
3418 return __kmp_execute_tasks_template(
3419 thread, gtid, flag, final_spin,
3420 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3423int __kmp_execute_tasks_oncore(
3424 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag,
int final_spin,
3425 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3426 kmp_int32 is_constrained) {
3427 return __kmp_execute_tasks_template(
3428 thread, gtid, flag, final_spin,
3429 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3433__kmp_execute_tasks_32<false, false>(kmp_info_t *, kmp_int32,
3434 kmp_flag_32<false, false> *,
int,
3435 int *USE_ITT_BUILD_ARG(
void *), kmp_int32);
3437template int __kmp_execute_tasks_64<false, true>(kmp_info_t *, kmp_int32,
3438 kmp_flag_64<false, true> *,
3440 int *USE_ITT_BUILD_ARG(
void *),
3443template int __kmp_execute_tasks_64<true, false>(kmp_info_t *, kmp_int32,
3444 kmp_flag_64<true, false> *,
3446 int *USE_ITT_BUILD_ARG(
void *),
3449template int __kmp_atomic_execute_tasks_64<false, true>(
3450 kmp_info_t *, kmp_int32, kmp_atomic_flag_64<false, true> *,
int,
3451 int *USE_ITT_BUILD_ARG(
void *), kmp_int32);
3453template int __kmp_atomic_execute_tasks_64<true, false>(
3454 kmp_info_t *, kmp_int32, kmp_atomic_flag_64<true, false> *,
int,
3455 int *USE_ITT_BUILD_ARG(
void *), kmp_int32);
3460static void __kmp_enable_tasking(kmp_task_team_t *task_team,
3461 kmp_info_t *this_thr) {
3462 kmp_thread_data_t *threads_data;
3463 int nthreads, i, is_init_thread;
3465 KA_TRACE(10, (
"__kmp_enable_tasking(enter): T#%d\n",
3466 __kmp_gtid_from_thread(this_thr)));
3468 KMP_DEBUG_ASSERT(task_team != NULL);
3469 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
3471 nthreads = task_team->tt.tt_nproc;
3472 KMP_DEBUG_ASSERT(nthreads > 0);
3473 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
3476 is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team);
3478 if (!is_init_thread) {
3482 (
"__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
3483 __kmp_gtid_from_thread(this_thr)));
3486 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
3487 KMP_DEBUG_ASSERT(threads_data != NULL);
3489 if (__kmp_tasking_mode == tskm_task_teams &&
3490 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) {
3494 for (i = 0; i < nthreads; i++) {
3496 kmp_info_t *thread = threads_data[i].td.td_thr;
3498 if (i == this_thr->th.th_info.ds.ds_tid) {
3507 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
3509 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d waking up thread T#%d\n",
3510 __kmp_gtid_from_thread(this_thr),
3511 __kmp_gtid_from_thread(thread)));
3512 __kmp_null_resume_wrapper(thread);
3514 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
3515 __kmp_gtid_from_thread(this_thr),
3516 __kmp_gtid_from_thread(thread)));
3521 KA_TRACE(10, (
"__kmp_enable_tasking(exit): T#%d\n",
3522 __kmp_gtid_from_thread(this_thr)));
3555static kmp_task_team_t *__kmp_free_task_teams =
3558kmp_bootstrap_lock_t __kmp_task_team_lock =
3559 KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock);
3566static void __kmp_alloc_task_deque(kmp_info_t *thread,
3567 kmp_thread_data_t *thread_data) {
3568 __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
3569 KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL);
3572 thread_data->td.td_deque_last_stolen = -1;
3574 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0);
3575 KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0);
3576 KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0);
3580 (
"__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
3581 __kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data));
3585 thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
3586 INITIAL_TASK_DEQUE_SIZE *
sizeof(kmp_taskdata_t *));
3587 thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
3593static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) {
3594 if (thread_data->td.td_deque != NULL) {
3595 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3596 TCW_4(thread_data->td.td_deque_ntasks, 0);
3597 __kmp_free(thread_data->td.td_deque);
3598 thread_data->td.td_deque = NULL;
3599 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3610static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
3611 kmp_task_team_t *task_team) {
3612 kmp_thread_data_t **threads_data_p;
3613 kmp_int32 nthreads, maxthreads;
3614 int is_init_thread = FALSE;
3616 if (TCR_4(task_team->tt.tt_found_tasks)) {
3621 threads_data_p = &task_team->tt.tt_threads_data;
3622 nthreads = task_team->tt.tt_nproc;
3623 maxthreads = task_team->tt.tt_max_threads;
3628 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3630 if (!TCR_4(task_team->tt.tt_found_tasks)) {
3632 kmp_team_t *team = thread->th.th_team;
3635 is_init_thread = TRUE;
3636 if (maxthreads < nthreads) {
3638 if (*threads_data_p != NULL) {
3639 kmp_thread_data_t *old_data = *threads_data_p;
3640 kmp_thread_data_t *new_data = NULL;
3644 (
"__kmp_realloc_task_threads_data: T#%d reallocating "
3645 "threads data for task_team %p, new_size = %d, old_size = %d\n",
3646 __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads));
3651 new_data = (kmp_thread_data_t *)__kmp_allocate(
3652 nthreads *
sizeof(kmp_thread_data_t));
3654 KMP_MEMCPY_S((
void *)new_data, nthreads *
sizeof(kmp_thread_data_t),
3655 (
void *)old_data, maxthreads *
sizeof(kmp_thread_data_t));
3658 (*threads_data_p) = new_data;
3659 __kmp_free(old_data);
3661 KE_TRACE(10, (
"__kmp_realloc_task_threads_data: T#%d allocating "
3662 "threads data for task_team %p, size = %d\n",
3663 __kmp_gtid_from_thread(thread), task_team, nthreads));
3667 *threads_data_p = (kmp_thread_data_t *)__kmp_allocate(
3668 nthreads *
sizeof(kmp_thread_data_t));
3670 task_team->tt.tt_max_threads = nthreads;
3673 KMP_DEBUG_ASSERT(*threads_data_p != NULL);
3677 for (i = 0; i < nthreads; i++) {
3678 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3679 thread_data->td.td_thr = team->t.t_threads[i];
3681 if (thread_data->td.td_deque_last_stolen >= nthreads) {
3685 thread_data->td.td_deque_last_stolen = -1;
3690 TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE);
3693 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3694 return is_init_thread;
3700static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) {
3701 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3702 if (task_team->tt.tt_threads_data != NULL) {
3704 for (i = 0; i < task_team->tt.tt_max_threads; i++) {
3705 __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]);
3707 __kmp_free(task_team->tt.tt_threads_data);
3708 task_team->tt.tt_threads_data = NULL;
3710 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3716static void __kmp_free_task_pri_list(kmp_task_team_t *task_team) {
3717 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
3718 if (task_team->tt.tt_task_pri_list != NULL) {
3719 kmp_task_pri_t *list = task_team->tt.tt_task_pri_list;
3720 while (list != NULL) {
3721 kmp_task_pri_t *next = list->next;
3722 __kmp_free_task_deque(&list->td);
3726 task_team->tt.tt_task_pri_list = NULL;
3728 __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
3731static inline void __kmp_task_team_init(kmp_task_team_t *task_team,
3733 int team_nth = team->t.t_nproc;
3735 if (!task_team->tt.tt_active || team_nth != task_team->tt.tt_nproc) {
3736 TCW_4(task_team->tt.tt_found_tasks, FALSE);
3737 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3738 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
3739 TCW_4(task_team->tt.tt_nproc, team_nth);
3740 KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, team_nth);
3741 TCW_4(task_team->tt.tt_active, TRUE);
3749static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
3751 kmp_task_team_t *task_team = NULL;
3753 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d entering; team = %p\n",
3754 (thread ? __kmp_gtid_from_thread(thread) : -1), team));
3756 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
3758 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3759 if (__kmp_free_task_teams != NULL) {
3760 task_team = __kmp_free_task_teams;
3761 TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next);
3762 task_team->tt.tt_next = NULL;
3764 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3767 if (task_team == NULL) {
3768 KE_TRACE(10, (
"__kmp_allocate_task_team: T#%d allocating "
3769 "task team for team %p\n",
3770 __kmp_gtid_from_thread(thread), team));
3773 task_team = (kmp_task_team_t *)__kmp_allocate(
sizeof(kmp_task_team_t));
3774 __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
3775 __kmp_init_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
3776#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
3779 __itt_suppress_mark_range(
3780 __itt_suppress_range, __itt_suppress_threading_errors,
3781 &task_team->tt.tt_found_tasks,
sizeof(task_team->tt.tt_found_tasks));
3782 __itt_suppress_mark_range(__itt_suppress_range,
3783 __itt_suppress_threading_errors,
3784 CCAST(kmp_uint32 *, &task_team->tt.tt_active),
3785 sizeof(task_team->tt.tt_active));
3793 __kmp_task_team_init(task_team, team);
3795 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d exiting; task_team = %p "
3796 "unfinished_threads init'd to %d\n",
3797 (thread ? __kmp_gtid_from_thread(thread) : -1), task_team,
3798 KMP_ATOMIC_LD_RLX(&task_team->tt.tt_unfinished_threads)));
3805void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) {
3806 KA_TRACE(20, (
"__kmp_free_task_team: T#%d task_team = %p\n",
3807 thread ? __kmp_gtid_from_thread(thread) : -1, task_team));
3810 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3812 KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL);
3813 task_team->tt.tt_next = __kmp_free_task_teams;
3814 TCW_PTR(__kmp_free_task_teams, task_team);
3816 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3824void __kmp_reap_task_teams(
void) {
3825 kmp_task_team_t *task_team;
3827 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
3829 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3830 while ((task_team = __kmp_free_task_teams) != NULL) {
3831 __kmp_free_task_teams = task_team->tt.tt_next;
3832 task_team->tt.tt_next = NULL;
3835 if (task_team->tt.tt_threads_data != NULL) {
3836 __kmp_free_task_threads_data(task_team);
3838 if (task_team->tt.tt_task_pri_list != NULL) {
3839 __kmp_free_task_pri_list(task_team);
3841 __kmp_free(task_team);
3843 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3851void __kmp_push_task_team_node(kmp_info_t *thread, kmp_team_t *team) {
3852 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
3853 kmp_task_team_list_t *current =
3854 (kmp_task_team_list_t *)(&team->t.t_task_team[0]);
3855 kmp_task_team_list_t *node =
3856 (kmp_task_team_list_t *)__kmp_allocate(
sizeof(kmp_task_team_list_t));
3857 node->task_team = current->task_team;
3858 node->next = current->next;
3859 thread->th.th_task_team = current->task_team = NULL;
3860 current->next = node;
3864void __kmp_pop_task_team_node(kmp_info_t *thread, kmp_team_t *team) {
3865 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
3866 kmp_task_team_list_t *current =
3867 (kmp_task_team_list_t *)(&team->t.t_task_team[0]);
3868 if (current->task_team) {
3869 __kmp_free_task_team(thread, current->task_team);
3871 kmp_task_team_list_t *next = current->next;
3873 current->task_team = next->task_team;
3874 current->next = next->next;
3875 KMP_DEBUG_ASSERT(next != current);
3877 thread->th.th_task_team = current->task_team;
3884void __kmp_wait_to_unref_task_teams(
void) {
3890 KMP_INIT_YIELD(spins);
3891 KMP_INIT_BACKOFF(time);
3899 for (thread = CCAST(kmp_info_t *, __kmp_thread_pool); thread != NULL;
3900 thread = thread->th.th_next_pool) {
3904 if (TCR_PTR(thread->th.th_task_team) == NULL) {
3905 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
3906 __kmp_gtid_from_thread(thread)));
3911 if (!__kmp_is_thread_alive(thread, &exit_val)) {
3912 thread->th.th_task_team = NULL;
3919 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: Waiting for T#%d to "
3920 "unreference task_team\n",
3921 __kmp_gtid_from_thread(thread)));
3923 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
3926 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
3930 (
"__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
3931 __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
3932 __kmp_null_resume_wrapper(thread);
3941 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
3947void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team) {
3948 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3953 if (team == this_thr->th.th_serial_team ||
3954 team == this_thr->th.th_root->r.r_root_team) {
3955 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
3956 if (team->t.t_task_team[0] == NULL) {
3957 team->t.t_task_team[0] = __kmp_allocate_task_team(this_thr, team);
3959 20, (
"__kmp_task_team_setup: Primary T#%d created new task_team %p"
3960 " for serial/root team %p\n",
3961 __kmp_gtid_from_thread(this_thr), team->t.t_task_team[0], team));
3964 __kmp_task_team_init(team->t.t_task_team[0], team);
3972 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL) {
3973 team->t.t_task_team[this_thr->th.th_task_state] =
3974 __kmp_allocate_task_team(this_thr, team);
3975 KA_TRACE(20, (
"__kmp_task_team_setup: Primary T#%d created new task_team %p"
3976 " for team %d at parity=%d\n",
3977 __kmp_gtid_from_thread(this_thr),
3978 team->t.t_task_team[this_thr->th.th_task_state], team->t.t_id,
3979 this_thr->th.th_task_state));
3988 int other_team = 1 - this_thr->th.th_task_state;
3989 KMP_DEBUG_ASSERT(other_team >= 0 && other_team < 2);
3990 if (team->t.t_task_team[other_team] == NULL) {
3991 team->t.t_task_team[other_team] = __kmp_allocate_task_team(this_thr, team);
3992 KA_TRACE(20, (
"__kmp_task_team_setup: Primary T#%d created second new "
3993 "task_team %p for team %d at parity=%d\n",
3994 __kmp_gtid_from_thread(this_thr),
3995 team->t.t_task_team[other_team], team->t.t_id, other_team));
3998 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
3999 __kmp_task_team_init(task_team, team);
4002 KA_TRACE(20, (
"__kmp_task_team_setup: Primary T#%d reset next task_team "
4003 "%p for team %d at parity=%d\n",
4004 __kmp_gtid_from_thread(this_thr),
4005 team->t.t_task_team[other_team], team->t.t_id, other_team));
4012 if (this_thr == __kmp_hidden_helper_main_thread) {
4013 for (
int i = 0; i < 2; ++i) {
4014 kmp_task_team_t *task_team = team->t.t_task_team[i];
4015 if (KMP_TASKING_ENABLED(task_team)) {
4018 __kmp_enable_tasking(task_team, this_thr);
4019 for (
int j = 0; j < task_team->tt.tt_nproc; ++j) {
4020 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[j];
4021 if (thread_data->td.td_deque == NULL) {
4022 __kmp_alloc_task_deque(__kmp_hidden_helper_threads[j], thread_data);
4032void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) {
4033 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
4034 KMP_DEBUG_ASSERT(team != this_thr->th.th_serial_team);
4035 KMP_DEBUG_ASSERT(team != this_thr->th.th_root->r.r_root_team);
4039 this_thr->th.th_task_state = (kmp_uint8)(1 - this_thr->th.th_task_state);
4043 TCW_PTR(this_thr->th.th_task_team,
4044 team->t.t_task_team[this_thr->th.th_task_state]);
4046 (
"__kmp_task_team_sync: Thread T#%d task team switched to task_team "
4047 "%p from Team #%d (parity=%d)\n",
4048 __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team,
4049 team->t.t_id, this_thr->th.th_task_state));
4058void __kmp_task_team_wait(
4059 kmp_info_t *this_thr,
4060 kmp_team_t *team USE_ITT_BUILD_ARG(
void *itt_sync_obj),
int wait) {
4061 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
4063 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
4064 KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team);
4066 if ((task_team != NULL) && KMP_TASKING_ENABLED(task_team)) {
4068 KA_TRACE(20, (
"__kmp_task_team_wait: Primary T#%d waiting for all tasks "
4069 "(for unfinished_threads to reach 0) on task_team = %p\n",
4070 __kmp_gtid_from_thread(this_thr), task_team));
4074 kmp_flag_32<false, false> flag(
4075 RCAST(std::atomic<kmp_uint32> *,
4076 &task_team->tt.tt_unfinished_threads),
4078 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
4084 (
"__kmp_task_team_wait: Primary T#%d deactivating task_team %p: "
4085 "setting active to false, setting local and team's pointer to NULL\n",
4086 __kmp_gtid_from_thread(this_thr), task_team));
4087 TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE);
4088 TCW_SYNC_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
4089 KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0);
4090 TCW_SYNC_4(task_team->tt.tt_active, FALSE);
4093 TCW_PTR(this_thr->th.th_task_team, NULL);
4102void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
int gtid) {
4103 std::atomic<kmp_uint32> *spin = RCAST(
4104 std::atomic<kmp_uint32> *,
4105 &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads);
4107 KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier);
4110 KMP_FSYNC_SPIN_INIT(spin, NULL);
4112 kmp_flag_32<false, false> spin_flag(spin, 0U);
4113 while (!spin_flag.execute_tasks(thread, gtid, TRUE,
4114 &flag USE_ITT_BUILD_ARG(NULL), 0)) {
4117 KMP_FSYNC_SPIN_PREPARE(RCAST(
void *, spin));
4120 if (TCR_4(__kmp_global.g.g_done)) {
4121 if (__kmp_global.g.g_abort)
4122 __kmp_abort_thread();
4128 KMP_FSYNC_SPIN_ACQUIRED(RCAST(
void *, spin));
4137static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
4139 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4140 kmp_task_team_t *task_team = taskdata->td_task_team;
4142 KA_TRACE(20, (
"__kmp_give_task: trying to give task %p to thread %d.\n",
4146 KMP_DEBUG_ASSERT(task_team != NULL);
4148 bool result =
false;
4149 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
4151 if (thread_data->td.td_deque == NULL) {
4155 (
"__kmp_give_task: thread %d has no queue while giving task %p.\n",
4160 if (TCR_4(thread_data->td.td_deque_ntasks) >=
4161 TASK_DEQUE_SIZE(thread_data->td)) {
4164 (
"__kmp_give_task: queue is full while giving task %p to thread %d.\n",
4169 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
4172 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
4173 if (TCR_4(thread_data->td.td_deque_ntasks) >=
4174 TASK_DEQUE_SIZE(thread_data->td)) {
4176 __kmp_realloc_task_deque(thread, thread_data);
4181 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
4183 if (TCR_4(thread_data->td.td_deque_ntasks) >=
4184 TASK_DEQUE_SIZE(thread_data->td)) {
4185 KA_TRACE(30, (
"__kmp_give_task: queue is full while giving task %p to "
4191 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
4192 goto release_and_exit;
4194 __kmp_realloc_task_deque(thread, thread_data);
4200 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
4202 thread_data->td.td_deque_tail =
4203 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
4204 TCW_4(thread_data->td.td_deque_ntasks,
4205 TCR_4(thread_data->td.td_deque_ntasks) + 1);
4208 KA_TRACE(30, (
"__kmp_give_task: successfully gave task %p to thread %d.\n",
4212 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
4217#define PROXY_TASK_FLAG 0x40000000
4234static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
4235 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
4236 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
4237 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
4238 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
4240 taskdata->td_flags.complete = 1;
4242 taskdata->td_flags.onced = 1;
4245 if (taskdata->td_taskgroup)
4246 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
4250 KMP_ATOMIC_OR(&taskdata->td_incomplete_child_tasks, PROXY_TASK_FLAG);
4253static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
4255 kmp_int32 children = 0;
4259 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
4260 KMP_DEBUG_ASSERT(children >= 0);
4263 KMP_ATOMIC_AND(&taskdata->td_incomplete_child_tasks, ~PROXY_TASK_FLAG);
4266static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) {
4267 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4268 kmp_info_t *thread = __kmp_threads[gtid];
4270 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
4271 KMP_DEBUG_ASSERT(taskdata->td_flags.complete ==
4276 while ((KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) &
4277 PROXY_TASK_FLAG) > 0)
4280 __kmp_release_deps(gtid, taskdata);
4281 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
4293 KMP_DEBUG_ASSERT(ptask != NULL);
4294 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4296 10, (
"__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n",
4298 __kmp_assert_valid_gtid(gtid);
4299 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
4301 __kmp_first_top_half_finish_proxy(taskdata);
4302 __kmp_second_top_half_finish_proxy(taskdata);
4303 __kmp_bottom_half_finish_proxy(gtid, ptask);
4306 (
"__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n",
4310void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start = 0) {
4311 KMP_DEBUG_ASSERT(ptask != NULL);
4312 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4316 kmp_team_t *team = taskdata->td_team;
4317 kmp_int32 nthreads = team->t.t_nproc;
4322 kmp_int32 start_k = start % nthreads;
4324 kmp_int32 k = start_k;
4328 thread = team->t.t_threads[k];
4329 k = (k + 1) % nthreads;
4335 }
while (!__kmp_give_task(thread, k, ptask, pass));
4337 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && __kmp_wpolicy_passive) {
4339 for (
int i = 0; i < nthreads; ++i) {
4340 thread = team->t.t_threads[i];
4341 if (thread->th.th_sleep_loc != NULL) {
4342 __kmp_null_resume_wrapper(thread);
4357 KMP_DEBUG_ASSERT(ptask != NULL);
4358 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4362 (
"__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n",
4365 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
4367 __kmp_first_top_half_finish_proxy(taskdata);
4369 __kmpc_give_task(ptask);
4371 __kmp_second_top_half_finish_proxy(taskdata);
4375 (
"__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n",
4379kmp_event_t *__kmpc_task_allow_completion_event(
ident_t *loc_ref,
int gtid,
4381 kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task);
4382 if (td->td_allow_completion_event.type == KMP_EVENT_UNINITIALIZED) {
4383 td->td_allow_completion_event.type = KMP_EVENT_ALLOW_COMPLETION;
4384 td->td_allow_completion_event.ed.task = task;
4385 __kmp_init_tas_lock(&td->td_allow_completion_event.lock);
4387 return &td->td_allow_completion_event;
4390void __kmp_fulfill_event(kmp_event_t *event) {
4391 if (event->type == KMP_EVENT_ALLOW_COMPLETION) {
4392 kmp_task_t *ptask = event->ed.task;
4393 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4394 bool detached =
false;
4395 int gtid = __kmp_get_gtid();
4400 __kmp_acquire_tas_lock(&event->lock, gtid);
4401 if (taskdata->td_flags.proxy == TASK_PROXY) {
4407 if (UNLIKELY(ompt_enabled.enabled))
4408 __ompt_task_finish(ptask, NULL, ompt_task_early_fulfill);
4411 event->type = KMP_EVENT_UNINITIALIZED;
4412 __kmp_release_tas_lock(&event->lock, gtid);
4418 if (UNLIKELY(ompt_enabled.enabled))
4419 __ompt_task_finish(ptask, NULL, ompt_task_late_fulfill);
4423 kmp_team_t *team = taskdata->td_team;
4424 kmp_info_t *thread = __kmp_get_thread();
4425 if (thread->th.th_team == team) {
4445kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src
4447 ,
int taskloop_recur
4451 kmp_taskdata_t *taskdata;
4452 kmp_taskdata_t *taskdata_src = KMP_TASK_TO_TASKDATA(task_src);
4453 kmp_taskdata_t *parent_task = taskdata_src->td_parent;
4454 size_t shareds_offset;
4457 KA_TRACE(10, (
"__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread,
4459 KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy ==
4461 KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT);
4462 task_size = taskdata_src->td_size_alloc;
4465 KA_TRACE(30, (
"__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread,
4468 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size);
4470 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size);
4472 KMP_MEMCPY(taskdata, taskdata_src, task_size);
4474 task = KMP_TASKDATA_TO_TASK(taskdata);
4478 if (taskdata->is_taskgraph && !taskloop_recur &&
4479 __kmp_tdg_is_recording(taskdata_src->tdg->tdg_status))
4480 taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id);
4482 taskdata->td_task_id = KMP_GEN_TASK_ID();
4483 if (task->shareds != NULL) {
4484 shareds_offset = (
char *)task_src->shareds - (
char *)taskdata_src;
4485 task->shareds = &((
char *)taskdata)[shareds_offset];
4486 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
4489 taskdata->td_alloc_thread = thread;
4490 taskdata->td_parent = parent_task;
4492 taskdata->td_taskgroup = parent_task->td_taskgroup;
4495 if (taskdata->td_flags.tiedness == TASK_TIED)
4496 taskdata->td_last_tied = taskdata;
4500 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
4501 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
4502 if (parent_task->td_taskgroup)
4503 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
4506 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT)
4507 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
4511 (
"__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
4512 thread, taskdata, taskdata->td_parent));
4514 if (UNLIKELY(ompt_enabled.enabled))
4515 __ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid);
4524typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
4526KMP_BUILD_ASSERT(
sizeof(
long) == 4 ||
sizeof(
long) == 8);
4531class kmp_taskloop_bounds_t {
4533 const kmp_taskdata_t *taskdata;
4534 size_t lower_offset;
4535 size_t upper_offset;
4538 kmp_taskloop_bounds_t(kmp_task_t *_task, kmp_uint64 *lb, kmp_uint64 *ub)
4539 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(task)),
4540 lower_offset((char *)lb - (char *)task),
4541 upper_offset((char *)ub - (char *)task) {
4542 KMP_DEBUG_ASSERT((
char *)lb > (
char *)_task);
4543 KMP_DEBUG_ASSERT((
char *)ub > (
char *)_task);
4545 kmp_taskloop_bounds_t(kmp_task_t *_task,
const kmp_taskloop_bounds_t &bounds)
4546 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(_task)),
4547 lower_offset(bounds.lower_offset), upper_offset(bounds.upper_offset) {}
4548 size_t get_lower_offset()
const {
return lower_offset; }
4549 size_t get_upper_offset()
const {
return upper_offset; }
4550 kmp_uint64 get_lb()
const {
4552#if defined(KMP_GOMP_COMPAT)
4554 if (!taskdata->td_flags.native) {
4555 retval = *(kmp_int64 *)((
char *)task + lower_offset);
4558 if (taskdata->td_size_loop_bounds == 4) {
4559 kmp_int32 *lb = RCAST(kmp_int32 *, task->shareds);
4560 retval = (kmp_int64)*lb;
4562 kmp_int64 *lb = RCAST(kmp_int64 *, task->shareds);
4563 retval = (kmp_int64)*lb;
4568 retval = *(kmp_int64 *)((
char *)task + lower_offset);
4572 kmp_uint64 get_ub()
const {
4574#if defined(KMP_GOMP_COMPAT)
4576 if (!taskdata->td_flags.native) {
4577 retval = *(kmp_int64 *)((
char *)task + upper_offset);
4580 if (taskdata->td_size_loop_bounds == 4) {
4581 kmp_int32 *ub = RCAST(kmp_int32 *, task->shareds) + 1;
4582 retval = (kmp_int64)*ub;
4584 kmp_int64 *ub = RCAST(kmp_int64 *, task->shareds) + 1;
4585 retval = (kmp_int64)*ub;
4589 retval = *(kmp_int64 *)((
char *)task + upper_offset);
4593 void set_lb(kmp_uint64 lb) {
4594#if defined(KMP_GOMP_COMPAT)
4596 if (!taskdata->td_flags.native) {
4597 *(kmp_uint64 *)((
char *)task + lower_offset) = lb;
4600 if (taskdata->td_size_loop_bounds == 4) {
4601 kmp_uint32 *lower = RCAST(kmp_uint32 *, task->shareds);
4602 *lower = (kmp_uint32)lb;
4604 kmp_uint64 *lower = RCAST(kmp_uint64 *, task->shareds);
4605 *lower = (kmp_uint64)lb;
4609 *(kmp_uint64 *)((
char *)task + lower_offset) = lb;
4612 void set_ub(kmp_uint64 ub) {
4613#if defined(KMP_GOMP_COMPAT)
4615 if (!taskdata->td_flags.native) {
4616 *(kmp_uint64 *)((
char *)task + upper_offset) = ub;
4619 if (taskdata->td_size_loop_bounds == 4) {
4620 kmp_uint32 *upper = RCAST(kmp_uint32 *, task->shareds) + 1;
4621 *upper = (kmp_uint32)ub;
4623 kmp_uint64 *upper = RCAST(kmp_uint64 *, task->shareds) + 1;
4624 *upper = (kmp_uint64)ub;
4628 *(kmp_uint64 *)((
char *)task + upper_offset) = ub;
4649void __kmp_taskloop_linear(
ident_t *loc,
int gtid, kmp_task_t *task,
4650 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4651 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
4652 kmp_uint64 grainsize, kmp_uint64 extras,
4653 kmp_int64 last_chunk, kmp_uint64 tc,
4659 KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
4660 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
4662 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
4663 kmp_uint64 lower = task_bounds.get_lb();
4664 kmp_uint64 upper = task_bounds.get_ub();
4666 kmp_info_t *thread = __kmp_threads[gtid];
4667 kmp_taskdata_t *current_task = thread->th.th_current_task;
4668 kmp_task_t *next_task;
4669 kmp_int32 lastpriv = 0;
4671 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
4672 (last_chunk < 0 ? last_chunk : extras));
4673 KMP_DEBUG_ASSERT(num_tasks > extras);
4674 KMP_DEBUG_ASSERT(num_tasks > 0);
4675 KA_TRACE(20, (
"__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, "
4676 "extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n",
4677 gtid, num_tasks, grainsize, extras, last_chunk, lower, upper,
4678 ub_glob, st, task_dup));
4681 for (i = 0; i < num_tasks; ++i) {
4682 kmp_uint64 chunk_minus_1;
4684 chunk_minus_1 = grainsize - 1;
4686 chunk_minus_1 = grainsize;
4689 upper = lower + st * chunk_minus_1;
4693 if (i == num_tasks - 1) {
4696 KMP_DEBUG_ASSERT(upper == *ub);
4697 if (upper == ub_glob)
4699 }
else if (st > 0) {
4700 KMP_DEBUG_ASSERT((kmp_uint64)st > *ub - upper);
4701 if ((kmp_uint64)st > ub_glob - upper)
4704 KMP_DEBUG_ASSERT(upper + st < *ub);
4705 if (upper - ub_glob < (kmp_uint64)(-st))
4711 next_task = __kmp_task_dup_alloc(thread, task, 0);
4713 next_task = __kmp_task_dup_alloc(thread, task);
4716 kmp_taskdata_t *next_taskdata = KMP_TASK_TO_TASKDATA(next_task);
4717 kmp_taskloop_bounds_t next_task_bounds =
4718 kmp_taskloop_bounds_t(next_task, task_bounds);
4721 next_task_bounds.set_lb(lower);
4722 if (next_taskdata->td_flags.native) {
4723 next_task_bounds.set_ub(upper + (st > 0 ? 1 : -1));
4725 next_task_bounds.set_ub(upper);
4727 if (ptask_dup != NULL)
4729 ptask_dup(next_task, task, lastpriv);
4731 (
"__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "
4732 "upper %lld stride %lld, (offsets %p %p)\n",
4733 gtid, i, next_task, lower, upper, st,
4734 next_task_bounds.get_lower_offset(),
4735 next_task_bounds.get_upper_offset()));
4737 __kmp_omp_taskloop_task(NULL, gtid, next_task,
4740 if (ompt_enabled.ompt_callback_dispatch) {
4741 OMPT_GET_DISPATCH_CHUNK(next_taskdata->ompt_task_info.dispatch_chunk,
4746 __kmp_omp_task(gtid, next_task,
true);
4751 __kmp_task_start(gtid, task, current_task);
4753 __kmp_task_finish<false>(gtid, task, current_task);
4758typedef struct __taskloop_params {
4765 kmp_uint64 num_tasks;
4766 kmp_uint64 grainsize;
4768 kmp_int64 last_chunk;
4770 kmp_uint64 num_t_min;
4774} __taskloop_params_t;
4776void __kmp_taskloop_recur(
ident_t *,
int, kmp_task_t *, kmp_uint64 *,
4777 kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64,
4778 kmp_uint64, kmp_uint64, kmp_int64, kmp_uint64,
4786int __kmp_taskloop_task(
int gtid,
void *ptask) {
4787 __taskloop_params_t *p =
4788 (__taskloop_params_t *)((kmp_task_t *)ptask)->shareds;
4789 kmp_task_t *task = p->task;
4790 kmp_uint64 *lb = p->lb;
4791 kmp_uint64 *ub = p->ub;
4792 void *task_dup = p->task_dup;
4794 kmp_int64 st = p->st;
4795 kmp_uint64 ub_glob = p->ub_glob;
4796 kmp_uint64 num_tasks = p->num_tasks;
4797 kmp_uint64 grainsize = p->grainsize;
4798 kmp_uint64 extras = p->extras;
4799 kmp_int64 last_chunk = p->last_chunk;
4800 kmp_uint64 tc = p->tc;
4801 kmp_uint64 num_t_min = p->num_t_min;
4803 void *codeptr_ra = p->codeptr_ra;
4806 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4807 KMP_DEBUG_ASSERT(task != NULL);
4809 (
"__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
4810 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
4811 gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
4814 KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min);
4815 if (num_tasks > num_t_min)
4816 __kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
4817 grainsize, extras, last_chunk, tc, num_t_min,
4823 __kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
4824 grainsize, extras, last_chunk, tc,
4830 KA_TRACE(40, (
"__kmp_taskloop_task(exit): T#%d\n", gtid));
4852void __kmp_taskloop_recur(
ident_t *loc,
int gtid, kmp_task_t *task,
4853 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4854 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
4855 kmp_uint64 grainsize, kmp_uint64 extras,
4856 kmp_int64 last_chunk, kmp_uint64 tc,
4857 kmp_uint64 num_t_min,
4862 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4863 KMP_DEBUG_ASSERT(task != NULL);
4864 KMP_DEBUG_ASSERT(num_tasks > num_t_min);
4866 (
"__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
4867 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
4868 gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
4870 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
4871 kmp_uint64 lower = *lb;
4872 kmp_info_t *thread = __kmp_threads[gtid];
4874 kmp_task_t *next_task;
4875 size_t lower_offset =
4876 (
char *)lb - (
char *)task;
4877 size_t upper_offset =
4878 (
char *)ub - (
char *)task;
4880 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
4881 (last_chunk < 0 ? last_chunk : extras));
4882 KMP_DEBUG_ASSERT(num_tasks > extras);
4883 KMP_DEBUG_ASSERT(num_tasks > 0);
4886 kmp_uint64 lb1, ub0, tc0, tc1, ext0, ext1;
4887 kmp_int64 last_chunk0 = 0, last_chunk1 = 0;
4888 kmp_uint64 gr_size0 = grainsize;
4889 kmp_uint64 n_tsk0 = num_tasks >> 1;
4890 kmp_uint64 n_tsk1 = num_tasks - n_tsk0;
4891 if (last_chunk < 0) {
4893 last_chunk1 = last_chunk;
4894 tc0 = grainsize * n_tsk0;
4896 }
else if (n_tsk0 <= extras) {
4899 ext1 = extras - n_tsk0;
4900 tc0 = gr_size0 * n_tsk0;
4905 tc1 = grainsize * n_tsk1;
4908 ub0 = lower + st * (tc0 - 1);
4913 next_task = __kmp_task_dup_alloc(thread, task,
4916 next_task = __kmp_task_dup_alloc(thread, task);
4919 *(kmp_uint64 *)((
char *)next_task + lower_offset) = lb1;
4920 if (ptask_dup != NULL)
4921 ptask_dup(next_task, task, 0);
4926 kmp_taskdata_t *current_task = thread->th.th_current_task;
4927 thread->th.th_current_task = taskdata->td_parent;
4928 kmp_task_t *new_task =
4929 __kmpc_omp_task_alloc(loc, gtid, 1, 3 *
sizeof(
void *),
4930 sizeof(__taskloop_params_t), &__kmp_taskloop_task);
4932 thread->th.th_current_task = current_task;
4933 __taskloop_params_t *p = (__taskloop_params_t *)new_task->shareds;
4934 p->task = next_task;
4935 p->lb = (kmp_uint64 *)((
char *)next_task + lower_offset);
4936 p->ub = (kmp_uint64 *)((
char *)next_task + upper_offset);
4937 p->task_dup = task_dup;
4939 p->ub_glob = ub_glob;
4940 p->num_tasks = n_tsk1;
4941 p->grainsize = grainsize;
4943 p->last_chunk = last_chunk1;
4945 p->num_t_min = num_t_min;
4947 p->codeptr_ra = codeptr_ra;
4951 kmp_taskdata_t *new_task_data = KMP_TASK_TO_TASKDATA(new_task);
4952 new_task_data->tdg = taskdata->tdg;
4953 new_task_data->is_taskgraph = 0;
4958 __kmp_omp_taskloop_task(NULL, gtid, new_task, codeptr_ra);
4960 __kmp_omp_task(gtid, new_task,
true);
4964 if (n_tsk0 > num_t_min)
4965 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0,
4966 ext0, last_chunk0, tc0, num_t_min,
4972 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0,
4973 gr_size0, ext0, last_chunk0, tc0,
4979 KA_TRACE(40, (
"__kmp_taskloop_recur(exit): T#%d\n", gtid));
4982static void __kmp_taskloop(
ident_t *loc,
int gtid, kmp_task_t *task,
int if_val,
4983 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4984 int nogroup,
int sched, kmp_uint64 grainsize,
4985 int modifier,
void *task_dup) {
4986 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4987 KMP_DEBUG_ASSERT(task != NULL);
4989#if OMPT_SUPPORT && OMPT_OPTIONAL
4990 OMPT_STORE_RETURN_ADDRESS(gtid);
4992 __kmpc_taskgroup(loc, gtid);
4996 KMP_ATOMIC_DEC(&__kmp_tdg_task_id);
5000 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
5003 kmp_uint64 lower = task_bounds.get_lb();
5004 kmp_uint64 upper = task_bounds.get_ub();
5005 kmp_uint64 ub_glob = upper;
5006 kmp_uint64 num_tasks = 0, extras = 0;
5007 kmp_int64 last_chunk =
5009 kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks;
5010 kmp_info_t *thread = __kmp_threads[gtid];
5011 kmp_taskdata_t *current_task = thread->th.th_current_task;
5013 KA_TRACE(20, (
"__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
5014 "grain %llu(%d, %d), dup %p\n",
5015 gtid, taskdata, lower, upper, st, grainsize, sched, modifier,
5020 tc = upper - lower + 1;
5021 }
else if (st < 0) {
5022 tc = (lower - upper) / (-st) + 1;
5024 tc = (upper - lower) / st + 1;
5027 KA_TRACE(20, (
"__kmp_taskloop(exit): T#%d zero-trip loop\n", gtid));
5029 __kmp_task_start(gtid, task, current_task);
5031 __kmp_task_finish<false>(gtid, task, current_task);
5035#if OMPT_SUPPORT && OMPT_OPTIONAL
5036 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
5037 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
5038 if (ompt_enabled.ompt_callback_work) {
5039 ompt_callbacks.ompt_callback(ompt_callback_work)(
5040 ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data),
5041 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
5045 if (num_tasks_min == 0)
5048 KMP_MIN(thread->th.th_team_nproc * 10, INITIAL_TASK_DEQUE_SIZE);
5054 grainsize = thread->th.th_team_nproc *
static_cast<kmp_uint64
>(10);
5057 if (grainsize > tc) {
5062 num_tasks = grainsize;
5063 grainsize = tc / num_tasks;
5064 extras = tc % num_tasks;
5068 if (grainsize > tc) {
5074 num_tasks = (tc + grainsize - 1) / grainsize;
5075 last_chunk = tc - (num_tasks * grainsize);
5078 num_tasks = tc / grainsize;
5080 grainsize = tc / num_tasks;
5081 extras = tc % num_tasks;
5086 KMP_ASSERT2(0,
"unknown scheduling of taskloop");
5089 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
5090 (last_chunk < 0 ? last_chunk : extras));
5091 KMP_DEBUG_ASSERT(num_tasks > extras);
5092 KMP_DEBUG_ASSERT(num_tasks > 0);
5098 taskdata->td_flags.task_serial = 1;
5099 taskdata->td_flags.tiedness = TASK_TIED;
5101 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
5102 grainsize, extras, last_chunk, tc,
5104 OMPT_GET_RETURN_ADDRESS(0),
5109 }
else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) {
5110 KA_TRACE(20, (
"__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
5111 "(%lld), grain %llu, extras %llu, last_chunk %lld\n",
5112 gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
5114 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
5115 grainsize, extras, last_chunk, tc, num_tasks_min,
5117 OMPT_GET_RETURN_ADDRESS(0),
5121 KA_TRACE(20, (
"__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
5122 "(%lld), grain %llu, extras %llu, last_chunk %lld\n",
5123 gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
5125 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
5126 grainsize, extras, last_chunk, tc,
5128 OMPT_GET_RETURN_ADDRESS(0),
5133#if OMPT_SUPPORT && OMPT_OPTIONAL
5134 if (ompt_enabled.ompt_callback_work) {
5135 ompt_callbacks.ompt_callback(ompt_callback_work)(
5136 ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data),
5137 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
5142#if OMPT_SUPPORT && OMPT_OPTIONAL
5143 OMPT_STORE_RETURN_ADDRESS(gtid);
5145 __kmpc_end_taskgroup(loc, gtid);
5147 KA_TRACE(20, (
"__kmp_taskloop(exit): T#%d\n", gtid));
5167 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
int nogroup,
5168 int sched, kmp_uint64 grainsize,
void *task_dup) {
5169 __kmp_assert_valid_gtid(gtid);
5170 KA_TRACE(20, (
"__kmpc_taskloop(enter): T#%d\n", gtid));
5171 __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
5173 KA_TRACE(20, (
"__kmpc_taskloop(exit): T#%d\n", gtid));
5194 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
5195 int nogroup,
int sched, kmp_uint64 grainsize,
5196 int modifier,
void *task_dup) {
5197 __kmp_assert_valid_gtid(gtid);
5198 KA_TRACE(20, (
"__kmpc_taskloop_5(enter): T#%d\n", gtid));
5199 __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
5200 modifier, task_dup);
5201 KA_TRACE(20, (
"__kmpc_taskloop_5(exit): T#%d\n", gtid));
5213 if (gtid == KMP_GTID_DNE)
5216 kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
5217 kmp_taskdata_t *taskdata = thread->th.th_current_task;
5222 return &taskdata->td_target_data.async_handle;
5234 if (gtid == KMP_GTID_DNE)
5237 kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
5238 kmp_taskdata_t *taskdata = thread->th.th_current_task;
5243 return taskdata->td_task_team != NULL;
5251static kmp_tdg_info_t *__kmp_find_tdg(kmp_int32 tdg_id) {
5252 kmp_tdg_info_t *res =
nullptr;
5253 if (__kmp_max_tdgs == 0)
5256 if (__kmp_global_tdgs == NULL)
5257 __kmp_global_tdgs = (kmp_tdg_info_t **)__kmp_allocate(
5258 sizeof(kmp_tdg_info_t *) * __kmp_max_tdgs);
5260 if ((__kmp_global_tdgs[tdg_id]) &&
5261 (__kmp_global_tdgs[tdg_id]->tdg_status != KMP_TDG_NONE))
5262 res = __kmp_global_tdgs[tdg_id];
5269void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg, kmp_int32 gtid) {
5270 kmp_int32 tdg_id = tdg->tdg_id;
5271 KA_TRACE(10, (
"__kmp_print_tdg_dot(enter): T#%d tdg_id=%d \n", gtid, tdg_id));
5274 sprintf(file_name,
"tdg_%d.dot", tdg_id);
5277 kmp_int32 num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
5281 " subgraph cluster {\n"
5284 for (kmp_int32 i = 0; i < num_tasks; i++) {
5285 fprintf(tdg_file,
" %d[style=bold]\n", i);
5287 fprintf(tdg_file,
" }\n");
5288 for (kmp_int32 i = 0; i < num_tasks; i++) {
5289 kmp_int32 nsuccessors = tdg->record_map[i].nsuccessors;
5290 kmp_int32 *successors = tdg->record_map[i].successors;
5291 if (nsuccessors > 0) {
5292 for (kmp_int32 j = 0; j < nsuccessors; j++)
5293 fprintf(tdg_file,
" %d -> %d \n", i, successors[j]);
5296 fprintf(tdg_file,
"}");
5297 KA_TRACE(10, (
"__kmp_print_tdg_dot(exit): T#%d tdg_id=%d \n", gtid, tdg_id));
5304void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
5305 KMP_DEBUG_ASSERT(tdg->tdg_status == KMP_TDG_READY);
5306 KA_TRACE(10, (
"__kmp_exec_tdg(enter): T#%d tdg_id=%d num_roots=%d\n", gtid,
5307 tdg->tdg_id, tdg->num_roots));
5308 kmp_node_info_t *this_record_map = tdg->record_map;
5309 kmp_int32 *this_root_tasks = tdg->root_tasks;
5310 kmp_int32 this_num_roots = tdg->num_roots;
5311 kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
5313 kmp_info_t *thread = __kmp_threads[gtid];
5314 kmp_taskdata_t *parent_task = thread->th.th_current_task;
5316 if (tdg->rec_taskred_data) {
5320 for (kmp_int32 j = 0; j < this_num_tasks; j++) {
5321 kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(this_record_map[j].task);
5323 td->td_parent = parent_task;
5324 this_record_map[j].parent_task = parent_task;
5326 kmp_taskgroup_t *parent_taskgroup =
5327 this_record_map[j].parent_task->td_taskgroup;
5329 KMP_ATOMIC_ST_RLX(&this_record_map[j].npredecessors_counter,
5330 this_record_map[j].npredecessors);
5331 KMP_ATOMIC_INC(&this_record_map[j].parent_task->td_incomplete_child_tasks);
5333 if (parent_taskgroup) {
5334 KMP_ATOMIC_INC(&parent_taskgroup->count);
5336 td->td_taskgroup = parent_taskgroup;
5337 }
else if (td->td_taskgroup !=
nullptr) {
5339 td->td_taskgroup =
nullptr;
5341 if (this_record_map[j].parent_task->td_flags.tasktype == TASK_EXPLICIT)
5342 KMP_ATOMIC_INC(&this_record_map[j].parent_task->td_allocated_child_tasks);
5345 for (kmp_int32 j = 0; j < this_num_roots; ++j) {
5346 __kmp_omp_task(gtid, this_record_map[this_root_tasks[j]].task,
true);
5348 KA_TRACE(10, (
"__kmp_exec_tdg(exit): T#%d tdg_id=%d num_roots=%d\n", gtid,
5349 tdg->tdg_id, tdg->num_roots));
5357static inline void __kmp_start_record(kmp_int32 gtid,
5358 kmp_taskgraph_flags_t *flags,
5360 kmp_tdg_info_t *tdg =
5361 (kmp_tdg_info_t *)__kmp_allocate(
sizeof(kmp_tdg_info_t));
5362 __kmp_global_tdgs[__kmp_curr_tdg_idx] = tdg;
5364 tdg->tdg_id = tdg_id;
5365 tdg->map_size = INIT_MAPSIZE;
5366 tdg->num_roots = -1;
5367 tdg->root_tasks =
nullptr;
5368 tdg->tdg_status = KMP_TDG_RECORDING;
5369 tdg->rec_num_taskred = 0;
5370 tdg->rec_taskred_data =
nullptr;
5371 KMP_ATOMIC_ST_RLX(&tdg->num_tasks, 0);
5374 kmp_node_info_t *this_record_map =
5375 (kmp_node_info_t *)__kmp_allocate(INIT_MAPSIZE *
sizeof(kmp_node_info_t));
5376 for (kmp_int32 i = 0; i < INIT_MAPSIZE; i++) {
5377 kmp_int32 *successorsList =
5378 (kmp_int32 *)__kmp_allocate(__kmp_successors_size *
sizeof(kmp_int32));
5379 this_record_map[i].task =
nullptr;
5380 this_record_map[i].successors = successorsList;
5381 this_record_map[i].nsuccessors = 0;
5382 this_record_map[i].npredecessors = 0;
5383 this_record_map[i].successors_size = __kmp_successors_size;
5384 KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter, 0);
5387 __kmp_global_tdgs[__kmp_curr_tdg_idx]->record_map = this_record_map;
5397kmp_int32 __kmpc_start_record_task(
ident_t *loc_ref, kmp_int32 gtid,
5398 kmp_int32 input_flags, kmp_int32 tdg_id) {
5401 kmp_taskgraph_flags_t *flags = (kmp_taskgraph_flags_t *)&input_flags;
5403 (
"__kmpc_start_record_task(enter): T#%d loc=%p flags=%d tdg_id=%d\n",
5404 gtid, loc_ref, input_flags, tdg_id));
5406 if (__kmp_max_tdgs == 0) {
5409 (
"__kmpc_start_record_task(abandon): T#%d loc=%p flags=%d tdg_id = %d, "
5410 "__kmp_max_tdgs = 0\n",
5411 gtid, loc_ref, input_flags, tdg_id));
5415 __kmpc_taskgroup(loc_ref, gtid);
5416 if (kmp_tdg_info_t *tdg = __kmp_find_tdg(tdg_id)) {
5418 __kmp_exec_tdg(gtid, tdg);
5421 __kmp_curr_tdg_idx = tdg_id;
5422 KMP_DEBUG_ASSERT(__kmp_curr_tdg_idx < __kmp_max_tdgs);
5423 __kmp_start_record(gtid, flags, tdg_id);
5427 KA_TRACE(10, (
"__kmpc_start_record_task(exit): T#%d TDG %d starts to %s\n",
5428 gtid, tdg_id, res ?
"record" :
"execute"));
5435void __kmp_end_record(kmp_int32 gtid, kmp_tdg_info_t *tdg) {
5437 kmp_node_info_t *this_record_map = tdg->record_map;
5438 kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks);
5439 kmp_int32 *this_root_tasks =
5440 (kmp_int32 *)__kmp_allocate(this_num_tasks *
sizeof(kmp_int32));
5441 kmp_int32 this_map_size = tdg->map_size;
5442 kmp_int32 this_num_roots = 0;
5443 kmp_info_t *thread = __kmp_threads[gtid];
5445 for (kmp_int32 i = 0; i < this_num_tasks; i++) {
5446 if (this_record_map[i].npredecessors == 0) {
5447 this_root_tasks[this_num_roots++] = i;
5452 tdg->map_size = this_map_size;
5453 tdg->num_roots = this_num_roots;
5454 tdg->root_tasks = this_root_tasks;
5455 KMP_DEBUG_ASSERT(tdg->tdg_status == KMP_TDG_RECORDING);
5456 tdg->tdg_status = KMP_TDG_READY;
5458 if (thread->th.th_current_task->td_dephash) {
5459 __kmp_dephash_free(thread, thread->th.th_current_task->td_dephash);
5460 thread->th.th_current_task->td_dephash = NULL;
5464 for (kmp_int32 i = 0; i < this_num_tasks; i++) {
5465 KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter,
5466 this_record_map[i].npredecessors);
5468 KMP_ATOMIC_ST_RLX(&__kmp_tdg_task_id, 0);
5471 __kmp_print_tdg_dot(tdg, gtid);
5481void __kmpc_end_record_task(
ident_t *loc_ref, kmp_int32 gtid,
5482 kmp_int32 input_flags, kmp_int32 tdg_id) {
5483 kmp_tdg_info_t *tdg = __kmp_find_tdg(tdg_id);
5485 KA_TRACE(10, (
"__kmpc_end_record_task(enter): T#%d loc=%p finishes recording"
5486 " tdg=%d with flags=%d\n",
5487 gtid, loc_ref, tdg_id, input_flags));
5488 if (__kmp_max_tdgs) {
5490 __kmpc_end_taskgroup(loc_ref, gtid);
5491 if (__kmp_tdg_is_recording(tdg->tdg_status))
5492 __kmp_end_record(gtid, tdg);
5494 KA_TRACE(10, (
"__kmpc_end_record_task(exit): T#%d loc=%p finished recording"
5495 " tdg=%d, its status is now READY\n",
5496 gtid, loc_ref, tdg_id));
struct kmp_taskred_data kmp_taskred_data_t
struct kmp_task_red_input kmp_task_red_input_t
struct kmp_taskred_flags kmp_taskred_flags_t
struct kmp_taskred_input kmp_taskred_input_t
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void * __kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data)
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup)
void * __kmpc_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws, int num, void *data)
void * __kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws, int num, void *data)
bool __kmpc_omp_has_task_team(kmp_int32 gtid)
void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask)
void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, int is_ws)
kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins, kmp_task_affinity_info_t *affin_list)
void __kmpc_taskloop_5(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, int modifier, void *task_dup)
void * __kmpc_task_reduction_init(int gtid, int num, void *data)
void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask)
void * __kmpc_taskred_init(int gtid, int num, void *data)
void ** __kmpc_omp_get_target_async_handle_ptr(kmp_int32 gtid)
kmp_taskred_flags_t flags