Skip to content

Commit 1e265c2

Browse files
authored
Merge pull request #80096 from mikeash/task-group-cancellation-lock-fix
[Concurrency] Fix a race when using cancelAll on a task group concurrently with child tasks being removed.
2 parents 8c43428 + 38c4fce commit 1e265c2

File tree

4 files changed

+66
-36
lines changed

4 files changed

+66
-36
lines changed

include/swift/ABI/TaskStatus.h

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -133,10 +133,13 @@ class ChildTaskStatusRecord : public TaskStatusRecord {
133133
/// Group child tasks DO NOT have their own `ChildTaskStatusRecord` entries,
134134
/// and are only tracked by their respective `TaskGroupTaskStatusRecord`.
135135
class TaskGroupTaskStatusRecord : public TaskStatusRecord {
136-
public:
137-
AsyncTask *FirstChild;
136+
// FirstChild may be read concurrently to check for the presence of children,
137+
// so it needs to be atomic. The pointer is never dereferenced in that case,
138+
// so we can universally use memory_order_relaxed on it.
139+
std::atomic<AsyncTask *> FirstChild;
138140
AsyncTask *LastChild;
139141

142+
public:
140143
TaskGroupTaskStatusRecord()
141144
: TaskStatusRecord(TaskStatusRecordKind::TaskGroup),
142145
FirstChild(nullptr),
@@ -155,7 +158,9 @@ class TaskGroupTaskStatusRecord : public TaskStatusRecord {
155158
/// Return the first child linked by this record. This may be null;
156159
/// if not, it (and all of its successors) are guaranteed to satisfy
157160
/// `isChildTask()`.
158-
AsyncTask *getFirstChild() const { return FirstChild; }
161+
AsyncTask *getFirstChild() const {
162+
return FirstChild.load(std::memory_order_relaxed);
163+
}
159164

160165
/// Attach the passed in `child` task to this group.
161166
void attachChild(AsyncTask *child) {
@@ -165,9 +170,9 @@ class TaskGroupTaskStatusRecord : public TaskStatusRecord {
165170
auto oldLastChild = LastChild;
166171
LastChild = child;
167172

168-
if (!FirstChild) {
173+
if (!getFirstChild()) {
169174
// This is the first child we ever attach, so store it as FirstChild.
170-
FirstChild = child;
175+
FirstChild.store(child, std::memory_order_relaxed);
171176
return;
172177
}
173178

@@ -176,15 +181,18 @@ class TaskGroupTaskStatusRecord : public TaskStatusRecord {
176181

177182
void detachChild(AsyncTask *child) {
178183
assert(child && "cannot remove a null child from group");
179-
if (FirstChild == child) {
180-
FirstChild = getNextChildTask(child);
181-
if (FirstChild == nullptr) {
184+
185+
AsyncTask *prev = getFirstChild();
186+
187+
if (prev == child) {
188+
AsyncTask *next = getNextChildTask(child);
189+
FirstChild.store(next, std::memory_order_relaxed);
190+
if (next == nullptr) {
182191
LastChild = nullptr;
183192
}
184193
return;
185194
}
186195

187-
AsyncTask *prev = FirstChild;
188196
// Remove the child from the linked list, i.e.:
189197
// prev -> afterPrev -> afterChild
190198
// ==

stdlib/public/Concurrency/TaskGroup.cpp

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -474,9 +474,8 @@ class TaskGroupBase : public TaskGroupTaskStatusRecord {
474474
bool statusCancel();
475475

476476
/// Cancel the group and all of its child tasks recursively.
477-
/// This also sets
478-
bool cancelAll();
479-
477+
/// This also sets the cancelled bit in the group status.
478+
bool cancelAll(AsyncTask *task);
480479
};
481480

482481
#if !SWIFT_CONCURRENCY_EMBEDDED
@@ -1378,7 +1377,8 @@ void DiscardingTaskGroup::offer(AsyncTask *completedTask, AsyncContext *context)
13781377
// Discarding results mode immediately treats a child failure as group cancellation.
13791378
// "All for one, one for all!" - any task failing must cause the group and all sibling tasks to be cancelled,
13801379
// such that the discarding group can exit as soon as possible.
1381-
cancelAll();
1380+
auto parent = completedTask->childFragment()->getParent();
1381+
cancelAll(parent);
13821382

13831383
if (afterComplete.hasWaitingTask() && afterComplete.pendingTasks(this) == 0) {
13841384
// We grab the waiting task while holding the group lock, because this
@@ -2097,10 +2097,12 @@ static bool swift_taskGroup_isCancelledImpl(TaskGroup *group) {
20972097

20982098
SWIFT_CC(swift)
20992099
static void swift_taskGroup_cancelAllImpl(TaskGroup *group) {
2100-
asBaseImpl(group)->cancelAll();
2100+
// TaskGroup is not a Sendable type, so this can only be called from the
2101+
// owning task.
2102+
asBaseImpl(group)->cancelAll(swift_task_getCurrent());
21012103
}
21022104

2103-
bool TaskGroupBase::cancelAll() {
2105+
bool TaskGroupBase::cancelAll(AsyncTask *owningTask) {
21042106
SWIFT_TASK_DEBUG_LOG("cancel all tasks in group = %p", this);
21052107

21062108
// Flag the task group itself as cancelled. If this was already
@@ -2114,8 +2116,8 @@ bool TaskGroupBase::cancelAll() {
21142116

21152117
// Cancel all the child tasks. TaskGroup is not a Sendable type,
21162118
// so cancelAll() can only be called from the owning task. This
2117-
// satisfies the precondition on cancelAllChildren().
2118-
_swift_taskGroup_cancelAllChildren(asAbstract(this));
2119+
// satisfies the precondition on cancelAllChildren_unlocked().
2120+
_swift_taskGroup_cancelAllChildren_unlocked(asAbstract(this), owningTask);
21192121

21202122
return true;
21212123
}
@@ -2124,22 +2126,8 @@ SWIFT_CC(swift)
21242126
static void swift_task_cancel_group_child_tasksImpl(TaskGroup *group) {
21252127
// TaskGroup is not a Sendable type, and so this operation (which is not
21262128
// currently exposed in the API) can only be called from the owning
2127-
// task. This satisfies the precondition on cancelAllChildren().
2128-
_swift_taskGroup_cancelAllChildren(group);
2129-
}
2130-
2131-
/// Cancel all the children of the given task group.
2132-
///
2133-
/// The caller must guarantee that this is either called from the
2134-
/// owning task of the task group or while holding the owning task's
2135-
/// status record lock.
2136-
void swift::_swift_taskGroup_cancelAllChildren(TaskGroup *group) {
2137-
// Because only the owning task of the task group can modify the
2138-
// child list of a task group status record, and it can only do so
2139-
// while holding the owning task's status record lock, we do not need
2140-
// any additional synchronization within this function.
2141-
for (auto childTask: group->getTaskRecord()->children())
2142-
swift_task_cancel(childTask);
2129+
// task. This satisfies the precondition on cancelAllChildren_unlocked().
2130+
_swift_taskGroup_cancelAllChildren_unlocked(group, swift_task_getCurrent());
21432131
}
21442132

21452133
// =============================================================================

stdlib/public/Concurrency/TaskPrivate.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,16 @@ AsyncTask *_swift_task_setCurrent(AsyncTask *newTask);
9090

9191
/// Cancel all the child tasks that belong to `group`.
9292
///
93-
/// The caller must guarantee that this is either called from the
94-
/// owning task of the task group or while holding the owning task's
95-
/// status record lock.
93+
/// The caller must guarantee that this is called while holding the owning
94+
/// task's status record lock.
9695
void _swift_taskGroup_cancelAllChildren(TaskGroup *group);
9796

97+
/// Cancel all the child tasks that belong to `group`.
98+
///
99+
/// The caller must guarantee that this is called from the owning task.
100+
void _swift_taskGroup_cancelAllChildren_unlocked(TaskGroup *group,
101+
AsyncTask *owningTask);
102+
98103
/// Remove the given task from the given task group.
99104
///
100105
/// This is an internal API; clients outside of the TaskGroup implementation

stdlib/public/Concurrency/TaskStatus.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -943,6 +943,35 @@ void swift::_swift_taskGroup_detachChild(TaskGroup *group,
943943
});
944944
}
945945

946+
/// Cancel all the child tasks that belong to `group`.
947+
///
948+
/// The caller must guarantee that this is called while holding the owning
949+
/// task's status record lock.
950+
void swift::_swift_taskGroup_cancelAllChildren(TaskGroup *group) {
951+
// Because only the owning task of the task group can modify the
952+
// child list of a task group status record, and it can only do so
953+
// while holding the owning task's status record lock, we do not need
954+
// any additional synchronization within this function.
955+
for (auto childTask : group->getTaskRecord()->children())
956+
swift_task_cancel(childTask);
957+
}
958+
959+
/// Cancel all the child tasks that belong to `group`.
960+
///
961+
/// The caller must guarantee that this is called from the owning task.
962+
void swift::_swift_taskGroup_cancelAllChildren_unlocked(TaskGroup *group,
963+
AsyncTask *owningTask) {
964+
// Early out. If there are no children, there's nothing to do. We can safely
965+
// check this without locking, since this can only be concurrently mutated
966+
// from a child task. If there are no children then no more can be added.
967+
if (!group->getTaskRecord()->getFirstChild())
968+
return;
969+
970+
withStatusRecordLock(owningTask, [&group](ActiveTaskStatus status) {
971+
_swift_taskGroup_cancelAllChildren(group);
972+
});
973+
}
974+
946975
/**************************************************************************/
947976
/****************************** CANCELLATION ******************************/
948977
/**************************************************************************/

0 commit comments

Comments
 (0)