Skip to content

Commit 43c01fb

Browse files
committed
io-wq: re-set NUMA node affinities if CPUs come online
We correctly set io-wq NUMA node affinities when the io-wq context is setup, but if an entire node CPU set is offlined and then brought back online, the per node affinities are broken. Ensure that we set them again whenever a CPU comes online. This ensures that we always track the right node affinity. The usual cpuhp notifiers are used to drive it. Reported-by: Zhang Qiang <qiang.zhang@windriver.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent ff57716 commit 43c01fb

1 file changed

Lines changed: 56 additions & 4 deletions

File tree

fs/io-wq.c

Lines changed: 56 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919
#include <linux/task_work.h>
2020
#include <linux/blk-cgroup.h>
2121
#include <linux/audit.h>
22+
#include <linux/cpu.h>
2223

24+
#include "../kernel/sched/sched.h"
2325
#include "io-wq.h"
2426

2527
#define WORKER_IDLE_TIMEOUT (5 * HZ)
@@ -123,9 +125,13 @@ struct io_wq {
123125
refcount_t refs;
124126
struct completion done;
125127

128+
struct hlist_node cpuhp_node;
129+
126130
refcount_t use_refs;
127131
};
128132

133+
static enum cpuhp_state io_wq_online;
134+
129135
static bool io_worker_get(struct io_worker *worker)
130136
{
131137
return refcount_inc_not_zero(&worker->ref);
@@ -1091,17 +1097,20 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
10911097
return ERR_PTR(-ENOMEM);
10921098

10931099
wq->wqes = kcalloc(nr_node_ids, sizeof(struct io_wqe *), GFP_KERNEL);
1094-
if (!wq->wqes) {
1095-
kfree(wq);
1096-
return ERR_PTR(-ENOMEM);
1097-
}
1100+
if (!wq->wqes)
1101+
goto err_wq;
1102+
1103+
ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node);
1104+
if (ret)
1105+
goto err_wqes;
10981106

10991107
wq->free_work = data->free_work;
11001108
wq->do_work = data->do_work;
11011109

11021110
/* caller must already hold a reference to this */
11031111
wq->user = data->user;
11041112

1113+
ret = -ENOMEM;
11051114
for_each_node(node) {
11061115
struct io_wqe *wqe;
11071116
int alloc_node = node;
@@ -1145,9 +1154,12 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
11451154
ret = PTR_ERR(wq->manager);
11461155
complete(&wq->done);
11471156
err:
1157+
cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
11481158
for_each_node(node)
11491159
kfree(wq->wqes[node]);
1160+
err_wqes:
11501161
kfree(wq->wqes);
1162+
err_wq:
11511163
kfree(wq);
11521164
return ERR_PTR(ret);
11531165
}
@@ -1164,6 +1176,8 @@ static void __io_wq_destroy(struct io_wq *wq)
11641176
{
11651177
int node;
11661178

1179+
cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
1180+
11671181
set_bit(IO_WQ_BIT_EXIT, &wq->state);
11681182
if (wq->manager)
11691183
kthread_stop(wq->manager);
@@ -1191,3 +1205,41 @@ struct task_struct *io_wq_get_task(struct io_wq *wq)
11911205
{
11921206
return wq->manager;
11931207
}
1208+
1209+
static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
1210+
{
1211+
struct task_struct *task = worker->task;
1212+
struct rq_flags rf;
1213+
struct rq *rq;
1214+
1215+
rq = task_rq_lock(task, &rf);
1216+
do_set_cpus_allowed(task, cpumask_of_node(worker->wqe->node));
1217+
task->flags |= PF_NO_SETAFFINITY;
1218+
task_rq_unlock(rq, task, &rf);
1219+
return false;
1220+
}
1221+
1222+
static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
1223+
{
1224+
struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
1225+
int i;
1226+
1227+
rcu_read_lock();
1228+
for_each_node(i)
1229+
io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, NULL);
1230+
rcu_read_unlock();
1231+
return 0;
1232+
}
1233+
1234+
static __init int io_wq_init(void)
1235+
{
1236+
int ret;
1237+
1238+
ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online",
1239+
io_wq_cpu_online, NULL);
1240+
if (ret < 0)
1241+
return ret;
1242+
io_wq_online = ret;
1243+
return 0;
1244+
}
1245+
subsys_initcall(io_wq_init);

0 commit comments

Comments
 (0)