From 4a6efa01e225d0e08b88c507165616afb94b89ac Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Thu, 11 Jun 2026 16:42:00 +0900 Subject: [PATCH] block: serialize whole elevator change steps for the same queue When elevator_change() is called concurrently for the same queue, the elevator_change_done() function runs concurrently as well. This function adds or deletes kobjects for the debugfs entry of the queue. Then the concurrent calls cause memory corruption of the kobjects and result in a process hang. The core part of the elevator switch is protected by queue freeze and q->elevator_lock. However, since the commit 559dc11143eb ("block: move elv_register[unregister]_queue out of elevator_lock"), the elevator_change_done() is not serialized. Hence the memory corruption and the hang. The failures are observed when udev-worker writes to a sysfs queue/scheduler attribute file while the blktests test case block/005 writes to the same attribute file. The failure also can be recreated by running two processes that write to the same queue/scheduler file concurrently. The failure is observed since another commit 370ac285f23a ("block: avoid cpu_hotplug_lock depedency on freeze_lock"). This commit changed the behavior of queue freeze and it unveiled the failure. Fix the failure by adding a new per-queue lock 'elevator_queue_lock', which serializes the whole elevator switch steps for the same queue including the elevator_change_done() call. Fixes: 559dc11143eb ("block: move elv_register[unregister]_queue out of elevator_lock") Signed-off-by: Shin'ichiro Kawasaki --- block/blk-core.c | 1 + block/elevator.c | 9 +++++++++ include/linux/blkdev.h | 7 +++++++ 3 files changed, 17 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index 365641266c9e8..940c1933d2330 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -473,6 +473,7 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id) refcount_set(&q->refs, 1); mutex_init(&q->debugfs_mutex); mutex_init(&q->elevator_lock); + mutex_init(&q->elevator_queue_lock); mutex_init(&q->sysfs_lock); mutex_init(&q->limits_lock); mutex_init(&q->rq_qos_mutex); diff --git a/block/elevator.c b/block/elevator.c index 3bcd37c2aa340..65bdea27aa8a9 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -665,6 +665,13 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx) return ret; } + /* + * Acquire elevator_queue_lock to serialize the debugfs (un)register + * steps for the same queue. The elevator switch core part is protected + * by queue freezing and ->elevator_lock. + */ + mutex_lock(&q->elevator_queue_lock); + memflags = blk_mq_freeze_queue(q); /* * May be called before adding disk, when there isn't any FS I/O, @@ -690,6 +697,8 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx) if (!ctx->new) blk_mq_free_sched_res(&ctx->res, ctx->type, set); + mutex_unlock(&q->elevator_queue_lock); + return ret; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9213a5716f95a..dd0704b407deb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -612,6 +612,13 @@ struct request_queue { */ struct mutex elevator_lock; + /* + * Serializes the whole elevator change operation for the same queue, + * including the debugfs (un)register steps. Must be acquired before + * freezing the queue and acquiring elevator_lock. + */ + struct mutex elevator_queue_lock; + struct mutex sysfs_lock; /* * Protects queue limits and also sysfs attribute read_ahead_kb.