Skip to content

Commit aa1c09c

Browse files
damien-lemoalaxboe
authored andcommitted
null_blk: Fix locking in zoned mode
When the zoned mode is enabled in null_blk, Serializing read, write and zone management operations for each zone is necessary to protect device level information for managing zone resources (zone open and closed counters) as well as each zone condition and write pointer position. Commit 35bc10b ("null_blk: synchronization fix for zoned device") introduced a spinlock to implement this serialization. However, when memory backing is also enabled, GFP_NOIO memory allocations are executed under the spinlock, resulting in might_sleep() warnings. Furthermore, the zone_lock spinlock is locked/unlocked using spin_lock_irq/spin_unlock_irq, similarly to the memory backing code with the nullb->lock spinlock. This nested use of irq locks wrecks the irq enabled/disabled state. Fix all this by introducing a bitmap for per-zone lock, with locking implemented using wait_on_bit_lock_io() and clear_and_wake_up_bit(). This locking mechanism allows keeping a zone locked while executing null_process_cmd(), serializing all operations to the zone while allowing to sleep during memory backing allocation with GFP_NOIO. Device level zone resource management information is protected using a spinlock which is not held while executing null_process_cmd(); Fixes: 35bc10b ("null_blk: synchronization fix for zoned device") Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent f9c9104 commit aa1c09c

2 files changed

Lines changed: 82 additions & 25 deletions

File tree

drivers/block/null_blk.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,8 @@ struct nullb_device {
4747
unsigned int nr_zones_closed;
4848
struct blk_zone *zones;
4949
sector_t zone_size_sects;
50-
spinlock_t zone_lock;
50+
spinlock_t zone_dev_lock;
51+
unsigned long *zone_locks;
5152

5253
unsigned long size; /* device size in MB */
5354
unsigned long completion_nsec; /* time in ns to complete a request */

drivers/block/null_blk_zoned.c

Lines changed: 80 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
// SPDX-License-Identifier: GPL-2.0
22
#include <linux/vmalloc.h>
3+
#include <linux/bitmap.h>
34
#include "null_blk.h"
45

56
#define CREATE_TRACE_POINTS
@@ -45,7 +46,13 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
4546
if (!dev->zones)
4647
return -ENOMEM;
4748

48-
spin_lock_init(&dev->zone_lock);
49+
spin_lock_init(&dev->zone_dev_lock);
50+
dev->zone_locks = bitmap_zalloc(dev->nr_zones, GFP_KERNEL);
51+
if (!dev->zone_locks) {
52+
kvfree(dev->zones);
53+
return -ENOMEM;
54+
}
55+
4956
if (dev->zone_nr_conv >= dev->nr_zones) {
5057
dev->zone_nr_conv = dev->nr_zones - 1;
5158
pr_info("changed the number of conventional zones to %u",
@@ -124,15 +131,26 @@ int null_register_zoned_dev(struct nullb *nullb)
124131

125132
void null_free_zoned_dev(struct nullb_device *dev)
126133
{
134+
bitmap_free(dev->zone_locks);
127135
kvfree(dev->zones);
128136
}
129137

138+
static inline void null_lock_zone(struct nullb_device *dev, unsigned int zno)
139+
{
140+
wait_on_bit_lock_io(dev->zone_locks, zno, TASK_UNINTERRUPTIBLE);
141+
}
142+
143+
static inline void null_unlock_zone(struct nullb_device *dev, unsigned int zno)
144+
{
145+
clear_and_wake_up_bit(zno, dev->zone_locks);
146+
}
147+
130148
int null_report_zones(struct gendisk *disk, sector_t sector,
131149
unsigned int nr_zones, report_zones_cb cb, void *data)
132150
{
133151
struct nullb *nullb = disk->private_data;
134152
struct nullb_device *dev = nullb->dev;
135-
unsigned int first_zone, i;
153+
unsigned int first_zone, i, zno;
136154
struct blk_zone zone;
137155
int error;
138156

@@ -143,17 +161,17 @@ int null_report_zones(struct gendisk *disk, sector_t sector,
143161
nr_zones = min(nr_zones, dev->nr_zones - first_zone);
144162
trace_nullb_report_zones(nullb, nr_zones);
145163

146-
for (i = 0; i < nr_zones; i++) {
164+
zno = first_zone;
165+
for (i = 0; i < nr_zones; i++, zno++) {
147166
/*
148167
* Stacked DM target drivers will remap the zone information by
149168
* modifying the zone information passed to the report callback.
150169
* So use a local copy to avoid corruption of the device zone
151170
* array.
152171
*/
153-
spin_lock_irq(&dev->zone_lock);
154-
memcpy(&zone, &dev->zones[first_zone + i],
155-
sizeof(struct blk_zone));
156-
spin_unlock_irq(&dev->zone_lock);
172+
null_lock_zone(dev, zno);
173+
memcpy(&zone, &dev->zones[zno], sizeof(struct blk_zone));
174+
null_unlock_zone(dev, zno);
157175

158176
error = cb(&zone, i, data);
159177
if (error)
@@ -163,6 +181,10 @@ int null_report_zones(struct gendisk *disk, sector_t sector,
163181
return nr_zones;
164182
}
165183

184+
/*
185+
* This is called in the case of memory backing from null_process_cmd()
186+
* with the target zone already locked.
187+
*/
166188
size_t null_zone_valid_read_len(struct nullb *nullb,
167189
sector_t sector, unsigned int len)
168190
{
@@ -299,22 +321,27 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
299321
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
300322
return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
301323

324+
null_lock_zone(dev, zno);
325+
spin_lock(&dev->zone_dev_lock);
326+
302327
switch (zone->cond) {
303328
case BLK_ZONE_COND_FULL:
304329
/* Cannot write to a full zone */
305-
return BLK_STS_IOERR;
330+
ret = BLK_STS_IOERR;
331+
goto unlock;
306332
case BLK_ZONE_COND_EMPTY:
307333
case BLK_ZONE_COND_CLOSED:
308334
ret = null_check_zone_resources(dev, zone);
309335
if (ret != BLK_STS_OK)
310-
return ret;
336+
goto unlock;
311337
break;
312338
case BLK_ZONE_COND_IMP_OPEN:
313339
case BLK_ZONE_COND_EXP_OPEN:
314340
break;
315341
default:
316342
/* Invalid zone condition */
317-
return BLK_STS_IOERR;
343+
ret = BLK_STS_IOERR;
344+
goto unlock;
318345
}
319346

320347
/*
@@ -330,11 +357,14 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
330357
else
331358
cmd->rq->__sector = sector;
332359
} else if (sector != zone->wp) {
333-
return BLK_STS_IOERR;
360+
ret = BLK_STS_IOERR;
361+
goto unlock;
334362
}
335363

336-
if (zone->wp + nr_sectors > zone->start + zone->capacity)
337-
return BLK_STS_IOERR;
364+
if (zone->wp + nr_sectors > zone->start + zone->capacity) {
365+
ret = BLK_STS_IOERR;
366+
goto unlock;
367+
}
338368

339369
if (zone->cond == BLK_ZONE_COND_CLOSED) {
340370
dev->nr_zones_closed--;
@@ -345,9 +375,11 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
345375
if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
346376
zone->cond = BLK_ZONE_COND_IMP_OPEN;
347377

378+
spin_unlock(&dev->zone_dev_lock);
348379
ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
380+
spin_lock(&dev->zone_dev_lock);
349381
if (ret != BLK_STS_OK)
350-
return ret;
382+
goto unlock;
351383

352384
zone->wp += nr_sectors;
353385
if (zone->wp == zone->start + zone->capacity) {
@@ -357,7 +389,13 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
357389
dev->nr_zones_imp_open--;
358390
zone->cond = BLK_ZONE_COND_FULL;
359391
}
360-
return BLK_STS_OK;
392+
ret = BLK_STS_OK;
393+
394+
unlock:
395+
spin_unlock(&dev->zone_dev_lock);
396+
null_unlock_zone(dev, zno);
397+
398+
return ret;
361399
}
362400

363401
static blk_status_t null_open_zone(struct nullb_device *dev, struct blk_zone *zone)
@@ -468,21 +506,33 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
468506
sector_t sector)
469507
{
470508
struct nullb_device *dev = cmd->nq->dev;
471-
unsigned int zone_no = null_zone_no(dev, sector);
472-
struct blk_zone *zone = &dev->zones[zone_no];
473-
blk_status_t ret = BLK_STS_OK;
509+
unsigned int zone_no;
510+
struct blk_zone *zone;
511+
blk_status_t ret;
474512
size_t i;
475513

476-
switch (op) {
477-
case REQ_OP_ZONE_RESET_ALL:
514+
if (op == REQ_OP_ZONE_RESET_ALL) {
478515
for (i = dev->zone_nr_conv; i < dev->nr_zones; i++) {
516+
null_lock_zone(dev, i);
479517
zone = &dev->zones[i];
480518
if (zone->cond != BLK_ZONE_COND_EMPTY) {
519+
spin_lock(&dev->zone_dev_lock);
481520
null_reset_zone(dev, zone);
521+
spin_unlock(&dev->zone_dev_lock);
482522
trace_nullb_zone_op(cmd, i, zone->cond);
483523
}
524+
null_unlock_zone(dev, i);
484525
}
485526
return BLK_STS_OK;
527+
}
528+
529+
zone_no = null_zone_no(dev, sector);
530+
zone = &dev->zones[zone_no];
531+
532+
null_lock_zone(dev, zone_no);
533+
spin_lock(&dev->zone_dev_lock);
534+
535+
switch (op) {
486536
case REQ_OP_ZONE_RESET:
487537
ret = null_reset_zone(dev, zone);
488538
break;
@@ -496,22 +546,27 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
496546
ret = null_finish_zone(dev, zone);
497547
break;
498548
default:
499-
return BLK_STS_NOTSUPP;
549+
ret = BLK_STS_NOTSUPP;
550+
break;
500551
}
501552

553+
spin_unlock(&dev->zone_dev_lock);
554+
502555
if (ret == BLK_STS_OK)
503556
trace_nullb_zone_op(cmd, zone_no, zone->cond);
504557

558+
null_unlock_zone(dev, zone_no);
559+
505560
return ret;
506561
}
507562

508563
blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op,
509564
sector_t sector, sector_t nr_sectors)
510565
{
511-
blk_status_t sts;
512566
struct nullb_device *dev = cmd->nq->dev;
567+
unsigned int zno = null_zone_no(dev, sector);
568+
blk_status_t sts;
513569

514-
spin_lock_irq(&dev->zone_lock);
515570
switch (op) {
516571
case REQ_OP_WRITE:
517572
sts = null_zone_write(cmd, sector, nr_sectors, false);
@@ -527,9 +582,10 @@ blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op,
527582
sts = null_zone_mgmt(cmd, op, sector);
528583
break;
529584
default:
585+
null_lock_zone(dev, zno);
530586
sts = null_process_cmd(cmd, op, sector, nr_sectors);
587+
null_unlock_zone(dev, zno);
531588
}
532-
spin_unlock_irq(&dev->zone_lock);
533589

534590
return sts;
535591
}

0 commit comments

Comments
 (0)