Skip to content

Commit ddc6291

Browse files
committed
Merge tag 'nvme-5.10-2020-10-23' of git://git.infradead.org/nvme into block-5.10
Pull NVMe fixes from Christoph: "nvme fixes for 5.10 - rdma error handling fixes (Chao Leng) - fc error handling and reconnect fixes (James Smart) - fix the qid displace when tracing ioctl command (Keith Busch) - don't use BLK_MQ_REQ_NOWAIT for passthru (Chaitanya Kulkarni) - fix MTDT for passthru (Logan Gunthorpe) - blacklist Write Same on more devices (Kai-Heng Feng) - fix an uninitialized work struct (zhenwei pi)" * tag 'nvme-5.10-2020-10-23' of git://git.infradead.org/nvme: nvme-fc: shorten reconnect delay if possible for FC nvme-fc: wait for queues to freeze before calling update_hr_hw_queues nvme-fc: fix error loop in create_hw_io_queues nvme-fc: fix io timeout to abort I/O nvmet: don't use BLK_MQ_REQ_NOWAIT for passthru nvmet: cleanup nvmet_passthru_map_sg() nvmet: limit passthru MTDS by BIO_MAX_PAGES nvmet: fix uninitialized work for zero kato nvme-pci: disable Write Zeroes on Sandisk Skyhawk nvme: use queuedata for nvme_req_qid nvme-rdma: fix crash due to incorrect cqe nvme-rdma: fix crash when connect rejected
2 parents fd78874 + f673714 commit ddc6291

6 files changed

Lines changed: 115 additions & 54 deletions

File tree

drivers/nvme/host/fc.c

Lines changed: 94 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ enum nvme_fc_queue_flags {
2626
};
2727

2828
#define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */
29+
#define NVME_FC_DEFAULT_RECONNECT_TMO 2 /* delay between reconnects
30+
* when connected and a
31+
* connection failure.
32+
*/
2933

3034
struct nvme_fc_queue {
3135
struct nvme_fc_ctrl *ctrl;
@@ -1837,8 +1841,10 @@ __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
18371841
opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
18381842
if (opstate != FCPOP_STATE_ACTIVE)
18391843
atomic_set(&op->state, opstate);
1840-
else if (test_bit(FCCTRL_TERMIO, &ctrl->flags))
1844+
else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
1845+
op->flags |= FCOP_FLAGS_TERMIO;
18411846
ctrl->iocnt++;
1847+
}
18421848
spin_unlock_irqrestore(&ctrl->lock, flags);
18431849

18441850
if (opstate != FCPOP_STATE_ACTIVE)
@@ -1874,7 +1880,8 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
18741880

18751881
if (opstate == FCPOP_STATE_ABORTED) {
18761882
spin_lock_irqsave(&ctrl->lock, flags);
1877-
if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
1883+
if (test_bit(FCCTRL_TERMIO, &ctrl->flags) &&
1884+
op->flags & FCOP_FLAGS_TERMIO) {
18781885
if (!--ctrl->iocnt)
18791886
wake_up(&ctrl->ioabort_wait);
18801887
}
@@ -2314,7 +2321,7 @@ nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
23142321
return 0;
23152322

23162323
delete_queues:
2317-
for (; i >= 0; i--)
2324+
for (; i > 0; i--)
23182325
__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i);
23192326
return ret;
23202327
}
@@ -2433,7 +2440,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
24332440
return;
24342441

24352442
dev_warn(ctrl->ctrl.device,
2436-
"NVME-FC{%d}: transport association error detected: %s\n",
2443+
"NVME-FC{%d}: transport association event: %s\n",
24372444
ctrl->cnum, errmsg);
24382445
dev_warn(ctrl->ctrl.device,
24392446
"NVME-FC{%d}: resetting controller\n", ctrl->cnum);
@@ -2446,15 +2453,20 @@ nvme_fc_timeout(struct request *rq, bool reserved)
24462453
{
24472454
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
24482455
struct nvme_fc_ctrl *ctrl = op->ctrl;
2456+
struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
2457+
struct nvme_command *sqe = &cmdiu->sqe;
24492458

24502459
/*
2451-
* we can't individually ABTS an io without affecting the queue,
2452-
* thus killing the queue, and thus the association.
2453-
* So resolve by performing a controller reset, which will stop
2454-
* the host/io stack, terminate the association on the link,
2455-
* and recreate an association on the link.
2460+
* Attempt to abort the offending command. Command completion
2461+
* will detect the aborted io and will fail the connection.
24562462
*/
2457-
nvme_fc_error_recovery(ctrl, "io timeout error");
2463+
dev_info(ctrl->ctrl.device,
2464+
"NVME-FC{%d.%d}: io timeout: opcode %d fctype %d w10/11: "
2465+
"x%08x/x%08x\n",
2466+
ctrl->cnum, op->queue->qnum, sqe->common.opcode,
2467+
sqe->connect.fctype, sqe->common.cdw10, sqe->common.cdw11);
2468+
if (__nvme_fc_abort_op(ctrl, op))
2469+
nvme_fc_error_recovery(ctrl, "io timeout abort failed");
24582470

24592471
/*
24602472
* the io abort has been initiated. Have the reset timer
@@ -2726,6 +2738,7 @@ nvme_fc_complete_rq(struct request *rq)
27262738
struct nvme_fc_ctrl *ctrl = op->ctrl;
27272739

27282740
atomic_set(&op->state, FCPOP_STATE_IDLE);
2741+
op->flags &= ~FCOP_FLAGS_TERMIO;
27292742

27302743
nvme_fc_unmap_data(ctrl, rq, op);
27312744
nvme_complete_rq(rq);
@@ -2876,11 +2889,14 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
28762889
if (ret)
28772890
goto out_delete_hw_queues;
28782891

2879-
if (prior_ioq_cnt != nr_io_queues)
2892+
if (prior_ioq_cnt != nr_io_queues) {
28802893
dev_info(ctrl->ctrl.device,
28812894
"reconnect: revising io queue count from %d to %d\n",
28822895
prior_ioq_cnt, nr_io_queues);
2883-
blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
2896+
nvme_wait_freeze(&ctrl->ctrl);
2897+
blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
2898+
nvme_unfreeze(&ctrl->ctrl);
2899+
}
28842900

28852901
return 0;
28862902

@@ -3090,26 +3106,19 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
30903106
return ret;
30913107
}
30923108

3109+
30933110
/*
3094-
* This routine stops operation of the controller on the host side.
3095-
* On the host os stack side: Admin and IO queues are stopped,
3096-
* outstanding ios on them terminated via FC ABTS.
3097-
* On the link side: the association is terminated.
3111+
* This routine runs through all outstanding commands on the association
3112+
* and aborts them. This routine is typically be called by the
3113+
* delete_association routine. It is also called due to an error during
3114+
* reconnect. In that scenario, it is most likely a command that initializes
3115+
* the controller, including fabric Connect commands on io queues, that
3116+
* may have timed out or failed thus the io must be killed for the connect
3117+
* thread to see the error.
30983118
*/
30993119
static void
3100-
nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
3120+
__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
31013121
{
3102-
struct nvmefc_ls_rcv_op *disls = NULL;
3103-
unsigned long flags;
3104-
3105-
if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags))
3106-
return;
3107-
3108-
spin_lock_irqsave(&ctrl->lock, flags);
3109-
set_bit(FCCTRL_TERMIO, &ctrl->flags);
3110-
ctrl->iocnt = 0;
3111-
spin_unlock_irqrestore(&ctrl->lock, flags);
3112-
31133122
/*
31143123
* If io queues are present, stop them and terminate all outstanding
31153124
* ios on them. As FC allocates FC exchange for each io, the
@@ -3127,6 +3136,8 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
31273136
blk_mq_tagset_busy_iter(&ctrl->tag_set,
31283137
nvme_fc_terminate_exchange, &ctrl->ctrl);
31293138
blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
3139+
if (start_queues)
3140+
nvme_start_queues(&ctrl->ctrl);
31303141
}
31313142

31323143
/*
@@ -3143,13 +3154,34 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
31433154

31443155
/*
31453156
* clean up the admin queue. Same thing as above.
3146-
* use blk_mq_tagset_busy_itr() and the transport routine to
3147-
* terminate the exchanges.
31483157
*/
31493158
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
31503159
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
31513160
nvme_fc_terminate_exchange, &ctrl->ctrl);
31523161
blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
3162+
}
3163+
3164+
/*
3165+
* This routine stops operation of the controller on the host side.
3166+
* On the host os stack side: Admin and IO queues are stopped,
3167+
* outstanding ios on them terminated via FC ABTS.
3168+
* On the link side: the association is terminated.
3169+
*/
3170+
static void
3171+
nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
3172+
{
3173+
struct nvmefc_ls_rcv_op *disls = NULL;
3174+
unsigned long flags;
3175+
3176+
if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags))
3177+
return;
3178+
3179+
spin_lock_irqsave(&ctrl->lock, flags);
3180+
set_bit(FCCTRL_TERMIO, &ctrl->flags);
3181+
ctrl->iocnt = 0;
3182+
spin_unlock_irqrestore(&ctrl->lock, flags);
3183+
3184+
__nvme_fc_abort_outstanding_ios(ctrl, false);
31533185

31543186
/* kill the aens as they are a separate path */
31553187
nvme_fc_abort_aen_ops(ctrl);
@@ -3263,22 +3295,27 @@ static void
32633295
__nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl)
32643296
{
32653297
/*
3266-
* if state is connecting - the error occurred as part of a
3267-
* reconnect attempt. The create_association error paths will
3268-
* clean up any outstanding io.
3269-
*
3270-
* if it's a different state - ensure all pending io is
3271-
* terminated. Given this can delay while waiting for the
3272-
* aborted io to return, we recheck adapter state below
3273-
* before changing state.
3298+
* if state is CONNECTING - the error occurred as part of a
3299+
* reconnect attempt. Abort any ios on the association and
3300+
* let the create_association error paths resolve things.
32743301
*/
3275-
if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
3276-
nvme_stop_keep_alive(&ctrl->ctrl);
3277-
3278-
/* will block will waiting for io to terminate */
3279-
nvme_fc_delete_association(ctrl);
3302+
if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
3303+
__nvme_fc_abort_outstanding_ios(ctrl, true);
3304+
return;
32803305
}
32813306

3307+
/*
3308+
* For any other state, kill the association. As this routine
3309+
* is a common io abort routine for resetting and such, after
3310+
* the association is terminated, ensure that the state is set
3311+
* to CONNECTING.
3312+
*/
3313+
3314+
nvme_stop_keep_alive(&ctrl->ctrl);
3315+
3316+
/* will block will waiting for io to terminate */
3317+
nvme_fc_delete_association(ctrl);
3318+
32823319
if (ctrl->ctrl.state != NVME_CTRL_CONNECTING &&
32833320
!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
32843321
dev_err(ctrl->ctrl.device,
@@ -3403,7 +3440,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
34033440
{
34043441
struct nvme_fc_ctrl *ctrl;
34053442
unsigned long flags;
3406-
int ret, idx;
3443+
int ret, idx, ctrl_loss_tmo;
34073444

34083445
if (!(rport->remoteport.port_role &
34093446
(FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) {
@@ -3429,6 +3466,19 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
34293466
goto out_free_ctrl;
34303467
}
34313468

3469+
/*
3470+
* if ctrl_loss_tmo is being enforced and the default reconnect delay
3471+
* is being used, change to a shorter reconnect delay for FC.
3472+
*/
3473+
if (opts->max_reconnects != -1 &&
3474+
opts->reconnect_delay == NVMF_DEF_RECONNECT_DELAY &&
3475+
opts->reconnect_delay > NVME_FC_DEFAULT_RECONNECT_TMO) {
3476+
ctrl_loss_tmo = opts->max_reconnects * opts->reconnect_delay;
3477+
opts->reconnect_delay = NVME_FC_DEFAULT_RECONNECT_TMO;
3478+
opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
3479+
opts->reconnect_delay);
3480+
}
3481+
34323482
ctrl->ctrl.opts = opts;
34333483
ctrl->ctrl.nr_reconnects = 0;
34343484
if (lport->dev)

drivers/nvme/host/nvme.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ static inline struct nvme_request *nvme_req(struct request *req)
176176

177177
static inline u16 nvme_req_qid(struct request *req)
178178
{
179-
if (!req->rq_disk)
179+
if (!req->q->queuedata)
180180
return 0;
181181
return blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(req)) + 1;
182182
}

drivers/nvme/host/pci.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3185,6 +3185,8 @@ static const struct pci_device_id nvme_id_table[] = {
31853185
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
31863186
{ PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */
31873187
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
3188+
{ PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */
3189+
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
31883190
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001),
31893191
.driver_data = NVME_QUIRK_SINGLE_VECTOR },
31903192
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },

drivers/nvme/host/rdma.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1730,10 +1730,11 @@ static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
17301730
req->result = cqe->result;
17311731

17321732
if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
1733-
if (unlikely(wc->ex.invalidate_rkey != req->mr->rkey)) {
1733+
if (unlikely(!req->mr ||
1734+
wc->ex.invalidate_rkey != req->mr->rkey)) {
17341735
dev_err(queue->ctrl->ctrl.device,
17351736
"Bogus remote invalidation for rkey %#x\n",
1736-
req->mr->rkey);
1737+
req->mr ? req->mr->rkey : 0);
17371738
nvme_rdma_error_recovery(queue->ctrl);
17381739
}
17391740
} else if (req->mr) {
@@ -1926,7 +1927,6 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
19261927
complete(&queue->cm_done);
19271928
return 0;
19281929
case RDMA_CM_EVENT_REJECTED:
1929-
nvme_rdma_destroy_queue_ib(queue);
19301930
cm_error = nvme_rdma_conn_rejected(queue, ev);
19311931
break;
19321932
case RDMA_CM_EVENT_ROUTE_ERROR:

drivers/nvme/target/core.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1126,7 +1126,8 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
11261126
* in case a host died before it enabled the controller. Hence, simply
11271127
* reset the keep alive timer when the controller is enabled.
11281128
*/
1129-
mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
1129+
if (ctrl->kato)
1130+
mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
11301131
}
11311132

11321133
static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)

drivers/nvme/target/passthru.c

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
2626
struct nvme_ctrl *pctrl = ctrl->subsys->passthru_ctrl;
2727
u16 status = NVME_SC_SUCCESS;
2828
struct nvme_id_ctrl *id;
29-
u32 max_hw_sectors;
29+
int max_hw_sectors;
3030
int page_shift;
3131

3232
id = kzalloc(sizeof(*id), GFP_KERNEL);
@@ -48,6 +48,13 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
4848
max_hw_sectors = min_not_zero(pctrl->max_segments << (PAGE_SHIFT - 9),
4949
pctrl->max_hw_sectors);
5050

51+
/*
52+
* nvmet_passthru_map_sg is limitted to using a single bio so limit
53+
* the mdts based on BIO_MAX_PAGES as well
54+
*/
55+
max_hw_sectors = min_not_zero(BIO_MAX_PAGES << (PAGE_SHIFT - 9),
56+
max_hw_sectors);
57+
5158
page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
5259

5360
id->mdts = ilog2(max_hw_sectors) + 9 - page_shift;
@@ -180,18 +187,20 @@ static void nvmet_passthru_req_done(struct request *rq,
180187

181188
static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
182189
{
183-
int sg_cnt = req->sg_cnt;
184190
struct scatterlist *sg;
185191
int op_flags = 0;
186192
struct bio *bio;
187193
int i, ret;
188194

195+
if (req->sg_cnt > BIO_MAX_PAGES)
196+
return -EINVAL;
197+
189198
if (req->cmd->common.opcode == nvme_cmd_flush)
190199
op_flags = REQ_FUA;
191200
else if (nvme_is_write(req->cmd))
192201
op_flags = REQ_SYNC | REQ_IDLE;
193202

194-
bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
203+
bio = bio_alloc(GFP_KERNEL, req->sg_cnt);
195204
bio->bi_end_io = bio_put;
196205
bio->bi_opf = req_op(rq) | op_flags;
197206

@@ -201,7 +210,6 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
201210
bio_put(bio);
202211
return -EINVAL;
203212
}
204-
sg_cnt--;
205213
}
206214

207215
ret = blk_rq_append_bio(rq, &bio);
@@ -236,7 +244,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
236244
q = ns->queue;
237245
}
238246

239-
rq = nvme_alloc_request(q, req->cmd, BLK_MQ_REQ_NOWAIT, NVME_QID_ANY);
247+
rq = nvme_alloc_request(q, req->cmd, 0, NVME_QID_ANY);
240248
if (IS_ERR(rq)) {
241249
status = NVME_SC_INTERNAL;
242250
goto out_put_ns;

0 commit comments

Comments
 (0)