Message ID | 20220308082048.9774-1-njavali@marvell.com |
---|---|
Headers | show |
Series | qla2xxx driver fixes | expand |
> On Mar 8, 2022, at 12:20 AM, Nilesh Javali <njavali@marvell.com> wrote: > > From: Quinn Tran <qutran@marvell.com> > > User experienced no task management error while target device > is responding with error. The RSP_CODE field in the status > iocb is in little endian. Driver assumes it's big endian, > where it picked up erroneous data. > > Convert the data back to big endian as is on the wire, > where current code will pick up correct status. > > Cc: stable@vger.kernel.org > Fixes: faef62d13463 ("[SCSI] qla2xxx: Fix Task Management command asynchronous handling") > Signed-off-by: Quinn Tran <qutran@marvell.com> > Signed-off-by: Nilesh Javali <njavali@marvell.com> > --- > drivers/scsi/qla2xxx/qla_isr.c | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c > index aaf6504570fd..198b782d7790 100644 > --- a/drivers/scsi/qla2xxx/qla_isr.c > +++ b/drivers/scsi/qla2xxx/qla_isr.c > @@ -2498,6 +2498,7 @@ qla24xx_tm_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, void *tsk) > iocb->u.tmf.data = QLA_FUNCTION_FAILED; > } else if ((le16_to_cpu(sts->scsi_status) & > SS_RESPONSE_INFO_LEN_VALID)) { > + host_to_fcp_swap(sts->data, sizeof(sts->data)); > if (le32_to_cpu(sts->rsp_data_len) < 4) { > ql_log(ql_log_warn, fcport->vha, 0x503b, > "Async-%s error - hdl=%x not enough response(%d).\n", > -- > 2.19.0.rc0 > Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com> -- Himanshu Madhani Oracle Linux Engineering
> On Mar 8, 2022, at 12:20 AM, Nilesh Javali <njavali@marvell.com> wrote: > > From: Arun Easi <aeasi@marvell.com> > > Driver registration of localport can race when it > happens at the remote port discovery time. Fix > this by calling the registration under a mutex. > > Reported-by: Marco Patalano <mpatalan@redhat.com> > Tested-by: Marco Patalano <mpatalan@redhat.com> > Cc: stable@vger.kernel.org > Fixes: e84067d74301 ("scsi: qla2xxx: Add FC-NVMe F/W initialization and transport registration") > Signed-off-by: Arun Easi <aeasi@marvell.com> > Signed-off-by: Nilesh Javali <njavali@marvell.com> > --- > drivers/scsi/qla2xxx/qla_nvme.c | 30 ++++++++++++++++++++---------- > 1 file changed, 20 insertions(+), 10 deletions(-) > > diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c > index 5723082d94d6..3bf5cbd754a7 100644 > --- a/drivers/scsi/qla2xxx/qla_nvme.c > +++ b/drivers/scsi/qla2xxx/qla_nvme.c > @@ -782,8 +782,6 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha) > ha = vha->hw; > tmpl = &qla_nvme_fc_transport; > > - WARN_ON(vha->nvme_local_port); > - > if (ql2xnvme_queues < MIN_NVME_HW_QUEUES || ql2xnvme_queues > MAX_NVME_HW_QUEUES) { > ql_log(ql_log_warn, vha, 0xfffd, > "ql2xnvme_queues=%d is out of range(MIN:%d - MAX:%d). Resetting ql2xnvme_queues to:%d\n", > @@ -797,7 +795,7 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha) > (uint8_t)(ha->max_qpairs ? ha->max_qpairs : 1)); > > ql_log(ql_log_info, vha, 0xfffb, > - "Number of NVME queues used for this port: %d\n", > + "Number of NVME queues used for this port: %d\n", > qla_nvme_fc_transport.max_hw_queues); > > pinfo.node_name = wwn_to_u64(vha->node_name); > @@ -805,13 +803,25 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha) > pinfo.port_role = FC_PORT_ROLE_NVME_INITIATOR; > pinfo.port_id = vha->d_id.b24; > > - ql_log(ql_log_info, vha, 0xffff, > - "register_localport: host-traddr=nn-0x%llx:pn-0x%llx on portID:%x\n", > - pinfo.node_name, pinfo.port_name, pinfo.port_id); > - qla_nvme_fc_transport.dma_boundary = vha->host->dma_boundary; > - > - ret = nvme_fc_register_localport(&pinfo, tmpl, > - get_device(&ha->pdev->dev), &vha->nvme_local_port); > + mutex_lock(&ha->vport_lock); > + /* > + * Check again for nvme_local_port to see if any other thread raced > + * with this one and finished registration. > + */ > + if (!vha->nvme_local_port) { > + ql_log(ql_log_info, vha, 0xffff, > + "register_localport: host-traddr=nn-0x%llx:pn-0x%llx on portID:%x\n", > + pinfo.node_name, pinfo.port_name, pinfo.port_id); > + qla_nvme_fc_transport.dma_boundary = vha->host->dma_boundary; > + > + ret = nvme_fc_register_localport(&pinfo, tmpl, > + get_device(&ha->pdev->dev), > + &vha->nvme_local_port); > + mutex_unlock(&ha->vport_lock); > + } else { > + mutex_unlock(&ha->vport_lock); > + return 0; > + } > if (ret) { > ql_log(ql_log_warn, vha, 0xffff, > "register_localport failed: ret=%x\n", ret); > -- > 2.19.0.rc0 > Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com> -- Himanshu Madhani Oracle Linux Engineering
> On Mar 8, 2022, at 12:20 AM, Nilesh Javali <njavali@marvell.com> wrote: > > From: Arun Easi <aeasi@marvell.com> > > During purex packet handling driver was incorrectly > freeing a pre-allocated structure. Fix this by > skipping that entry. > > System crashed with the following stack during a > module unload test. > > Call Trace: > sbitmap_init_node+0x7f/0x1e0 > sbitmap_queue_init_node+0x24/0x150 > blk_mq_init_bitmaps+0x3d/0xa0 > blk_mq_init_tags+0x68/0x90 > blk_mq_alloc_map_and_rqs+0x44/0x120 > blk_mq_alloc_set_map_and_rqs+0x63/0x150 > blk_mq_alloc_tag_set+0x11b/0x230 > scsi_add_host_with_dma.cold+0x3f/0x245 > qla2x00_probe_one+0xd5a/0x1b80 [qla2xxx] > > Call Trace with slub_debug and debug kernel: > kasan_report_invalid_free+0x50/0x80 > __kasan_slab_free+0x137/0x150 > slab_free_freelist_hook+0xc6/0x190 > kfree+0xe8/0x2e0 > qla2x00_free_device+0x3bb/0x5d0 [qla2xxx] > qla2x00_remove_one+0x668/0xcf0 [qla2xxx] > > Reported-by: Marco Patalano <mpatalan@redhat.com> > Tested-by: Marco Patalano <mpatalan@redhat.com> > Cc: stable@vger.kernel.org > Fixes: 62e9dd177732 ("scsi: qla2xxx: Change in PUREX to handle FPIN ELS requests") > Signed-off-by: Arun Easi <aeasi@marvell.com> > Signed-off-by: Nilesh Javali <njavali@marvell.com> > --- > drivers/scsi/qla2xxx/qla_os.c | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c > index a4546346c18b..d572a76d0fa0 100644 > --- a/drivers/scsi/qla2xxx/qla_os.c > +++ b/drivers/scsi/qla2xxx/qla_os.c > @@ -3904,6 +3904,8 @@ qla24xx_free_purex_list(struct purex_list *list) > spin_lock_irqsave(&list->lock, flags); > list_for_each_entry_safe(item, next, &list->head, list) { > list_del(&item->list); > + if (item == &item->vha->default_item) > + continue; > kfree(item); > } > spin_unlock_irqrestore(&list->lock, flags); > -- > 2.19.0.rc0 > Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com> -- Himanshu Madhani Oracle Linux Engineering
> On Mar 8, 2022, at 12:20 AM, Nilesh Javali <njavali@marvell.com> wrote: > > From: Quinn Tran <qutran@marvell.com> > > User experience device lost. The log shows Get port data base > command was queued up, failed, and requeued again. Every time > it is requeued, it set the FCF_ASYNC_ACTIVE. This prevents any > recovery code from occurring because driver thinks a recovery is in > progress for this session. In essence, this session is hung. > The reason it gets into this place is the session deletion got > in front of this call due to link perturbation. > > Break the requeue cycle and exit. > The session deletion code will trigger a session relogin. > > Cc: stable@vger.kernel.org > Fixes: 726b85487067 ("qla2xxx: Add framework for async fabric discovery") > Signed-off-by: Quinn Tran <qutran@marvell.com> > Signed-off-by: Nilesh Javali <njavali@marvell.com> > --- > drivers/scsi/qla2xxx/qla_def.h | 4 ++++ > drivers/scsi/qla2xxx/qla_init.c | 19 +++++++++++++++++-- > 2 files changed, 21 insertions(+), 2 deletions(-) > > diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h > index 47d7fa1c7ae8..b0579bce5b88 100644 > --- a/drivers/scsi/qla2xxx/qla_def.h > +++ b/drivers/scsi/qla2xxx/qla_def.h > @@ -5437,4 +5437,8 @@ struct ql_vnd_tgt_stats_resp { > #include "qla_gbl.h" > #include "qla_dbg.h" > #include "qla_inline.h" > + > +#define SESSION_DELETE(_fcport) (_fcport->disc_state == DSC_DELETE_PEND || \ > + _fcport->disc_state == DSC_DELETED) > + would you be open to changing the macro name to IS_SESSION_DELETED(). Since you are checking for pending deletion in progress or deleted for session, name SESSION_DELETE is not reader friendly. > #endif > diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c > index 6ffe44b805b6..3c58a2911937 100644 > --- a/drivers/scsi/qla2xxx/qla_init.c > +++ b/drivers/scsi/qla2xxx/qla_init.c > @@ -575,6 +575,14 @@ qla2x00_async_adisc(struct scsi_qla_host *vha, fc_port_t *fcport, > struct srb_iocb *lio; > int rval = QLA_FUNCTION_FAILED; > > + if (SESSION_DELETE(fcport)) { > + ql_log(ql_log_warn, vha, 0xffff, > + "%s: %8phC is being delete - not sending command.\n", > + __func__, fcport->port_name); > + fcport->flags &= ~FCF_ASYNC_ACTIVE; > + return rval; > + } > + > if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT)) > return rval; > > @@ -1338,8 +1346,15 @@ int qla24xx_async_gpdb(struct scsi_qla_host *vha, fc_port_t *fcport, u8 opt) > struct port_database_24xx *pd; > struct qla_hw_data *ha = vha->hw; > > - if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT) || > - fcport->loop_id == FC_NO_LOOP_ID) { > + if (SESSION_DELETE(fcport)) { > + ql_log(ql_log_warn, vha, 0xffff, > + "%s: %8phC is being delete - not sending command.\n", > + __func__, fcport->port_name); > + fcport->flags &= ~FCF_ASYNC_ACTIVE; > + return rval; > + } > + > + if (!vha->flags.online || fcport->flags & FCF_ASYNC_SENT) { > ql_log(ql_log_warn, vha, 0xffff, > "%s: %8phC online %d flags %x - not sending command.\n", > __func__, fcport->port_name, vha->flags.online, fcport->flags); > -- > 2.19.0.rc0 > -- Himanshu Madhani Oracle Linux Engineering