@@ -1028,6 +1028,7 @@ struct lpfc_hba {
* Firmware supports Forced Link Speed
* capability
*/
+#define HBA_PCI_ERR 0x80000 /* The PCI slot is offline */
#define HBA_FLOGI_ISSUED 0x100000 /* FLOGI was issued */
#define HBA_CGN_RSVD1 0x200000 /* Reserved CGN flag */
#define HBA_CGN_DAY_WRAP 0x400000 /* HBA Congestion info day wraps */
@@ -5250,6 +5250,7 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT);
if (rc == MBX_NOT_FINISHED) {
+ ndlp->nlp_flag &= ~NLP_UNREG_INP;
mempool_free(mbox, phba->mbox_mem_pool);
acc_plogi = 1;
}
@@ -1606,6 +1606,11 @@ void
lpfc_sli4_offline_eratt(struct lpfc_hba *phba)
{
spin_lock_irq(&phba->hbalock);
+ if (phba->link_state == LPFC_HBA_ERROR &&
+ phba->hba_flag & HBA_PCI_ERR) {
+ spin_unlock_irq(&phba->hbalock);
+ return;
+ }
phba->link_state = LPFC_HBA_ERROR;
spin_unlock_irq(&phba->hbalock);
@@ -1945,7 +1950,6 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba)
if (pci_channel_offline(phba->pcidev)) {
lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
"3166 pci channel is offline\n");
- lpfc_sli4_offline_eratt(phba);
return;
}
@@ -3643,6 +3647,7 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
struct lpfc_vport **vports;
struct Scsi_Host *shost;
int i;
+ int offline = 0;
if (vport->fc_flag & FC_OFFLINE_MODE)
return;
@@ -3651,6 +3656,8 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
lpfc_linkdown(phba);
+ offline = pci_channel_offline(phba->pcidev);
+
/* Issue an unreg_login to all nodes on all vports */
vports = lpfc_create_vport_work_array(phba);
if (vports != NULL) {
@@ -3673,7 +3680,14 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
ndlp->nlp_flag &= ~NLP_NPR_ADISC;
spin_unlock_irq(&ndlp->lock);
- lpfc_unreg_rpi(vports[i], ndlp);
+ if (offline) {
+ spin_lock_irq(&ndlp->lock);
+ ndlp->nlp_flag &= ~(NLP_UNREG_INP |
+ NLP_RPI_REGISTERED);
+ spin_unlock_irq(&ndlp->lock);
+ } else {
+ lpfc_unreg_rpi(vports[i], ndlp);
+ }
/*
* Whenever an SLI4 port goes offline, free the
* RPI. Get a new RPI when the adapter port
@@ -14080,6 +14094,10 @@ lpfc_pci_resume_one_s3(struct device *dev_d)
return error;
}
+ /* Init cpu_map array */
+ lpfc_cpu_map_array_init(phba);
+ /* Init hba_eq_hdl array */
+ lpfc_hba_eq_hdl_array_init(phba);
/* Configure and enable interrupt */
intr_mode = lpfc_sli_enable_intr(phba, phba->intr_mode);
if (intr_mode == LPFC_INTR_ERROR) {
@@ -15032,14 +15050,17 @@ lpfc_io_error_detected_s4(struct pci_dev *pdev, pci_channel_state_t state)
lpfc_sli4_prep_dev_for_recover(phba);
return PCI_ERS_RESULT_CAN_RECOVER;
case pci_channel_io_frozen:
+ phba->hba_flag |= HBA_PCI_ERR;
/* Fatal error, prepare for slot reset */
lpfc_sli4_prep_dev_for_reset(phba);
return PCI_ERS_RESULT_NEED_RESET;
case pci_channel_io_perm_failure:
+ phba->hba_flag |= HBA_PCI_ERR;
/* Permanent failure, prepare for device down */
lpfc_sli4_prep_dev_for_perm_failure(phba);
return PCI_ERS_RESULT_DISCONNECT;
default:
+ phba->hba_flag |= HBA_PCI_ERR;
/* Unknown state, prepare and request slot reset */
lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
"2825 Unknown PCI error state: x%x\n", state);
@@ -15083,6 +15104,7 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev)
pci_restore_state(pdev);
+ phba->hba_flag &= ~HBA_PCI_ERR;
/*
* As the new kernel behavior of pci_restore_state() API call clears
* device saved_state flag, need to save the restored state again.
@@ -15105,6 +15127,7 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev)
return PCI_ERS_RESULT_DISCONNECT;
} else
phba->intr_mode = intr_mode;
+ lpfc_cpu_affinity_check(phba, phba->cfg_irq_chann);
/* Log the current active interrupt mode */
lpfc_log_intr_mode(phba, phba->intr_mode);
@@ -15306,6 +15329,10 @@ lpfc_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
pci_ers_result_t rc = PCI_ERS_RESULT_DISCONNECT;
+ if (phba->link_state == LPFC_HBA_ERROR &&
+ phba->hba_flag & HBA_IOQ_FLUSH)
+ return PCI_ERS_RESULT_NEED_RESET;
+
switch (phba->pci_dev_grp) {
case LPFC_PCI_DEV_LP:
rc = lpfc_io_error_detected_s3(pdev, state);
@@ -937,6 +937,7 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
int cpu;
#endif
+ int offline = 0;
/* Sanity check on return of outstanding command */
if (!lpfc_ncmd) {
@@ -1098,11 +1099,12 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn,
nCmd->transferred_length = 0;
nCmd->rcv_rsplen = 0;
nCmd->status = NVME_SC_INTERNAL;
+ offline = pci_channel_offline(vport->phba->pcidev);
}
}
/* pick up SLI4 exhange busy condition */
- if (bf_get(lpfc_wcqe_c_xb, wcqe))
+ if (bf_get(lpfc_wcqe_c_xb, wcqe) && !offline)
lpfc_ncmd->flags |= LPFC_SBUF_XBUSY;
else
lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY;
@@ -2169,6 +2171,10 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport,
abts_nvme = 0;
for (i = 0; i < phba->cfg_hdw_queue; i++) {
qp = &phba->sli4_hba.hdwq[i];
+ if (!vport || !vport->localport ||
+ !qp || !qp->io_wq)
+ return;
+
pring = qp->io_wq->pring;
if (!pring)
continue;
@@ -2176,6 +2182,10 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport,
abts_scsi += qp->abts_scsi_io_bufs;
abts_nvme += qp->abts_nvme_io_bufs;
}
+ if (!vport || !vport->localport ||
+ vport->phba->hba_flag & HBA_PCI_ERR)
+ return;
+
lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
"6176 Lport x%px Localport x%px wait "
"timed out. Pending %d [%d:%d]. "
@@ -2215,6 +2225,8 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport)
return;
localport = vport->localport;
+ if (!localport)
+ return;
lport = (struct lpfc_nvme_lport *)localport->private;
lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
@@ -2531,7 +2543,8 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
* return values is ignored. The upcall is a courtesy to the
* transport.
*/
- if (vport->load_flag & FC_UNLOADING)
+ if (vport->load_flag & FC_UNLOADING ||
+ unlikely(vport->phba->hba_flag & HBA_PCI_ERR))
(void)nvme_fc_set_remoteport_devloss(remoteport, 0);
ret = nvme_fc_unregister_remoteport(remoteport);
@@ -2559,6 +2572,42 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
vport->localport, ndlp->rport, ndlp->nlp_DID);
}
+/**
+ * lpfc_sli4_nvme_pci_offline_aborted - Fast-path process of NVME xri abort
+ * @phba: pointer to lpfc hba data structure.
+ * @lpfc_ncmd: The nvme job structure for the request being aborted.
+ *
+ * This routine is invoked by the worker thread to process a SLI4 fast-path
+ * NVME aborted xri. Aborted NVME IO commands are completed to the transport
+ * here.
+ **/
+void
+lpfc_sli4_nvme_pci_offline_aborted(struct lpfc_hba *phba,
+ struct lpfc_io_buf *lpfc_ncmd)
+{
+ struct nvmefc_fcp_req *nvme_cmd = NULL;
+
+ lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
+ "6533 %s nvme_cmd %p tag x%x abort complete and "
+ "xri released\n", __func__,
+ lpfc_ncmd->nvmeCmd,
+ lpfc_ncmd->cur_iocbq.iotag);
+
+ /* Aborted NVME commands are required to not complete
+ * before the abort exchange command fully completes.
+ * Once completed, it is available via the put list.
+ */
+ if (lpfc_ncmd->nvmeCmd) {
+ nvme_cmd = lpfc_ncmd->nvmeCmd;
+ nvme_cmd->transferred_length = 0;
+ nvme_cmd->rcv_rsplen = 0;
+ nvme_cmd->status = NVME_SC_INTERNAL;
+ nvme_cmd->done(nvme_cmd);
+ lpfc_ncmd->nvmeCmd = NULL;
+ }
+ lpfc_release_nvme_buf(phba, lpfc_ncmd);
+}
+
/**
* lpfc_sli4_nvme_xri_aborted - Fast-path process of NVME xri abort
* @phba: pointer to lpfc hba data structure.
@@ -493,8 +493,8 @@ void
lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba,
struct sli4_wcqe_xri_aborted *axri, int idx)
{
- uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
- uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri);
+ u16 xri = 0;
+ u16 rxid = 0;
struct lpfc_io_buf *psb, *next_psb;
struct lpfc_sli4_hdw_queue *qp;
unsigned long iflag = 0;
@@ -504,15 +504,22 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba,
int rrq_empty = 0;
struct lpfc_sli_ring *pring = phba->sli4_hba.els_wq->pring;
struct scsi_cmnd *cmd;
+ int offline = 0;
if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP))
return;
-
+ offline = pci_channel_offline(phba->pcidev);
+ if (!offline) {
+ xri = bf_get(lpfc_wcqe_xa_xri, axri);
+ rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri);
+ }
qp = &phba->sli4_hba.hdwq[idx];
spin_lock_irqsave(&phba->hbalock, iflag);
spin_lock(&qp->abts_io_buf_list_lock);
list_for_each_entry_safe(psb, next_psb,
&qp->lpfc_abts_io_buf_list, list) {
+ if (offline)
+ xri = psb->cur_iocbq.sli4_xritag;
if (psb->cur_iocbq.sli4_xritag == xri) {
list_del_init(&psb->list);
psb->flags &= ~LPFC_SBUF_XBUSY;
@@ -521,8 +528,15 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba,
qp->abts_nvme_io_bufs--;
spin_unlock(&qp->abts_io_buf_list_lock);
spin_unlock_irqrestore(&phba->hbalock, iflag);
- lpfc_sli4_nvme_xri_aborted(phba, axri, psb);
- return;
+ if (!offline) {
+ lpfc_sli4_nvme_xri_aborted(phba, axri,
+ psb);
+ return;
+ }
+ lpfc_sli4_nvme_pci_offline_aborted(phba, psb);
+ spin_lock_irqsave(&phba->hbalock, iflag);
+ spin_lock(&qp->abts_io_buf_list_lock);
+ continue;
}
qp->abts_scsi_io_bufs--;
spin_unlock(&qp->abts_io_buf_list_lock);
@@ -534,13 +548,13 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba,
rrq_empty = list_empty(&phba->active_rrq_list);
spin_unlock_irqrestore(&phba->hbalock, iflag);
- if (ndlp) {
+ if (ndlp && !offline) {
lpfc_set_rrq_active(phba, ndlp,
psb->cur_iocbq.sli4_lxritag, rxid, 1);
lpfc_sli4_abts_err_handler(phba, ndlp, axri);
}
- if (phba->cfg_fcp_wait_abts_rsp) {
+ if (phba->cfg_fcp_wait_abts_rsp || offline) {
spin_lock_irqsave(&psb->buf_lock, iflag);
cmd = psb->pCmd;
psb->pCmd = NULL;
@@ -567,25 +581,30 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba,
lpfc_release_scsi_buf_s4(phba, psb);
if (rrq_empty)
lpfc_worker_wake_up(phba);
- return;
+ if (!offline)
+ return;
+ spin_lock_irqsave(&phba->hbalock, iflag);
+ spin_lock(&qp->abts_io_buf_list_lock);
+ continue;
}
}
spin_unlock(&qp->abts_io_buf_list_lock);
- for (i = 1; i <= phba->sli.last_iotag; i++) {
- iocbq = phba->sli.iocbq_lookup[i];
-
- if (!(iocbq->iocb_flag & LPFC_IO_FCP) ||
- (iocbq->iocb_flag & LPFC_IO_LIBDFC))
- continue;
- if (iocbq->sli4_xritag != xri)
- continue;
- psb = container_of(iocbq, struct lpfc_io_buf, cur_iocbq);
- psb->flags &= ~LPFC_SBUF_XBUSY;
- spin_unlock_irqrestore(&phba->hbalock, iflag);
- if (!list_empty(&pring->txq))
- lpfc_worker_wake_up(phba);
- return;
+ if (!offline) {
+ for (i = 1; i <= phba->sli.last_iotag; i++) {
+ iocbq = phba->sli.iocbq_lookup[i];
+ if (!(iocbq->iocb_flag & LPFC_IO_FCP) ||
+ (iocbq->iocb_flag & LPFC_IO_LIBDFC))
+ continue;
+ if (iocbq->sli4_xritag != xri)
+ continue;
+ psb = container_of(iocbq, struct lpfc_io_buf, cur_iocbq);
+ psb->flags &= ~LPFC_SBUF_XBUSY;
+ spin_unlock_irqrestore(&phba->hbalock, iflag);
+ if (!list_empty(&pring->txq))
+ lpfc_worker_wake_up(phba);
+ return;
+ }
}
spin_unlock_irqrestore(&phba->hbalock, iflag);
}
@@ -1404,7 +1404,8 @@ __lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
}
if ((iocbq->iocb_flag & LPFC_EXCHANGE_BUSY) &&
- (sglq->state != SGL_XRI_ABORTED)) {
+ (!(unlikely(pci_channel_offline(phba->pcidev)))) &&
+ sglq->state != SGL_XRI_ABORTED) {
spin_lock_irqsave(&phba->sli4_hba.sgl_list_lock,
iflag);
@@ -4583,10 +4584,12 @@ lpfc_sli_flush_io_rings(struct lpfc_hba *phba)
lpfc_sli_cancel_iocbs(phba, &txq,
IOSTAT_LOCAL_REJECT,
IOERR_SLI_DOWN);
- /* Flush the txcmpq */
+ /* Flush the txcmplq */
lpfc_sli_cancel_iocbs(phba, &txcmplq,
IOSTAT_LOCAL_REJECT,
IOERR_SLI_DOWN);
+ if (unlikely(pci_channel_offline(phba->pcidev)))
+ lpfc_sli4_io_xri_aborted(phba, NULL, 0);
}
} else {
pring = &psli->sli3_ring[LPFC_FCP_RING];
@@ -22019,8 +22022,26 @@ lpfc_get_io_buf_from_multixri_pools(struct lpfc_hba *phba,
qp = &phba->sli4_hba.hdwq[hwqid];
lpfc_ncmd = NULL;
+ if (!qp) {
+ lpfc_printf_log(phba, KERN_INFO,
+ LOG_SLI | LOG_NVME_ABTS | LOG_FCP,
+ "5556 NULL qp for hwqid x%x\n", hwqid);
+ return lpfc_ncmd;
+ }
multixri_pool = qp->p_multixri_pool;
+ if (!multixri_pool) {
+ lpfc_printf_log(phba, KERN_INFO,
+ LOG_SLI | LOG_NVME_ABTS | LOG_FCP,
+ "5557 NULL multixri for hwqid x%x\n", hwqid);
+ return lpfc_ncmd;
+ }
pvt_pool = &multixri_pool->pvt_pool;
+ if (!pvt_pool) {
+ lpfc_printf_log(phba, KERN_INFO,
+ LOG_SLI | LOG_NVME_ABTS | LOG_FCP,
+ "5558 NULL pvt_pool for hwqid x%x\n", hwqid);
+ return lpfc_ncmd;
+ }
multixri_pool->io_req_count++;
/* If pvt_pool is empty, move some XRIs from public to private pool */
@@ -22096,6 +22117,12 @@ struct lpfc_io_buf *lpfc_get_io_buf(struct lpfc_hba *phba,
qp = &phba->sli4_hba.hdwq[hwqid];
lpfc_cmd = NULL;
+ if (!qp) {
+ lpfc_printf_log(phba, KERN_WARNING,
+ LOG_SLI | LOG_NVME_ABTS | LOG_FCP,
+ "5555 NULL qp for hwqid x%x\n", hwqid);
+ return lpfc_cmd;
+ }
if (phba->cfg_xri_rebalancing)
lpfc_cmd = lpfc_get_io_buf_from_multixri_pools(
@@ -1116,6 +1116,8 @@ void lpfc_sli4_fcf_redisc_event_proc(struct lpfc_hba *);
int lpfc_sli4_resume_rpi(struct lpfc_nodelist *,
void (*)(struct lpfc_hba *, LPFC_MBOXQ_t *), void *);
void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *phba);
+void lpfc_sli4_nvme_pci_offline_aborted(struct lpfc_hba *phba,
+ struct lpfc_io_buf *lpfc_ncmd);
void lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba,
struct sli4_wcqe_xri_aborted *axri,
struct lpfc_io_buf *lpfc_ncmd);