@@ -896,6 +896,11 @@ enum lpfc_irq_chann_mode {
NHT_MODE,
};
+enum lpfc_hba_bit_flags {
+ FABRIC_COMANDS_BLOCKED,
+ HBA_PCI_ERR,
+};
+
struct lpfc_hba {
/* SCSI interface function jump table entries */
struct lpfc_io_buf * (*lpfc_get_scsi_buf)
@@ -1042,7 +1047,6 @@ struct lpfc_hba {
* Firmware supports Forced Link Speed
* capability
*/
-#define HBA_PCI_ERR 0x80000 /* The PCI slot is offline */
#define HBA_FLOGI_ISSUED 0x100000 /* FLOGI was issued */
#define HBA_SHORT_CMF 0x200000 /* shorter CMF timer routine */
#define HBA_CGN_DAY_WRAP 0x400000 /* HBA Congestion info day wraps */
@@ -1349,7 +1353,6 @@ struct lpfc_hba {
atomic_t fabric_iocb_count;
struct timer_list fabric_block_timer;
unsigned long bit_flags;
-#define FABRIC_COMANDS_BLOCKED 0
atomic_t num_rsrc_err;
atomic_t num_cmd_success;
unsigned long last_rsrc_error_time;
@@ -670,3 +670,6 @@ struct lpfc_vmid *lpfc_get_vmid_from_hashtable(struct lpfc_vport *vport,
uint32_t hash, uint8_t *buf);
void lpfc_vmid_vport_cleanup(struct lpfc_vport *vport);
int lpfc_issue_els_qfpa(struct lpfc_vport *vport);
+
+void lpfc_sli_rpi_release(struct lpfc_vport *vport,
+ struct lpfc_nodelist *ndlp);
@@ -109,8 +109,8 @@ lpfc_rport_invalid(struct fc_rport *rport)
ndlp = rdata->pnode;
if (!rdata->pnode) {
- pr_err("**** %s: NULL ndlp on rport x%px SID x%x\n",
- __func__, rport, rport->scsi_target_id);
+ pr_info("**** %s: NULL ndlp on rport x%px SID x%x\n",
+ __func__, rport, rport->scsi_target_id);
return -EINVAL;
}
@@ -169,9 +169,10 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
"3181 dev_loss_callbk x%06x, rport x%px flg x%x "
- "load_flag x%x refcnt %d\n",
+ "load_flag x%x refcnt %d state %d xpt x%x\n",
ndlp->nlp_DID, ndlp->rport, ndlp->nlp_flag,
- vport->load_flag, kref_read(&ndlp->kref));
+ vport->load_flag, kref_read(&ndlp->kref),
+ ndlp->nlp_state, ndlp->fc4_xpt_flags);
/* Don't schedule a worker thread event if the vport is going down.
* The teardown process cleans up the node via lpfc_drop_node.
@@ -181,6 +182,11 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
ndlp->rport = NULL;
ndlp->fc4_xpt_flags &= ~SCSI_XPT_REGD;
+ /* clear the NLP_XPT_REGD if the node is not registered
+ * with nvme-fc
+ */
+ if (ndlp->fc4_xpt_flags == NLP_XPT_REGD)
+ ndlp->fc4_xpt_flags &= ~NLP_XPT_REGD;
/* Remove the node reference from remote_port_add now.
* The driver will not call remote_port_delete.
@@ -225,18 +231,36 @@ lpfc_dev_loss_tmo_callbk(struct fc_rport *rport)
ndlp->rport = NULL;
spin_unlock_irqrestore(&ndlp->lock, iflags);
- /* We need to hold the node by incrementing the reference
- * count until this queued work is done
- */
- evtp->evt_arg1 = lpfc_nlp_get(ndlp);
+ if (phba->worker_thread) {
+ /* We need to hold the node by incrementing the reference
+ * count until this queued work is done
+ */
+ evtp->evt_arg1 = lpfc_nlp_get(ndlp);
+
+ spin_lock_irqsave(&phba->hbalock, iflags);
+ if (evtp->evt_arg1) {
+ evtp->evt = LPFC_EVT_DEV_LOSS;
+ list_add_tail(&evtp->evt_listp, &phba->work_list);
+ lpfc_worker_wake_up(phba);
+ }
+ spin_unlock_irqrestore(&phba->hbalock, iflags);
+ } else {
+ lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE,
+ "3188 worker thread is stopped %s x%06x, "
+ " rport x%px flg x%x load_flag x%x refcnt "
+ "%d\n", __func__, ndlp->nlp_DID,
+ ndlp->rport, ndlp->nlp_flag,
+ vport->load_flag, kref_read(&ndlp->kref));
+ if (!(ndlp->fc4_xpt_flags & NVME_XPT_REGD)) {
+ spin_lock_irqsave(&ndlp->lock, iflags);
+ /* Node is in dev loss. No further transaction. */
+ ndlp->nlp_flag &= ~NLP_IN_DEV_LOSS;
+ spin_unlock_irqrestore(&ndlp->lock, iflags);
+ lpfc_disc_state_machine(vport, ndlp, NULL,
+ NLP_EVT_DEVICE_RM);
+ }
- spin_lock_irqsave(&phba->hbalock, iflags);
- if (evtp->evt_arg1) {
- evtp->evt = LPFC_EVT_DEV_LOSS;
- list_add_tail(&evtp->evt_listp, &phba->work_list);
- lpfc_worker_wake_up(phba);
}
- spin_unlock_irqrestore(&phba->hbalock, iflags);
return;
}
@@ -503,11 +527,12 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
"0203 Devloss timeout on "
"WWPN %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x "
- "NPort x%06x Data: x%x x%x x%x\n",
+ "NPort x%06x Data: x%x x%x x%x refcnt %d\n",
*name, *(name+1), *(name+2), *(name+3),
*(name+4), *(name+5), *(name+6), *(name+7),
ndlp->nlp_DID, ndlp->nlp_flag,
- ndlp->nlp_state, ndlp->nlp_rpi);
+ ndlp->nlp_state, ndlp->nlp_rpi,
+ kref_read(&ndlp->kref));
} else {
lpfc_printf_vlog(vport, KERN_INFO, LOG_TRACE_EVENT,
"0204 Devloss timeout on "
@@ -755,18 +780,22 @@ lpfc_work_list_done(struct lpfc_hba *phba)
int free_evt;
int fcf_inuse;
uint32_t nlp_did;
+ bool hba_pci_err;
spin_lock_irq(&phba->hbalock);
while (!list_empty(&phba->work_list)) {
list_remove_head((&phba->work_list), evtp, typeof(*evtp),
evt_listp);
spin_unlock_irq(&phba->hbalock);
+ hba_pci_err = test_bit(HBA_PCI_ERR, &phba->bit_flags);
free_evt = 1;
switch (evtp->evt) {
case LPFC_EVT_ELS_RETRY:
ndlp = (struct lpfc_nodelist *) (evtp->evt_arg1);
- lpfc_els_retry_delay_handler(ndlp);
- free_evt = 0; /* evt is part of ndlp */
+ if (!hba_pci_err) {
+ lpfc_els_retry_delay_handler(ndlp);
+ free_evt = 0; /* evt is part of ndlp */
+ }
/* decrement the node reference count held
* for this queued work
*/
@@ -788,8 +817,10 @@ lpfc_work_list_done(struct lpfc_hba *phba)
break;
case LPFC_EVT_RECOVER_PORT:
ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1);
- lpfc_sli_abts_recover_port(ndlp->vport, ndlp);
- free_evt = 0;
+ if (!hba_pci_err) {
+ lpfc_sli_abts_recover_port(ndlp->vport, ndlp);
+ free_evt = 0;
+ }
/* decrement the node reference count held for
* this queued work
*/
@@ -859,14 +890,18 @@ lpfc_work_done(struct lpfc_hba *phba)
struct lpfc_vport **vports;
struct lpfc_vport *vport;
int i;
+ bool hba_pci_err;
+ hba_pci_err = test_bit(HBA_PCI_ERR, &phba->bit_flags);
spin_lock_irq(&phba->hbalock);
ha_copy = phba->work_ha;
phba->work_ha = 0;
spin_unlock_irq(&phba->hbalock);
+ if (hba_pci_err)
+ ha_copy = 0;
/* First, try to post the next mailbox command to SLI4 device */
- if (phba->pci_dev_grp == LPFC_PCI_DEV_OC)
+ if (phba->pci_dev_grp == LPFC_PCI_DEV_OC && !hba_pci_err)
lpfc_sli4_post_async_mbox(phba);
if (ha_copy & HA_ERATT) {
@@ -886,7 +921,7 @@ lpfc_work_done(struct lpfc_hba *phba)
lpfc_handle_latt(phba);
/* Handle VMID Events */
- if (lpfc_is_vmid_enabled(phba)) {
+ if (lpfc_is_vmid_enabled(phba) && !hba_pci_err) {
if (phba->pport->work_port_events &
WORKER_CHECK_VMID_ISSUE_QFPA) {
lpfc_check_vmid_qfpa_issue(phba);
@@ -936,6 +971,8 @@ lpfc_work_done(struct lpfc_hba *phba)
work_port_events = vport->work_port_events;
vport->work_port_events &= ~work_port_events;
spin_unlock_irq(&vport->work_port_lock);
+ if (hba_pci_err)
+ continue;
if (work_port_events & WORKER_DISC_TMO)
lpfc_disc_timeout_handler(vport);
if (work_port_events & WORKER_ELS_TMO)
@@ -1173,12 +1210,14 @@ lpfc_linkdown(struct lpfc_hba *phba)
struct lpfc_vport **vports;
LPFC_MBOXQ_t *mb;
int i;
+ int offline;
if (phba->link_state == LPFC_LINK_DOWN)
return 0;
/* Block all SCSI stack I/Os */
lpfc_scsi_dev_block(phba);
+ offline = pci_channel_offline(phba->pcidev);
phba->defer_flogi_acc_flag = false;
@@ -1219,7 +1258,7 @@ lpfc_linkdown(struct lpfc_hba *phba)
lpfc_destroy_vport_work_array(phba, vports);
/* Clean up any SLI3 firmware default rpi's */
- if (phba->sli_rev > LPFC_SLI_REV3)
+ if (phba->sli_rev > LPFC_SLI_REV3 || offline)
goto skip_unreg_did;
mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
@@ -4712,6 +4751,11 @@ lpfc_nlp_unreg_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
spin_lock_irqsave(&ndlp->lock, iflags);
if (!(ndlp->fc4_xpt_flags & NLP_XPT_REGD)) {
spin_unlock_irqrestore(&ndlp->lock, iflags);
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
+ "0999 %s Not regd: ndlp x%px rport x%px DID "
+ "x%x FLG x%x XPT x%x\n",
+ __func__, ndlp, ndlp->rport, ndlp->nlp_DID,
+ ndlp->nlp_flag, ndlp->fc4_xpt_flags);
return;
}
@@ -4722,6 +4766,13 @@ lpfc_nlp_unreg_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
ndlp->fc4_xpt_flags & SCSI_XPT_REGD) {
vport->phba->nport_event_cnt++;
lpfc_unregister_remote_port(ndlp);
+ } else if (!ndlp->rport) {
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
+ "1999 %s NDLP in devloss x%px DID x%x FLG x%x"
+ " XPT x%x refcnt %d\n",
+ __func__, ndlp, ndlp->nlp_DID, ndlp->nlp_flag,
+ ndlp->fc4_xpt_flags,
+ kref_read(&ndlp->kref));
}
if (ndlp->fc4_xpt_flags & NVME_XPT_REGD) {
@@ -6097,12 +6148,34 @@ lpfc_disc_flush_list(struct lpfc_vport *vport)
}
}
+/*
+ * lpfc_notify_xport_npr - notifies xport of node disappearance
+ * @vport: Pointer to Virtual Port object.
+ *
+ * Transitions all ndlps to NPR state. When lpfc_nlp_set_state
+ * calls lpfc_nlp_state_cleanup, the ndlp->rport is unregistered
+ * and transport notified that the node is gone.
+ * Return Code:
+ * none
+ */
+static void
+lpfc_notify_xport_npr(struct lpfc_vport *vport)
+{
+ struct lpfc_nodelist *ndlp, *next_ndlp;
+
+ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes,
+ nlp_listp) {
+ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+ }
+}
void
lpfc_cleanup_discovery_resources(struct lpfc_vport *vport)
{
lpfc_els_flush_rscn(vport);
lpfc_els_flush_cmd(vport);
lpfc_disc_flush_list(vport);
+ if (pci_channel_offline(vport->phba->pcidev))
+ lpfc_notify_xport_npr(vport);
}
/*****************************************************************************/
@@ -1642,7 +1642,7 @@ lpfc_sli4_offline_eratt(struct lpfc_hba *phba)
{
spin_lock_irq(&phba->hbalock);
if (phba->link_state == LPFC_HBA_ERROR &&
- phba->hba_flag & HBA_PCI_ERR) {
+ test_bit(HBA_PCI_ERR, &phba->bit_flags)) {
spin_unlock_irq(&phba->hbalock);
return;
}
@@ -3682,7 +3682,8 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
struct lpfc_vport **vports;
struct Scsi_Host *shost;
int i;
- int offline = 0;
+ int offline;
+ bool hba_pci_err;
if (vport->fc_flag & FC_OFFLINE_MODE)
return;
@@ -3692,6 +3693,7 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
lpfc_linkdown(phba);
offline = pci_channel_offline(phba->pcidev);
+ hba_pci_err = test_bit(HBA_PCI_ERR, &phba->bit_flags);
/* Issue an unreg_login to all nodes on all vports */
vports = lpfc_create_vport_work_array(phba);
@@ -3715,11 +3717,14 @@ lpfc_offline_prep(struct lpfc_hba *phba, int mbx_action)
ndlp->nlp_flag &= ~NLP_NPR_ADISC;
spin_unlock_irq(&ndlp->lock);
- if (offline) {
+ if (offline || hba_pci_err) {
spin_lock_irq(&ndlp->lock);
ndlp->nlp_flag &= ~(NLP_UNREG_INP |
NLP_RPI_REGISTERED);
spin_unlock_irq(&ndlp->lock);
+ if (phba->sli_rev == LPFC_SLI_REV4)
+ lpfc_sli_rpi_release(vports[i],
+ ndlp);
} else {
lpfc_unreg_rpi(vports[i], ndlp);
}
@@ -13374,15 +13379,12 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba)
/* Disable FW logging to host memory */
lpfc_ras_stop_fwlog(phba);
- /* Unset the queues shared with the hardware then release all
- * allocated resources.
- */
- lpfc_sli4_queue_unset(phba);
- lpfc_sli4_queue_destroy(phba);
-
/* Reset SLI4 HBA FCoE function */
lpfc_pci_function_reset(phba);
+ /* release all queue allocated resources. */
+ lpfc_sli4_queue_destroy(phba);
+
/* Free RAS DMA memory */
if (phba->ras_fwlog.ras_enabled)
lpfc_sli4_ras_dma_free(phba);
@@ -15057,24 +15059,28 @@ lpfc_sli4_prep_dev_for_recover(struct lpfc_hba *phba)
static void
lpfc_sli4_prep_dev_for_reset(struct lpfc_hba *phba)
{
- lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
- "2826 PCI channel disable preparing for reset\n");
+ int offline = pci_channel_offline(phba->pcidev);
+
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "2826 PCI channel disable preparing for reset offline"
+ " %d\n", offline);
/* Block any management I/Os to the device */
lpfc_block_mgmt_io(phba, LPFC_MBX_NO_WAIT);
- /* Block all SCSI devices' I/Os on the host */
- lpfc_scsi_dev_block(phba);
+ /* HBA_PCI_ERR was set in io_error_detect */
+ lpfc_offline_prep(phba, LPFC_MBX_NO_WAIT);
/* Flush all driver's outstanding I/Os as we are to reset */
lpfc_sli_flush_io_rings(phba);
+ lpfc_offline(phba);
/* stop all timers */
lpfc_stop_hba_timers(phba);
+ lpfc_sli4_queue_destroy(phba);
/* Disable interrupt and pci device */
lpfc_sli4_disable_intr(phba);
- lpfc_sli4_queue_destroy(phba);
pci_disable_device(phba->pcidev);
}
@@ -15123,6 +15129,7 @@ lpfc_io_error_detected_s4(struct pci_dev *pdev, pci_channel_state_t state)
{
struct Scsi_Host *shost = pci_get_drvdata(pdev);
struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
+ bool hba_pci_err;
switch (state) {
case pci_channel_io_normal:
@@ -15130,17 +15137,24 @@ lpfc_io_error_detected_s4(struct pci_dev *pdev, pci_channel_state_t state)
lpfc_sli4_prep_dev_for_recover(phba);
return PCI_ERS_RESULT_CAN_RECOVER;
case pci_channel_io_frozen:
- phba->hba_flag |= HBA_PCI_ERR;
+ hba_pci_err = test_and_set_bit(HBA_PCI_ERR, &phba->bit_flags);
/* Fatal error, prepare for slot reset */
- lpfc_sli4_prep_dev_for_reset(phba);
+ if (!hba_pci_err)
+ lpfc_sli4_prep_dev_for_reset(phba);
+ else
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "2832 Already handling PCI error "
+ "state: x%x\n", state);
return PCI_ERS_RESULT_NEED_RESET;
case pci_channel_io_perm_failure:
- phba->hba_flag |= HBA_PCI_ERR;
+ set_bit(HBA_PCI_ERR, &phba->bit_flags);
/* Permanent failure, prepare for device down */
lpfc_sli4_prep_dev_for_perm_failure(phba);
return PCI_ERS_RESULT_DISCONNECT;
default:
- phba->hba_flag |= HBA_PCI_ERR;
+ hba_pci_err = test_and_set_bit(HBA_PCI_ERR, &phba->bit_flags);
+ if (!hba_pci_err)
+ lpfc_sli4_prep_dev_for_reset(phba);
/* Unknown state, prepare and request slot reset */
lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
"2825 Unknown PCI error state: x%x\n", state);
@@ -15174,17 +15188,21 @@ lpfc_io_slot_reset_s4(struct pci_dev *pdev)
struct lpfc_hba *phba = ((struct lpfc_vport *)shost->hostdata)->phba;
struct lpfc_sli *psli = &phba->sli;
uint32_t intr_mode;
+ bool hba_pci_err;
dev_printk(KERN_INFO, &pdev->dev, "recovering from a slot reset.\n");
if (pci_enable_device_mem(pdev)) {
printk(KERN_ERR "lpfc: Cannot re-enable "
- "PCI device after reset.\n");
+ "PCI device after reset.\n");
return PCI_ERS_RESULT_DISCONNECT;
}
pci_restore_state(pdev);
- phba->hba_flag &= ~HBA_PCI_ERR;
+ hba_pci_err = test_and_clear_bit(HBA_PCI_ERR, &phba->bit_flags);
+ if (!hba_pci_err)
+ dev_info(&pdev->dev,
+ "hba_pci_err was not set, recovering slot reset.\n");
/*
* As the new kernel behavior of pci_restore_state() API call clears
* device saved_state flag, need to save the restored state again.
@@ -15239,8 +15257,6 @@ lpfc_io_resume_s4(struct pci_dev *pdev)
*/
if (!(phba->sli.sli_flag & LPFC_SLI_ACTIVE)) {
/* Perform device reset */
- lpfc_offline_prep(phba, LPFC_MBX_WAIT);
- lpfc_offline(phba);
lpfc_sli_brdrestart(phba);
/* Bring the device back online */
lpfc_online(phba);
@@ -2169,8 +2169,7 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport,
abts_nvme = 0;
for (i = 0; i < phba->cfg_hdw_queue; i++) {
qp = &phba->sli4_hba.hdwq[i];
- if (!vport || !vport->localport ||
- !qp || !qp->io_wq)
+ if (!vport->localport || !qp || !qp->io_wq)
return;
pring = qp->io_wq->pring;
@@ -2180,8 +2179,9 @@ lpfc_nvme_lport_unreg_wait(struct lpfc_vport *vport,
abts_scsi += qp->abts_scsi_io_bufs;
abts_nvme += qp->abts_nvme_io_bufs;
}
- if (!vport || !vport->localport ||
- vport->phba->hba_flag & HBA_PCI_ERR)
+ if (!vport->localport ||
+ test_bit(HBA_PCI_ERR, &vport->phba->bit_flags) ||
+ vport->load_flag & FC_UNLOADING)
return;
lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
@@ -2541,8 +2541,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
* return values is ignored. The upcall is a courtesy to the
* transport.
*/
- if (vport->load_flag & FC_UNLOADING ||
- unlikely(vport->phba->hba_flag & HBA_PCI_ERR))
+ if (vport->load_flag & FC_UNLOADING)
(void)nvme_fc_set_remoteport_devloss(remoteport, 0);
ret = nvme_fc_unregister_remoteport(remoteport);
@@ -2828,6 +2828,12 @@ __lpfc_sli_rpi_release(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
ndlp->nlp_flag &= ~NLP_UNREG_INP;
}
+void
+lpfc_sli_rpi_release(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+{
+ __lpfc_sli_rpi_release(vport, ndlp);
+}
+
/**
* lpfc_sli_def_mbox_cmpl - Default mailbox completion handler
* @phba: Pointer to HBA context object.
@@ -4624,11 +4630,6 @@ lpfc_sli_flush_io_rings(struct lpfc_hba *phba)
struct lpfc_iocbq *piocb, *next_iocb;
spin_lock_irq(&phba->hbalock);
- if (phba->hba_flag & HBA_IOQ_FLUSH ||
- !phba->sli4_hba.hdwq) {
- spin_unlock_irq(&phba->hbalock);
- return;
- }
/* Indicate the I/O queues are flushed */
phba->hba_flag |= HBA_IOQ_FLUSH;
spin_unlock_irq(&phba->hbalock);
@@ -10997,6 +10998,10 @@ lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
unsigned long iflags;
int rc;
+ /* If the PCI channel is in offline state, do not post iocbs. */
+ if (unlikely(pci_channel_offline(phba->pcidev)))
+ return IOCB_ERROR;
+
if (phba->sli_rev == LPFC_SLI_REV4) {
lpfc_sli_prep_wqe(phba, piocb);