Message ID | 20210423233455.27243-1-jsmart2021@gmail.com |
---|---|
Headers | show |
Series | efct: Broadcom (Emulex) FC Target driver | expand |
On 4/23/21 6:34 PM, James Smart wrote: > +static ssize_t > +efct_lio_tpg_enable_show(struct config_item *item, char *page) > +{ > + struct se_portal_group *se_tpg = to_tpg(item); > + struct efct_lio_tpg *tpg = > + container_of(se_tpg, struct efct_lio_tpg, tpg); > + > + return snprintf(page, PAGE_SIZE, "%d\n", atomic_read(&tpg->enabled)); Did you use to use enabled differently or is there a bug? I couldn't figure out why it was an atomic. > +} > + > +static int > +efct_lio_npiv_check_prod_write_protect(struct se_portal_group *se_tpg) > +{ > + struct efct_lio_tpg *tpg = > + container_of(se_tpg, struct efct_lio_tpg, tpg); > + > + return tpg->tpg_attrib.prod_mode_write_protect; > +} > + > +static u32 efct_lio_tpg_get_inst_index(struct se_portal_group *se_tpg) > +{ > + return EFC_SUCCESS; I'm not sure if you meant to return 0 here. Most drivers have it hard coded as 1. iscsi uses it as a global target index. qla returns the tpgt value the user passed in. > +} > + > +static int efct_lio_check_stop_free(struct se_cmd *se_cmd) > +{ > + struct efct_scsi_tgt_io *ocp = > + container_of(se_cmd, struct efct_scsi_tgt_io, cmd); > + struct efct_io *io = container_of(ocp, struct efct_io, tgt_io); > + > + efct_set_lio_io_state(io, EFCT_LIO_STATE_TFO_CHK_STOP_FREE); > + return target_put_sess_cmd(se_cmd); > +} > + > +static int > +efct_lio_abort_tgt_cb(struct efct_io *io, > + enum efct_scsi_io_status scsi_status, > + u32 flags, void *arg) > +{ > + efct_lio_io_printf(io, "Abort done, status:%d\n", scsi_status); > + return EFC_SUCCESS; > +} > + > +static void > +efct_lio_aborted_task(struct se_cmd *se_cmd) > +{ > + struct efct_scsi_tgt_io *ocp = > + container_of(se_cmd, struct efct_scsi_tgt_io, cmd); > + struct efct_io *io = container_of(ocp, struct efct_io, tgt_io); > + > + efct_set_lio_io_state(io, EFCT_LIO_STATE_TFO_ABORTED_TASK); > + > + if (!(se_cmd->transport_state & CMD_T_ABORTED) || ocp->rsp_sent) Are you getting cmds here where CMD_T_ABORTED is not set? > + return; > + > + /* command has been aborted, cleanup here */ > + ocp->aborting = true; > + ocp->err = EFCT_SCSI_STATUS_ABORTED; > + /* terminate the exchange */ > + efct_scsi_tgt_abort_io(io, efct_lio_abort_tgt_cb, NULL); > +} > + > +static void efct_lio_release_cmd(struct se_cmd *se_cmd) > +{ > + struct efct_scsi_tgt_io *ocp = > + container_of(se_cmd, struct efct_scsi_tgt_io, cmd); > + struct efct_io *io = container_of(ocp, struct efct_io, tgt_io); > + struct efct *efct = io->efct; > + > + efct_set_lio_io_state(io, EFCT_LIO_STATE_TFO_RELEASE_CMD); > + efct_set_lio_io_state(io, EFCT_LIO_STATE_SCSI_CMPL_CMD); > + efct_scsi_io_complete(io); > + atomic_sub_return(1, &efct->tgt_efct.ios_in_use); > +} > + > +static void efct_lio_close_session(struct se_session *se_sess) > +{ > + struct efc_node *node = se_sess->fabric_sess_ptr; > + struct efct *efct = NULL; > + int rc; > + > + pr_debug("se_sess=%p node=%p", se_sess, node); > + > + if (!node) { > + pr_debug("node is NULL"); > + return; > + } > + > + efct = node->efc->base; > + rc = efct_xport_control(efct->xport, EFCT_XPORT_POST_NODE_EVENT, node, > + EFCT_XPORT_SHUTDOWN); Sorry for the lazy review comment. I'm just checking the lio entry points. Does this wait for efct_lio_remove_session to complete? > + if (rc != 0) { > + efc_log_debug(efct, > + "Failed to shutdown session %p node %p\n", > + se_sess, node); > + return; > + } > +} > + > +static u32 efct_lio_sess_get_index(struct se_session *se_sess) > +{ > + return EFC_SUCCESS; Could you hard code this to 0 for me? Every driver but iscsi has it hard coded to 0, and I'm going to kill it. I've sent patches mixed in other sets and just have to break it out. It just makes it easier, because we do not have to worry what EFC_SUCCESS is and if we actually meant to return 0 here as an actual index or were trying to indicate the function was successful. > +} > + > +static void efct_lio_set_default_node_attrs(struct se_node_acl *nacl) > +{ > +} > + > +static int efct_lio_get_cmd_state(struct se_cmd *se_cmd) > +{ > + return EFC_SUCCESS; Did you want to print the tgt_io state? > +} > + > +static int > +efct_lio_sg_map(struct efct_io *io) > +{ > + struct efct_scsi_tgt_io *ocp = &io->tgt_io; > + struct se_cmd *cmd = &ocp->cmd; > + > + ocp->seg_map_cnt = pci_map_sg(io->efct->pci, cmd->t_data_sg, > + cmd->t_data_nents, cmd->data_direction); > + if (ocp->seg_map_cnt == 0) > + return -EFAULT; > + return EFC_SUCCESS; Did you mean to mix in the EFC return codes with the -Exyz ones? Here it's fine, but I think in some places you might be returning EFC_FAIL which is 1, and then in lio core we might be checking for less than 0 for failure. > +} > + > +static void > +efct_lio_sg_unmap(struct efct_io *io) > +{ > + struct efct_scsi_tgt_io *ocp = &io->tgt_io; > + struct se_cmd *cmd = &ocp->cmd; > + > + if (WARN_ON(!ocp->seg_map_cnt || !cmd->t_data_sg)) > + return; > + > + pci_unmap_sg(io->efct->pci, cmd->t_data_sg, > + ocp->seg_map_cnt, cmd->data_direction); > + ocp->seg_map_cnt = 0; > +} > + > +static int > +efct_lio_status_done(struct efct_io *io, > + enum efct_scsi_io_status scsi_status, > + u32 flags, void *arg) > +{ > + struct efct_scsi_tgt_io *ocp = &io->tgt_io; > + > + efct_set_lio_io_state(io, EFCT_LIO_STATE_SCSI_RSP_DONE); > + if (scsi_status != EFCT_SCSI_STATUS_GOOD) { > + efct_lio_io_printf(io, "callback completed with error=%d\n", > + scsi_status); > + ocp->err = scsi_status; > + } > + if (ocp->seg_map_cnt) > + efct_lio_sg_unmap(io); > + > + efct_lio_io_printf(io, "status=%d, err=%d flags=0x%x, dir=%d\n", > + scsi_status, ocp->err, flags, ocp->ddir); > + > + efct_set_lio_io_state(io, EFCT_LIO_STATE_TGT_GENERIC_FREE); > + transport_generic_free_cmd(&io->tgt_io.cmd, 0); > + return EFC_SUCCESS; > +} > + > +static int > +efct_lio_datamove_done(struct efct_io *io, enum efct_scsi_io_status scsi_status, > + u32 flags, void *arg); > + > +static int > +efct_lio_write_pending(struct se_cmd *cmd) > +{ > + struct efct_scsi_tgt_io *ocp = > + container_of(cmd, struct efct_scsi_tgt_io, cmd); > + struct efct_io *io = container_of(ocp, struct efct_io, tgt_io); > + struct efct_scsi_sgl *sgl = io->sgl; > + struct scatterlist *sg; > + u32 flags = 0, cnt, curcnt; > + u64 length = 0; > + int rc = 0; > + > + efct_set_lio_io_state(io, EFCT_LIO_STATE_TFO_WRITE_PENDING); > + efct_lio_io_printf(io, "trans_state=0x%x se_cmd_flags=0x%x\n", > + cmd->transport_state, cmd->se_cmd_flags); > + > + if (ocp->seg_cnt == 0) { > + ocp->seg_cnt = cmd->t_data_nents; > + ocp->cur_seg = 0; > + if (efct_lio_sg_map(io)) { > + efct_lio_io_printf(io, "efct_lio_sg_map failed\n"); > + return -EFAULT; > + } > + } > + curcnt = (ocp->seg_map_cnt - ocp->cur_seg); > + curcnt = (curcnt < io->sgl_allocated) ? curcnt : io->sgl_allocated; > + /* find current sg */ > + for (cnt = 0, sg = cmd->t_data_sg; cnt < ocp->cur_seg; cnt++, > + sg = sg_next(sg)) > + ;/* do nothing */ > + > + for (cnt = 0; cnt < curcnt; cnt++, sg = sg_next(sg)) { > + sgl[cnt].addr = sg_dma_address(sg); > + sgl[cnt].dif_addr = 0; > + sgl[cnt].len = sg_dma_len(sg); > + length += sgl[cnt].len; > + ocp->cur_seg++; > + } > + if (ocp->cur_seg == ocp->seg_cnt) > + flags = EFCT_SCSI_LAST_DATAPHASE; > + rc = efct_scsi_recv_wr_data(io, flags, sgl, curcnt, length, > + efct_lio_datamove_done, NULL); > + return rc; You can just remove rc and do a return. Maybe you meant to have a debug function print out the value. > +} > + > +static int > +efct_lio_queue_data_in(struct se_cmd *cmd) > +{ > + struct efct_scsi_tgt_io *ocp = > + container_of(cmd, struct efct_scsi_tgt_io, cmd); > + struct efct_io *io = container_of(ocp, struct efct_io, tgt_io); > + struct efct_scsi_sgl *sgl = io->sgl; > + struct scatterlist *sg = NULL; > + uint flags = 0, cnt = 0, curcnt = 0; > + u64 length = 0; > + int rc = 0; > + > + efct_set_lio_io_state(io, EFCT_LIO_STATE_TFO_QUEUE_DATA_IN); > + > + if (ocp->seg_cnt == 0) { > + if (cmd->data_length) { > + ocp->seg_cnt = cmd->t_data_nents; > + ocp->cur_seg = 0; > + if (efct_lio_sg_map(io)) { > + efct_lio_io_printf(io, > + "efct_lio_sg_map failed\n"); > + return -EAGAIN; > + } > + } else { > + /* If command length is 0, send the response status */ > + struct efct_scsi_cmd_resp rsp; > + > + memset(&rsp, 0, sizeof(rsp)); > + efct_lio_io_printf(io, > + "cmd : %p length 0, send status\n", > + cmd); > + return efct_scsi_send_resp(io, 0, &rsp, > + efct_lio_status_done, NULL); Here I think we can mix and match -Exyz with EFC_FAIL. If we got EFC_FAIL in transport_complete_qf we would complete the cmd with success but I think you wanted to fail it or maybe even retry it in some cases. I didn't check the other IO paths, so you should check them out. > + > +static int efct_session_cb(struct se_portal_group *se_tpg, > + struct se_session *se_sess, void *private) > +{ > + struct efc_node *node = private; > + struct efct_node *tgt_node = NULL; > + struct efct *efct = node->efc->base; > + > + tgt_node = kzalloc(sizeof(*tgt_node), GFP_KERNEL); > + if (!tgt_node) > + return EFC_FAIL; The caller of the callback uses the ERR macros which assume -Exyz values so this won't work. > + > + /* initialize refcount */ You can drop the comment since the function name says the same thing. > + kref_init(&tgt_node->ref); > + tgt_node->release = _efct_tgt_node_free; > + > + tgt_node->session = se_sess; > + node->tgt_node = tgt_node; > + tgt_node->efct = efct; > + > + tgt_node->node = node; > + > + tgt_node->node_fc_id = node->rnode.fc_id; > + tgt_node->port_fc_id = node->nport->fc_id; > + tgt_node->vpi = node->nport->indicator; > + tgt_node->rpi = node->rnode.indicator; > + > + spin_lock_init(&tgt_node->active_ios_lock); > + INIT_LIST_HEAD(&tgt_node->active_ios); > + > + return EFC_SUCCESS; > +} > + > +int efct_scsi_tgt_new_device(struct efct *efct) > +{ > + int rc = 0; > + u32 total_ios; > + > + /* Get the max settings */ > + efct->tgt_efct.max_sge = sli_get_max_sge(&efct->hw.sli); > + efct->tgt_efct.max_sgl = sli_get_max_sgl(&efct->hw.sli); > + > + /* initialize IO watermark fields */ > + atomic_set(&efct->tgt_efct.ios_in_use, 0); > + total_ios = efct->hw.config.n_io; > + efc_log_debug(efct, "total_ios=%d\n", total_ios); > + efct->tgt_efct.watermark_min = > + (total_ios * EFCT_WATERMARK_LOW_PCT) / 100; > + efct->tgt_efct.watermark_max = > + (total_ios * EFCT_WATERMARK_HIGH_PCT) / 100; > + atomic_set(&efct->tgt_efct.io_high_watermark, > + efct->tgt_efct.watermark_max); > + atomic_set(&efct->tgt_efct.watermark_hit, 0); > + atomic_set(&efct->tgt_efct.initiator_count, 0); > + > + lio_wq = create_singlethread_workqueue("efct_lio_worker"); > + if (!lio_wq) { > + efc_log_err(efct, "workqueue create failed\n"); > + return -ENOMEM; > + } > + > + spin_lock_init(&efct->tgt_efct.efct_lio_lock); > + INIT_LIST_HEAD(&efct->tgt_efct.vport_list); > + > + return rc; You don't need rc. You also probably wanted to use the EFC_FAL/SUCCESS values here because I think the caller does. > +} > + > +int efct_scsi_tgt_del_device(struct efct *efct) > +{ > + flush_workqueue(lio_wq); > + > + return EFC_SUCCESS; > +} > + > +int > +efct_scsi_tgt_new_nport(struct efc *efc, struct efc_nport *nport) > +{ > + struct efct *efct = nport->efc->base; > + > + efc_log_debug(efct, "New SPORT: %s bound to %s\n", nport->display_name, > + efct->tgt_efct.lio_nport->wwpn_str); > + > + return EFC_SUCCESS; > +} > + > +void > +efct_scsi_tgt_del_nport(struct efc *efc, struct efc_nport *nport) > +{ > + efc_log_debug(efc, "Del SPORT: %s\n", nport->display_name); > +} > + > +static void efct_lio_setup_session(struct work_struct *work) > +{ > + struct efct_lio_wq_data *wq_data = > + container_of(work, struct efct_lio_wq_data, work); > + struct efct *efct = wq_data->efct; > + struct efc_node *node = wq_data->ptr; > + char wwpn[WWN_NAME_LEN]; > + struct efct_lio_tpg *tpg = NULL; > + struct efct_node *tgt_node; > + struct se_portal_group *se_tpg; > + struct se_session *se_sess; > + int watermark; > + int ini_count; > + u64 id; > + > + /* Check to see if it's belongs to vport, > + * if not get physical port > + */ > + tpg = efct_get_vport_tpg(node); > + if (tpg) { > + se_tpg = &tpg->tpg; > + } else if (efct->tgt_efct.tpg) { > + tpg = efct->tgt_efct.tpg; > + se_tpg = &tpg->tpg; > + } else { > + efc_log_err(efct, "failed to init session\n"); > + return; > + } > + > + /* > + * Format the FCP Initiator port_name into colon > + * separated values to match the format by our explicit > + * ConfigFS NodeACLs. > + */ > + efct_format_wwn(wwpn, sizeof(wwpn), "", efc_node_get_wwpn(node)); > + > + se_sess = target_setup_session(se_tpg, 0, 0, TARGET_PROT_NORMAL, wwpn, > + node, efct_session_cb); > + if (IS_ERR(se_sess)) { > + efc_log_err(efct, "failed to setup session\n"); > + kfree(wq_data); > + efc_scsi_sess_reg_complete(node, EFC_FAIL); > + return; > + } > + > + efc_log_debug(efct, "new initiator sess=%p node=%p id: %llx\n", > + se_sess, node, id); > + > + tgt_node = node->tgt_node; > + id = (u64) tgt_node->port_fc_id << 32 | tgt_node->node_fc_id; > + > + if (xa_err(xa_store(&efct->lookup, id, tgt_node, GFP_KERNEL))) > + efc_log_err(efct, "Node lookup store failed\n"); > + > + efc_scsi_sess_reg_complete(node, EFC_SUCCESS); > + > + /* update IO watermark: increment initiator count */ > + ini_count = atomic_add_return(1, &efct->tgt_efct.initiator_count); > + watermark = efct->tgt_efct.watermark_max - > + ini_count * EFCT_IO_WATERMARK_PER_INITIATOR; > + watermark = (efct->tgt_efct.watermark_min > watermark) ? > + efct->tgt_efct.watermark_min : watermark; > + atomic_set(&efct->tgt_efct.io_high_watermark, watermark); > + > + kfree(wq_data); > +} > + > +int efct_scsi_new_initiator(struct efc *efc, struct efc_node *node) > +{ > + struct efct *efct = node->efc->base; > + struct efct_lio_wq_data *wq_data; > + > + /* > + * Since LIO only supports initiator validation at thread level, > + * we are open minded and accept all callers. > + */ > + wq_data = kzalloc(sizeof(*wq_data), GFP_ATOMIC); > + if (!wq_dlibefc_function_templateata) > + return -ENOMEM; I think in other libefc_function_template callouts you return EFCT_SCSI_CALL_COMPLETE on failure. Probably should stuck to one type of return value. > + > + wq_data->ptr = node; > + wq_data->efct = efct; > + INIT_WORK(&wq_data->work, efct_lio_setup_session); > + queue_work(lio_wq, &wq_data->work); > + return EFCT_SCSI_CALL_ASYNC; > +} > + > +static void efct_lio_remove_session(struct work_struct *work) > +{ > + struct efct_lio_wq_data *wq_data = > + container_of(work, struct efct_lio_wq_data, work); > + struct efct *efct = wq_data->efct; > + struct efc_node *node = wq_data->ptr; > + struct efct_node *tgt_node = NULL; No need to set to NULL since we set it a couple lines later. This happens a lot in the driver, so do a quick scan. > + struct se_session *se_sess; > + > + tgt_node = node->tgt_node; > + se_sess = tgt_node->session; > + > + if (!se_sess) { > + /* base driver has sent back-to-back requests > + * to unreg session with no intervening > + * register > + */ I didn't get this part. Should this be if (!node->tgt_node) Also does there need to be some code that sets node->tgt_node to NULL after the removal? It looks like tgt_node->session will always be set. We set it in efct_session_cb when we allocate the tgt_node and I didn't see any place it's set to null. I think if we did call efct_lio_remove_session on a node back to back without a registered session on the second call, then we would end up with refcount warnings or errors from accessing a freed tgt_node. So you probably wanted a node->tgt_node check before queueing the work. > + efc_log_err(efct, "unreg session for NULL session\n"); > + efc_scsi_del_initiator_complete(node->efc, node); > + return; > + } > + > + efc_log_debug(efct, "unreg session se_sess=%p node=%p\n", > + se_sess, node); > + > + /* first flag all session commands to complete */ > + target_stop_session(se_sess); > + > + /* now wait for session commands to complete */ > + target_wait_for_sess_cmds(se_sess); > + target_remove_session(se_sess); > + > + kref_put(&tgt_node->ref, tgt_node->release); How the refcount and session removal work was not clear but I was lazy. I see: - target_remove_session is going to free the session. - We are using refcounts for the efct_node. - We check for if efct_node->session is NULL in some places which I don't think can happen. Is the above put going to be the last put? If so, are refcounts needed? I mean does the code work by stopping the internal driver threads/irqs from sending new IO to lio then we run this function? Or could you have something like: efct_dispatch_frame -> efct_dispatch_fcp_cmd -> efct_scsi_recv_cmd running when this happening? Was there some place like here where efct_node->session was set to NULL and that's what we try to detect in efct_scsi_recv_cmd? I might have misread the code because sometimes efct_node is node (like in efct_dispatch_frame) and sometimes its tgt_node like above. > + > + kfree(wq_data); > +} > + > +int efct_scsi_del_initiator(struct efc *efc, struct efc_node *node, int reason) > +{ > + struct efct *efct = node->efc->base; > + struct efct_node *tgt_node = node->tgt_node; > + struct efct_lio_wq_data *wq_data; > + int watermark; > + int ini_count; > + u64 id; > + > + if (reason == EFCT_SCSI_INITIATOR_MISSING) > + return EFCT_SCSI_CALL_COMPLETE; > + > + wq_data = kzalloc(sizeof(*wq_data), GFP_ATOMIC); > + if (!wq_data) > + return EFCT_SCSI_CALL_COMPLETE; > + > + id = (u64) tgt_node->port_fc_id << 32 | tgt_node->node_fc_id; > + xa_erase(&efct->lookup, id); > + > + wq_data->ptr = node; > + wq_data->efct = efct; > + INIT_WORK(&wq_data->work, efct_lio_remove_session); > + queue_work(lio_wq, &wq_data->work); I think you can stick the work inside the efct_node struct. > + > + /* > + * update IO watermark: decrement initiator count > + */ > + ini_count = atomic_sub_return(1, &efct->tgt_efct.initiator_count); > + > + watermark = efct->tgt_efct.watermark_max - > + ini_count * EFCT_IO_WATERMARK_PER_INITIATOR; > + watermark = (efct->tgt_efct.watermark_min > watermark) ? > + efct->tgt_efct.watermark_min : watermark; > + atomic_set(&efct->tgt_efct.io_high_watermark, watermark); > + > + return EFCT_SCSI_CALL_ASYNC; > +} > + > +void efct_scsi_recv_cmd(struct efct_io *io, uint64_t lun, u8 *cdb, > + u32 cdb_len, u32 flags) > +{ > + struct efct_scsi_tgt_io *ocp = &io->tgt_io; > + struct se_cmd *se_cmd = &io->tgt_io.cmd; > + struct efct *efct = io->efct; > + char *ddir; > + struct efct_node *tgt_node = NULL; > + struct se_session *se_sess; > + int rc = 0; > + > + memset(ocp, 0, sizeof(struct efct_scsi_tgt_io)); > + efct_set_lio_io_state(io, EFCT_LIO_STATE_SCSI_RECV_CMD); > + atomic_add_return(1, &efct->tgt_efct.ios_in_use); > + > + /* set target timeout */ > + io->timeout = efct->target_io_timer_sec; > + > + if (flags & EFCT_SCSI_CMD_SIMPLE) > + ocp->task_attr = TCM_SIMPLE_TAG; > + else if (flags & EFCT_SCSI_CMD_HEAD_OF_QUEUE) > + ocp->task_attr = TCM_HEAD_TAG; > + else if (flags & EFCT_SCSI_CMD_ORDERED) > + ocp->task_attr = TCM_ORDERED_TAG; > + else if (flags & EFCT_SCSI_CMD_ACA) > + ocp->task_attr = TCM_ACA_TAG; > + > + switch (flags & (EFCT_SCSI_CMD_DIR_IN | EFCT_SCSI_CMD_DIR_OUT)) { > + case EFCT_SCSI_CMD_DIR_IN: > + ddir = "FROM_INITIATOR"; > + ocp->ddir = DMA_TO_DEVICE; > + break; > + case EFCT_SCSI_CMD_DIR_OUT: > + ddir = "TO_INITIATOR"; > + ocp->ddir = DMA_FROM_DEVICE; > + break; > + case EFCT_SCSI_CMD_DIR_IN | EFCT_SCSI_CMD_DIR_OUT: > + ddir = "BIDIR"; > + ocp->ddir = DMA_BIDIRECTIONAL; > + break; > + default: > + ddir = "NONE"; > + ocp->ddir = DMA_NONE; > + break; > + } > + > + ocp->lun = lun; > + efct_lio_io_printf(io, "new cmd=0x%x ddir=%s dl=%u\n", > + cdb[0], ddir, io->exp_xfer_len); > + > + tgt_node = io->node; > + se_sess = tgt_node->session; > + if (!se_sess) { Can this happen? I think when tgt_node is allocated in efct_session_cb and at that time we set the session pointer, so these 2 structs are bound for life. > + efc_log_err(efct, "No session found to submit IO se_cmd: %p\n", > + &ocp->cmd); > + efct_scsi_io_free(io); > + return; > + } > + > + efct_set_lio_io_state(io, EFCT_LIO_STATE_TGT_SUBMIT_CMD); > + rc = target_init_cmd(se_cmd, se_sess, &io->tgt_io.sense_buffer[0], > + ocp->lun, io->exp_xfer_len, ocp->task_attr, > + ocp->ddir, TARGET_SCF_ACK_KREF); > + if (rc) { > + efc_log_err(efct, "failed to init cmd se_cmd: %p\n", se_cmd); > + efct_scsi_io_free(io); > + return; > + } > + > + if (target_submit_prep(se_cmd, cdb, NULL, 0, NULL, 0, > + NULL, 0, GFP_ATOMIC)) > + return; > + > + target_submit(se_cmd); > +} > + > +int > +efct_scsi_recv_tmf(struct efct_io *tmfio, u32 lun, enum efct_scsi_tmf_cmd cmd, > + struct efct_io *io_to_abort, u32 flags) > +{ > + unsigned char tmr_func; > + struct efct *efct = tmfio->efct; > + struct efct_scsi_tgt_io *ocp = &tmfio->tgt_io; > + struct efct_node *tgt_node = NULL; > + struct se_session *se_sess; > + int rc; > + > + memset(ocp, 0, sizeof(struct efct_scsi_tgt_io)); > + efct_set_lio_io_state(tmfio, EFCT_LIO_STATE_SCSI_RECV_TMF); > + atomic_add_return(1, &efct->tgt_efct.ios_in_use); > + efct_lio_tmfio_printf(tmfio, "%s: new tmf %x lun=%u\n", > + tmfio->display_name, cmd, lun); > + > + switch (cmd) { > + case EFCT_SCSI_TMF_ABORT_TASK: > + tmr_func = TMR_ABORT_TASK; > + break; > + case EFCT_SCSI_TMF_ABORT_TASK_SET: > + tmr_func = TMR_ABORT_TASK_SET; > + break; > + case EFCT_SCSI_TMF_CLEAR_TASK_SET: > + tmr_func = TMR_CLEAR_TASK_SET; > + break; > + case EFCT_SCSI_TMF_LOGICAL_UNIT_RESET: > + tmr_func = TMR_LUN_RESET; > + break; > + case EFCT_SCSI_TMF_CLEAR_ACA: > + tmr_func = TMR_CLEAR_ACA; > + break; > + case EFCT_SCSI_TMF_TARGET_RESET: > + tmr_func = TMR_TARGET_WARM_RESET; > + break; > + case EFCT_SCSI_TMF_QUERY_ASYNCHRONOUS_EVENT: > + case EFCT_SCSI_TMF_QUERY_TASK_SET: > + default: > + goto tmf_fail; > + } > + > + tmfio->tgt_io.tmf = tmr_func; > + tmfio->tgt_io.lun = lun; > + tmfio->tgt_io.io_to_abort = io_to_abort; > + > + tgt_node = tmfio->node; > + > + se_sess = tgt_node->session; > + if (!se_sess) > + return EFC_SUCCESS; > + > + rc = target_submit_tmr(&ocp->cmd, se_sess, NULL, lun, ocp, tmr_func, > + GFP_ATOMIC, tmfio->init_task_tag, TARGET_SCF_ACK_KREF); > + > + efct_set_lio_io_state(tmfio, EFCT_LIO_STATE_TGT_SUBMIT_TMR); Is it possible that the ocp->cmd could complete from another CPU and be freed by the time this returns? It seems unlikely but did you want a get() on the cmd just in case?
On 4/24/21 3:34 PM, Mike Christie wrote: > Or could you have something like: > > efct_dispatch_frame -> efct_dispatch_fcp_cmd -> efct_scsi_recv_cmd > > running when this happening? Was there some place like here where > efct_node->session was set to NULL and that's what we try to detect > in efct_scsi_recv_cmd? Oh yeah, I think if you can still have IO running from inside the driver you want to move the target_remove_session call to _efct_tgt_node_free. We will then handle the case where a cmd has passed efct_node_find and got a ref to the efct_node but not yet reached target_init_cmd. When the cmd hits target_init_cmd lio will return a failure, efct_dispatch_frame will drop its ref, and then we will free the efct_node and lio session.