*
* When the host needs to go down, all users must be quiesced and their
* memory freed. This is accomplished by putting the contexts in error
- * state which will notify the user and let them 'drive' the tear-down.
+ * state which will notify the user and let them 'drive' the tear down.
* Meanwhile, this routine camps until all user contexts have been removed.
*/
void cxlflash_stop_term_user_contexts(struct cxlflash_cfg *cfg)
dev_dbg(dev, "%s: Wait for user contexts to quiesce...\n",
__func__);
- wake_up_all(&cfg->limbo_waitq);
+ wake_up_all(&cfg->reset_waitq);
ssleep(1);
}
}
if (likely(ctxid < MAX_CONTEXT)) {
while (true) {
- rc = mutex_lock_interruptible(&cfg->ctx_tbl_list_mutex);
- if (rc)
- goto out;
-
+ mutex_lock(&cfg->ctx_tbl_list_mutex);
ctxi = cfg->ctx_tbl[ctxid];
if (ctxi)
if ((file && (ctxi->file != file)) ||
{
struct device *dev = &cfg->dev->dev;
struct afu *afu = cfg->afu;
- struct sisl_ctrl_map *ctrl_map = ctxi->ctrl_map;
+ struct sisl_ctrl_map __iomem *ctrl_map = ctxi->ctrl_map;
int rc = 0;
u64 val;
* @sdev: SCSI device associated with LUN.
* @lli: LUN destined for capacity request.
*
+ * The READ_CAP16 can take quite a while to complete. Should an EEH occur while
+ * in scsi_execute(), the EEH handler will attempt to recover. As part of the
+ * recovery, the handler drains all currently running ioctls, waiting until they
+ * have completed before proceeding with a reset. As this routine is used on the
+ * ioctl path, this can create a condition where the EEH handler becomes stuck,
+ * infinitely waiting for this ioctl thread. To avoid this behavior, temporarily
+ * unmark this thread as an ioctl thread by releasing the ioctl read semaphore.
+ * This will allow the EEH handler to proceed with a recovery while this thread
+ * is still running. Once the scsi_execute() returns, reacquire the ioctl read
+ * semaphore and check the adapter state in case it changed while inside of
+ * scsi_execute(). The state check will wait if the adapter is still being
+ * recovered or return a failure if the recovery failed. In the event that the
+ * adapter reset failed, simply return the failure as the ioctl would be unable
+ * to continue.
+ *
+ * Note that the above puts a requirement on this routine to only be called on
+ * an ioctl thread.
+ *
* Return: 0 on success, -errno on failure
*/
static int read_cap16(struct scsi_device *sdev, struct llun_info *lli)
dev_dbg(dev, "%s: %ssending cmd(0x%x)\n", __func__,
retry_cnt ? "re" : "", scsi_cmd[0]);
+ /* Drop the ioctl read semahpore across lengthy call */
+ up_read(&cfg->ioctl_rwsem);
result = scsi_execute(sdev, scsi_cmd, DMA_FROM_DEVICE, cmd_buf,
CMD_BUFSIZE, sense_buf, to, CMD_RETRIES, 0, NULL);
+ down_read(&cfg->ioctl_rwsem);
+ rc = check_state(cfg);
+ if (rc) {
+ dev_err(dev, "%s: Failed state! result=0x08%X\n",
+ __func__, result);
+ rc = -ENODEV;
+ goto out;
+ }
if (driver_byte(result) == DRIVER_SENSE) {
result &= ~(0xFF<<24); /* DRIVER_SENSE is not an error */
* as the buffer is allocated on an aligned boundary.
*/
mutex_lock(&gli->mutex);
- gli->max_lba = be64_to_cpu(*((u64 *)&cmd_buf[0]));
- gli->blk_len = be32_to_cpu(*((u32 *)&cmd_buf[8]));
+ gli->max_lba = be64_to_cpu(*((__be64 *)&cmd_buf[0]));
+ gli->blk_len = be32_to_cpu(*((__be32 *)&cmd_buf[8]));
mutex_unlock(&gli->mutex);
out:
kfree(ctxi->rht_needs_ws);
kfree(ctxi->rht_lun);
kfree(ctxi);
- atomic_dec_if_positive(&cfg->num_user_contexts);
}
/**
INIT_LIST_HEAD(&ctxi->luns);
INIT_LIST_HEAD(&ctxi->list); /* initialize for list_empty() */
- atomic_inc(&cfg->num_user_contexts);
mutex_lock(&ctxi->mutex);
out:
return ctxi;
return rc;
}
-/*
- * Local fops for adapter file descriptor
- */
-static const struct file_operations cxlflash_cxl_fops = {
+const struct file_operations cxlflash_cxl_fops = {
.owner = THIS_MODULE,
.mmap = cxlflash_cxl_mmap,
.release = cxlflash_cxl_release,
*
* Return: 0 on success, -errno on failure
*/
-static int check_state(struct cxlflash_cfg *cfg)
+int check_state(struct cxlflash_cfg *cfg)
{
struct device *dev = &cfg->dev->dev;
int rc = 0;
retry:
switch (cfg->state) {
- case STATE_LIMBO:
- dev_dbg(dev, "%s: Limbo state, going to wait...\n", __func__);
+ case STATE_RESET:
+ dev_dbg(dev, "%s: Reset state, going to wait...\n", __func__);
up_read(&cfg->ioctl_rwsem);
- rc = wait_event_interruptible(cfg->limbo_waitq,
- cfg->state != STATE_LIMBO);
+ rc = wait_event_interruptible(cfg->reset_waitq,
+ cfg->state != STATE_RESET);
down_read(&cfg->ioctl_rwsem);
if (unlikely(rc))
break;
int fd = -1;
- /* On first attach set fileops */
- if (atomic_read(&cfg->num_user_contexts) == 0)
- cfg->cxl_fops = cxlflash_cxl_fops;
-
if (attach->num_interrupts > 4) {
dev_dbg(dev, "%s: Cannot support this many interrupts %llu\n",
__func__, attach->num_interrupts);
* quite possible for this routine to act as the kernel's EEH detection
* source (MMIO read of mbox_r). Because of this, there is a window of
* time where an EEH might have been detected but not yet 'serviced'
- * (callback invoked, causing the device to enter limbo state). To avoid
+ * (callback invoked, causing the device to enter reset state). To avoid
* looping in this routine during that window, a 1 second sleep is in place
* between the time the MMIO failure is detected and the time a wait on the
- * limbo wait queue is attempted via check_state().
+ * reset wait queue is attempted via check_state().
*
* Return: 0 on success, -errno on failure
*/
* inquiry (i.e. the Unit attention is due to the WWN changing).
*/
if (verify->hint & DK_CXLFLASH_VERIFY_HINT_SENSE) {
+ /* Can't hold mutex across process_sense/read_cap16,
+ * since we could have an intervening EEH event.
+ */
+ ctxi->unavail = true;
+ mutex_unlock(&ctxi->mutex);
rc = process_sense(sdev, verify);
if (unlikely(rc)) {
dev_err(dev, "%s: Failed to validate sense data (%d)\n",
__func__, rc);
+ mutex_lock(&ctxi->mutex);
+ ctxi->unavail = false;
goto out;
}
+ mutex_lock(&ctxi->mutex);
+ ctxi->unavail = false;
}
switch (gli->mode) {