Merge "scsi: ufs: fix race between hibern8 failure recovery and error handler"

This commit is contained in:
Linux Build Service Account 2016-08-26 14:48:28 -07:00 committed by Gerrit - the friendly Code Review server
commit cd6b388dab
3 changed files with 95 additions and 59 deletions

View file

@ -1412,8 +1412,10 @@ static ssize_t ufsdbg_reset_controller_write(struct file *filp,
struct ufs_hba *hba = filp->f_mapping->host->i_private; struct ufs_hba *hba = filp->f_mapping->host->i_private;
unsigned long flags; unsigned long flags;
spin_lock_irqsave(hba->host->host_lock, flags); pm_runtime_get_sync(hba->dev);
ufshcd_hold(hba, false);
spin_lock_irqsave(hba->host->host_lock, flags);
/* /*
* simulating a dummy error in order to "convince" * simulating a dummy error in order to "convince"
* eh_work to actually reset the controller * eh_work to actually reset the controller
@ -1421,9 +1423,13 @@ static ssize_t ufsdbg_reset_controller_write(struct file *filp,
hba->saved_err |= INT_FATAL_ERRORS; hba->saved_err |= INT_FATAL_ERRORS;
hba->silence_err_logs = true; hba->silence_err_logs = true;
schedule_work(&hba->eh_work); schedule_work(&hba->eh_work);
spin_unlock_irqrestore(hba->host->host_lock, flags); spin_unlock_irqrestore(hba->host->host_lock, flags);
flush_work(&hba->eh_work);
ufshcd_release(hba, false);
pm_runtime_put_sync(hba->dev);
return cnt; return cnt;
} }

View file

@ -2630,6 +2630,13 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
return SCSI_MLQUEUE_HOST_BUSY; return SCSI_MLQUEUE_HOST_BUSY;
spin_lock_irqsave(hba->host->host_lock, flags); spin_lock_irqsave(hba->host->host_lock, flags);
/* if error handling is in progress, return host busy */
if (ufshcd_eh_in_progress(hba)) {
err = SCSI_MLQUEUE_HOST_BUSY;
goto out_unlock;
}
switch (hba->ufshcd_state) { switch (hba->ufshcd_state) {
case UFSHCD_STATE_OPERATIONAL: case UFSHCD_STATE_OPERATIONAL:
break; break;
@ -2647,13 +2654,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
cmd->scsi_done(cmd); cmd->scsi_done(cmd);
goto out_unlock; goto out_unlock;
} }
/* if error handling is in progress, don't issue commands */
if (ufshcd_eh_in_progress(hba)) {
set_host_byte(cmd, DID_ERROR);
cmd->scsi_done(cmd);
goto out_unlock;
}
spin_unlock_irqrestore(hba->host->host_lock, flags); spin_unlock_irqrestore(hba->host->host_lock, flags);
hba->req_abort_count = 0; hba->req_abort_count = 0;
@ -4039,32 +4039,50 @@ out:
static int ufshcd_link_recovery(struct ufs_hba *hba) static int ufshcd_link_recovery(struct ufs_hba *hba)
{ {
int ret; int ret = 0;
unsigned long flags; unsigned long flags;
spin_lock_irqsave(hba->host->host_lock, flags); /*
hba->ufshcd_state = UFSHCD_STATE_RESET; * Check if there is any race with fatal error handling.
ufshcd_set_eh_in_progress(hba); * If so, wait for it to complete. Even though fatal error
* handling does reset and restore in some cases, don't assume
* anything out of it. We are just avoiding race here.
*/
do {
spin_lock_irqsave(hba->host->host_lock, flags);
if (!(work_pending(&hba->eh_work) ||
hba->ufshcd_state == UFSHCD_STATE_RESET))
break;
spin_unlock_irqrestore(hba->host->host_lock, flags);
dev_dbg(hba->dev, "%s: reset in progress\n", __func__);
flush_work(&hba->eh_work);
} while (1);
/*
* we don't know if previous reset had really reset the host controller
* or not. So let's force reset here to be sure.
*/
hba->ufshcd_state = UFSHCD_STATE_ERROR;
hba->force_host_reset = true;
schedule_work(&hba->eh_work);
/* wait for the reset work to finish */
do {
if (!(work_pending(&hba->eh_work) ||
hba->ufshcd_state == UFSHCD_STATE_RESET))
break;
spin_unlock_irqrestore(hba->host->host_lock, flags);
dev_dbg(hba->dev, "%s: reset in progress\n", __func__);
flush_work(&hba->eh_work);
spin_lock_irqsave(hba->host->host_lock, flags);
} while (1);
if (!((hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) &&
ufshcd_is_link_active(hba)))
ret = -ENOLINK;
spin_unlock_irqrestore(hba->host->host_lock, flags); spin_unlock_irqrestore(hba->host->host_lock, flags);
ret = ufshcd_vops_full_reset(hba);
if (ret)
dev_warn(hba->dev,
"full reset returned %d, trying to recover the link\n",
ret);
ret = ufshcd_host_reset_and_restore(hba);
spin_lock_irqsave(hba->host->host_lock, flags);
if (ret)
hba->ufshcd_state = UFSHCD_STATE_ERROR;
ufshcd_clear_eh_in_progress(hba);
spin_unlock_irqrestore(hba->host->host_lock, flags);
if (ret)
dev_err(hba->dev, "%s: link recovery failed, err %d",
__func__, ret);
return ret; return ret;
} }
@ -4087,8 +4105,7 @@ static int __ufshcd_uic_hibern8_enter(struct ufs_hba *hba)
* If link recovery fails then return error so that caller * If link recovery fails then return error so that caller
* don't retry the hibern8 enter again. * don't retry the hibern8 enter again.
*/ */
if (ufshcd_link_recovery(hba)) ret = ufshcd_link_recovery(hba);
ret = -ENOLINK;
} else { } else {
dev_dbg(hba->dev, "%s: Hibern8 Enter at %lld us", __func__, dev_dbg(hba->dev, "%s: Hibern8 Enter at %lld us", __func__,
ktime_to_us(ktime_get())); ktime_to_us(ktime_get()));
@ -5604,11 +5621,9 @@ static void ufshcd_err_handler(struct work_struct *work)
hba = container_of(work, struct ufs_hba, eh_work); hba = container_of(work, struct ufs_hba, eh_work);
ufsdbg_set_err_state(hba);
pm_runtime_get_sync(hba->dev);
ufshcd_hold_all(hba);
spin_lock_irqsave(hba->host->host_lock, flags); spin_lock_irqsave(hba->host->host_lock, flags);
ufsdbg_set_err_state(hba);
if (hba->ufshcd_state == UFSHCD_STATE_RESET) if (hba->ufshcd_state == UFSHCD_STATE_RESET)
goto out; goto out;
@ -5644,7 +5659,8 @@ static void ufshcd_err_handler(struct work_struct *work)
} }
} }
if ((hba->saved_err & INT_FATAL_ERRORS) || hba->saved_ce_err || if ((hba->saved_err & INT_FATAL_ERRORS)
|| hba->saved_ce_err || hba->force_host_reset ||
((hba->saved_err & UIC_ERROR) && ((hba->saved_err & UIC_ERROR) &&
(hba->saved_uic_err & (UFSHCD_UIC_DL_PA_INIT_ERROR | (hba->saved_uic_err & (UFSHCD_UIC_DL_PA_INIT_ERROR |
UFSHCD_UIC_DL_NAC_RECEIVED_ERROR | UFSHCD_UIC_DL_NAC_RECEIVED_ERROR |
@ -5732,6 +5748,7 @@ skip_pending_xfer_clear:
hba->saved_err = 0; hba->saved_err = 0;
hba->saved_uic_err = 0; hba->saved_uic_err = 0;
hba->saved_ce_err = 0; hba->saved_ce_err = 0;
hba->force_host_reset = false;
} }
skip_err_handling: skip_err_handling:
@ -5743,12 +5760,9 @@ skip_err_handling:
} }
hba->silence_err_logs = false; hba->silence_err_logs = false;
ufshcd_clear_eh_in_progress(hba);
out: out:
ufshcd_clear_eh_in_progress(hba);
spin_unlock_irqrestore(hba->host->host_lock, flags); spin_unlock_irqrestore(hba->host->host_lock, flags);
ufshcd_scsi_unblock_requests(hba);
ufshcd_release_all(hba);
pm_runtime_put_sync(hba->dev);
} }
static void ufshcd_update_uic_reg_hist(struct ufs_uic_err_reg_hist *reg_hist, static void ufshcd_update_uic_reg_hist(struct ufs_uic_err_reg_hist *reg_hist,
@ -5849,8 +5863,11 @@ static void ufshcd_check_errors(struct ufs_hba *hba)
/* handle fatal errors only when link is functional */ /* handle fatal errors only when link is functional */
if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) { if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
/* block commands from scsi mid-layer */ /*
__ufshcd_scsi_block_requests(hba); * Set error handling in progress flag early so that we
* don't issue new requests any more.
*/
ufshcd_set_eh_in_progress(hba);
hba->ufshcd_state = UFSHCD_STATE_ERROR; hba->ufshcd_state = UFSHCD_STATE_ERROR;
schedule_work(&hba->eh_work); schedule_work(&hba->eh_work);
@ -6354,6 +6371,11 @@ static int ufshcd_reset_and_restore(struct ufs_hba *hba)
int retries = MAX_HOST_RESET_RETRIES; int retries = MAX_HOST_RESET_RETRIES;
do { do {
err = ufshcd_vops_full_reset(hba);
if (err)
dev_warn(hba->dev, "%s: full reset returned %d\n",
__func__, err);
err = ufshcd_host_reset_and_restore(hba); err = ufshcd_host_reset_and_restore(hba);
} while (err && --retries); } while (err && --retries);
@ -6383,13 +6405,12 @@ static int ufshcd_reset_and_restore(struct ufs_hba *hba)
*/ */
static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd) static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd)
{ {
int err; int err = SUCCESS;
unsigned long flags; unsigned long flags;
struct ufs_hba *hba; struct ufs_hba *hba;
hba = shost_priv(cmd->device->host); hba = shost_priv(cmd->device->host);
ufshcd_hold_all(hba);
/* /*
* Check if there is any race with fatal error handling. * Check if there is any race with fatal error handling.
* If so, wait for it to complete. Even though fatal error * If so, wait for it to complete. Even though fatal error
@ -6402,29 +6423,37 @@ static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd)
hba->ufshcd_state == UFSHCD_STATE_RESET)) hba->ufshcd_state == UFSHCD_STATE_RESET))
break; break;
spin_unlock_irqrestore(hba->host->host_lock, flags); spin_unlock_irqrestore(hba->host->host_lock, flags);
dev_dbg(hba->dev, "%s: reset in progress\n", __func__); dev_err(hba->dev, "%s: reset in progress - 1\n", __func__);
flush_work(&hba->eh_work); flush_work(&hba->eh_work);
} while (1); } while (1);
hba->ufshcd_state = UFSHCD_STATE_RESET; /*
ufshcd_set_eh_in_progress(hba); * we don't know if previous reset had really reset the host controller
spin_unlock_irqrestore(hba->host->host_lock, flags); * or not. So let's force reset here to be sure.
*/
hba->ufshcd_state = UFSHCD_STATE_ERROR;
hba->force_host_reset = true;
schedule_work(&hba->eh_work);
ufshcd_update_error_stats(hba, UFS_ERR_EH); /* wait for the reset work to finish */
err = ufshcd_reset_and_restore(hba); do {
if (!(work_pending(&hba->eh_work) ||
hba->ufshcd_state == UFSHCD_STATE_RESET))
break;
spin_unlock_irqrestore(hba->host->host_lock, flags);
dev_err(hba->dev, "%s: reset in progress - 2\n", __func__);
flush_work(&hba->eh_work);
spin_lock_irqsave(hba->host->host_lock, flags);
} while (1);
spin_lock_irqsave(hba->host->host_lock, flags); if (!((hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) &&
if (!err) { ufshcd_is_link_active(hba))) {
err = SUCCESS;
hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
} else {
err = FAILED; err = FAILED;
hba->ufshcd_state = UFSHCD_STATE_ERROR; hba->ufshcd_state = UFSHCD_STATE_ERROR;
} }
ufshcd_clear_eh_in_progress(hba);
spin_unlock_irqrestore(hba->host->host_lock, flags); spin_unlock_irqrestore(hba->host->host_lock, flags);
ufshcd_release_all(hba);
return err; return err;
} }

View file

@ -815,6 +815,7 @@ struct ufs_hba {
u32 saved_uic_err; u32 saved_uic_err;
u32 saved_ce_err; u32 saved_ce_err;
bool silence_err_logs; bool silence_err_logs;
bool force_host_reset;
/* Device management request data */ /* Device management request data */
struct ufs_dev_cmd dev_cmd; struct ufs_dev_cmd dev_cmd;