mhi: core: Fix PM state machine sync issue

Certain corner cases have been identified in the MHI core power management
state machine whereby the driver state can go out of sync with the
device.

The cases are as follows: During in MHI resume operation the device
is not guaranteed to be fully recovered from LPM as the framework
requires. This causes RPM suspend to potentially be invoked
on a device which is still going through a resume state.

During RPM suspend action, if said action is cancelled a timing
window exists where previously cached commands are not played back
to the device. This causes a rotting command issue, when said commands
are never acknowledged causing a non recoverable timeout error on the
client channels.

During an RPM suspend operation, a timing window exists where
the device may be transitioning to M2 when the M3 is set. This causes
a device error which is non recoverable except by a full reset.

CRs-Fixed: 856202
Change-Id: I232365ba77b02b9aec87fef4cecc3d991243afe2
Signed-off-by: Andrei Danaila <adanaila@codeaurora.org>
This commit is contained in:
Andrei Danaila 2015-06-19 14:07:15 -07:00 committed by David Keitel
parent 4acd8f4e87
commit 25539ada43
2 changed files with 16 additions and 0 deletions

View file

@ -173,6 +173,7 @@ static enum MHI_STATUS mhi_process_event_ring(
case MHI_PKT_TYPE_SYS_ERR_EVENT:
mhi_log(MHI_MSG_INFO,
"MHI System Error Detected. Triggering Reset\n");
BUG();
if (!mhi_trigger_reset(mhi_dev_ctxt))
mhi_log(MHI_MSG_ERROR,
"Failed to reset for SYSERR recovery\n");

View file

@ -231,11 +231,14 @@ static enum MHI_STATUS process_m1_transition(
if (r) {
mhi_log(MHI_MSG_ERROR,
"Failed to remove counter ret %d\n", r);
BUG_ON(mhi_dev_ctxt->dev_info->
plat_dev->dev.power.runtime_error);
}
}
atomic_set(&mhi_dev_ctxt->flags.m2_transition, 0);
mhi_log(MHI_MSG_INFO, "M2 transition complete.\n");
write_unlock_irqrestore(&mhi_dev_ctxt->xfer_lock, flags);
BUG_ON(atomic_read(&mhi_dev_ctxt->outbound_acks) < 0);
return MHI_STATUS_SUCCESS;
}
@ -849,6 +852,16 @@ int mhi_initiate_m0(struct mhi_device_ctxt *mhi_dev_ctxt)
mhi_set_m_state(mhi_dev_ctxt, MHI_STATE_M0);
}
write_unlock_irqrestore(&mhi_dev_ctxt->xfer_lock, flags);
r = wait_event_interruptible_timeout(
*mhi_dev_ctxt->mhi_ev_wq.m0_event,
mhi_dev_ctxt->mhi_state == MHI_STATE_M0 ||
mhi_dev_ctxt->mhi_state == MHI_STATE_M1,
msecs_to_jiffies(MHI_MAX_RESUME_TIMEOUT));
WARN_ON(!r || -ERESTARTSYS == r);
if (!r || -ERESTARTSYS == r)
mhi_log(MHI_MSG_ERROR,
"Failed to get M0 event ret %d\n", r);
r = 0;
}
exit:
mutex_unlock(&mhi_dev_ctxt->pm_lock);
@ -879,6 +892,7 @@ int mhi_initiate_m3(struct mhi_device_ctxt *mhi_dev_ctxt)
"Failed to set bus freq ret %d\n", r);
goto exit;
break;
case MHI_STATE_M0:
case MHI_STATE_M1:
case MHI_STATE_M2:
mhi_log(MHI_MSG_INFO,
@ -977,6 +991,7 @@ exit:
atomic_inc(&mhi_dev_ctxt->flags.data_pending);
write_unlock_irqrestore(&mhi_dev_ctxt->xfer_lock, flags);
ring_all_chan_dbs(mhi_dev_ctxt);
ring_all_cmd_dbs(mhi_dev_ctxt);
atomic_dec(&mhi_dev_ctxt->flags.data_pending);
mhi_deassert_device_wake(mhi_dev_ctxt);
}