From ddfc1dd716fd1cdec78776b5b0eb0ff3e95bea2d Mon Sep 17 00:00:00 2001 From: Sharat Masetty Date: Fri, 28 Jul 2017 16:00:02 +0530 Subject: [PATCH] drm/msm: Fix race condition in the submit path There is a race condition issue between the IRQ context trying to trigger preemption and the user context trying to submit commands to the GPU. The check in a5xx_flush() API only updates the wptr if the GPU is not in preemption. In the cases where we move from PREEMPT_START to PREEMPT_NONE there is a small window where the preempt state is still in START but the CPU context switches to the user thread which is in the a5xx_flush() call to update the wptr, but fails to update the wptr to the GPU since the preempt state is not PREEMPT_NONE. This leads to a GPU stall. Introduce a new intermediate state PREEMPT_ABORT and change preempt_trigger() to use gpu's current ring instead of the ring retrieved from get_next_ring() while in this state. Change-Id: I333e9de19824bd373901bbc8afc829de04635017 CRs-Fixed: 2081164 Signed-off-by: Sharat Masetty --- drivers/gpu/drm/msm/adreno/a5xx_gpu.h | 8 +++++++- drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 15 +++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h index e637237fa811..c30b65785ab6 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h @@ -70,6 +70,8 @@ struct a5xx_gpu { * PREEMPT_NONE - no preemption in progress. Next state START. * PREEMPT_START - The trigger is evaulating if preemption is possible. Next * states: TRIGGERED, NONE + * PREEMPT_ABORT - An intermediate state before moving back to NONE. Next + * state: NONE. * PREEMPT_TRIGGERED: A preemption has been executed on the hardware. Next * states: FAULTED, PENDING * PREEMPT_FAULTED: A preemption timed out (never completed). This will trigger @@ -81,6 +83,7 @@ struct a5xx_gpu { enum preempt_state { PREEMPT_NONE = 0, PREEMPT_START, + PREEMPT_ABORT, PREEMPT_TRIGGERED, PREEMPT_FAULTED, PREEMPT_PENDING, @@ -184,7 +187,10 @@ int a5xx_snapshot(struct msm_gpu *gpu, struct msm_snapshot *snapshot); /* Return true if we are in a preempt state */ static inline bool a5xx_in_preempt(struct a5xx_gpu *a5xx_gpu) { - return !(atomic_read(&a5xx_gpu->preempt_state) == PREEMPT_NONE); + int preempt_state = atomic_read(&a5xx_gpu->preempt_state); + + return !(preempt_state == PREEMPT_NONE || + preempt_state == PREEMPT_ABORT); } int a5xx_counters_init(struct adreno_gpu *adreno_gpu); diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c index 6ab3ba076c2f..44d4ca35fa09 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c @@ -128,9 +128,20 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu) * one do nothing except to update the wptr to the latest and greatest */ if (!ring || (a5xx_gpu->cur_ring == ring)) { - update_wptr(gpu, ring); + /* + * Its possible that while a preemption request is in progress + * from an irq context, a user context trying to submit might + * fail to update the write pointer, because it determines + * that the preempt state is not PREEMPT_NONE. + * + * Close the race by introducing an intermediate + * state PREEMPT_ABORT to let the submit path + * know that the ringbuffer is not going to change + * and can safely update the write pointer. + */ - /* Set the state back to NONE */ + set_preempt_state(a5xx_gpu, PREEMPT_ABORT); + update_wptr(gpu, a5xx_gpu->cur_ring); set_preempt_state(a5xx_gpu, PREEMPT_NONE); return; }