drm/msm: Enable per cmdstream profiling for the user
If the user provides a profile buffer identified with a buffer type MSM_SUBMIT_CMD_PROFILE_BUF, then the driver records the kernel clock time and gpu ticks at the time of cmdstream submission, and the GPU records the ticks just before the start of the cmdstream execution and right after the end of the cmdstream execution. Change-Id: Ic6298ec5919b18e976ae089ffb0860b8165ce4f3 Signed-off-by: Sharat Masetty <smasetty@codeaurora.org>
This commit is contained in:
parent
ce87c90172
commit
e84be6486a
5 changed files with 86 additions and 0 deletions
|
@ -133,10 +133,30 @@ static int a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
|
|||
OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
|
||||
OUT_RING(ring, 0x02);
|
||||
|
||||
/* Record the always on counter before command execution */
|
||||
if (submit->profile_buf_iova) {
|
||||
uint64_t gpuaddr = submit->profile_buf_iova +
|
||||
offsetof(struct drm_msm_gem_submit_profile_buffer,
|
||||
ticks_submitted);
|
||||
|
||||
/*
|
||||
* Set bit[30] to make this command a 64 bit write operation.
|
||||
* bits[18-29] is to specify number of consecutive registers
|
||||
* to copy, so set this space with 2, since we want to copy
|
||||
* data from REG_A5XX_RBBM_ALWAYSON_COUNTER_LO and [HI].
|
||||
*/
|
||||
OUT_PKT7(ring, CP_REG_TO_MEM, 3);
|
||||
OUT_RING(ring, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO |
|
||||
(1 << 30) | (2 << 18));
|
||||
OUT_RING(ring, lower_32_bits(gpuaddr));
|
||||
OUT_RING(ring, upper_32_bits(gpuaddr));
|
||||
}
|
||||
|
||||
/* Submit the commands */
|
||||
for (i = 0; i < submit->nr_cmds; i++) {
|
||||
switch (submit->cmd[i].type) {
|
||||
case MSM_SUBMIT_CMD_IB_TARGET_BUF:
|
||||
case MSM_SUBMIT_CMD_PROFILE_BUF:
|
||||
break;
|
||||
case MSM_SUBMIT_CMD_BUF:
|
||||
OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
|
||||
|
@ -164,6 +184,19 @@ static int a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
|
|||
OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
|
||||
OUT_RING(ring, 0x01);
|
||||
|
||||
/* Record the always on counter after command execution */
|
||||
if (submit->profile_buf_iova) {
|
||||
uint64_t gpuaddr = submit->profile_buf_iova +
|
||||
offsetof(struct drm_msm_gem_submit_profile_buffer,
|
||||
ticks_retired);
|
||||
|
||||
OUT_PKT7(ring, CP_REG_TO_MEM, 3);
|
||||
OUT_RING(ring, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO |
|
||||
(1 << 30) | (2 << 18));
|
||||
OUT_RING(ring, lower_32_bits(gpuaddr));
|
||||
OUT_RING(ring, upper_32_bits(gpuaddr));
|
||||
}
|
||||
|
||||
/* Write the fence to the scratch register */
|
||||
OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
|
||||
OUT_RING(ring, submit->fence);
|
||||
|
@ -193,6 +226,35 @@ static int a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
|
|||
/* Set bit 0 to trigger an interrupt on preempt complete */
|
||||
OUT_RING(ring, 0x01);
|
||||
|
||||
if (submit->profile_buf_iova) {
|
||||
unsigned long flags;
|
||||
uint64_t ktime;
|
||||
struct drm_msm_gem_submit_profile_buffer *profile_buf =
|
||||
submit->profile_buf_vaddr;
|
||||
|
||||
/*
|
||||
* With this profiling, we are trying to create closest
|
||||
* possible mapping between the CPU time domain(monotonic clock)
|
||||
* and the GPU time domain(ticks). In order to make this
|
||||
* happen, we need to briefly turn off interrupts to make sure
|
||||
* interrupts do not run between collecting these two samples.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
|
||||
profile_buf->ticks_queued = gpu_read64(gpu,
|
||||
REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
|
||||
REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
|
||||
|
||||
ktime = ktime_get_raw_ns();
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
do_div(ktime, NSEC_PER_SEC);
|
||||
|
||||
profile_buf->queue_time = ktime;
|
||||
profile_buf->submit_time = ktime;
|
||||
}
|
||||
|
||||
a5xx_flush(gpu, ring);
|
||||
|
||||
/* Check to see if we need to start preemption */
|
||||
|
|
|
@ -183,6 +183,7 @@ int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
|
|||
case MSM_SUBMIT_CMD_IB_TARGET_BUF:
|
||||
/* ignore IB-targets */
|
||||
break;
|
||||
case MSM_SUBMIT_CMD_PROFILE_BUF:
|
||||
case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
|
||||
break;
|
||||
case MSM_SUBMIT_CMD_BUF:
|
||||
|
|
|
@ -125,6 +125,8 @@ struct msm_gem_submit {
|
|||
uint32_t fence;
|
||||
int ring;
|
||||
bool valid;
|
||||
uint64_t profile_buf_iova;
|
||||
void *profile_buf_vaddr;
|
||||
unsigned int nr_cmds;
|
||||
unsigned int nr_bos;
|
||||
struct {
|
||||
|
|
|
@ -48,6 +48,9 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
|
|||
submit->nr_bos = 0;
|
||||
submit->nr_cmds = 0;
|
||||
|
||||
submit->profile_buf_vaddr = NULL;
|
||||
submit->profile_buf_iova = 0;
|
||||
|
||||
INIT_LIST_HEAD(&submit->bo_list);
|
||||
ww_acquire_init(&submit->ticket, &reservation_ww_class);
|
||||
}
|
||||
|
@ -393,6 +396,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
|
|||
case MSM_SUBMIT_CMD_BUF:
|
||||
case MSM_SUBMIT_CMD_IB_TARGET_BUF:
|
||||
case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
|
||||
case MSM_SUBMIT_CMD_PROFILE_BUF:
|
||||
break;
|
||||
default:
|
||||
DRM_ERROR("invalid type: %08x\n", submit_cmd.type);
|
||||
|
@ -425,6 +429,12 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
|
|||
submit->cmd[i].iova = iova + submit_cmd.submit_offset;
|
||||
submit->cmd[i].idx = submit_cmd.submit_idx;
|
||||
|
||||
if (submit_cmd.type == MSM_SUBMIT_CMD_PROFILE_BUF) {
|
||||
submit->profile_buf_iova = submit->cmd[i].iova;
|
||||
submit->profile_buf_vaddr =
|
||||
msm_gem_vaddr_locked(&msm_obj->base);
|
||||
}
|
||||
|
||||
if (submit->valid)
|
||||
continue;
|
||||
|
||||
|
|
|
@ -152,10 +152,13 @@ struct drm_msm_gem_submit_reloc {
|
|||
* this buffer in the first-level ringbuffer
|
||||
* CTX_RESTORE_BUF - only executed if there has been a GPU context
|
||||
* switch since the last SUBMIT ioctl
|
||||
* PROFILE_BUF - A profiling buffer written to by both GPU and CPU.
|
||||
*/
|
||||
#define MSM_SUBMIT_CMD_BUF 0x0001
|
||||
#define MSM_SUBMIT_CMD_IB_TARGET_BUF 0x0002
|
||||
#define MSM_SUBMIT_CMD_CTX_RESTORE_BUF 0x0003
|
||||
#define MSM_SUBMIT_CMD_PROFILE_BUF 0x0004
|
||||
|
||||
struct drm_msm_gem_submit_cmd {
|
||||
__u32 type; /* in, one of MSM_SUBMIT_CMD_x */
|
||||
__u32 submit_idx; /* in, index of submit_bo cmdstream buffer */
|
||||
|
@ -207,6 +210,14 @@ struct drm_msm_gem_submit {
|
|||
__u64 __user cmds; /* in, ptr to array of submit_cmd's */
|
||||
};
|
||||
|
||||
struct drm_msm_gem_submit_profile_buffer {
|
||||
__s64 queue_time; /* out, Ringbuffer queue time (seconds) */
|
||||
__s64 submit_time; /* out, Ringbuffer submission time (seconds) */
|
||||
__u64 ticks_queued; /* out, GPU ticks at ringbuffer submission */
|
||||
__u64 ticks_submitted; /* out, GPU ticks before cmdstream execution*/
|
||||
__u64 ticks_retired; /* out, GPU ticks after cmdstream execution */
|
||||
};
|
||||
|
||||
/* The normal way to synchronize with the GPU is just to CPU_PREP on
|
||||
* a buffer if you need to access it from the CPU (other cmdstream
|
||||
* submission from same or other contexts, PAGE_FLIP ioctl, etc, all
|
||||
|
|
Loading…
Add table
Reference in a new issue