diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile index 6086c0f9f13c..b77fdd098471 100644 --- a/drivers/gpu/drm/msm/Makefile +++ b/drivers/gpu/drm/msm/Makefile @@ -55,7 +55,8 @@ msm_drm-y += adreno/adreno_device.o \ adreno/a4xx_gpu.o \ adreno/a5xx_gpu.o \ adreno/a5xx_power.o \ - adreno/a5xx_preempt.o + adreno/a5xx_preempt.o \ + adreno/a5xx_snapshot.o endif msm_drm-$(CONFIG_DRM_MSM_MDP4) += mdp/mdp4/mdp4_crtc.o \ @@ -131,6 +132,7 @@ msm_drm-$(CONFIG_DRM_MSM) += \ msm_perf.o \ msm_rd.o \ msm_ringbuffer.o \ - msm_prop.o + msm_prop.o \ + msm_snapshot.o obj-$(CONFIG_DRM_MSM) += msm_drm.o diff --git a/drivers/gpu/drm/msm/adreno/a5xx.xml.h b/drivers/gpu/drm/msm/adreno/a5xx.xml.h index bfee2fd83462..56dad2217289 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx.xml.h +++ b/drivers/gpu/drm/msm/adreno/a5xx.xml.h @@ -155,6 +155,114 @@ enum a5xx_depth_format { DEPTH5_32 = 4, }; +enum a5xx_debugbus { + A5XX_RBBM_DBGBUS_CP = 1, + A5XX_RBBM_DBGBUS_RBBM = 2, + A5XX_RBBM_DBGBUS_VBIF = 3, + A5XX_RBBM_DBGBUS_HLSQ = 4, + A5XX_RBBM_DBGBUS_UCHE = 5, + A5XX_RBBM_DBGBUS_DPM = 6, + A5XX_RBBM_DBGBUS_TESS = 7, + A5XX_RBBM_DBGBUS_PC = 8, + A5XX_RBBM_DBGBUS_VFDP = 9, + A5XX_RBBM_DBGBUS_VPC = 10, + A5XX_RBBM_DBGBUS_TSE = 11, + A5XX_RBBM_DBGBUS_RAS = 12, + A5XX_RBBM_DBGBUS_VSC = 13, + A5XX_RBBM_DBGBUS_COM = 14, + A5XX_RBBM_DBGBUS_DCOM = 15, + A5XX_RBBM_DBGBUS_LRZ = 16, + A5XX_RBBM_DBGBUS_A2D_DSP = 17, + A5XX_RBBM_DBGBUS_CCUFCHE = 18, + A5XX_RBBM_DBGBUS_GPMU = 19, + A5XX_RBBM_DBGBUS_RBP = 20, + A5XX_RBBM_DBGBUS_HM = 21, + A5XX_RBBM_DBGBUS_RBBM_CFG = 22, + A5XX_RBBM_DBGBUS_VBIF_CX = 23, + A5XX_RBBM_DBGBUS_GPC = 29, + A5XX_RBBM_DBGBUS_LARC = 30, + A5XX_RBBM_DBGBUS_HLSQ_SPTP = 31, + A5XX_RBBM_DBGBUS_RB_0 = 32, + A5XX_RBBM_DBGBUS_RB_1 = 33, + A5XX_RBBM_DBGBUS_RB_2 = 34, + A5XX_RBBM_DBGBUS_RB_3 = 35, + A5XX_RBBM_DBGBUS_CCU_0 = 40, + A5XX_RBBM_DBGBUS_CCU_1 = 41, + A5XX_RBBM_DBGBUS_CCU_2 = 42, + A5XX_RBBM_DBGBUS_CCU_3 = 43, + A5XX_RBBM_DBGBUS_A2D_RAS_0 = 48, + A5XX_RBBM_DBGBUS_A2D_RAS_1 = 49, + A5XX_RBBM_DBGBUS_A2D_RAS_2 = 50, + A5XX_RBBM_DBGBUS_A2D_RAS_3 = 51, + A5XX_RBBM_DBGBUS_VFD_0 = 56, + A5XX_RBBM_DBGBUS_VFD_1 = 57, + A5XX_RBBM_DBGBUS_VFD_2 = 58, + A5XX_RBBM_DBGBUS_VFD_3 = 59, + A5XX_RBBM_DBGBUS_SP_0 = 64, + A5XX_RBBM_DBGBUS_SP_1 = 65, + A5XX_RBBM_DBGBUS_SP_2 = 66, + A5XX_RBBM_DBGBUS_SP_3 = 67, + A5XX_RBBM_DBGBUS_TPL1_0 = 72, + A5XX_RBBM_DBGBUS_TPL1_1 = 73, + A5XX_RBBM_DBGBUS_TPL1_2 = 74, + A5XX_RBBM_DBGBUS_TPL1_3 = 75, +}; + +enum a5xx_shader_blocks { + A5XX_TP_W_MEMOBJ = 1, + A5XX_TP_W_SAMPLER = 2, + A5XX_TP_W_MIPMAP_BASE = 3, + A5XX_TP_W_MEMOBJ_TAG = 4, + A5XX_TP_W_SAMPLER_TAG = 5, + A5XX_TP_S_3D_MEMOBJ = 6, + A5XX_TP_S_3D_SAMPLER = 7, + A5XX_TP_S_3D_MEMOBJ_TAG = 8, + A5XX_TP_S_3D_SAMPLER_TAG = 9, + A5XX_TP_S_CS_MEMOBJ = 10, + A5XX_TP_S_CS_SAMPLER = 11, + A5XX_TP_S_CS_MEMOBJ_TAG = 12, + A5XX_TP_S_CS_SAMPLER_TAG = 13, + A5XX_SP_W_INSTR = 14, + A5XX_SP_W_CONST = 15, + A5XX_SP_W_UAV_SIZE = 16, + A5XX_SP_W_CB_SIZE = 17, + A5XX_SP_W_UAV_BASE = 18, + A5XX_SP_W_CB_BASE = 19, + A5XX_SP_W_INST_TAG = 20, + A5XX_SP_W_STATE = 21, + A5XX_SP_S_3D_INSTR = 22, + A5XX_SP_S_3D_CONST = 23, + A5XX_SP_S_3D_CB_BASE = 24, + A5XX_SP_S_3D_CB_SIZE = 25, + A5XX_SP_S_3D_UAV_BASE = 26, + A5XX_SP_S_3D_UAV_SIZE = 27, + A5XX_SP_S_CS_INSTR = 28, + A5XX_SP_S_CS_CONST = 29, + A5XX_SP_S_CS_CB_BASE = 30, + A5XX_SP_S_CS_CB_SIZE = 31, + A5XX_SP_S_CS_UAV_BASE = 32, + A5XX_SP_S_CS_UAV_SIZE = 33, + A5XX_SP_S_3D_INSTR_DIRTY = 34, + A5XX_SP_S_3D_CONST_DIRTY = 35, + A5XX_SP_S_3D_CB_BASE_DIRTY = 36, + A5XX_SP_S_3D_CB_SIZE_DIRTY = 37, + A5XX_SP_S_3D_UAV_BASE_DIRTY = 38, + A5XX_SP_S_3D_UAV_SIZE_DIRTY = 39, + A5XX_SP_S_CS_INSTR_DIRTY = 40, + A5XX_SP_S_CS_CONST_DIRTY = 41, + A5XX_SP_S_CS_CB_BASE_DIRTY = 42, + A5XX_SP_S_CS_CB_SIZE_DIRTY = 43, + A5XX_SP_S_CS_UAV_BASE_DIRTY = 44, + A5XX_SP_S_CS_UAV_SIZE_DIRTY = 45, + A5XX_HLSQ_ICB = 46, + A5XX_HLSQ_ICB_DIRTY = 47, + A5XX_HLSQ_ICB_CB_BASE_DIRTY = 48, + A5XX_SP_POWER_RESTORE_RAM = 64, + A5XX_SP_POWER_RESTORE_RAM_TAG = 65, + A5XX_TP_POWER_RESTORE_RAM = 66, + A5XX_TP_POWER_RESTORE_RAM_TAG = 67, +}; + enum a5xx_tex_filter { A5XX_TEX_NEAREST = 0, A5XX_TEX_LINEAR = 1, @@ -396,6 +504,18 @@ static inline uint32_t A5XX_CP_PROTECT_REG_MASK_LEN(uint32_t val) #define REG_A5XX_CP_POWERCTR_CP_SEL_3 0x00000bbd #define REG_A5XX_RBBM_CFG_DBGBUS_SEL_A 0x00000004 +#define A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_INDEX__MASK 0x000000ff +#define A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_INDEX__SHIFT 0 +static inline uint32_t A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_INDEX(uint32_t val) +{ + return ((val) << A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_INDEX__SHIFT) & A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_INDEX__MASK; +} +#define A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_BLK_SEL__MASK 0x0000ff00 +#define A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_BLK_SEL__SHIFT 8 +static inline uint32_t A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_BLK_SEL(uint32_t val) +{ + return ((val) << A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_BLK_SEL__SHIFT) & A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_BLK_SEL__MASK; +} #define REG_A5XX_RBBM_CFG_DBGBUS_SEL_B 0x00000005 @@ -406,6 +526,12 @@ static inline uint32_t A5XX_CP_PROTECT_REG_MASK_LEN(uint32_t val) #define REG_A5XX_RBBM_CFG_DBGBUS_CNTLT 0x00000008 #define REG_A5XX_RBBM_CFG_DBGBUS_CNTLM 0x00000009 +#define A5XX_RBBM_CFG_DBGBUS_CNTLM_ENABLE__MASK 0x0f000000 +#define A5XX_RBBM_CFG_DBGBUS_CNTLM_ENABLE__SHIFT 24 +static inline uint32_t A5XX_RBBM_CFG_DBGBUS_CNTLM_ENABLE(uint32_t val) +{ + return ((val) << A5XX_RBBM_CFG_DBGBUS_CNTLM_ENABLE__SHIFT) & A5XX_RBBM_CFG_DBGBUS_CNTLM_ENABLE__MASK; +} #define REG_A5XX_RBBM_CFG_DEBBUS_CTLTM_ENABLE_SHIFT 0x00000018 @@ -1413,6 +1539,12 @@ static inline uint32_t A5XX_VSC_BIN_SIZE_Y(uint32_t val) #define REG_A5XX_HLSQ_SPTP_RDSEL 0x00000f08 #define REG_A5XX_HLSQ_DBG_READ_SEL 0x0000bc00 +#define A5XX_HLSQ_DBG_READ_SEL_STATETYPE__MASK 0x0000ff00 +#define A5XX_HLSQ_DBG_READ_SEL_STATETYPE__SHIFT 8 +static inline uint32_t A5XX_HLSQ_DBG_READ_SEL_STATETYPE(uint32_t val) +{ + return ((val) << A5XX_HLSQ_DBG_READ_SEL_STATETYPE__SHIFT) & A5XX_HLSQ_DBG_READ_SEL_STATETYPE__MASK; +} #define REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE 0x0000a000 @@ -1583,6 +1715,8 @@ static inline uint32_t A5XX_VSC_BIN_SIZE_Y(uint32_t val) #define REG_A5XX_VBIF_VERSION 0x00003000 #define REG_A5XX_VBIF_CLKON 0x00003001 +#define A5XX_VBIF_CLKON_FORCE_ON 0x00000001 +#define A5XX_VBIF_CLKON_FORCE_ON_TESTBUS 0x00000002 #define REG_A5XX_VBIF_ABIT_SORT 0x00003028 @@ -1601,14 +1735,27 @@ static inline uint32_t A5XX_VSC_BIN_SIZE_Y(uint32_t val) #define REG_A5XX_VBIF_XIN_HALT_CTRL1 0x00003081 #define REG_A5XX_VBIF_TEST_BUS_OUT_CTRL 0x00003084 +#define A5XX_VBIF_TEST_BUS_OUT_CTRL_TEST_BUS_CTRL_EN 0x00000001 #define REG_A5XX_VBIF_TEST_BUS1_CTRL0 0x00003085 #define REG_A5XX_VBIF_TEST_BUS1_CTRL1 0x00003086 +#define A5XX_VBIF_TEST_BUS1_CTRL1_TEST_BUS1_DATA_SEL__MASK 0x0000000f +#define A5XX_VBIF_TEST_BUS1_CTRL1_TEST_BUS1_DATA_SEL__SHIFT 0 +static inline uint32_t A5XX_VBIF_TEST_BUS1_CTRL1_TEST_BUS1_DATA_SEL(uint32_t val) +{ + return ((val) << A5XX_VBIF_TEST_BUS1_CTRL1_TEST_BUS1_DATA_SEL__SHIFT) & A5XX_VBIF_TEST_BUS1_CTRL1_TEST_BUS1_DATA_SEL__MASK; +} #define REG_A5XX_VBIF_TEST_BUS2_CTRL0 0x00003087 #define REG_A5XX_VBIF_TEST_BUS2_CTRL1 0x00003088 +#define A5XX_VBIF_TEST_BUS2_CTRL1_TEST_BUS2_DATA_SEL__MASK 0x0000001f +#define A5XX_VBIF_TEST_BUS2_CTRL1_TEST_BUS2_DATA_SEL__SHIFT 0 +static inline uint32_t A5XX_VBIF_TEST_BUS2_CTRL1_TEST_BUS2_DATA_SEL(uint32_t val) +{ + return ((val) << A5XX_VBIF_TEST_BUS2_CTRL1_TEST_BUS2_DATA_SEL__SHIFT) & A5XX_VBIF_TEST_BUS2_CTRL1_TEST_BUS2_DATA_SEL__MASK; +} #define REG_A5XX_VBIF_TEST_BUS_OUT 0x0000308c diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 9933e932679a..a49a7b247547 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -15,9 +15,6 @@ #include "msm_iommu.h" #include "a5xx_gpu.h" -extern bool hang_debug; -static void a5xx_dump(struct msm_gpu *gpu); - static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); @@ -800,8 +797,7 @@ static void a5xx_recover(struct msm_gpu *gpu) { adreno_dump_info(gpu); - if (hang_debug) - a5xx_dump(gpu); + msm_gpu_snapshot(gpu, gpu->snapshot); /* Reset the GPU so it can work again */ gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1); @@ -1112,13 +1108,6 @@ static const u32 a5xx_registers[] = { ~0 }; -static void a5xx_dump(struct msm_gpu *gpu) -{ - dev_info(gpu->dev->dev, "status: %08x\n", - gpu_read(gpu, REG_A5XX_RBBM_STATUS)); - adreno_dump(gpu); -} - static int a5xx_pm_resume(struct msm_gpu *gpu) { int ret; @@ -1225,6 +1214,7 @@ static const struct adreno_gpu_funcs funcs = { #ifdef CONFIG_DEBUG_FS .show = a5xx_show, #endif + .snapshot = a5xx_snapshot, }, .get_timestamp = a5xx_get_timestamp, }; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h index 366f37545589..3de14fe42a1b 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h @@ -176,6 +176,8 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu); void a5xx_preempt_irq(struct msm_gpu *gpu); void a5xx_preempt_fini(struct msm_gpu *gpu); +int a5xx_snapshot(struct msm_gpu *gpu, struct msm_snapshot *snapshot); + /* Return true if we are in a preempt state */ static inline bool a5xx_in_preempt(struct a5xx_gpu *a5xx_gpu) { diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c index f8e6bc4dc432..648494c75abc 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c @@ -46,10 +46,6 @@ static void *alloc_kernel_bo(struct drm_device *drm, struct msm_gpu *gpu, if (iova) *iova = _iova; - pr_err("[%ps] buffer size %x, iova [%llx : %llx]\n", - __builtin_return_address(0), size, - _iova, _iova+size-1); - return ptr; out: drm_gem_object_unreference_unlocked(_bo); diff --git a/drivers/gpu/drm/msm/adreno/a5xx_snapshot.c b/drivers/gpu/drm/msm/adreno/a5xx_snapshot.c new file mode 100644 index 000000000000..5a2edb0ea518 --- /dev/null +++ b/drivers/gpu/drm/msm/adreno/a5xx_snapshot.c @@ -0,0 +1,796 @@ +/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include "msm_gpu.h" +#include "msm_gem.h" +#include "a5xx_gpu.h" +#include "msm_snapshot_api.h" + +#define A5XX_NR_SHADER_BANKS 4 + +/* + * These are a list of the registers that need to be read through the HLSQ + * aperture through the crashdumper. These are not nominally accessible from + * the CPU on a secure platform. + */ +static const struct { + u32 type; + u32 regoffset; + u32 count; +} a5xx_hlsq_aperture_regs[] = { + { 0x35, 0xE00, 0x32 }, /* HSLQ non-context */ + { 0x31, 0x2080, 0x1 }, /* HLSQ 2D context 0 */ + { 0x33, 0x2480, 0x1 }, /* HLSQ 2D context 1 */ + { 0x32, 0xE780, 0x62 }, /* HLSQ 3D context 0 */ + { 0x34, 0xEF80, 0x62 }, /* HLSQ 3D context 1 */ + { 0x3f, 0x0EC0, 0x40 }, /* SP non-context */ + { 0x3d, 0x2040, 0x1 }, /* SP 2D context 0 */ + { 0x3b, 0x2440, 0x1 }, /* SP 2D context 1 */ + { 0x3e, 0xE580, 0x180 }, /* SP 3D context 0 */ + { 0x3c, 0xED80, 0x180 }, /* SP 3D context 1 */ + { 0x3a, 0x0F00, 0x1c }, /* TP non-context */ + { 0x38, 0x2000, 0xa }, /* TP 2D context 0 */ + { 0x36, 0x2400, 0xa }, /* TP 2D context 1 */ + { 0x39, 0xE700, 0x80 }, /* TP 3D context 0 */ + { 0x37, 0xEF00, 0x80 }, /* TP 3D context 1 */ +}; + +/* + * The debugbus registers contain device state that presumably makes + * sense to the hardware designers. 'count' is the number of indexes to read, + * each index value is 64 bits + */ +static const struct { + enum a5xx_debugbus id; + u32 count; +} a5xx_debugbus_blocks[] = { + { A5XX_RBBM_DBGBUS_CP, 0x100, }, + { A5XX_RBBM_DBGBUS_RBBM, 0x100, }, + { A5XX_RBBM_DBGBUS_HLSQ, 0x100, }, + { A5XX_RBBM_DBGBUS_UCHE, 0x100, }, + { A5XX_RBBM_DBGBUS_DPM, 0x100, }, + { A5XX_RBBM_DBGBUS_TESS, 0x100, }, + { A5XX_RBBM_DBGBUS_PC, 0x100, }, + { A5XX_RBBM_DBGBUS_VFDP, 0x100, }, + { A5XX_RBBM_DBGBUS_VPC, 0x100, }, + { A5XX_RBBM_DBGBUS_TSE, 0x100, }, + { A5XX_RBBM_DBGBUS_RAS, 0x100, }, + { A5XX_RBBM_DBGBUS_VSC, 0x100, }, + { A5XX_RBBM_DBGBUS_COM, 0x100, }, + { A5XX_RBBM_DBGBUS_DCOM, 0x100, }, + { A5XX_RBBM_DBGBUS_LRZ, 0x100, }, + { A5XX_RBBM_DBGBUS_A2D_DSP, 0x100, }, + { A5XX_RBBM_DBGBUS_CCUFCHE, 0x100, }, + { A5XX_RBBM_DBGBUS_GPMU, 0x100, }, + { A5XX_RBBM_DBGBUS_RBP, 0x100, }, + { A5XX_RBBM_DBGBUS_HM, 0x100, }, + { A5XX_RBBM_DBGBUS_RBBM_CFG, 0x100, }, + { A5XX_RBBM_DBGBUS_VBIF_CX, 0x100, }, + { A5XX_RBBM_DBGBUS_GPC, 0x100, }, + { A5XX_RBBM_DBGBUS_LARC, 0x100, }, + { A5XX_RBBM_DBGBUS_HLSQ_SPTP, 0x100, }, + { A5XX_RBBM_DBGBUS_RB_0, 0x100, }, + { A5XX_RBBM_DBGBUS_RB_1, 0x100, }, + { A5XX_RBBM_DBGBUS_RB_2, 0x100, }, + { A5XX_RBBM_DBGBUS_RB_3, 0x100, }, + { A5XX_RBBM_DBGBUS_CCU_0, 0x100, }, + { A5XX_RBBM_DBGBUS_CCU_1, 0x100, }, + { A5XX_RBBM_DBGBUS_CCU_2, 0x100, }, + { A5XX_RBBM_DBGBUS_CCU_3, 0x100, }, + { A5XX_RBBM_DBGBUS_A2D_RAS_0, 0x100, }, + { A5XX_RBBM_DBGBUS_A2D_RAS_1, 0x100, }, + { A5XX_RBBM_DBGBUS_A2D_RAS_2, 0x100, }, + { A5XX_RBBM_DBGBUS_A2D_RAS_3, 0x100, }, + { A5XX_RBBM_DBGBUS_VFD_0, 0x100, }, + { A5XX_RBBM_DBGBUS_VFD_1, 0x100, }, + { A5XX_RBBM_DBGBUS_VFD_2, 0x100, }, + { A5XX_RBBM_DBGBUS_VFD_3, 0x100, }, + { A5XX_RBBM_DBGBUS_SP_0, 0x100, }, + { A5XX_RBBM_DBGBUS_SP_1, 0x100, }, + { A5XX_RBBM_DBGBUS_SP_2, 0x100, }, + { A5XX_RBBM_DBGBUS_SP_3, 0x100, }, + { A5XX_RBBM_DBGBUS_TPL1_0, 0x100, }, + { A5XX_RBBM_DBGBUS_TPL1_1, 0x100, }, + { A5XX_RBBM_DBGBUS_TPL1_2, 0x100, }, + { A5XX_RBBM_DBGBUS_TPL1_3, 0x100, }, +}; + +/* + * The shader blocks are read from the HLSQ aperture - each one has its own + * identifier for the aperture read + */ +static const struct { + enum a5xx_shader_blocks id; + u32 size; +} a5xx_shader_blocks[] = { + {A5XX_TP_W_MEMOBJ, 0x200}, + {A5XX_TP_W_MIPMAP_BASE, 0x3C0}, + {A5XX_TP_W_SAMPLER_TAG, 0x40}, + {A5XX_TP_S_3D_SAMPLER, 0x80}, + {A5XX_TP_S_3D_SAMPLER_TAG, 0x20}, + {A5XX_TP_S_CS_SAMPLER, 0x40}, + {A5XX_TP_S_CS_SAMPLER_TAG, 0x10}, + {A5XX_SP_W_CONST, 0x800}, + {A5XX_SP_W_CB_SIZE, 0x30}, + {A5XX_SP_W_CB_BASE, 0xF0}, + {A5XX_SP_W_STATE, 0x1}, + {A5XX_SP_S_3D_CONST, 0x800}, + {A5XX_SP_S_3D_CB_SIZE, 0x28}, + {A5XX_SP_S_3D_UAV_SIZE, 0x80}, + {A5XX_SP_S_CS_CONST, 0x400}, + {A5XX_SP_S_CS_CB_SIZE, 0x8}, + {A5XX_SP_S_CS_UAV_SIZE, 0x80}, + {A5XX_SP_S_3D_CONST_DIRTY, 0x12}, + {A5XX_SP_S_3D_CB_SIZE_DIRTY, 0x1}, + {A5XX_SP_S_3D_UAV_SIZE_DIRTY, 0x2}, + {A5XX_SP_S_CS_CONST_DIRTY, 0xA}, + {A5XX_SP_S_CS_CB_SIZE_DIRTY, 0x1}, + {A5XX_SP_S_CS_UAV_SIZE_DIRTY, 0x2}, + {A5XX_HLSQ_ICB_DIRTY, 0xB}, + {A5XX_SP_POWER_RESTORE_RAM_TAG, 0xA}, + {A5XX_TP_POWER_RESTORE_RAM_TAG, 0xA}, + {A5XX_TP_W_SAMPLER, 0x80}, + {A5XX_TP_W_MEMOBJ_TAG, 0x40}, + {A5XX_TP_S_3D_MEMOBJ, 0x200}, + {A5XX_TP_S_3D_MEMOBJ_TAG, 0x20}, + {A5XX_TP_S_CS_MEMOBJ, 0x100}, + {A5XX_TP_S_CS_MEMOBJ_TAG, 0x10}, + {A5XX_SP_W_INSTR, 0x800}, + {A5XX_SP_W_UAV_SIZE, 0x80}, + {A5XX_SP_W_UAV_BASE, 0x80}, + {A5XX_SP_W_INST_TAG, 0x40}, + {A5XX_SP_S_3D_INSTR, 0x800}, + {A5XX_SP_S_3D_CB_BASE, 0xC8}, + {A5XX_SP_S_3D_UAV_BASE, 0x80}, + {A5XX_SP_S_CS_INSTR, 0x400}, + {A5XX_SP_S_CS_CB_BASE, 0x28}, + {A5XX_SP_S_CS_UAV_BASE, 0x80}, + {A5XX_SP_S_3D_INSTR_DIRTY, 0x1}, + {A5XX_SP_S_3D_CB_BASE_DIRTY, 0x5}, + {A5XX_SP_S_3D_UAV_BASE_DIRTY, 0x2}, + {A5XX_SP_S_CS_INSTR_DIRTY, 0x1}, + {A5XX_SP_S_CS_CB_BASE_DIRTY, 0x1}, + {A5XX_SP_S_CS_UAV_BASE_DIRTY, 0x2}, + {A5XX_HLSQ_ICB, 0x200}, + {A5XX_HLSQ_ICB_CB_BASE_DIRTY, 0x4}, + {A5XX_SP_POWER_RESTORE_RAM, 0x140}, + {A5XX_TP_POWER_RESTORE_RAM, 0x40}, +}; + +/* + * The A5XX architecture has a a built in engine to asynchronously dump + * registers from the GPU. It is used to accelerate the copy of hundreds + * (thousands) of registers and as a safe way to access registers that might + * have secure data in them (if the GPU is in secure, the crashdumper returns + * bogus values for those registers). On a fully secured device the CPU will be + * blocked from accessing those registers directly and so the crashdump is the + * only way that we can access context registers and the shader banks for debug + * purposes. + * + * The downside of the crashdump is that it requires access to GPU accessible + * memory (so the VBIF and the bus and the SMMU need to be up and working) and + * you need enough memory to write the script for the crashdumper and to store + * the data that you are dumping so there is a balancing act between the work to + * set up a crash dumper and the value we get out of it. + */ + +/* + * The crashdump uses a pseudo-script format to read and write registers. Each + * operation is two 64 bit values. + * + * READ: + * [qword 0] [64:00] - The absolute IOVA address target for the register value + * [qword 1] [63:44] - the dword address of the register offset to read + * [15:00] - Number of dwords to read at once + * + * WRITE: + * [qword 0] [31:0] 32 bit value to write to the register + * [qword 1] [63:44] - the dword address of the register offset to write + * [21:21] - set 1 to write + * [15:00] - Number of dwords to write (usually 1) + * + * At the bottom of the script, write quadword zeros to trigger the end. + */ +struct crashdump { + struct drm_gem_object *bo; + void *ptr; + u64 iova; + u32 index; +}; + +#define CRASHDUMP_BO_SIZE (SZ_1M) +#define CRASHDUMP_SCRIPT_SIZE (256 * SZ_1K) +#define CRASHDUMP_DATA_SIZE (CRASHDUMP_BO_SIZE - CRASHDUMP_SCRIPT_SIZE) + +static int crashdump_init(struct msm_gpu *gpu, struct crashdump *crashdump) +{ + struct drm_device *drm = gpu->dev; + int ret = -ENOMEM; + + crashdump->bo = msm_gem_new(drm, CRASHDUMP_BO_SIZE, MSM_BO_UNCACHED); + if (IS_ERR(crashdump->bo)) { + ret = PTR_ERR(crashdump->bo); + crashdump->bo = NULL; + return ret; + } + + crashdump->ptr = msm_gem_vaddr_locked(crashdump->bo); + if (!crashdump->ptr) + goto out; + + ret = msm_gem_get_iova_locked(crashdump->bo, gpu->aspace, + &crashdump->iova); + +out: + if (ret) { + drm_gem_object_unreference(crashdump->bo); + crashdump->bo = NULL; + } + + return ret; +} + +static int crashdump_run(struct msm_gpu *gpu, struct crashdump *crashdump) +{ + if (!crashdump->ptr || !crashdump->index) + return -EINVAL; + + gpu_write(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO, + lower_32_bits(crashdump->iova)); + gpu_write(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, + upper_32_bits(crashdump->iova)); + + gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1); + + return spin_until(gpu_read(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL) & 0x04); +} + +static void crashdump_destroy(struct msm_gpu *gpu, struct crashdump *crashdump) +{ + if (!crashdump->bo) + return; + + if (crashdump->iova) + msm_gem_put_iova(crashdump->bo, gpu->aspace); + + drm_gem_object_unreference(crashdump->bo); + + memset(crashdump, 0, sizeof(*crashdump)); +} + +static inline void CRASHDUMP_SCRIPT_WRITE(struct crashdump *crashdump, + u32 reg, u32 val) +{ + u64 *ptr = crashdump->ptr + crashdump->index; + + if (WARN_ON(crashdump->index + (2 * sizeof(u64)) + >= CRASHDUMP_SCRIPT_SIZE)) + return; + + /* This is the value to write */ + ptr[0] = (u64) val; + + /* + * This triggers a write to the specified register. 1 is the size of + * the write in dwords + */ + ptr[1] = (((u64) reg) << 44) | (1 << 21) | 1; + + crashdump->index += 2 * sizeof(u64); +} + +static inline void CRASHDUMP_SCRIPT_READ(struct crashdump *crashdump, + u32 reg, u32 count, u32 offset) +{ + u64 *ptr = crashdump->ptr + crashdump->index; + + if (WARN_ON(crashdump->index + (2 * sizeof(u64)) + >= CRASHDUMP_SCRIPT_SIZE)) + return; + + if (WARN_ON(offset + (count * sizeof(u32)) >= CRASHDUMP_DATA_SIZE)) + return; + + ptr[0] = (u64) crashdump->iova + CRASHDUMP_SCRIPT_SIZE + offset; + ptr[1] = (((u64) reg) << 44) | count; + + crashdump->index += 2 * sizeof(u64); +} + +static inline void *CRASHDUMP_DATA_PTR(struct crashdump *crashdump, u32 offset) +{ + if (WARN_ON(!crashdump->ptr || offset >= CRASHDUMP_DATA_SIZE)) + return NULL; + + return crashdump->ptr + CRASHDUMP_SCRIPT_SIZE + offset; +} + +static inline u32 CRASHDUMP_DATA_READ(struct crashdump *crashdump, u32 offset) +{ + return *((u32 *) CRASHDUMP_DATA_PTR(crashdump, offset)); +} + +static inline void CRASHDUMP_RESET(struct crashdump *crashdump) +{ + crashdump->index = 0; +} + +static inline void CRASHDUMP_END(struct crashdump *crashdump) +{ + u64 *ptr = crashdump->ptr + crashdump->index; + + if (WARN_ON((crashdump->index + (2 * sizeof(u64))) + >= CRASHDUMP_SCRIPT_SIZE)) + return; + + ptr[0] = 0; + ptr[1] = 0; + + crashdump->index += 2 * sizeof(u64); +} + +static u32 _crashdump_read_hlsq_aperture(struct crashdump *crashdump, + u32 offset, u32 statetype, u32 bank, + u32 count) +{ + CRASHDUMP_SCRIPT_WRITE(crashdump, REG_A5XX_HLSQ_DBG_READ_SEL, + A5XX_HLSQ_DBG_READ_SEL_STATETYPE(statetype) | bank); + + CRASHDUMP_SCRIPT_READ(crashdump, REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE, + count, offset); + + return count * sizeof(u32); +} + +static u32 _copy_registers(struct msm_snapshot *snapshot, + struct crashdump *crashdump, u32 reg, u32 count, + u32 offset) +{ + int i; + u32 *ptr = (u32 *) (crashdump->ptr + CRASHDUMP_SCRIPT_SIZE + offset); + /* + * Write the offset of the first register of the group and the number of + * registers in the group + */ + SNAPSHOT_WRITE_U32(snapshot, ((count << 16) | reg)); + + /* Followed by each register value in the group */ + for (i = 0; i < count; i++) + SNAPSHOT_WRITE_U32(snapshot, ptr[i]); + + return count * sizeof(u32); +} + +/* + * Return the number of registers in each register group from the + * adreno_gpu->rgisters + */ +static inline u32 REG_COUNT(const unsigned int *ptr) +{ + return (ptr[1] - ptr[0]) + 1; +} + +/* + * Capture what registers we can from the CPU in case the crashdumper is + * unavailable or broken. This will omit the SP,TP and HLSQ registers, but + * you'll get everything else and that ain't bad + */ +static void a5xx_snapshot_registers_cpu(struct msm_gpu *gpu, + struct msm_snapshot *snapshot) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct msm_snapshot_regs header; + u32 regcount = 0, groups = 0; + int i; + + /* + * Before we write the section we need to figure out how big our data + * section will be + */ + for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) { + regcount += REG_COUNT(&(adreno_gpu->registers[i])); + groups++; + } + + header.count = groups; + + /* + * We need one dword for each group and then one dword for each register + * value in that group + */ + if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_REGS_V2, + regcount + groups)) + return; + + for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) { + u32 count = REG_COUNT(&(adreno_gpu->registers[i])); + u32 reg = adreno_gpu->registers[i]; + int j; + + /* Write the offset and count for the group */ + SNAPSHOT_WRITE_U32(snapshot, (count << 16) | reg); + + /* Write each value in the group */ + for (j = 0; j < count; j++) + SNAPSHOT_WRITE_U32(snapshot, gpu_read(gpu, reg++)); + } +} + +static void a5xx_snapshot_registers(struct msm_gpu *gpu, + struct msm_snapshot *snapshot) +{ + struct msm_snapshot_regs header; + struct crashdump *crashdump = snapshot->priv; + u32 offset = 0, regcount = 0, groups = 0; + int i; + + /* + * First snapshot all the registers that we can from the CPU. Do this + * because the crashdumper has a tendency to "taint" the value of some + * of the registers (because the GPU implements the crashdumper) so we + * only want to use the crash dump facility if we have to + */ + a5xx_snapshot_registers_cpu(gpu, snapshot); + + if (!crashdump) + return; + + CRASHDUMP_RESET(crashdump); + + /* HLSQ and context registers behind the aperture */ + for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) { + u32 count = a5xx_hlsq_aperture_regs[i].count; + + offset += _crashdump_read_hlsq_aperture(crashdump, offset, + a5xx_hlsq_aperture_regs[i].type, 0, count); + regcount += count; + + groups++; + } + + CRASHDUMP_END(crashdump); + + if (crashdump_run(gpu, crashdump)) + return; + + header.count = groups; + + /* + * The size of the data will be one dword for each "group" of registers, + * and then one dword for each of the registers in that group + */ + if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_REGS_V2, + groups + regcount)) + return; + + /* Copy the registers to the snapshot */ + for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) + offset += _copy_registers(snapshot, crashdump, + a5xx_hlsq_aperture_regs[i].regoffset, + a5xx_hlsq_aperture_regs[i].count, offset); +} + +static void _a5xx_snapshot_shader_bank(struct msm_snapshot *snapshot, + struct crashdump *crashdump, u32 block, u32 bank, + u32 size, u32 offset) +{ + void *src; + + struct msm_snapshot_shader header = { + .type = block, + .index = bank, + .size = size, + }; + + if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_SHADER, size)) + return; + + src = CRASHDUMP_DATA_PTR(crashdump, offset); + + if (src) + SNAPSHOT_MEMCPY(snapshot, src, size * sizeof(u32)); +} + +static void a5xx_snapshot_shader_memory(struct msm_gpu *gpu, + struct msm_snapshot *snapshot) +{ + struct crashdump *crashdump = snapshot->priv; + u32 offset = 0; + int i; + + /* We can only get shader memory through the crashdump */ + if (!crashdump) + return; + + CRASHDUMP_RESET(crashdump); + + /* For each shader block */ + for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) { + int j; + + /* For each block, dump 4 banks */ + for (j = 0; j < A5XX_NR_SHADER_BANKS; j++) + offset += _crashdump_read_hlsq_aperture(crashdump, + offset, a5xx_shader_blocks[i].id, j, + a5xx_shader_blocks[i].size); + } + + CRASHDUMP_END(crashdump); + + /* If the crashdump fails we can't get shader memory any other way */ + if (crashdump_run(gpu, crashdump)) + return; + + /* Each bank of each shader gets its own snapshot section */ + for (offset = 0, i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) { + int j; + + for (j = 0; j < A5XX_NR_SHADER_BANKS; j++) { + _a5xx_snapshot_shader_bank(snapshot, crashdump, + a5xx_shader_blocks[i].id, j, + a5xx_shader_blocks[i].size, offset); + offset += a5xx_shader_blocks[i].size * sizeof(u32); + } + } +} + +#define A5XX_NUM_AXI_ARB_BLOCKS 2 +#define A5XX_NUM_XIN_BLOCKS 4 +#define VBIF_DATA_SIZE ((16 * A5XX_NUM_AXI_ARB_BLOCKS) + \ + (18 * A5XX_NUM_XIN_BLOCKS) + (12 * A5XX_NUM_XIN_BLOCKS)) + +static void a5xx_snapshot_debugbus_vbif(struct msm_gpu *gpu, + struct msm_snapshot *snapshot) +{ + int i; + struct msm_snapshot_debugbus header = { + .id = A5XX_RBBM_DBGBUS_VBIF, + .count = VBIF_DATA_SIZE, + }; + + if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_DEBUGBUS, + VBIF_DATA_SIZE)) + return; + + gpu_rmw(gpu, REG_A5XX_VBIF_CLKON, A5XX_VBIF_CLKON_FORCE_ON_TESTBUS, + A5XX_VBIF_CLKON_FORCE_ON_TESTBUS); + + gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS1_CTRL0, 0); + gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS_OUT_CTRL, + A5XX_VBIF_TEST_BUS_OUT_CTRL_TEST_BUS_CTRL_EN); + + for (i = 0; i < A5XX_NUM_AXI_ARB_BLOCKS; i++) { + int j; + + gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS2_CTRL0, 1 << (i + 16)); + for (j = 0; j < 16; j++) { + gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS2_CTRL1, + A5XX_VBIF_TEST_BUS2_CTRL1_TEST_BUS2_DATA_SEL(j)); + SNAPSHOT_WRITE_U32(snapshot, gpu_read(gpu, + REG_A5XX_VBIF_TEST_BUS_OUT)); + } + } + + for (i = 0; i < A5XX_NUM_XIN_BLOCKS; i++) { + int j; + + gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS2_CTRL0, 1 << i); + for (j = 0; j < 18; j++) { + gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS2_CTRL1, + A5XX_VBIF_TEST_BUS2_CTRL1_TEST_BUS2_DATA_SEL(j)); + SNAPSHOT_WRITE_U32(snapshot, + gpu_read(gpu, REG_A5XX_VBIF_TEST_BUS_OUT)); + } + } + + for (i = 0; i < A5XX_NUM_XIN_BLOCKS; i++) { + int j; + + gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS1_CTRL0, 1 << i); + for (j = 0; j < 12; j++) { + gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS1_CTRL1, + A5XX_VBIF_TEST_BUS1_CTRL1_TEST_BUS1_DATA_SEL(j)); + SNAPSHOT_WRITE_U32(snapshot, gpu_read(gpu, + REG_A5XX_VBIF_TEST_BUS_OUT)); + } + } + +} + +static void a5xx_snapshot_debugbus_block(struct msm_gpu *gpu, + struct msm_snapshot *snapshot, u32 block, u32 count) +{ + int i; + struct msm_snapshot_debugbus header = { + .id = block, + .count = count * 2, /* Each value is 2 dwords */ + }; + + if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_DEBUGBUS, + (count * 2))) + return; + + for (i = 0; i < count; i++) { + u32 reg = A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_INDEX(i) | + A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block); + + gpu_write(gpu, REG_A5XX_RBBM_CFG_DBGBUS_SEL_A, reg); + gpu_write(gpu, REG_A5XX_RBBM_CFG_DBGBUS_SEL_B, reg); + gpu_write(gpu, REG_A5XX_RBBM_CFG_DBGBUS_SEL_C, reg); + gpu_write(gpu, REG_A5XX_RBBM_CFG_DBGBUS_SEL_D, reg); + + /* Each debugbus entry is a quad word */ + SNAPSHOT_WRITE_U32(snapshot, gpu_read(gpu, + REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2)); + SNAPSHOT_WRITE_U32(snapshot, + gpu_read(gpu, REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1)); + } +} + +static void a5xx_snapshot_debugbus(struct msm_gpu *gpu, + struct msm_snapshot *snapshot) +{ + int i; + + gpu_write(gpu, REG_A5XX_RBBM_CFG_DBGBUS_CNTLM, + A5XX_RBBM_CFG_DBGBUS_CNTLM_ENABLE(0xF)); + + for (i = 0; i < ARRAY_SIZE(a5xx_debugbus_blocks); i++) + a5xx_snapshot_debugbus_block(gpu, snapshot, + a5xx_debugbus_blocks[i].id, + a5xx_debugbus_blocks[i].count); + + /* VBIF is special and not in a good way */ + a5xx_snapshot_debugbus_vbif(gpu, snapshot); +} + +static void a5xx_snapshot_cp_merciu(struct msm_gpu *gpu, + struct msm_snapshot *snapshot) +{ + unsigned int i; + struct msm_snapshot_debug header = { + .type = SNAPSHOT_DEBUG_CP_MERCIU, + .size = 64 << 1, /* Data size is 2 dwords per entry */ + }; + + if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_DEBUG, 64 << 1)) + return; + + gpu_write(gpu, REG_A5XX_CP_MERCIU_DBG_ADDR, 0); + for (i = 0; i < 64; i++) { + SNAPSHOT_WRITE_U32(snapshot, + gpu_read(gpu, REG_A5XX_CP_MERCIU_DBG_DATA_1)); + SNAPSHOT_WRITE_U32(snapshot, + gpu_read(gpu, REG_A5XX_CP_MERCIU_DBG_DATA_2)); + } +} + +static void a5xx_snapshot_cp_roq(struct msm_gpu *gpu, + struct msm_snapshot *snapshot) +{ + int i; + struct msm_snapshot_debug header = { + .type = SNAPSHOT_DEBUG_CP_ROQ, + .size = 512, + }; + + if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_DEBUG, 512)) + return; + + gpu_write(gpu, REG_A5XX_CP_ROQ_DBG_ADDR, 0); + for (i = 0; i < 512; i++) + SNAPSHOT_WRITE_U32(snapshot, + gpu_read(gpu, REG_A5XX_CP_ROQ_DBG_DATA)); +} + +static void a5xx_snapshot_cp_meq(struct msm_gpu *gpu, + struct msm_snapshot *snapshot) +{ + int i; + struct msm_snapshot_debug header = { + .type = SNAPSHOT_DEBUG_CP_MEQ, + .size = 64, + }; + + if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_DEBUG, 64)) + return; + + gpu_write(gpu, REG_A5XX_CP_MEQ_DBG_ADDR, 0); + for (i = 0; i < 64; i++) + SNAPSHOT_WRITE_U32(snapshot, + gpu_read(gpu, REG_A5XX_CP_MEQ_DBG_DATA)); +} + +static void a5xx_snapshot_indexed_registers(struct msm_gpu *gpu, + struct msm_snapshot *snapshot, u32 addr, u32 data, + u32 count) +{ + unsigned int i; + struct msm_snapshot_indexed_regs header = { + .index_reg = addr, + .data_reg = data, + .start = 0, + .count = count, + }; + + if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_INDEXED_REGS, + count)) + return; + + for (i = 0; i < count; i++) { + gpu_write(gpu, addr, i); + SNAPSHOT_WRITE_U32(snapshot, gpu_read(gpu, data)); + } +} + +int a5xx_snapshot(struct msm_gpu *gpu, struct msm_snapshot *snapshot) +{ + struct crashdump crashdump = { 0 }; + + if (!crashdump_init(gpu, &crashdump)) + snapshot->priv = &crashdump; + + /* To accurately read all registers, disable hardware clock gating */ + a5xx_set_hwcg(gpu, false); + + /* Kick it up to the generic level */ + adreno_snapshot(gpu, snapshot); + + /* Read the GPU registers */ + a5xx_snapshot_registers(gpu, snapshot); + + /* Read the shader memory banks */ + a5xx_snapshot_shader_memory(gpu, snapshot); + + /* Read the debugbus registers */ + a5xx_snapshot_debugbus(gpu, snapshot); + + /* PFP data */ + a5xx_snapshot_indexed_registers(gpu, snapshot, + REG_A5XX_CP_PFP_STAT_ADDR, REG_A5XX_CP_PFP_STAT_DATA, 36); + + /* ME data */ + a5xx_snapshot_indexed_registers(gpu, snapshot, + REG_A5XX_CP_ME_STAT_ADDR, REG_A5XX_CP_ME_STAT_DATA, 29); + + /* DRAW_STATE data */ + a5xx_snapshot_indexed_registers(gpu, snapshot, + REG_A5XX_CP_DRAW_STATE_ADDR, REG_A5XX_CP_DRAW_STATE_DATA, + 256); + + /* ME cache */ + a5xx_snapshot_indexed_registers(gpu, snapshot, + REG_A5XX_CP_ME_UCODE_DBG_ADDR, REG_A5XX_CP_ME_UCODE_DBG_DATA, + 0x53F); + + /* PFP cache */ + a5xx_snapshot_indexed_registers(gpu, snapshot, + REG_A5XX_CP_PFP_UCODE_DBG_ADDR, REG_A5XX_CP_PFP_UCODE_DBG_DATA, + 0x53F); + + /* ME queue */ + a5xx_snapshot_cp_meq(gpu, snapshot); + + /* CP ROQ */ + a5xx_snapshot_cp_roq(gpu, snapshot); + + /* CP MERCIU */ + a5xx_snapshot_cp_merciu(gpu, snapshot); + + crashdump_destroy(gpu, &crashdump); + snapshot->priv = NULL; + + /* Re-enable HWCG */ + a5xx_set_hwcg(gpu, true); + return 0; +} diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 337ed53b7bc8..f1883825354e 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -17,7 +17,9 @@ * this program. If not, see . */ +#include #include "adreno_gpu.h" +#include "msm_snapshot.h" #include "msm_gem.h" #include "msm_mmu.h" @@ -629,3 +631,81 @@ void adreno_gpu_cleanup(struct adreno_gpu *gpu) msm_gem_address_space_put(aspace); } } + +static void adreno_snapshot_os(struct msm_gpu *gpu, + struct msm_snapshot *snapshot) +{ + struct msm_snapshot_linux header; + + memset(&header, 0, sizeof(header)); + + header.osid = SNAPSHOT_OS_LINUX_V3; + strlcpy(header.release, utsname()->release, sizeof(header.release)); + strlcpy(header.version, utsname()->version, sizeof(header.version)); + + header.seconds = get_seconds(); + header.ctxtcount = 0; + + SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_OS, 0); +} + +static void adreno_snapshot_ringbuffer(struct msm_gpu *gpu, + struct msm_snapshot *snapshot, struct msm_ringbuffer *ring) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct msm_snapshot_ringbuffer header; + unsigned int i, end = 0; + unsigned int *data = ring->start; + + memset(&header, 0, sizeof(header)); + + /* + * We only want to copy the active contents of each ring, so find the + * last valid entry in the ringbuffer + */ + for (i = 0; i < MSM_GPU_RINGBUFFER_SZ >> 2; i++) { + if (data[i]) + end = i; + } + + /* The dump always starts at 0 */ + header.start = 0; + header.end = end; + + /* This is the number of dwords being dumped */ + header.count = end + 1; + + /* This is the size of the actual ringbuffer */ + header.rbsize = MSM_GPU_RINGBUFFER_SZ >> 2; + + header.id = ring->id; + header.gpuaddr = ring->iova; + header.rptr = get_rptr(adreno_gpu, ring); + header.wptr = get_wptr(ring); + header.timestamp_queued = adreno_submitted_fence(gpu, ring); + header.timestamp_retired = adreno_last_fence(gpu, ring); + + /* Write the header even if the ringbuffer data is empty */ + if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_RB_V2, + header.count)) + return; + + SNAPSHOT_MEMCPY(snapshot, ring->start, header.count * sizeof(u32)); +} + +static void adreno_snapshot_ringbuffers(struct msm_gpu *gpu, + struct msm_snapshot *snapshot) +{ + struct msm_ringbuffer *ring; + int i; + + /* Write a new section for each ringbuffer */ + FOR_EACH_RING(gpu, ring, i) + adreno_snapshot_ringbuffer(gpu, snapshot, ring); +} + +void adreno_snapshot(struct msm_gpu *gpu, struct msm_snapshot *snapshot) +{ + adreno_snapshot_os(gpu, snapshot); + adreno_snapshot_ringbuffers(gpu, snapshot); +} diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 55602d20a205..30461115281c 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -233,6 +233,7 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, int nr_rings); void adreno_gpu_cleanup(struct adreno_gpu *gpu); +void adreno_snapshot(struct msm_gpu *gpu, struct msm_snapshot *snapshot); /* ringbuffer helpers (the parts that are adreno specific) */ diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 4d39eb439793..0231ac3f269f 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -837,6 +837,13 @@ static int msm_gpu_show(struct drm_device *dev, struct seq_file *m) return 0; } +static int msm_snapshot_show(struct drm_device *dev, struct seq_file *m) +{ + struct msm_drm_private *priv = dev->dev_private; + + return msm_snapshot_write(priv->gpu, m); +} + static int msm_gem_show(struct drm_device *dev, struct seq_file *m) { struct msm_drm_private *priv = dev->dev_private; @@ -901,11 +908,22 @@ static int show_locked(struct seq_file *m, void *arg) return ret; } +static int show_unlocked(struct seq_file *m, void *arg) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + int (*show)(struct drm_device *dev, struct seq_file *m) = + node->info_ent->data; + + return show(dev, m); +} + static struct drm_info_list msm_debugfs_list[] = { {"gpu", show_locked, 0, msm_gpu_show}, {"gem", show_locked, 0, msm_gem_show}, { "mm", show_locked, 0, msm_mm_show }, { "fb", show_locked, 0, msm_fb_show }, + { "snapshot", show_unlocked, 0, msm_snapshot_show }, }; static int late_init_minor(struct drm_minor *minor) diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 000a369e2537..3fb480f41fde 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -764,6 +764,10 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, bs_init(gpu); + gpu->snapshot = msm_snapshot_new(gpu); + if (IS_ERR(gpu->snapshot)) + gpu->snapshot = NULL; + return 0; fail: @@ -794,4 +798,6 @@ void msm_gpu_cleanup(struct msm_gpu *gpu) msm_ringbuffer_destroy(gpu->rb[i]); } + + msm_snapshot_destroy(gpu, gpu->snapshot); } diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index bab4f9349e64..06dfaabbfcfe 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -24,6 +24,7 @@ #include "msm_drv.h" #include "msm_ringbuffer.h" +#include "msm_snapshot.h" struct msm_gem_submit; struct msm_gpu_perfcntr; @@ -69,6 +70,7 @@ struct msm_gpu_funcs { /* show GPU status in debugfs: */ void (*show)(struct msm_gpu *gpu, struct seq_file *m); #endif + int (*snapshot)(struct msm_gpu *gpu, struct msm_snapshot *snapshot); }; struct msm_gpu { @@ -137,6 +139,8 @@ struct msm_gpu { struct work_struct recover_work; struct list_head submit_list; + + struct msm_snapshot *snapshot; }; /* It turns out that all targets use the same ringbuffer size. */ diff --git a/drivers/gpu/drm/msm/msm_snapshot.c b/drivers/gpu/drm/msm/msm_snapshot.c new file mode 100644 index 000000000000..30f3e5c64ebd --- /dev/null +++ b/drivers/gpu/drm/msm/msm_snapshot.c @@ -0,0 +1,105 @@ +/* Copyright (c) 2016 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "msm_gpu.h" +#include "msm_gem.h" +#include "msm_snapshot_api.h" + +void msm_snapshot_destroy(struct msm_gpu *gpu, struct msm_snapshot *snapshot) +{ + struct drm_device *dev = gpu->dev; + struct msm_drm_private *priv = dev->dev_private; + struct platform_device *pdev = priv->gpu_pdev; + + if (!snapshot) + return; + + dma_free_coherent(&pdev->dev, SZ_1M, snapshot->ptr, + snapshot->physaddr); + + kfree(snapshot); +} + +struct msm_snapshot *msm_snapshot_new(struct msm_gpu *gpu) +{ + struct drm_device *dev = gpu->dev; + struct msm_drm_private *priv = dev->dev_private; + struct platform_device *pdev = priv->gpu_pdev; + struct msm_snapshot *snapshot; + + snapshot = kzalloc(sizeof(*snapshot), GFP_KERNEL); + if (!snapshot) + return ERR_PTR(-ENOMEM); + + snapshot->ptr = dma_alloc_coherent(&pdev->dev, SZ_1M, + &snapshot->physaddr, GFP_KERNEL); + + if (!snapshot->ptr) { + kfree(snapshot); + return ERR_PTR(-ENOMEM); + } + + seq_buf_init(&snapshot->buf, snapshot->ptr, SZ_1M); + + return snapshot; +} + +int msm_gpu_snapshot(struct msm_gpu *gpu, struct msm_snapshot *snapshot) +{ + int ret; + struct msm_snapshot_header header; + uint64_t val; + + if (!snapshot) + return -ENOMEM; + + /* + * For now, blow away the snapshot and take a new one - the most + * interesting hang is the last one we saw + */ + seq_buf_init(&snapshot->buf, snapshot->ptr, SZ_1M); + + header.magic = SNAPSHOT_MAGIC; + gpu->funcs->get_param(gpu, MSM_PARAM_GPU_ID, &val); + header.gpuid = lower_32_bits(val); + + gpu->funcs->get_param(gpu, MSM_PARAM_CHIP_ID, &val); + header.chipid = lower_32_bits(val); + + seq_buf_putmem(&snapshot->buf, &header, sizeof(header)); + + ret = gpu->funcs->snapshot(gpu, snapshot); + + if (!ret) { + struct msm_snapshot_section_header end; + + end.magic = SNAPSHOT_SECTION_MAGIC; + end.id = SNAPSHOT_SECTION_END; + end.size = sizeof(end); + + seq_buf_putmem(&snapshot->buf, &end, sizeof(end)); + + dev_info(gpu->dev->dev, "GPU snapshot created [0x%pa (%d bytes)]\n", + &snapshot->physaddr, seq_buf_used(&snapshot->buf)); + } + + return ret; +} + +int msm_snapshot_write(struct msm_gpu *gpu, struct seq_file *m) +{ + if (gpu && gpu->snapshot) + seq_write(m, gpu->snapshot->ptr, + seq_buf_used(&gpu->snapshot->buf)); + + return 0; +} diff --git a/drivers/gpu/drm/msm/msm_snapshot.h b/drivers/gpu/drm/msm/msm_snapshot.h new file mode 100644 index 000000000000..247e1358c885 --- /dev/null +++ b/drivers/gpu/drm/msm/msm_snapshot.h @@ -0,0 +1,85 @@ +/* Copyright (c) 2016 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef MSM_SNAPSHOT_H_ +#define MSM_SNAPSHOT_H_ + +#include +#include +#include "msm_snapshot_api.h" + +struct msm_snapshot { + void *ptr; + struct seq_buf buf; + phys_addr_t physaddr; + uint32_t index; + uint32_t remain; + unsigned long timestamp; + void *priv; +}; + +/* Write a uint32_t value to the next position in the snapshot buffer */ +static inline void SNAPSHOT_WRITE_U32(struct msm_snapshot *snapshot, + uint32_t value) +{ + seq_buf_putmem(&snapshot->buf, &value, sizeof(value)); +} + +/* Copy a block of memory to the next position in the snapshot buffer */ +static inline void SNAPSHOT_MEMCPY(struct msm_snapshot *snapshot, void *src, + uint32_t size) +{ + if (size) + seq_buf_putmem(&snapshot->buf, src, size); +} + +static inline bool _snapshot_header(struct msm_snapshot *snapshot, + struct msm_snapshot_section_header *header, + u32 headsz, u32 datasz, u32 id) +{ + u32 size = headsz + datasz; + + if (seq_buf_buffer_left(&snapshot->buf) <= size) + return false; + + /* Write the section header */ + header->magic = SNAPSHOT_SECTION_MAGIC; + header->id = id; + header->size = headsz + datasz; + + /* Write the section header */ + seq_buf_putmem(&snapshot->buf, header, headsz); + + /* The caller will fill in the data from here */ + return true; +} + +/* SNAPSHOT_HEADER + * _snapshot: pointer to struct msm_snapshot + * _header: Local variable containing the sub-section header + * _id: Section ID to write + * _dword: Size of the data section (in dword) + */ +#define SNAPSHOT_HEADER(_snapshot, _header, _id, _dwords) \ + _snapshot_header((_snapshot), \ + (struct msm_snapshot_section_header *) &(header), \ + sizeof(header), (_dwords) << 2, (_id)) + +struct msm_gpu; + +struct msm_snapshot *msm_snapshot_new(struct msm_gpu *gpu); +void msm_snapshot_destroy(struct msm_gpu *gpu, struct msm_snapshot *snapshot); +int msm_gpu_snapshot(struct msm_gpu *gpu, struct msm_snapshot *snapshot); +int msm_snapshot_write(struct msm_gpu *gpu, struct seq_file *m); + +#endif + diff --git a/drivers/gpu/drm/msm/msm_snapshot_api.h b/drivers/gpu/drm/msm/msm_snapshot_api.h new file mode 100644 index 000000000000..9f0adb9ee784 --- /dev/null +++ b/drivers/gpu/drm/msm/msm_snapshot_api.h @@ -0,0 +1,121 @@ +/* Copyright (c) 2016 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef MSM_SNAPSHOT_API_H_ +#define MSM_SNAPSHOT_API_H_ + +#include + +/* High word is the magic, low word is the snapshot header version */ +#define SNAPSHOT_MAGIC 0x504D0002 + +struct msm_snapshot_header { + __u32 magic; + __u32 gpuid; + __u32 chipid; +} __packed; + +#define SNAPSHOT_SECTION_MAGIC 0xABCD + +struct msm_snapshot_section_header { + __u16 magic; + __u16 id; + __u32 size; +} __packed; + +/* Section identifiers */ +#define SNAPSHOT_SECTION_OS 0x0101 +#define SNAPSHOT_SECTION_REGS_V2 0x0202 +#define SNAPSHOT_SECTION_RB_V2 0x0302 +#define SNAPSHOT_SECTION_IB_V2 0x0402 +#define SNAPSHOT_SECTION_INDEXED_REGS 0x0501 +#define SNAPSHOT_SECTION_DEBUG 0x0901 +#define SNAPSHOT_SECTION_DEBUGBUS 0x0A01 +#define SNAPSHOT_SECTION_GPU_OBJECT_V2 0x0B02 +#define SNAPSHOT_SECTION_MEMLIST_V2 0x0E02 +#define SNAPSHOT_SECTION_SHADER 0x1201 +#define SNAPSHOT_SECTION_END 0xFFFF + +#define SNAPSHOT_OS_LINUX_V3 0x00000202 + +struct msm_snapshot_linux { + struct msm_snapshot_section_header header; + int osid; + __u32 seconds; + __u32 power_flags; + __u32 power_level; + __u32 power_interval_timeout; + __u32 grpclk; + __u32 busclk; + __u64 ptbase; + __u32 pid; + __u32 current_context; + __u32 ctxtcount; + unsigned char release[32]; + unsigned char version[32]; + unsigned char comm[16]; +} __packed; + +struct msm_snapshot_ringbuffer { + struct msm_snapshot_section_header header; + int start; + int end; + int rbsize; + int wptr; + int rptr; + int count; + __u32 timestamp_queued; + __u32 timestamp_retired; + __u64 gpuaddr; + __u32 id; +} __packed; + +struct msm_snapshot_regs { + struct msm_snapshot_section_header header; + __u32 count; +} __packed; + +struct msm_snapshot_indexed_regs { + struct msm_snapshot_section_header header; + __u32 index_reg; + __u32 data_reg; + __u32 start; + __u32 count; +} __packed; + +#define SNAPSHOT_DEBUG_CP_MEQ 7 +#define SNAPSHOT_DEBUG_CP_PM4_RAM 8 +#define SNAPSHOT_DEBUG_CP_PFP_RAM 9 +#define SNAPSHOT_DEBUG_CP_ROQ 10 +#define SNAPSHOT_DEBUG_SHADER_MEMORY 11 +#define SNAPSHOT_DEBUG_CP_MERCIU 12 + +struct msm_snapshot_debug { + struct msm_snapshot_section_header header; + __u32 type; + __u32 size; +} __packed; + +struct msm_snapshot_debugbus { + struct msm_snapshot_section_header header; + __u32 id; + __u32 count; +} __packed; + +struct msm_snapshot_shader { + struct msm_snapshot_section_header header; + __u32 type; + __u32 index; + __u32 size; +} __packed; + +#endif