diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 6086c0f9f13c..b77fdd098471 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -55,7 +55,8 @@ msm_drm-y += adreno/adreno_device.o \
 	adreno/a4xx_gpu.o \
 	adreno/a5xx_gpu.o \
 	adreno/a5xx_power.o \
-	adreno/a5xx_preempt.o
+	adreno/a5xx_preempt.o \
+	adreno/a5xx_snapshot.o
 endif
 
 msm_drm-$(CONFIG_DRM_MSM_MDP4) += mdp/mdp4/mdp4_crtc.o \
@@ -131,6 +132,7 @@ msm_drm-$(CONFIG_DRM_MSM) += \
 	msm_perf.o \
 	msm_rd.o \
 	msm_ringbuffer.o \
-	msm_prop.o
+	msm_prop.o \
+	msm_snapshot.o
 
 obj-$(CONFIG_DRM_MSM)	+= msm_drm.o
diff --git a/drivers/gpu/drm/msm/adreno/a5xx.xml.h b/drivers/gpu/drm/msm/adreno/a5xx.xml.h
index bfee2fd83462..56dad2217289 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx.xml.h
+++ b/drivers/gpu/drm/msm/adreno/a5xx.xml.h
@@ -155,6 +155,114 @@ enum a5xx_depth_format {
 	DEPTH5_32 = 4,
 };
 
+enum a5xx_debugbus {
+	A5XX_RBBM_DBGBUS_CP = 1,
+	A5XX_RBBM_DBGBUS_RBBM = 2,
+	A5XX_RBBM_DBGBUS_VBIF = 3,
+	A5XX_RBBM_DBGBUS_HLSQ = 4,
+	A5XX_RBBM_DBGBUS_UCHE = 5,
+	A5XX_RBBM_DBGBUS_DPM = 6,
+	A5XX_RBBM_DBGBUS_TESS = 7,
+	A5XX_RBBM_DBGBUS_PC = 8,
+	A5XX_RBBM_DBGBUS_VFDP = 9,
+	A5XX_RBBM_DBGBUS_VPC = 10,
+	A5XX_RBBM_DBGBUS_TSE = 11,
+	A5XX_RBBM_DBGBUS_RAS = 12,
+	A5XX_RBBM_DBGBUS_VSC = 13,
+	A5XX_RBBM_DBGBUS_COM = 14,
+	A5XX_RBBM_DBGBUS_DCOM = 15,
+	A5XX_RBBM_DBGBUS_LRZ = 16,
+	A5XX_RBBM_DBGBUS_A2D_DSP = 17,
+	A5XX_RBBM_DBGBUS_CCUFCHE = 18,
+	A5XX_RBBM_DBGBUS_GPMU = 19,
+	A5XX_RBBM_DBGBUS_RBP = 20,
+	A5XX_RBBM_DBGBUS_HM = 21,
+	A5XX_RBBM_DBGBUS_RBBM_CFG = 22,
+	A5XX_RBBM_DBGBUS_VBIF_CX = 23,
+	A5XX_RBBM_DBGBUS_GPC = 29,
+	A5XX_RBBM_DBGBUS_LARC = 30,
+	A5XX_RBBM_DBGBUS_HLSQ_SPTP = 31,
+	A5XX_RBBM_DBGBUS_RB_0 = 32,
+	A5XX_RBBM_DBGBUS_RB_1 = 33,
+	A5XX_RBBM_DBGBUS_RB_2 = 34,
+	A5XX_RBBM_DBGBUS_RB_3 = 35,
+	A5XX_RBBM_DBGBUS_CCU_0 = 40,
+	A5XX_RBBM_DBGBUS_CCU_1 = 41,
+	A5XX_RBBM_DBGBUS_CCU_2 = 42,
+	A5XX_RBBM_DBGBUS_CCU_3 = 43,
+	A5XX_RBBM_DBGBUS_A2D_RAS_0 = 48,
+	A5XX_RBBM_DBGBUS_A2D_RAS_1 = 49,
+	A5XX_RBBM_DBGBUS_A2D_RAS_2 = 50,
+	A5XX_RBBM_DBGBUS_A2D_RAS_3 = 51,
+	A5XX_RBBM_DBGBUS_VFD_0 = 56,
+	A5XX_RBBM_DBGBUS_VFD_1 = 57,
+	A5XX_RBBM_DBGBUS_VFD_2 = 58,
+	A5XX_RBBM_DBGBUS_VFD_3 = 59,
+	A5XX_RBBM_DBGBUS_SP_0 = 64,
+	A5XX_RBBM_DBGBUS_SP_1 = 65,
+	A5XX_RBBM_DBGBUS_SP_2 = 66,
+	A5XX_RBBM_DBGBUS_SP_3 = 67,
+	A5XX_RBBM_DBGBUS_TPL1_0 = 72,
+	A5XX_RBBM_DBGBUS_TPL1_1 = 73,
+	A5XX_RBBM_DBGBUS_TPL1_2 = 74,
+	A5XX_RBBM_DBGBUS_TPL1_3 = 75,
+};
+
+enum a5xx_shader_blocks {
+	A5XX_TP_W_MEMOBJ = 1,
+	A5XX_TP_W_SAMPLER = 2,
+	A5XX_TP_W_MIPMAP_BASE = 3,
+	A5XX_TP_W_MEMOBJ_TAG = 4,
+	A5XX_TP_W_SAMPLER_TAG = 5,
+	A5XX_TP_S_3D_MEMOBJ = 6,
+	A5XX_TP_S_3D_SAMPLER = 7,
+	A5XX_TP_S_3D_MEMOBJ_TAG = 8,
+	A5XX_TP_S_3D_SAMPLER_TAG = 9,
+	A5XX_TP_S_CS_MEMOBJ = 10,
+	A5XX_TP_S_CS_SAMPLER = 11,
+	A5XX_TP_S_CS_MEMOBJ_TAG = 12,
+	A5XX_TP_S_CS_SAMPLER_TAG = 13,
+	A5XX_SP_W_INSTR = 14,
+	A5XX_SP_W_CONST = 15,
+	A5XX_SP_W_UAV_SIZE = 16,
+	A5XX_SP_W_CB_SIZE = 17,
+	A5XX_SP_W_UAV_BASE = 18,
+	A5XX_SP_W_CB_BASE = 19,
+	A5XX_SP_W_INST_TAG = 20,
+	A5XX_SP_W_STATE = 21,
+	A5XX_SP_S_3D_INSTR = 22,
+	A5XX_SP_S_3D_CONST = 23,
+	A5XX_SP_S_3D_CB_BASE = 24,
+	A5XX_SP_S_3D_CB_SIZE = 25,
+	A5XX_SP_S_3D_UAV_BASE = 26,
+	A5XX_SP_S_3D_UAV_SIZE = 27,
+	A5XX_SP_S_CS_INSTR = 28,
+	A5XX_SP_S_CS_CONST = 29,
+	A5XX_SP_S_CS_CB_BASE = 30,
+	A5XX_SP_S_CS_CB_SIZE = 31,
+	A5XX_SP_S_CS_UAV_BASE = 32,
+	A5XX_SP_S_CS_UAV_SIZE = 33,
+	A5XX_SP_S_3D_INSTR_DIRTY = 34,
+	A5XX_SP_S_3D_CONST_DIRTY = 35,
+	A5XX_SP_S_3D_CB_BASE_DIRTY = 36,
+	A5XX_SP_S_3D_CB_SIZE_DIRTY = 37,
+	A5XX_SP_S_3D_UAV_BASE_DIRTY = 38,
+	A5XX_SP_S_3D_UAV_SIZE_DIRTY = 39,
+	A5XX_SP_S_CS_INSTR_DIRTY = 40,
+	A5XX_SP_S_CS_CONST_DIRTY = 41,
+	A5XX_SP_S_CS_CB_BASE_DIRTY = 42,
+	A5XX_SP_S_CS_CB_SIZE_DIRTY = 43,
+	A5XX_SP_S_CS_UAV_BASE_DIRTY = 44,
+	A5XX_SP_S_CS_UAV_SIZE_DIRTY = 45,
+	A5XX_HLSQ_ICB = 46,
+	A5XX_HLSQ_ICB_DIRTY = 47,
+	A5XX_HLSQ_ICB_CB_BASE_DIRTY = 48,
+	A5XX_SP_POWER_RESTORE_RAM = 64,
+	A5XX_SP_POWER_RESTORE_RAM_TAG = 65,
+	A5XX_TP_POWER_RESTORE_RAM = 66,
+	A5XX_TP_POWER_RESTORE_RAM_TAG = 67,
+};
+
 enum a5xx_tex_filter {
 	A5XX_TEX_NEAREST = 0,
 	A5XX_TEX_LINEAR = 1,
@@ -396,6 +504,18 @@ static inline uint32_t A5XX_CP_PROTECT_REG_MASK_LEN(uint32_t val)
 #define REG_A5XX_CP_POWERCTR_CP_SEL_3				0x00000bbd
 
 #define REG_A5XX_RBBM_CFG_DBGBUS_SEL_A				0x00000004
+#define A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_INDEX__MASK		0x000000ff
+#define A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_INDEX__SHIFT		0
+static inline uint32_t A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_INDEX(uint32_t val)
+{
+	return ((val) << A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_INDEX__SHIFT) & A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_INDEX__MASK;
+}
+#define A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_BLK_SEL__MASK		0x0000ff00
+#define A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_BLK_SEL__SHIFT		8
+static inline uint32_t A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_BLK_SEL(uint32_t val)
+{
+	return ((val) << A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_BLK_SEL__SHIFT) & A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_BLK_SEL__MASK;
+}
 
 #define REG_A5XX_RBBM_CFG_DBGBUS_SEL_B				0x00000005
 
@@ -406,6 +526,12 @@ static inline uint32_t A5XX_CP_PROTECT_REG_MASK_LEN(uint32_t val)
 #define REG_A5XX_RBBM_CFG_DBGBUS_CNTLT				0x00000008
 
 #define REG_A5XX_RBBM_CFG_DBGBUS_CNTLM				0x00000009
+#define A5XX_RBBM_CFG_DBGBUS_CNTLM_ENABLE__MASK			0x0f000000
+#define A5XX_RBBM_CFG_DBGBUS_CNTLM_ENABLE__SHIFT		24
+static inline uint32_t A5XX_RBBM_CFG_DBGBUS_CNTLM_ENABLE(uint32_t val)
+{
+	return ((val) << A5XX_RBBM_CFG_DBGBUS_CNTLM_ENABLE__SHIFT) & A5XX_RBBM_CFG_DBGBUS_CNTLM_ENABLE__MASK;
+}
 
 #define REG_A5XX_RBBM_CFG_DEBBUS_CTLTM_ENABLE_SHIFT		0x00000018
 
@@ -1413,6 +1539,12 @@ static inline uint32_t A5XX_VSC_BIN_SIZE_Y(uint32_t val)
 #define REG_A5XX_HLSQ_SPTP_RDSEL				0x00000f08
 
 #define REG_A5XX_HLSQ_DBG_READ_SEL				0x0000bc00
+#define A5XX_HLSQ_DBG_READ_SEL_STATETYPE__MASK			0x0000ff00
+#define A5XX_HLSQ_DBG_READ_SEL_STATETYPE__SHIFT			8
+static inline uint32_t A5XX_HLSQ_DBG_READ_SEL_STATETYPE(uint32_t val)
+{
+	return ((val) << A5XX_HLSQ_DBG_READ_SEL_STATETYPE__SHIFT) & A5XX_HLSQ_DBG_READ_SEL_STATETYPE__MASK;
+}
 
 #define REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE			0x0000a000
 
@@ -1583,6 +1715,8 @@ static inline uint32_t A5XX_VSC_BIN_SIZE_Y(uint32_t val)
 #define REG_A5XX_VBIF_VERSION					0x00003000
 
 #define REG_A5XX_VBIF_CLKON					0x00003001
+#define A5XX_VBIF_CLKON_FORCE_ON				0x00000001
+#define A5XX_VBIF_CLKON_FORCE_ON_TESTBUS			0x00000002
 
 #define REG_A5XX_VBIF_ABIT_SORT					0x00003028
 
@@ -1601,14 +1735,27 @@ static inline uint32_t A5XX_VSC_BIN_SIZE_Y(uint32_t val)
 #define REG_A5XX_VBIF_XIN_HALT_CTRL1				0x00003081
 
 #define REG_A5XX_VBIF_TEST_BUS_OUT_CTRL				0x00003084
+#define A5XX_VBIF_TEST_BUS_OUT_CTRL_TEST_BUS_CTRL_EN		0x00000001
 
 #define REG_A5XX_VBIF_TEST_BUS1_CTRL0				0x00003085
 
 #define REG_A5XX_VBIF_TEST_BUS1_CTRL1				0x00003086
+#define A5XX_VBIF_TEST_BUS1_CTRL1_TEST_BUS1_DATA_SEL__MASK	0x0000000f
+#define A5XX_VBIF_TEST_BUS1_CTRL1_TEST_BUS1_DATA_SEL__SHIFT	0
+static inline uint32_t A5XX_VBIF_TEST_BUS1_CTRL1_TEST_BUS1_DATA_SEL(uint32_t val)
+{
+	return ((val) << A5XX_VBIF_TEST_BUS1_CTRL1_TEST_BUS1_DATA_SEL__SHIFT) & A5XX_VBIF_TEST_BUS1_CTRL1_TEST_BUS1_DATA_SEL__MASK;
+}
 
 #define REG_A5XX_VBIF_TEST_BUS2_CTRL0				0x00003087
 
 #define REG_A5XX_VBIF_TEST_BUS2_CTRL1				0x00003088
+#define A5XX_VBIF_TEST_BUS2_CTRL1_TEST_BUS2_DATA_SEL__MASK	0x0000001f
+#define A5XX_VBIF_TEST_BUS2_CTRL1_TEST_BUS2_DATA_SEL__SHIFT	0
+static inline uint32_t A5XX_VBIF_TEST_BUS2_CTRL1_TEST_BUS2_DATA_SEL(uint32_t val)
+{
+	return ((val) << A5XX_VBIF_TEST_BUS2_CTRL1_TEST_BUS2_DATA_SEL__SHIFT) & A5XX_VBIF_TEST_BUS2_CTRL1_TEST_BUS2_DATA_SEL__MASK;
+}
 
 #define REG_A5XX_VBIF_TEST_BUS_OUT				0x0000308c
 
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 9933e932679a..a49a7b247547 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -15,9 +15,6 @@
 #include "msm_iommu.h"
 #include "a5xx_gpu.h"
 
-extern bool hang_debug;
-static void a5xx_dump(struct msm_gpu *gpu);
-
 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
@@ -800,8 +797,7 @@ static void a5xx_recover(struct msm_gpu *gpu)
 {
 	adreno_dump_info(gpu);
 
-	if (hang_debug)
-		a5xx_dump(gpu);
+	msm_gpu_snapshot(gpu, gpu->snapshot);
 
 	/* Reset the GPU so it can work again */
 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
@@ -1112,13 +1108,6 @@ static const u32 a5xx_registers[] = {
 	~0
 };
 
-static void a5xx_dump(struct msm_gpu *gpu)
-{
-	dev_info(gpu->dev->dev, "status:   %08x\n",
-		gpu_read(gpu, REG_A5XX_RBBM_STATUS));
-	adreno_dump(gpu);
-}
-
 static int a5xx_pm_resume(struct msm_gpu *gpu)
 {
 	int ret;
@@ -1225,6 +1214,7 @@ static const struct adreno_gpu_funcs funcs = {
 #ifdef CONFIG_DEBUG_FS
 		.show = a5xx_show,
 #endif
+		.snapshot = a5xx_snapshot,
 	},
 	.get_timestamp = a5xx_get_timestamp,
 };
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
index 366f37545589..3de14fe42a1b 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
@@ -176,6 +176,8 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu);
 void a5xx_preempt_irq(struct msm_gpu *gpu);
 void a5xx_preempt_fini(struct msm_gpu *gpu);
 
+int a5xx_snapshot(struct msm_gpu *gpu, struct msm_snapshot *snapshot);
+
 /* Return true if we are in a preempt state */
 static inline bool a5xx_in_preempt(struct a5xx_gpu *a5xx_gpu)
 {
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
index f8e6bc4dc432..648494c75abc 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
@@ -46,10 +46,6 @@ static void *alloc_kernel_bo(struct drm_device *drm, struct msm_gpu *gpu,
 	if (iova)
 		*iova = _iova;
 
-	pr_err("[%ps] buffer size %x, iova [%llx : %llx]\n",
-		__builtin_return_address(0), size,
-		_iova, _iova+size-1);
-
 	return ptr;
 out:
 	drm_gem_object_unreference_unlocked(_bo);
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_snapshot.c b/drivers/gpu/drm/msm/adreno/a5xx_snapshot.c
new file mode 100644
index 000000000000..5a2edb0ea518
--- /dev/null
+++ b/drivers/gpu/drm/msm/adreno/a5xx_snapshot.c
@@ -0,0 +1,796 @@
+/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "msm_gpu.h"
+#include "msm_gem.h"
+#include "a5xx_gpu.h"
+#include "msm_snapshot_api.h"
+
+#define A5XX_NR_SHADER_BANKS 4
+
+/*
+ * These are a list of the registers that need to be read through the HLSQ
+ * aperture through the crashdumper.  These are not nominally accessible from
+ * the CPU on a secure platform.
+ */
+static const struct {
+	u32 type;
+	u32 regoffset;
+	u32 count;
+} a5xx_hlsq_aperture_regs[] = {
+	{ 0x35, 0xE00, 0x32 },   /* HSLQ non-context */
+	{ 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
+	{ 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
+	{ 0x32, 0xE780, 0x62 },  /* HLSQ 3D context 0 */
+	{ 0x34, 0xEF80, 0x62 },  /* HLSQ 3D context 1 */
+	{ 0x3f, 0x0EC0, 0x40 },  /* SP non-context */
+	{ 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
+	{ 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
+	{ 0x3e, 0xE580, 0x180 }, /* SP 3D context 0 */
+	{ 0x3c, 0xED80, 0x180 }, /* SP 3D context 1 */
+	{ 0x3a, 0x0F00, 0x1c },  /* TP non-context */
+	{ 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
+	{ 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
+	{ 0x39, 0xE700, 0x80 },  /* TP 3D context 0 */
+	{ 0x37, 0xEF00, 0x80 },  /* TP 3D context 1 */
+};
+
+/*
+ * The debugbus registers contain device state that presumably makes
+ * sense to the hardware designers. 'count' is the number of indexes to read,
+ * each index value is 64 bits
+ */
+static const struct {
+	enum a5xx_debugbus id;
+	u32 count;
+} a5xx_debugbus_blocks[] = {
+	{  A5XX_RBBM_DBGBUS_CP, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RBBM, 0x100, },
+	{  A5XX_RBBM_DBGBUS_HLSQ, 0x100, },
+	{  A5XX_RBBM_DBGBUS_UCHE, 0x100, },
+	{  A5XX_RBBM_DBGBUS_DPM, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TESS, 0x100, },
+	{  A5XX_RBBM_DBGBUS_PC, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VFDP, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VPC, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TSE, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RAS, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VSC, 0x100, },
+	{  A5XX_RBBM_DBGBUS_COM, 0x100, },
+	{  A5XX_RBBM_DBGBUS_DCOM, 0x100, },
+	{  A5XX_RBBM_DBGBUS_LRZ, 0x100, },
+	{  A5XX_RBBM_DBGBUS_A2D_DSP, 0x100, },
+	{  A5XX_RBBM_DBGBUS_CCUFCHE, 0x100, },
+	{  A5XX_RBBM_DBGBUS_GPMU, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RBP, 0x100, },
+	{  A5XX_RBBM_DBGBUS_HM, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RBBM_CFG, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VBIF_CX, 0x100, },
+	{  A5XX_RBBM_DBGBUS_GPC, 0x100, },
+	{  A5XX_RBBM_DBGBUS_LARC, 0x100, },
+	{  A5XX_RBBM_DBGBUS_HLSQ_SPTP, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RB_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RB_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RB_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RB_3, 0x100, },
+	{  A5XX_RBBM_DBGBUS_CCU_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_CCU_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_CCU_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_CCU_3, 0x100, },
+	{  A5XX_RBBM_DBGBUS_A2D_RAS_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_A2D_RAS_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_A2D_RAS_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_A2D_RAS_3, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VFD_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VFD_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VFD_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VFD_3, 0x100, },
+	{  A5XX_RBBM_DBGBUS_SP_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_SP_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_SP_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_SP_3, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TPL1_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TPL1_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TPL1_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TPL1_3, 0x100, },
+};
+
+/*
+ * The shader blocks are read from the HLSQ aperture - each one has its own
+ * identifier for the aperture read
+ */
+static const struct {
+	enum a5xx_shader_blocks id;
+	u32 size;
+} a5xx_shader_blocks[] = {
+	{A5XX_TP_W_MEMOBJ,              0x200},
+	{A5XX_TP_W_MIPMAP_BASE,         0x3C0},
+	{A5XX_TP_W_SAMPLER_TAG,          0x40},
+	{A5XX_TP_S_3D_SAMPLER,           0x80},
+	{A5XX_TP_S_3D_SAMPLER_TAG,       0x20},
+	{A5XX_TP_S_CS_SAMPLER,           0x40},
+	{A5XX_TP_S_CS_SAMPLER_TAG,       0x10},
+	{A5XX_SP_W_CONST,               0x800},
+	{A5XX_SP_W_CB_SIZE,              0x30},
+	{A5XX_SP_W_CB_BASE,              0xF0},
+	{A5XX_SP_W_STATE,                 0x1},
+	{A5XX_SP_S_3D_CONST,            0x800},
+	{A5XX_SP_S_3D_CB_SIZE,           0x28},
+	{A5XX_SP_S_3D_UAV_SIZE,          0x80},
+	{A5XX_SP_S_CS_CONST,            0x400},
+	{A5XX_SP_S_CS_CB_SIZE,            0x8},
+	{A5XX_SP_S_CS_UAV_SIZE,          0x80},
+	{A5XX_SP_S_3D_CONST_DIRTY,       0x12},
+	{A5XX_SP_S_3D_CB_SIZE_DIRTY,      0x1},
+	{A5XX_SP_S_3D_UAV_SIZE_DIRTY,     0x2},
+	{A5XX_SP_S_CS_CONST_DIRTY,        0xA},
+	{A5XX_SP_S_CS_CB_SIZE_DIRTY,      0x1},
+	{A5XX_SP_S_CS_UAV_SIZE_DIRTY,     0x2},
+	{A5XX_HLSQ_ICB_DIRTY,             0xB},
+	{A5XX_SP_POWER_RESTORE_RAM_TAG,   0xA},
+	{A5XX_TP_POWER_RESTORE_RAM_TAG,   0xA},
+	{A5XX_TP_W_SAMPLER,              0x80},
+	{A5XX_TP_W_MEMOBJ_TAG,           0x40},
+	{A5XX_TP_S_3D_MEMOBJ,           0x200},
+	{A5XX_TP_S_3D_MEMOBJ_TAG,        0x20},
+	{A5XX_TP_S_CS_MEMOBJ,           0x100},
+	{A5XX_TP_S_CS_MEMOBJ_TAG,        0x10},
+	{A5XX_SP_W_INSTR,               0x800},
+	{A5XX_SP_W_UAV_SIZE,             0x80},
+	{A5XX_SP_W_UAV_BASE,             0x80},
+	{A5XX_SP_W_INST_TAG,             0x40},
+	{A5XX_SP_S_3D_INSTR,            0x800},
+	{A5XX_SP_S_3D_CB_BASE,           0xC8},
+	{A5XX_SP_S_3D_UAV_BASE,          0x80},
+	{A5XX_SP_S_CS_INSTR,            0x400},
+	{A5XX_SP_S_CS_CB_BASE,           0x28},
+	{A5XX_SP_S_CS_UAV_BASE,          0x80},
+	{A5XX_SP_S_3D_INSTR_DIRTY,        0x1},
+	{A5XX_SP_S_3D_CB_BASE_DIRTY,      0x5},
+	{A5XX_SP_S_3D_UAV_BASE_DIRTY,     0x2},
+	{A5XX_SP_S_CS_INSTR_DIRTY,        0x1},
+	{A5XX_SP_S_CS_CB_BASE_DIRTY,      0x1},
+	{A5XX_SP_S_CS_UAV_BASE_DIRTY,     0x2},
+	{A5XX_HLSQ_ICB,                 0x200},
+	{A5XX_HLSQ_ICB_CB_BASE_DIRTY,     0x4},
+	{A5XX_SP_POWER_RESTORE_RAM,     0x140},
+	{A5XX_TP_POWER_RESTORE_RAM,      0x40},
+};
+
+/*
+ * The A5XX architecture has a a built in engine to asynchronously dump
+ * registers from the GPU. It is used to accelerate the copy of hundreds
+ * (thousands) of registers and as a safe way to access registers that might
+ * have secure data in them (if the GPU is in secure, the crashdumper returns
+ * bogus values for those registers). On a fully secured device the CPU will be
+ * blocked from accessing those registers directly and so the crashdump is the
+ * only way that we can access context registers and the shader banks for debug
+ * purposes.
+ *
+ * The downside of the crashdump is that it requires access to GPU accessible
+ * memory (so the VBIF and the bus and the SMMU need to be up and working) and
+ * you need enough memory to write the script for the crashdumper and to store
+ * the data that you are dumping so there is a balancing act between the work to
+ * set up a crash dumper and the value we get out of it.
+ */
+
+/*
+ * The crashdump uses a pseudo-script format to read and write registers.  Each
+ * operation is two 64 bit values.
+ *
+ * READ:
+ *  [qword 0] [64:00] - The absolute IOVA address target for the register value
+ *  [qword 1] [63:44] - the dword address of the register offset to read
+ *            [15:00] - Number of dwords to read at once
+ *
+ * WRITE:
+ *  [qword 0] [31:0] 32 bit value to write to the register
+ *  [qword 1] [63:44] - the dword address of the register offset to write
+ *            [21:21] - set 1 to write
+ *            [15:00] - Number of dwords to write (usually 1)
+ *
+ * At the bottom of the script, write quadword zeros to trigger the end.
+ */
+struct crashdump {
+	struct drm_gem_object *bo;
+	void *ptr;
+	u64 iova;
+	u32 index;
+};
+
+#define CRASHDUMP_BO_SIZE (SZ_1M)
+#define CRASHDUMP_SCRIPT_SIZE (256 * SZ_1K)
+#define CRASHDUMP_DATA_SIZE (CRASHDUMP_BO_SIZE - CRASHDUMP_SCRIPT_SIZE)
+
+static int crashdump_init(struct msm_gpu *gpu, struct crashdump *crashdump)
+{
+	struct drm_device *drm = gpu->dev;
+	int ret = -ENOMEM;
+
+	crashdump->bo = msm_gem_new(drm, CRASHDUMP_BO_SIZE, MSM_BO_UNCACHED);
+	if (IS_ERR(crashdump->bo)) {
+		ret = PTR_ERR(crashdump->bo);
+		crashdump->bo = NULL;
+		return ret;
+	}
+
+	crashdump->ptr = msm_gem_vaddr_locked(crashdump->bo);
+	if (!crashdump->ptr)
+		goto out;
+
+	ret = msm_gem_get_iova_locked(crashdump->bo, gpu->aspace,
+		&crashdump->iova);
+
+out:
+	if (ret) {
+		drm_gem_object_unreference(crashdump->bo);
+		crashdump->bo = NULL;
+	}
+
+	return ret;
+}
+
+static int crashdump_run(struct msm_gpu *gpu, struct crashdump *crashdump)
+{
+	if (!crashdump->ptr || !crashdump->index)
+		return -EINVAL;
+
+	gpu_write(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
+		lower_32_bits(crashdump->iova));
+	gpu_write(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_HI,
+		upper_32_bits(crashdump->iova));
+
+	gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
+
+	return spin_until(gpu_read(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL) & 0x04);
+}
+
+static void crashdump_destroy(struct msm_gpu *gpu, struct crashdump *crashdump)
+{
+	if (!crashdump->bo)
+		return;
+
+	if (crashdump->iova)
+		msm_gem_put_iova(crashdump->bo, gpu->aspace);
+
+	drm_gem_object_unreference(crashdump->bo);
+
+	memset(crashdump, 0, sizeof(*crashdump));
+}
+
+static inline void CRASHDUMP_SCRIPT_WRITE(struct crashdump *crashdump,
+		u32 reg, u32 val)
+{
+	u64 *ptr = crashdump->ptr + crashdump->index;
+
+	if (WARN_ON(crashdump->index + (2 * sizeof(u64))
+		>= CRASHDUMP_SCRIPT_SIZE))
+		return;
+
+	/* This is the value to write */
+	ptr[0] = (u64) val;
+
+	/*
+	 * This triggers a write to the specified register.  1 is the size of
+	 * the write in dwords
+	 */
+	ptr[1] = (((u64) reg) << 44) | (1 << 21) | 1;
+
+	crashdump->index += 2 * sizeof(u64);
+}
+
+static inline void CRASHDUMP_SCRIPT_READ(struct crashdump *crashdump,
+		u32 reg, u32 count, u32 offset)
+{
+	u64 *ptr = crashdump->ptr + crashdump->index;
+
+	if (WARN_ON(crashdump->index + (2 * sizeof(u64))
+		>= CRASHDUMP_SCRIPT_SIZE))
+		return;
+
+	if (WARN_ON(offset + (count * sizeof(u32)) >= CRASHDUMP_DATA_SIZE))
+		return;
+
+	ptr[0] = (u64) crashdump->iova + CRASHDUMP_SCRIPT_SIZE + offset;
+	ptr[1] = (((u64) reg) << 44) | count;
+
+	crashdump->index += 2 * sizeof(u64);
+}
+
+static inline void *CRASHDUMP_DATA_PTR(struct crashdump *crashdump, u32 offset)
+{
+	if (WARN_ON(!crashdump->ptr || offset >= CRASHDUMP_DATA_SIZE))
+		return NULL;
+
+	return crashdump->ptr + CRASHDUMP_SCRIPT_SIZE + offset;
+}
+
+static inline u32 CRASHDUMP_DATA_READ(struct crashdump *crashdump, u32 offset)
+{
+	return *((u32 *) CRASHDUMP_DATA_PTR(crashdump, offset));
+}
+
+static inline void CRASHDUMP_RESET(struct crashdump *crashdump)
+{
+	crashdump->index = 0;
+}
+
+static inline void CRASHDUMP_END(struct crashdump *crashdump)
+{
+	u64 *ptr = crashdump->ptr + crashdump->index;
+
+	if (WARN_ON((crashdump->index + (2 * sizeof(u64)))
+		>= CRASHDUMP_SCRIPT_SIZE))
+		return;
+
+	ptr[0] = 0;
+	ptr[1] = 0;
+
+	crashdump->index += 2 * sizeof(u64);
+}
+
+static u32 _crashdump_read_hlsq_aperture(struct crashdump *crashdump,
+		u32 offset, u32 statetype, u32 bank,
+		u32 count)
+{
+	CRASHDUMP_SCRIPT_WRITE(crashdump, REG_A5XX_HLSQ_DBG_READ_SEL,
+		A5XX_HLSQ_DBG_READ_SEL_STATETYPE(statetype) | bank);
+
+	CRASHDUMP_SCRIPT_READ(crashdump, REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE,
+		count, offset);
+
+	return count * sizeof(u32);
+}
+
+static u32 _copy_registers(struct msm_snapshot *snapshot,
+		struct crashdump *crashdump, u32 reg, u32 count,
+		u32 offset)
+{
+	int i;
+	u32 *ptr = (u32 *) (crashdump->ptr + CRASHDUMP_SCRIPT_SIZE + offset);
+	/*
+	 * Write the offset of the first register of the group and the number of
+	 * registers in the group
+	 */
+	SNAPSHOT_WRITE_U32(snapshot, ((count << 16) | reg));
+
+	/* Followed by each register value in the group */
+	for (i = 0; i < count; i++)
+		SNAPSHOT_WRITE_U32(snapshot, ptr[i]);
+
+	return count * sizeof(u32);
+}
+
+/*
+ * Return the number of registers in each register group from the
+ * adreno_gpu->rgisters
+ */
+static inline u32 REG_COUNT(const unsigned int *ptr)
+{
+	return (ptr[1] - ptr[0]) + 1;
+}
+
+/*
+ * Capture what registers we can from the CPU in case the crashdumper is
+ * unavailable or broken.  This will omit the SP,TP and HLSQ registers, but
+ * you'll get everything else and that ain't bad
+ */
+static void a5xx_snapshot_registers_cpu(struct msm_gpu *gpu,
+		struct msm_snapshot *snapshot)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	struct msm_snapshot_regs header;
+	u32 regcount = 0, groups = 0;
+	int i;
+
+	/*
+	 * Before we write the section we need to figure out how big our data
+	 * section will be
+	 */
+	for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) {
+		regcount += REG_COUNT(&(adreno_gpu->registers[i]));
+		groups++;
+	}
+
+	header.count = groups;
+
+	/*
+	 * We need one dword for each group and then one dword for each register
+	 * value in that group
+	 */
+	if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_REGS_V2,
+		regcount + groups))
+		return;
+
+	for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) {
+		u32 count = REG_COUNT(&(adreno_gpu->registers[i]));
+		u32 reg = adreno_gpu->registers[i];
+		int j;
+
+		/* Write the offset and count for the group */
+		SNAPSHOT_WRITE_U32(snapshot, (count << 16) | reg);
+
+		/* Write each value in the group */
+		for (j = 0; j < count; j++)
+			SNAPSHOT_WRITE_U32(snapshot, gpu_read(gpu, reg++));
+	}
+}
+
+static void a5xx_snapshot_registers(struct msm_gpu *gpu,
+		struct msm_snapshot *snapshot)
+{
+	struct msm_snapshot_regs header;
+	struct crashdump *crashdump = snapshot->priv;
+	u32 offset = 0, regcount = 0, groups = 0;
+	int i;
+
+	/*
+	 * First snapshot all the registers that we can from the CPU.  Do this
+	 * because the crashdumper has a tendency to "taint" the value of some
+	 * of the registers (because the GPU implements the crashdumper) so we
+	 * only want to use the crash dump facility if we have to
+	 */
+	a5xx_snapshot_registers_cpu(gpu, snapshot);
+
+	if (!crashdump)
+		return;
+
+	CRASHDUMP_RESET(crashdump);
+
+	/* HLSQ and context registers behind the aperture */
+	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
+		u32 count = a5xx_hlsq_aperture_regs[i].count;
+
+		offset += _crashdump_read_hlsq_aperture(crashdump, offset,
+			a5xx_hlsq_aperture_regs[i].type, 0, count);
+		regcount += count;
+
+		groups++;
+	}
+
+	CRASHDUMP_END(crashdump);
+
+	if (crashdump_run(gpu, crashdump))
+		return;
+
+	header.count = groups;
+
+	/*
+	 * The size of the data will be one dword for each "group" of registers,
+	 * and then one dword for each of the registers in that group
+	 */
+	if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_REGS_V2,
+		groups + regcount))
+		return;
+
+	/* Copy the registers to the snapshot */
+	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
+		offset += _copy_registers(snapshot, crashdump,
+			a5xx_hlsq_aperture_regs[i].regoffset,
+			a5xx_hlsq_aperture_regs[i].count, offset);
+}
+
+static void _a5xx_snapshot_shader_bank(struct msm_snapshot *snapshot,
+		struct crashdump *crashdump, u32 block, u32 bank,
+		u32 size, u32 offset)
+{
+	void *src;
+
+	struct msm_snapshot_shader header = {
+		.type = block,
+		.index = bank,
+		.size = size,
+	};
+
+	if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_SHADER, size))
+		return;
+
+	src = CRASHDUMP_DATA_PTR(crashdump, offset);
+
+	if (src)
+		SNAPSHOT_MEMCPY(snapshot, src, size * sizeof(u32));
+}
+
+static void a5xx_snapshot_shader_memory(struct msm_gpu *gpu,
+		struct msm_snapshot *snapshot)
+{
+	struct crashdump *crashdump = snapshot->priv;
+	u32 offset = 0;
+	int i;
+
+	/* We can only get shader memory through the crashdump */
+	if (!crashdump)
+		return;
+
+	CRASHDUMP_RESET(crashdump);
+
+	/* For each shader block */
+	for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) {
+		int j;
+
+		/* For each block, dump 4 banks */
+		for (j = 0; j < A5XX_NR_SHADER_BANKS; j++)
+			offset += _crashdump_read_hlsq_aperture(crashdump,
+				offset, a5xx_shader_blocks[i].id, j,
+				a5xx_shader_blocks[i].size);
+	}
+
+	CRASHDUMP_END(crashdump);
+
+	/* If the crashdump fails we can't get shader memory any other way */
+	if (crashdump_run(gpu, crashdump))
+		return;
+
+	/* Each bank of each shader gets its own snapshot section */
+	for (offset = 0, i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) {
+		int j;
+
+		for (j = 0; j < A5XX_NR_SHADER_BANKS; j++) {
+			_a5xx_snapshot_shader_bank(snapshot, crashdump,
+				a5xx_shader_blocks[i].id, j,
+				a5xx_shader_blocks[i].size, offset);
+			offset += a5xx_shader_blocks[i].size * sizeof(u32);
+		}
+	}
+}
+
+#define A5XX_NUM_AXI_ARB_BLOCKS 2
+#define A5XX_NUM_XIN_BLOCKS     4
+#define VBIF_DATA_SIZE ((16 * A5XX_NUM_AXI_ARB_BLOCKS) + \
+	(18 * A5XX_NUM_XIN_BLOCKS) + (12 * A5XX_NUM_XIN_BLOCKS))
+
+static void a5xx_snapshot_debugbus_vbif(struct msm_gpu *gpu,
+		struct msm_snapshot *snapshot)
+{
+	int i;
+	struct msm_snapshot_debugbus header = {
+		.id = A5XX_RBBM_DBGBUS_VBIF,
+		.count = VBIF_DATA_SIZE,
+	};
+
+	if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_DEBUGBUS,
+		VBIF_DATA_SIZE))
+		return;
+
+	gpu_rmw(gpu, REG_A5XX_VBIF_CLKON, A5XX_VBIF_CLKON_FORCE_ON_TESTBUS,
+		A5XX_VBIF_CLKON_FORCE_ON_TESTBUS);
+
+	gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS1_CTRL0, 0);
+	gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS_OUT_CTRL,
+		A5XX_VBIF_TEST_BUS_OUT_CTRL_TEST_BUS_CTRL_EN);
+
+	for (i = 0; i < A5XX_NUM_AXI_ARB_BLOCKS; i++) {
+		int j;
+
+		gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS2_CTRL0, 1 << (i + 16));
+		for (j = 0; j < 16; j++) {
+			gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS2_CTRL1,
+			A5XX_VBIF_TEST_BUS2_CTRL1_TEST_BUS2_DATA_SEL(j));
+			SNAPSHOT_WRITE_U32(snapshot, gpu_read(gpu,
+				REG_A5XX_VBIF_TEST_BUS_OUT));
+		}
+	}
+
+	for (i = 0; i < A5XX_NUM_XIN_BLOCKS; i++) {
+		int j;
+
+		gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS2_CTRL0, 1 << i);
+		for (j = 0; j < 18; j++) {
+			gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS2_CTRL1,
+			A5XX_VBIF_TEST_BUS2_CTRL1_TEST_BUS2_DATA_SEL(j));
+			SNAPSHOT_WRITE_U32(snapshot,
+				gpu_read(gpu, REG_A5XX_VBIF_TEST_BUS_OUT));
+		}
+	}
+
+	for (i = 0; i < A5XX_NUM_XIN_BLOCKS; i++) {
+		int j;
+
+		gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS1_CTRL0, 1 << i);
+		for (j = 0; j < 12; j++) {
+			gpu_write(gpu, REG_A5XX_VBIF_TEST_BUS1_CTRL1,
+			A5XX_VBIF_TEST_BUS1_CTRL1_TEST_BUS1_DATA_SEL(j));
+			SNAPSHOT_WRITE_U32(snapshot, gpu_read(gpu,
+				REG_A5XX_VBIF_TEST_BUS_OUT));
+		}
+	}
+
+}
+
+static void a5xx_snapshot_debugbus_block(struct msm_gpu *gpu,
+		struct msm_snapshot *snapshot, u32 block, u32 count)
+{
+	int i;
+	struct msm_snapshot_debugbus header = {
+		.id = block,
+		.count = count * 2, /* Each value is 2 dwords */
+	};
+
+	if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_DEBUGBUS,
+		(count * 2)))
+		return;
+
+	for (i = 0; i < count; i++) {
+		u32 reg = A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_INDEX(i) |
+			A5XX_RBBM_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
+
+		gpu_write(gpu, REG_A5XX_RBBM_CFG_DBGBUS_SEL_A, reg);
+		gpu_write(gpu, REG_A5XX_RBBM_CFG_DBGBUS_SEL_B, reg);
+		gpu_write(gpu, REG_A5XX_RBBM_CFG_DBGBUS_SEL_C, reg);
+		gpu_write(gpu, REG_A5XX_RBBM_CFG_DBGBUS_SEL_D, reg);
+
+		/* Each debugbus entry is a quad word */
+		SNAPSHOT_WRITE_U32(snapshot, gpu_read(gpu,
+			REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2));
+		SNAPSHOT_WRITE_U32(snapshot,
+			gpu_read(gpu, REG_A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1));
+	}
+}
+
+static void a5xx_snapshot_debugbus(struct msm_gpu *gpu,
+		struct msm_snapshot *snapshot)
+{
+	int i;
+
+	gpu_write(gpu, REG_A5XX_RBBM_CFG_DBGBUS_CNTLM,
+		A5XX_RBBM_CFG_DBGBUS_CNTLM_ENABLE(0xF));
+
+	for (i = 0; i < ARRAY_SIZE(a5xx_debugbus_blocks); i++)
+		a5xx_snapshot_debugbus_block(gpu, snapshot,
+			a5xx_debugbus_blocks[i].id,
+			a5xx_debugbus_blocks[i].count);
+
+	/* VBIF is special and not in a good way */
+	a5xx_snapshot_debugbus_vbif(gpu, snapshot);
+}
+
+static void a5xx_snapshot_cp_merciu(struct msm_gpu *gpu,
+		struct msm_snapshot *snapshot)
+{
+	unsigned int i;
+	struct msm_snapshot_debug header = {
+		.type = SNAPSHOT_DEBUG_CP_MERCIU,
+		.size = 64 << 1, /* Data size is 2 dwords per entry */
+	};
+
+	if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_DEBUG, 64 << 1))
+		return;
+
+	gpu_write(gpu, REG_A5XX_CP_MERCIU_DBG_ADDR, 0);
+	for (i = 0; i < 64; i++) {
+		SNAPSHOT_WRITE_U32(snapshot,
+			gpu_read(gpu, REG_A5XX_CP_MERCIU_DBG_DATA_1));
+		SNAPSHOT_WRITE_U32(snapshot,
+			gpu_read(gpu, REG_A5XX_CP_MERCIU_DBG_DATA_2));
+	}
+}
+
+static void a5xx_snapshot_cp_roq(struct msm_gpu *gpu,
+		struct msm_snapshot *snapshot)
+{
+	int i;
+	struct msm_snapshot_debug header = {
+		.type = SNAPSHOT_DEBUG_CP_ROQ,
+		.size = 512,
+	};
+
+	if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_DEBUG, 512))
+		return;
+
+	gpu_write(gpu, REG_A5XX_CP_ROQ_DBG_ADDR, 0);
+	for (i = 0; i < 512; i++)
+		SNAPSHOT_WRITE_U32(snapshot,
+			gpu_read(gpu, REG_A5XX_CP_ROQ_DBG_DATA));
+}
+
+static void a5xx_snapshot_cp_meq(struct msm_gpu *gpu,
+		struct msm_snapshot *snapshot)
+{
+	int i;
+	struct msm_snapshot_debug header = {
+		.type = SNAPSHOT_DEBUG_CP_MEQ,
+		.size = 64,
+	};
+
+	if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_DEBUG, 64))
+		return;
+
+	gpu_write(gpu, REG_A5XX_CP_MEQ_DBG_ADDR, 0);
+	for (i = 0; i < 64; i++)
+		SNAPSHOT_WRITE_U32(snapshot,
+			gpu_read(gpu, REG_A5XX_CP_MEQ_DBG_DATA));
+}
+
+static void a5xx_snapshot_indexed_registers(struct msm_gpu *gpu,
+		struct msm_snapshot *snapshot, u32 addr, u32 data,
+		u32 count)
+{
+	unsigned int i;
+	struct msm_snapshot_indexed_regs header = {
+		.index_reg = addr,
+		.data_reg = data,
+		.start = 0,
+		.count = count,
+	};
+
+	if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_INDEXED_REGS,
+		count))
+		return;
+
+	for (i = 0; i < count; i++) {
+		gpu_write(gpu, addr, i);
+		SNAPSHOT_WRITE_U32(snapshot, gpu_read(gpu, data));
+	}
+}
+
+int a5xx_snapshot(struct msm_gpu *gpu, struct msm_snapshot *snapshot)
+{
+	struct crashdump crashdump = { 0 };
+
+	if (!crashdump_init(gpu, &crashdump))
+		snapshot->priv = &crashdump;
+
+	/* To accurately read all registers, disable hardware clock gating */
+	a5xx_set_hwcg(gpu, false);
+
+	/* Kick it up to the generic level */
+	adreno_snapshot(gpu, snapshot);
+
+	/* Read the GPU registers */
+	a5xx_snapshot_registers(gpu, snapshot);
+
+	/* Read the shader memory banks */
+	a5xx_snapshot_shader_memory(gpu, snapshot);
+
+	/* Read the debugbus registers */
+	a5xx_snapshot_debugbus(gpu, snapshot);
+
+	/* PFP data */
+	a5xx_snapshot_indexed_registers(gpu, snapshot,
+		REG_A5XX_CP_PFP_STAT_ADDR, REG_A5XX_CP_PFP_STAT_DATA, 36);
+
+	/* ME data */
+	a5xx_snapshot_indexed_registers(gpu, snapshot,
+		REG_A5XX_CP_ME_STAT_ADDR, REG_A5XX_CP_ME_STAT_DATA, 29);
+
+	/* DRAW_STATE data */
+	a5xx_snapshot_indexed_registers(gpu, snapshot,
+		REG_A5XX_CP_DRAW_STATE_ADDR, REG_A5XX_CP_DRAW_STATE_DATA,
+		256);
+
+	/* ME cache */
+	a5xx_snapshot_indexed_registers(gpu, snapshot,
+		REG_A5XX_CP_ME_UCODE_DBG_ADDR, REG_A5XX_CP_ME_UCODE_DBG_DATA,
+		0x53F);
+
+	/* PFP cache */
+	a5xx_snapshot_indexed_registers(gpu, snapshot,
+		REG_A5XX_CP_PFP_UCODE_DBG_ADDR, REG_A5XX_CP_PFP_UCODE_DBG_DATA,
+		0x53F);
+
+	/* ME queue */
+	a5xx_snapshot_cp_meq(gpu, snapshot);
+
+	/* CP ROQ */
+	a5xx_snapshot_cp_roq(gpu, snapshot);
+
+	/* CP MERCIU */
+	a5xx_snapshot_cp_merciu(gpu, snapshot);
+
+	crashdump_destroy(gpu, &crashdump);
+	snapshot->priv = NULL;
+
+	/* Re-enable HWCG */
+	a5xx_set_hwcg(gpu, true);
+	return 0;
+}
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 337ed53b7bc8..f1883825354e 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -17,7 +17,9 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/utsname.h>
 #include "adreno_gpu.h"
+#include "msm_snapshot.h"
 #include "msm_gem.h"
 #include "msm_mmu.h"
 
@@ -629,3 +631,81 @@ void adreno_gpu_cleanup(struct adreno_gpu *gpu)
 		msm_gem_address_space_put(aspace);
 	}
 }
+
+static void adreno_snapshot_os(struct msm_gpu *gpu,
+		struct msm_snapshot *snapshot)
+{
+	struct msm_snapshot_linux header;
+
+	memset(&header, 0, sizeof(header));
+
+	header.osid = SNAPSHOT_OS_LINUX_V3;
+	strlcpy(header.release, utsname()->release, sizeof(header.release));
+	strlcpy(header.version, utsname()->version, sizeof(header.version));
+
+	header.seconds = get_seconds();
+	header.ctxtcount = 0;
+
+	SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_OS, 0);
+}
+
+static void adreno_snapshot_ringbuffer(struct msm_gpu *gpu,
+		struct msm_snapshot *snapshot, struct msm_ringbuffer *ring)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	struct msm_snapshot_ringbuffer header;
+	unsigned int i, end = 0;
+	unsigned int *data = ring->start;
+
+	memset(&header, 0, sizeof(header));
+
+	/*
+	 * We only want to copy the active contents of each ring, so find the
+	 * last valid entry in the ringbuffer
+	 */
+	for (i = 0; i < MSM_GPU_RINGBUFFER_SZ >> 2; i++) {
+		if (data[i])
+			end = i;
+	}
+
+	/* The dump always starts at 0 */
+	header.start = 0;
+	header.end = end;
+
+	/* This is the number of dwords being dumped */
+	header.count = end + 1;
+
+	/* This is the size of the actual ringbuffer */
+	header.rbsize = MSM_GPU_RINGBUFFER_SZ >> 2;
+
+	header.id = ring->id;
+	header.gpuaddr = ring->iova;
+	header.rptr = get_rptr(adreno_gpu, ring);
+	header.wptr = get_wptr(ring);
+	header.timestamp_queued = adreno_submitted_fence(gpu, ring);
+	header.timestamp_retired = adreno_last_fence(gpu, ring);
+
+	/* Write the header even if the ringbuffer data is empty */
+	if (!SNAPSHOT_HEADER(snapshot, header, SNAPSHOT_SECTION_RB_V2,
+		header.count))
+		return;
+
+	SNAPSHOT_MEMCPY(snapshot, ring->start, header.count * sizeof(u32));
+}
+
+static void adreno_snapshot_ringbuffers(struct msm_gpu *gpu,
+		struct msm_snapshot *snapshot)
+{
+	struct msm_ringbuffer *ring;
+	int i;
+
+	/* Write a new section for each ringbuffer */
+	FOR_EACH_RING(gpu, ring, i)
+		adreno_snapshot_ringbuffer(gpu, snapshot, ring);
+}
+
+void adreno_snapshot(struct msm_gpu *gpu, struct msm_snapshot *snapshot)
+{
+	adreno_snapshot_os(gpu, snapshot);
+	adreno_snapshot_ringbuffers(gpu, snapshot);
+}
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index 55602d20a205..30461115281c 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -233,6 +233,7 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 		int nr_rings);
 void adreno_gpu_cleanup(struct adreno_gpu *gpu);
 
+void adreno_snapshot(struct msm_gpu *gpu, struct msm_snapshot *snapshot);
 
 /* ringbuffer helpers (the parts that are adreno specific) */
 
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 4d39eb439793..0231ac3f269f 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -837,6 +837,13 @@ static int msm_gpu_show(struct drm_device *dev, struct seq_file *m)
 	return 0;
 }
 
+static int msm_snapshot_show(struct drm_device *dev, struct seq_file *m)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+
+	return msm_snapshot_write(priv->gpu, m);
+}
+
 static int msm_gem_show(struct drm_device *dev, struct seq_file *m)
 {
 	struct msm_drm_private *priv = dev->dev_private;
@@ -901,11 +908,22 @@ static int show_locked(struct seq_file *m, void *arg)
 	return ret;
 }
 
+static int show_unlocked(struct seq_file *m, void *arg)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	int (*show)(struct drm_device *dev, struct seq_file *m) =
+			node->info_ent->data;
+
+	return show(dev, m);
+}
+
 static struct drm_info_list msm_debugfs_list[] = {
 		{"gpu", show_locked, 0, msm_gpu_show},
 		{"gem", show_locked, 0, msm_gem_show},
 		{ "mm", show_locked, 0, msm_mm_show },
 		{ "fb", show_locked, 0, msm_fb_show },
+		{ "snapshot", show_unlocked, 0, msm_snapshot_show },
 };
 
 static int late_init_minor(struct drm_minor *minor)
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 000a369e2537..3fb480f41fde 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -764,6 +764,10 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 
 	bs_init(gpu);
 
+	gpu->snapshot = msm_snapshot_new(gpu);
+	if (IS_ERR(gpu->snapshot))
+		gpu->snapshot = NULL;
+
 	return 0;
 
 fail:
@@ -794,4 +798,6 @@ void msm_gpu_cleanup(struct msm_gpu *gpu)
 
 		msm_ringbuffer_destroy(gpu->rb[i]);
 	}
+
+	msm_snapshot_destroy(gpu, gpu->snapshot);
 }
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index bab4f9349e64..06dfaabbfcfe 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -24,6 +24,7 @@
 
 #include "msm_drv.h"
 #include "msm_ringbuffer.h"
+#include "msm_snapshot.h"
 
 struct msm_gem_submit;
 struct msm_gpu_perfcntr;
@@ -69,6 +70,7 @@ struct msm_gpu_funcs {
 	/* show GPU status in debugfs: */
 	void (*show)(struct msm_gpu *gpu, struct seq_file *m);
 #endif
+	int (*snapshot)(struct msm_gpu *gpu, struct msm_snapshot *snapshot);
 };
 
 struct msm_gpu {
@@ -137,6 +139,8 @@ struct msm_gpu {
 	struct work_struct recover_work;
 
 	struct list_head submit_list;
+
+	struct msm_snapshot *snapshot;
 };
 
 /* It turns out that all targets use the same ringbuffer size. */
diff --git a/drivers/gpu/drm/msm/msm_snapshot.c b/drivers/gpu/drm/msm/msm_snapshot.c
new file mode 100644
index 000000000000..30f3e5c64ebd
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_snapshot.c
@@ -0,0 +1,105 @@
+/* Copyright (c) 2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "msm_gpu.h"
+#include "msm_gem.h"
+#include "msm_snapshot_api.h"
+
+void msm_snapshot_destroy(struct msm_gpu *gpu, struct msm_snapshot *snapshot)
+{
+	struct drm_device *dev = gpu->dev;
+	struct msm_drm_private *priv = dev->dev_private;
+	struct platform_device *pdev = priv->gpu_pdev;
+
+	if (!snapshot)
+		return;
+
+	dma_free_coherent(&pdev->dev, SZ_1M, snapshot->ptr,
+		snapshot->physaddr);
+
+	kfree(snapshot);
+}
+
+struct msm_snapshot *msm_snapshot_new(struct msm_gpu *gpu)
+{
+	struct drm_device *dev = gpu->dev;
+	struct msm_drm_private *priv = dev->dev_private;
+	struct platform_device *pdev = priv->gpu_pdev;
+	struct msm_snapshot *snapshot;
+
+	snapshot = kzalloc(sizeof(*snapshot), GFP_KERNEL);
+	if (!snapshot)
+		return ERR_PTR(-ENOMEM);
+
+	snapshot->ptr = dma_alloc_coherent(&pdev->dev, SZ_1M,
+		&snapshot->physaddr, GFP_KERNEL);
+
+	if (!snapshot->ptr) {
+		kfree(snapshot);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	seq_buf_init(&snapshot->buf, snapshot->ptr, SZ_1M);
+
+	return snapshot;
+}
+
+int msm_gpu_snapshot(struct msm_gpu *gpu, struct msm_snapshot *snapshot)
+{
+	int ret;
+	struct msm_snapshot_header header;
+	uint64_t val;
+
+	if (!snapshot)
+		return -ENOMEM;
+
+	/*
+	 * For now, blow away the snapshot and take a new one  - the most
+	 * interesting hang is the last one we saw
+	 */
+	seq_buf_init(&snapshot->buf, snapshot->ptr, SZ_1M);
+
+	header.magic = SNAPSHOT_MAGIC;
+	gpu->funcs->get_param(gpu, MSM_PARAM_GPU_ID, &val);
+	header.gpuid = lower_32_bits(val);
+
+	gpu->funcs->get_param(gpu, MSM_PARAM_CHIP_ID, &val);
+	header.chipid = lower_32_bits(val);
+
+	seq_buf_putmem(&snapshot->buf, &header, sizeof(header));
+
+	ret = gpu->funcs->snapshot(gpu, snapshot);
+
+	if (!ret) {
+		struct msm_snapshot_section_header end;
+
+		end.magic = SNAPSHOT_SECTION_MAGIC;
+		end.id = SNAPSHOT_SECTION_END;
+		end.size = sizeof(end);
+
+		seq_buf_putmem(&snapshot->buf, &end, sizeof(end));
+
+		dev_info(gpu->dev->dev, "GPU snapshot created [0x%pa (%d bytes)]\n",
+			&snapshot->physaddr, seq_buf_used(&snapshot->buf));
+	}
+
+	return ret;
+}
+
+int msm_snapshot_write(struct msm_gpu *gpu, struct seq_file *m)
+{
+	if (gpu && gpu->snapshot)
+		seq_write(m, gpu->snapshot->ptr,
+			seq_buf_used(&gpu->snapshot->buf));
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/msm/msm_snapshot.h b/drivers/gpu/drm/msm/msm_snapshot.h
new file mode 100644
index 000000000000..247e1358c885
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_snapshot.h
@@ -0,0 +1,85 @@
+/* Copyright (c) 2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef MSM_SNAPSHOT_H_
+#define MSM_SNAPSHOT_H_
+
+#include <linux/string.h>
+#include <linux/seq_buf.h>
+#include "msm_snapshot_api.h"
+
+struct msm_snapshot {
+	void *ptr;
+	struct seq_buf buf;
+	phys_addr_t physaddr;
+	uint32_t index;
+	uint32_t remain;
+	unsigned long timestamp;
+	void *priv;
+};
+
+/* Write a uint32_t value to the next position in the snapshot buffer */
+static inline void SNAPSHOT_WRITE_U32(struct msm_snapshot *snapshot,
+		uint32_t value)
+{
+	seq_buf_putmem(&snapshot->buf, &value, sizeof(value));
+}
+
+/* Copy a block of memory to the next position in the snapshot buffer */
+static inline void SNAPSHOT_MEMCPY(struct msm_snapshot *snapshot, void *src,
+		uint32_t size)
+{
+	if (size)
+		seq_buf_putmem(&snapshot->buf, src, size);
+}
+
+static inline bool _snapshot_header(struct msm_snapshot *snapshot,
+		struct msm_snapshot_section_header *header,
+		u32 headsz, u32 datasz, u32 id)
+{
+	u32 size = headsz + datasz;
+
+	if (seq_buf_buffer_left(&snapshot->buf) <= size)
+		return false;
+
+	/* Write the section header */
+	header->magic = SNAPSHOT_SECTION_MAGIC;
+	header->id = id;
+	header->size = headsz + datasz;
+
+	/* Write the section header */
+	seq_buf_putmem(&snapshot->buf, header, headsz);
+
+	/* The caller will fill in the data from here */
+	return true;
+}
+
+/* SNAPSHOT_HEADER
+ * _snapshot: pointer to struct msm_snapshot
+ * _header: Local variable containing the sub-section header
+ * _id: Section ID to write
+ * _dword: Size of the data section (in dword)
+ */
+#define SNAPSHOT_HEADER(_snapshot, _header, _id, _dwords) \
+	_snapshot_header((_snapshot), \
+		(struct msm_snapshot_section_header *) &(header), \
+		sizeof(header), (_dwords) << 2, (_id))
+
+struct msm_gpu;
+
+struct msm_snapshot *msm_snapshot_new(struct msm_gpu *gpu);
+void msm_snapshot_destroy(struct msm_gpu *gpu, struct msm_snapshot *snapshot);
+int msm_gpu_snapshot(struct msm_gpu *gpu, struct msm_snapshot *snapshot);
+int msm_snapshot_write(struct msm_gpu *gpu, struct seq_file *m);
+
+#endif
+
diff --git a/drivers/gpu/drm/msm/msm_snapshot_api.h b/drivers/gpu/drm/msm/msm_snapshot_api.h
new file mode 100644
index 000000000000..9f0adb9ee784
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_snapshot_api.h
@@ -0,0 +1,121 @@
+/* Copyright (c) 2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef MSM_SNAPSHOT_API_H_
+#define MSM_SNAPSHOT_API_H_
+
+#include <linux/types.h>
+
+/* High word is the magic, low word is the snapshot header version */
+#define SNAPSHOT_MAGIC 0x504D0002
+
+struct msm_snapshot_header {
+	__u32 magic;
+	__u32 gpuid;
+	__u32 chipid;
+} __packed;
+
+#define SNAPSHOT_SECTION_MAGIC 0xABCD
+
+struct msm_snapshot_section_header {
+	__u16 magic;
+	__u16 id;
+	__u32 size;
+} __packed;
+
+/* Section identifiers */
+#define SNAPSHOT_SECTION_OS		0x0101
+#define SNAPSHOT_SECTION_REGS_V2	0x0202
+#define SNAPSHOT_SECTION_RB_V2		0x0302
+#define SNAPSHOT_SECTION_IB_V2		0x0402
+#define SNAPSHOT_SECTION_INDEXED_REGS	0x0501
+#define SNAPSHOT_SECTION_DEBUG		0x0901
+#define SNAPSHOT_SECTION_DEBUGBUS	0x0A01
+#define SNAPSHOT_SECTION_GPU_OBJECT_V2	0x0B02
+#define SNAPSHOT_SECTION_MEMLIST_V2	0x0E02
+#define SNAPSHOT_SECTION_SHADER		0x1201
+#define SNAPSHOT_SECTION_END		0xFFFF
+
+#define SNAPSHOT_OS_LINUX_V3          0x00000202
+
+struct msm_snapshot_linux {
+	struct msm_snapshot_section_header header;
+	int osid;
+	__u32 seconds;
+	__u32 power_flags;
+	__u32 power_level;
+	__u32 power_interval_timeout;
+	__u32 grpclk;
+	__u32 busclk;
+	__u64 ptbase;
+	__u32 pid;
+	__u32 current_context;
+	__u32 ctxtcount;
+	unsigned char release[32];
+	unsigned char version[32];
+	unsigned char comm[16];
+} __packed;
+
+struct msm_snapshot_ringbuffer {
+	struct msm_snapshot_section_header header;
+	int start;
+	int end;
+	int rbsize;
+	int wptr;
+	int rptr;
+	int count;
+	__u32 timestamp_queued;
+	__u32 timestamp_retired;
+	__u64 gpuaddr;
+	__u32 id;
+} __packed;
+
+struct msm_snapshot_regs {
+	struct msm_snapshot_section_header header;
+	__u32 count;
+} __packed;
+
+struct msm_snapshot_indexed_regs {
+	struct msm_snapshot_section_header header;
+	__u32 index_reg;
+	__u32 data_reg;
+	__u32 start;
+	__u32 count;
+} __packed;
+
+#define SNAPSHOT_DEBUG_CP_MEQ		7
+#define SNAPSHOT_DEBUG_CP_PM4_RAM	8
+#define SNAPSHOT_DEBUG_CP_PFP_RAM	9
+#define SNAPSHOT_DEBUG_CP_ROQ		10
+#define SNAPSHOT_DEBUG_SHADER_MEMORY	11
+#define SNAPSHOT_DEBUG_CP_MERCIU	12
+
+struct msm_snapshot_debug {
+	struct msm_snapshot_section_header header;
+	__u32 type;
+	__u32 size;
+} __packed;
+
+struct msm_snapshot_debugbus {
+	struct msm_snapshot_section_header header;
+	__u32 id;
+	__u32 count;
+} __packed;
+
+struct msm_snapshot_shader {
+	struct msm_snapshot_section_header header;
+	__u32 type;
+	__u32 index;
+	__u32 size;
+} __packed;
+
+#endif