This moves the module area to right before the vmalloc area, and moves the kernel image to the base of the vmalloc area. This is an intermediate step towards implementing KASLR, which allows the kernel image to be located anywhere in the vmalloc area. Since other subsystems such as hibernate may still need to refer to the kernel text or data segments via their linears addresses, both are mapped in the linear region as well. The linear alias of the text region is mapped read-only/non-executable to prevent inadvertent modification or execution. Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> (cherry picked from commit f9040773b7bbbd9e98eb6184a263512a7cfc133f) Signed-off-by: Alex Shi <alex.shi@linaro.org>
363 lines
7.8 KiB
C
363 lines
7.8 KiB
C
/*
|
|
* Copyright (c) 2014, The Linux Foundation. All rights reserved.
|
|
* Debug helper to dump the current kernel pagetables of the system
|
|
* so that we can see what the various memory ranges are set to.
|
|
*
|
|
* Derived from x86 and arm implementation:
|
|
* (C) Copyright 2008 Intel Corporation
|
|
*
|
|
* Author: Arjan van de Ven <arjan@linux.intel.com>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; version 2
|
|
* of the License.
|
|
*/
|
|
#include <linux/debugfs.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/io.h>
|
|
#include <linux/init.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/seq_file.h>
|
|
|
|
#include <asm/fixmap.h>
|
|
#include <asm/memory.h>
|
|
#include <asm/pgtable.h>
|
|
#include <asm/pgtable-hwdef.h>
|
|
|
|
#define LOWEST_ADDR (UL(0xffffffffffffffff) << VA_BITS)
|
|
|
|
struct addr_marker {
|
|
unsigned long start_address;
|
|
const char *name;
|
|
};
|
|
|
|
enum address_markers_idx {
|
|
MODULES_START_NR = 0,
|
|
MODULES_END_NR,
|
|
VMALLOC_START_NR,
|
|
VMALLOC_END_NR,
|
|
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
|
VMEMMAP_START_NR,
|
|
VMEMMAP_END_NR,
|
|
#endif
|
|
FIXADDR_START_NR,
|
|
FIXADDR_END_NR,
|
|
PCI_START_NR,
|
|
PCI_END_NR,
|
|
KERNEL_SPACE_NR,
|
|
};
|
|
|
|
static struct addr_marker address_markers[] = {
|
|
{ MODULES_VADDR, "Modules start" },
|
|
{ MODULES_END, "Modules end" },
|
|
{ VMALLOC_START, "vmalloc() Area" },
|
|
{ VMALLOC_END, "vmalloc() End" },
|
|
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
|
{ 0, "vmemmap start" },
|
|
{ 0, "vmemmap end" },
|
|
#endif
|
|
{ FIXADDR_START, "Fixmap start" },
|
|
{ FIXADDR_TOP, "Fixmap end" },
|
|
{ PCI_IO_START, "PCI I/O start" },
|
|
{ PCI_IO_END, "PCI I/O end" },
|
|
{ PAGE_OFFSET, "Linear Mapping" },
|
|
{ -1, NULL },
|
|
};
|
|
|
|
/*
|
|
* The page dumper groups page table entries of the same type into a single
|
|
* description. It uses pg_state to track the range information while
|
|
* iterating over the pte entries. When the continuity is broken it then
|
|
* dumps out a description of the range.
|
|
*/
|
|
struct pg_state {
|
|
struct seq_file *seq;
|
|
const struct addr_marker *marker;
|
|
unsigned long start_address;
|
|
unsigned level;
|
|
u64 current_prot;
|
|
};
|
|
|
|
struct prot_bits {
|
|
u64 mask;
|
|
u64 val;
|
|
const char *set;
|
|
const char *clear;
|
|
};
|
|
|
|
static const struct prot_bits pte_bits[] = {
|
|
{
|
|
.mask = PTE_VALID,
|
|
.val = PTE_VALID,
|
|
.set = " ",
|
|
.clear = "F",
|
|
}, {
|
|
.mask = PTE_USER,
|
|
.val = PTE_USER,
|
|
.set = "USR",
|
|
.clear = " ",
|
|
}, {
|
|
.mask = PTE_RDONLY,
|
|
.val = PTE_RDONLY,
|
|
.set = "ro",
|
|
.clear = "RW",
|
|
}, {
|
|
.mask = PTE_PXN,
|
|
.val = PTE_PXN,
|
|
.set = "NX",
|
|
.clear = "x ",
|
|
}, {
|
|
.mask = PTE_SHARED,
|
|
.val = PTE_SHARED,
|
|
.set = "SHD",
|
|
.clear = " ",
|
|
}, {
|
|
.mask = PTE_AF,
|
|
.val = PTE_AF,
|
|
.set = "AF",
|
|
.clear = " ",
|
|
}, {
|
|
.mask = PTE_NG,
|
|
.val = PTE_NG,
|
|
.set = "NG",
|
|
.clear = " ",
|
|
}, {
|
|
.mask = PTE_CONT,
|
|
.val = PTE_CONT,
|
|
.set = "CON",
|
|
.clear = " ",
|
|
}, {
|
|
.mask = PTE_TABLE_BIT,
|
|
.val = PTE_TABLE_BIT,
|
|
.set = " ",
|
|
.clear = "BLK",
|
|
}, {
|
|
.mask = PTE_UXN,
|
|
.val = PTE_UXN,
|
|
.set = "UXN",
|
|
}, {
|
|
.mask = PTE_ATTRINDX_MASK,
|
|
.val = PTE_ATTRINDX(MT_DEVICE_nGnRnE),
|
|
.set = "DEVICE/nGnRnE",
|
|
}, {
|
|
.mask = PTE_ATTRINDX_MASK,
|
|
.val = PTE_ATTRINDX(MT_DEVICE_nGnRE),
|
|
.set = "DEVICE/nGnRE",
|
|
}, {
|
|
.mask = PTE_ATTRINDX_MASK,
|
|
.val = PTE_ATTRINDX(MT_DEVICE_GRE),
|
|
.set = "DEVICE/GRE",
|
|
}, {
|
|
.mask = PTE_ATTRINDX_MASK,
|
|
.val = PTE_ATTRINDX(MT_NORMAL_NC),
|
|
.set = "MEM/NORMAL-NC",
|
|
}, {
|
|
.mask = PTE_ATTRINDX_MASK,
|
|
.val = PTE_ATTRINDX(MT_NORMAL),
|
|
.set = "MEM/NORMAL",
|
|
}
|
|
};
|
|
|
|
struct pg_level {
|
|
const struct prot_bits *bits;
|
|
size_t num;
|
|
u64 mask;
|
|
};
|
|
|
|
static struct pg_level pg_level[] = {
|
|
{
|
|
}, { /* pgd */
|
|
.bits = pte_bits,
|
|
.num = ARRAY_SIZE(pte_bits),
|
|
}, { /* pud */
|
|
.bits = pte_bits,
|
|
.num = ARRAY_SIZE(pte_bits),
|
|
}, { /* pmd */
|
|
.bits = pte_bits,
|
|
.num = ARRAY_SIZE(pte_bits),
|
|
}, { /* pte */
|
|
.bits = pte_bits,
|
|
.num = ARRAY_SIZE(pte_bits),
|
|
},
|
|
};
|
|
|
|
static void dump_prot(struct pg_state *st, const struct prot_bits *bits,
|
|
size_t num)
|
|
{
|
|
unsigned i;
|
|
|
|
for (i = 0; i < num; i++, bits++) {
|
|
const char *s;
|
|
|
|
if ((st->current_prot & bits->mask) == bits->val)
|
|
s = bits->set;
|
|
else
|
|
s = bits->clear;
|
|
|
|
if (s)
|
|
seq_printf(st->seq, " %s", s);
|
|
}
|
|
}
|
|
|
|
static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
|
|
u64 val)
|
|
{
|
|
static const char units[] = "KMGTPE";
|
|
u64 prot = val & pg_level[level].mask;
|
|
|
|
if (!st->level) {
|
|
st->level = level;
|
|
st->current_prot = prot;
|
|
st->start_address = addr;
|
|
seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
|
|
} else if (prot != st->current_prot || level != st->level ||
|
|
addr >= st->marker[1].start_address) {
|
|
const char *unit = units;
|
|
unsigned long delta;
|
|
|
|
if (st->current_prot) {
|
|
seq_printf(st->seq, "0x%016lx-0x%016lx ",
|
|
st->start_address, addr);
|
|
|
|
delta = (addr - st->start_address) >> 10;
|
|
while (!(delta & 1023) && unit[1]) {
|
|
delta >>= 10;
|
|
unit++;
|
|
}
|
|
seq_printf(st->seq, "%9lu%c", delta, *unit);
|
|
if (pg_level[st->level].bits)
|
|
dump_prot(st, pg_level[st->level].bits,
|
|
pg_level[st->level].num);
|
|
seq_puts(st->seq, "\n");
|
|
}
|
|
|
|
if (addr >= st->marker[1].start_address) {
|
|
st->marker++;
|
|
seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
|
|
}
|
|
|
|
st->start_address = addr;
|
|
st->current_prot = prot;
|
|
st->level = level;
|
|
}
|
|
|
|
if (addr >= st->marker[1].start_address) {
|
|
st->marker++;
|
|
seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
|
|
}
|
|
|
|
}
|
|
|
|
static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
|
|
{
|
|
pte_t *pte = pte_offset_kernel(pmd, 0);
|
|
unsigned long addr;
|
|
unsigned i;
|
|
|
|
for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
|
|
addr = start + i * PAGE_SIZE;
|
|
note_page(st, addr, 4, pte_val(*pte));
|
|
}
|
|
}
|
|
|
|
static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
|
|
{
|
|
pmd_t *pmd = pmd_offset(pud, 0);
|
|
unsigned long addr;
|
|
unsigned i;
|
|
|
|
for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
|
|
addr = start + i * PMD_SIZE;
|
|
if (pmd_none(*pmd) || pmd_sect(*pmd)) {
|
|
note_page(st, addr, 3, pmd_val(*pmd));
|
|
} else {
|
|
BUG_ON(pmd_bad(*pmd));
|
|
walk_pte(st, pmd, addr);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
|
|
{
|
|
pud_t *pud = pud_offset(pgd, 0);
|
|
unsigned long addr;
|
|
unsigned i;
|
|
|
|
for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
|
|
addr = start + i * PUD_SIZE;
|
|
if (pud_none(*pud) || pud_sect(*pud)) {
|
|
note_page(st, addr, 2, pud_val(*pud));
|
|
} else {
|
|
BUG_ON(pud_bad(*pud));
|
|
walk_pmd(st, pud, addr);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long start)
|
|
{
|
|
pgd_t *pgd = pgd_offset(mm, 0UL);
|
|
unsigned i;
|
|
unsigned long addr;
|
|
|
|
for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
|
|
addr = start + i * PGDIR_SIZE;
|
|
if (pgd_none(*pgd)) {
|
|
note_page(st, addr, 1, pgd_val(*pgd));
|
|
} else {
|
|
BUG_ON(pgd_bad(*pgd));
|
|
walk_pud(st, pgd, addr);
|
|
}
|
|
}
|
|
}
|
|
|
|
static int ptdump_show(struct seq_file *m, void *v)
|
|
{
|
|
struct pg_state st = {
|
|
.seq = m,
|
|
.marker = address_markers,
|
|
};
|
|
|
|
walk_pgd(&st, &init_mm, LOWEST_ADDR);
|
|
|
|
note_page(&st, 0, 0, 0);
|
|
return 0;
|
|
}
|
|
|
|
static int ptdump_open(struct inode *inode, struct file *file)
|
|
{
|
|
return single_open(file, ptdump_show, NULL);
|
|
}
|
|
|
|
static const struct file_operations ptdump_fops = {
|
|
.open = ptdump_open,
|
|
.read = seq_read,
|
|
.llseek = seq_lseek,
|
|
.release = single_release,
|
|
};
|
|
|
|
static int ptdump_init(void)
|
|
{
|
|
struct dentry *pe;
|
|
unsigned i, j;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(pg_level); i++)
|
|
if (pg_level[i].bits)
|
|
for (j = 0; j < pg_level[i].num; j++)
|
|
pg_level[i].mask |= pg_level[i].bits[j].mask;
|
|
|
|
#ifdef CONFIG_SPARSEMEM_VMEMMAP
|
|
address_markers[VMEMMAP_START_NR].start_address =
|
|
(unsigned long)virt_to_page(PAGE_OFFSET);
|
|
address_markers[VMEMMAP_END_NR].start_address =
|
|
(unsigned long)virt_to_page(high_memory);
|
|
#endif
|
|
|
|
pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
|
|
&ptdump_fops);
|
|
return pe ? 0 : -ENOMEM;
|
|
}
|
|
device_initcall(ptdump_init);
|