iommu/io-pgtable-arm: Don't leave dangling table entries

Currently, when all of the 4K PTEs beneath a 2M table entry are
unmapped, that 2M table entry is left intact, even though it doesn't
point to any valid 4K mappings anymore.  This results in a warning if a
subsequent block mapping lands on top of the dangling table entry, since
we require empty page table entries when we map.  It also causes the
page at which that the stomped-on table was pointing to be leaked.  Fix
this by keeping track of how many entries are currently mapped beneath a
table.  When the map count goes to zero (in unmap), free up the page the
table is pointing at and zero out the table entry.

Change-Id: I470e6ffb2206a09fe7c24253e3fd64a744337a7f
Signed-off-by: Mitchel Humpherys <mitchelh@codeaurora.org>
This commit is contained in:
Mitchel Humpherys 2015-07-14 16:41:29 -07:00 committed by David Keitel
parent c84f867d94
commit d1f111a6f9

View file

@ -164,7 +164,7 @@
/* IOPTE accessors */ /* IOPTE accessors */
#define iopte_deref(pte, d) \ #define iopte_deref(pte, d) \
(__va((pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1) \ (__va(iopte_val(pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1) \
& ~((1ULL << (d)->pg_shift) - 1))) & ~((1ULL << (d)->pg_shift) - 1)))
#define iopte_type(pte,l) \ #define iopte_type(pte,l) \
@ -196,12 +196,82 @@ struct arm_lpae_io_pgtable {
typedef u64 arm_lpae_iopte; typedef u64 arm_lpae_iopte;
/*
* We'll use some ignored bits in table entries to keep track of the number
* of page mappings beneath the table. The maximum number of entries
* beneath any table mapping in armv8 is 8192 (which is possible at the
* 2nd- and 3rd-level when using a 64K granule size). The bits at our
* disposal are:
*
* 4k granule: [58..52], [11..2]
* 64k granule: [58..52], [15..2]
*
* [58..52], [11..2] is enough bits for tracking table mappings at any
* level for any granule, so we'll use those.
*/
#define BOTTOM_IGNORED_MASK 0x3ff
#define BOTTOM_IGNORED_SHIFT 2
#define BOTTOM_IGNORED_NUM_BITS 10
#define TOP_IGNORED_MASK 0x7fULL
#define TOP_IGNORED_SHIFT 52
#define IOPTE_RESERVED_MASK ((BOTTOM_IGNORED_MASK << BOTTOM_IGNORED_SHIFT) | \
(TOP_IGNORED_MASK << TOP_IGNORED_SHIFT))
static arm_lpae_iopte iopte_val(arm_lpae_iopte table_pte)
{
return table_pte & ~IOPTE_RESERVED_MASK;
}
static arm_lpae_iopte _iopte_bottom_ignored_val(arm_lpae_iopte table_pte)
{
return (table_pte & (BOTTOM_IGNORED_MASK << BOTTOM_IGNORED_SHIFT))
>> BOTTOM_IGNORED_SHIFT;
}
static arm_lpae_iopte _iopte_top_ignored_val(arm_lpae_iopte table_pte)
{
return (table_pte & (TOP_IGNORED_MASK << TOP_IGNORED_SHIFT))
>> TOP_IGNORED_SHIFT;
}
static int iopte_tblcnt(arm_lpae_iopte table_pte)
{
return (_iopte_bottom_ignored_val(table_pte) |
(_iopte_top_ignored_val(table_pte) << BOTTOM_IGNORED_NUM_BITS));
}
static void iopte_tblcnt_set(arm_lpae_iopte *table_pte, int val)
{
arm_lpae_iopte pte = iopte_val(*table_pte);
pte |= ((val & BOTTOM_IGNORED_MASK) << BOTTOM_IGNORED_SHIFT) |
(((val & (TOP_IGNORED_MASK << BOTTOM_IGNORED_NUM_BITS))
>> BOTTOM_IGNORED_NUM_BITS) << TOP_IGNORED_SHIFT);
*table_pte = pte;
}
static void iopte_tblcnt_sub(arm_lpae_iopte *table_ptep, int cnt)
{
arm_lpae_iopte current_cnt = iopte_tblcnt(*table_ptep);
current_cnt -= cnt;
iopte_tblcnt_set(table_ptep, current_cnt);
}
static void iopte_tblcnt_add(arm_lpae_iopte *table_ptep, int cnt)
{
arm_lpae_iopte current_cnt = iopte_tblcnt(*table_ptep);
current_cnt += cnt;
iopte_tblcnt_set(table_ptep, current_cnt);
}
static bool suppress_map_failures; static bool suppress_map_failures;
static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
unsigned long iova, phys_addr_t paddr, unsigned long iova, phys_addr_t paddr,
arm_lpae_iopte prot, int lvl, arm_lpae_iopte prot, int lvl,
arm_lpae_iopte *ptep) arm_lpae_iopte *ptep, arm_lpae_iopte *prev_ptep)
{ {
arm_lpae_iopte pte = prot; arm_lpae_iopte pte = prot;
@ -224,12 +294,17 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
*ptep = pte; *ptep = pte;
data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), data->iop.cookie); data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), data->iop.cookie);
if (prev_ptep)
iopte_tblcnt_add(prev_ptep, 1);
return 0; return 0;
} }
static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
phys_addr_t paddr, size_t size, arm_lpae_iopte prot, phys_addr_t paddr, size_t size, arm_lpae_iopte prot,
int lvl, arm_lpae_iopte *ptep) int lvl, arm_lpae_iopte *ptep,
arm_lpae_iopte *prev_ptep)
{ {
arm_lpae_iopte *cptep, pte; arm_lpae_iopte *cptep, pte;
void *cookie = data->iop.cookie; void *cookie = data->iop.cookie;
@ -240,7 +315,8 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
/* If we can install a leaf entry at this level, then do so */ /* If we can install a leaf entry at this level, then do so */
if (size == block_size && (size & data->iop.cfg.pgsize_bitmap)) if (size == block_size && (size & data->iop.cfg.pgsize_bitmap))
return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep); return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep,
prev_ptep);
/* We can't allocate tables at the final level */ /* We can't allocate tables at the final level */
if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1)) if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1))
@ -267,7 +343,8 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
} }
/* Rinse, repeat */ /* Rinse, repeat */
return __arm_lpae_map(data, iova, paddr, size, prot, lvl + 1, cptep); return __arm_lpae_map(data, iova, paddr, size, prot, lvl + 1, cptep,
ptep);
} }
static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
@ -323,7 +400,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
return 0; return 0;
prot = arm_lpae_prot_to_pte(data, iommu_prot); prot = arm_lpae_prot_to_pte(data, iommu_prot);
return __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep); return __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep, NULL);
} }
static int arm_lpae_map_sg(struct io_pgtable_ops *ops, unsigned long iova, static int arm_lpae_map_sg(struct io_pgtable_ops *ops, unsigned long iova,
@ -365,7 +442,7 @@ static int arm_lpae_map_sg(struct io_pgtable_ops *ops, unsigned long iova,
size_t pgsize = iommu_pgsize( size_t pgsize = iommu_pgsize(
data->iop.cfg.pgsize_bitmap, iova | phys, size); data->iop.cfg.pgsize_bitmap, iova | phys, size);
ret = __arm_lpae_map(data, iova, phys, pgsize, prot, ret = __arm_lpae_map(data, iova, phys, pgsize, prot,
lvl, ptep); lvl, ptep, NULL);
if (ret) if (ret)
goto out_err; goto out_err;
@ -428,7 +505,8 @@ static void arm_lpae_free_pgtable(struct io_pgtable *iop)
static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
unsigned long iova, size_t size, unsigned long iova, size_t size,
arm_lpae_iopte prot, int lvl, arm_lpae_iopte prot, int lvl,
arm_lpae_iopte *ptep, size_t blk_size) arm_lpae_iopte *ptep,
arm_lpae_iopte *prev_ptep, size_t blk_size)
{ {
unsigned long blk_start, blk_end; unsigned long blk_start, blk_end;
phys_addr_t blk_paddr; phys_addr_t blk_paddr;
@ -450,7 +528,7 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
/* __arm_lpae_map expects a pointer to the start of the table */ /* __arm_lpae_map expects a pointer to the start of the table */
tablep = &table - ARM_LPAE_LVL_IDX(blk_start, lvl, data); tablep = &table - ARM_LPAE_LVL_IDX(blk_start, lvl, data);
if (__arm_lpae_map(data, blk_start, blk_paddr, size, prot, lvl, if (__arm_lpae_map(data, blk_start, blk_paddr, size, prot, lvl,
tablep) < 0) { tablep, prev_ptep) < 0) {
if (table) { if (table) {
/* Free the table we allocated */ /* Free the table we allocated */
tablep = iopte_deref(table, data); tablep = iopte_deref(table, data);
@ -467,7 +545,7 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
unsigned long iova, size_t size, int lvl, unsigned long iova, size_t size, int lvl,
arm_lpae_iopte *ptep) arm_lpae_iopte *ptep, arm_lpae_iopte *prev_ptep)
{ {
arm_lpae_iopte pte; arm_lpae_iopte pte;
const struct iommu_gather_ops *tlb = data->iop.cfg.tlb; const struct iommu_gather_ops *tlb = data->iop.cfg.tlb;
@ -495,6 +573,7 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
return size; return size;
} else if ((lvl == ARM_LPAE_MAX_LEVELS - 2) && !iopte_leaf(pte, lvl)) { } else if ((lvl == ARM_LPAE_MAX_LEVELS - 2) && !iopte_leaf(pte, lvl)) {
arm_lpae_iopte *table = iopte_deref(pte, data); arm_lpae_iopte *table = iopte_deref(pte, data);
arm_lpae_iopte *table_base = table;
int tl_offset = ARM_LPAE_LVL_IDX(iova, lvl + 1, data); int tl_offset = ARM_LPAE_LVL_IDX(iova, lvl + 1, data);
int entry_size = (1 << data->pg_shift); int entry_size = (1 << data->pg_shift);
int max_entries = ARM_LPAE_BLOCK_SIZE(lvl, data) / entry_size; int max_entries = ARM_LPAE_BLOCK_SIZE(lvl, data) / entry_size;
@ -514,6 +593,15 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
memset(table, 0, table_len); memset(table, 0, table_len);
tlb->flush_pgtable(table, table_len, cookie); tlb->flush_pgtable(table, table_len, cookie);
iopte_tblcnt_sub(ptep, entries);
if (!iopte_tblcnt(*ptep)) {
/* no valid mappings left under this table. free it. */
*ptep = 0;
tlb->flush_pgtable(ptep, sizeof(*ptep), cookie);
io_pgtable_free_pages_exact(
table_base, max_entries * sizeof(*table_base));
}
return entries * entry_size; return entries * entry_size;
} else if (iopte_leaf(pte, lvl)) { } else if (iopte_leaf(pte, lvl)) {
/* /*
@ -522,12 +610,14 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
*/ */
return arm_lpae_split_blk_unmap(data, iova, size, return arm_lpae_split_blk_unmap(data, iova, size,
iopte_prot(pte), lvl, ptep, iopte_prot(pte), lvl, ptep,
prev_ptep,
blk_size); blk_size);
} }
/* Keep on walkin' */ /* Keep on walkin' */
prev_ptep = ptep;
ptep = iopte_deref(pte, data); ptep = iopte_deref(pte, data);
return __arm_lpae_unmap(data, iova, size, lvl + 1, ptep); return __arm_lpae_unmap(data, iova, size, lvl + 1, ptep, prev_ptep);
} }
static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
@ -546,7 +636,8 @@ static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
size_to_unmap = remaining < SZ_2M size_to_unmap = remaining < SZ_2M
? remaining ? remaining
: iommu_pgsize(data->iop.cfg.pgsize_bitmap, iova, size); : iommu_pgsize(data->iop.cfg.pgsize_bitmap, iova, size);
ret = __arm_lpae_unmap(data, iova, size_to_unmap, lvl, ptep); ret = __arm_lpae_unmap(data, iova, size_to_unmap, lvl, ptep,
NULL);
if (ret == 0) if (ret == 0)
break; break;
unmapped += ret; unmapped += ret;