arm64: Hot-remove implementation for arm64
- arch_remove_memory interface - kernel page tables cleanup - vmemmap_free implementation for arm64 Change-Id: I8945b6b91ed7012abc1478de266302427ebeb639 Signed-off-by: Andrea Reale <ar@linux.vnet.ibm.com> Signed-off-by: Maciej Bielski <m.bielski@virtualopensystems.com> Patch-mainline: linux-kernel @ 11 Apr 2017, 18:25 Signed-off-by: Srivatsa Vaddagiri <vatsa@codeaurora.org> Signed-off-by: Arun KS <arunks@codeaurora.org>
This commit is contained in:
parent
9bade51dc7
commit
efdbaef3e1
5 changed files with 431 additions and 6 deletions
|
@ -656,6 +656,9 @@ config ARCH_ENABLE_MEMORY_HOTPLUG
|
|||
depends on !NUMA
|
||||
def_bool y
|
||||
|
||||
config ARCH_ENABLE_MEMORY_HOTREMOVE
|
||||
def_bool y
|
||||
|
||||
# The GPIO number here must be sorted by descending number. In case of
|
||||
# a multiplatform kernel, we just want the highest value required by the
|
||||
# selected platforms.
|
||||
|
|
|
@ -37,6 +37,10 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
|
|||
extern void *fixmap_remap_fdt(phys_addr_t dt_phys);
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
extern void hotplug_paging(phys_addr_t start, phys_addr_t size);
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
extern void remove_pagetable(unsigned long start,
|
||||
unsigned long end, bool direct);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -461,6 +461,11 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
|
|||
return pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK;
|
||||
}
|
||||
|
||||
static inline unsigned long pmd_page_vaddr(pmd_t pmd)
|
||||
{
|
||||
return (unsigned long) __va(pmd_page_paddr(pmd));
|
||||
}
|
||||
|
||||
/* Find an entry in the third-level page table. */
|
||||
#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
|
||||
|
||||
|
@ -512,6 +517,11 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
|
|||
return pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK;
|
||||
}
|
||||
|
||||
static inline unsigned long pud_page_vaddr(pud_t pud)
|
||||
{
|
||||
return (unsigned long) __va(pud_page_paddr(pud));
|
||||
}
|
||||
|
||||
/* Find an entry in the second-level page table. */
|
||||
#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
|
||||
|
||||
|
@ -564,6 +574,11 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
|
|||
return pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK;
|
||||
}
|
||||
|
||||
static inline unsigned long pgd_page_vaddr(pgd_t pgd)
|
||||
{
|
||||
return (unsigned long) __va(pgd_page_paddr(pgd));
|
||||
}
|
||||
|
||||
/* Find an entry in the frst-level page table. */
|
||||
#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
|
||||
|
||||
|
|
|
@ -505,7 +505,6 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
|
|||
unsigned long nr_pages = size >> PAGE_SHIFT;
|
||||
unsigned long end_pfn = start_pfn + nr_pages;
|
||||
unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
|
||||
unsigned long pfn;
|
||||
int ret;
|
||||
|
||||
if (end_pfn > max_sparsemem_pfn) {
|
||||
|
@ -578,5 +577,34 @@ int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
|
|||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
static void kernel_physical_mapping_remove(unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
start = (unsigned long)__va(start);
|
||||
end = (unsigned long)__va(end);
|
||||
|
||||
remove_pagetable(start, end, true);
|
||||
|
||||
}
|
||||
|
||||
int arch_remove_memory(u64 start, u64 size)
|
||||
{
|
||||
unsigned long start_pfn = start >> PAGE_SHIFT;
|
||||
unsigned long nr_pages = size >> PAGE_SHIFT;
|
||||
struct page *page = pfn_to_page(start_pfn);
|
||||
struct zone *zone;
|
||||
int ret = 0;
|
||||
|
||||
zone = page_zone(page);
|
||||
ret = __remove_pages(zone, start_pfn, nr_pages);
|
||||
WARN_ON_ONCE(ret);
|
||||
|
||||
kernel_physical_mapping_remove(start, start + size);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_MEMORY_HOTREMOVE */
|
||||
#endif /* CONFIG_MEMORY_HOTPLUG */
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
/*
|
||||
* Based on arch/arm/mm/mmu.c
|
||||
*
|
||||
|
@ -131,7 +130,6 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
|
|||
phys_addr_t pte_phys;
|
||||
BUG_ON(!pgtable_alloc);
|
||||
pte_phys = pgtable_alloc();
|
||||
pr_debug("Allocating PTE at %pK\n", __va(pte_phys));
|
||||
pte = pte_set_fixmap(pte_phys);
|
||||
if (pmd_sect(*pmd))
|
||||
split_pmd(pmd, pte);
|
||||
|
@ -196,7 +194,6 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
|
|||
phys_addr_t pmd_phys;
|
||||
BUG_ON(!pgtable_alloc);
|
||||
pmd_phys = pgtable_alloc();
|
||||
pr_debug("Allocating PMD at %pK\n", __va(pmd_phys));
|
||||
pmd = pmd_set_fixmap(pmd_phys);
|
||||
if (pud_sect(*pud)) {
|
||||
/*
|
||||
|
@ -265,7 +262,6 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
|
|||
phys_addr_t pud_phys;
|
||||
BUG_ON(!pgtable_alloc);
|
||||
pud_phys = pgtable_alloc();
|
||||
pr_debug("Allocating PUD at %pK\n", __va(pud_phys));
|
||||
__pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE);
|
||||
}
|
||||
BUG_ON(pgd_bad(*pgd));
|
||||
|
@ -648,8 +644,384 @@ void hotplug_paging(phys_addr_t start, phys_addr_t size)
|
|||
__free_pages(pg, 0);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
#define PAGE_INUSE 0xFD
|
||||
|
||||
static void free_pagetable(struct page *page, int order, bool direct)
|
||||
{
|
||||
unsigned long magic;
|
||||
unsigned int nr_pages = 1 << order;
|
||||
|
||||
/* bootmem page has reserved flag */
|
||||
if (PageReserved(page)) {
|
||||
__ClearPageReserved(page);
|
||||
|
||||
magic = (unsigned long)page->lru.next;
|
||||
if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
|
||||
while (nr_pages--)
|
||||
put_page_bootmem(page++);
|
||||
} else {
|
||||
while (nr_pages--)
|
||||
free_reserved_page(page++);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Only direct pagetable allocation (those allocated via
|
||||
* hotplug) call the pgtable_page_ctor; vmemmap pgtable
|
||||
* allocations don't.
|
||||
*/
|
||||
if (direct)
|
||||
pgtable_page_dtor(page);
|
||||
|
||||
free_pages((unsigned long)page_address(page), order);
|
||||
}
|
||||
}
|
||||
|
||||
static void free_pte_table(pmd_t *pmd, bool direct)
|
||||
{
|
||||
pte_t *pte_start, *pte;
|
||||
struct page *page;
|
||||
int i;
|
||||
|
||||
pte_start = (pte_t *) pmd_page_vaddr(*pmd);
|
||||
/* Check if there is no valid entry in the PMD */
|
||||
for (i = 0; i < PTRS_PER_PTE; i++) {
|
||||
pte = pte_start + i;
|
||||
if (!pte_none(*pte))
|
||||
return;
|
||||
}
|
||||
|
||||
page = pmd_page(*pmd);
|
||||
|
||||
free_pagetable(page, 0, direct);
|
||||
|
||||
/*
|
||||
* This spin lock could be only taken in _pte_aloc_kernel
|
||||
* in mm/memory.c and nowhere else (for arm64). Not sure if
|
||||
* the function above can be called concurrently. In doubt,
|
||||
* I am living it here for now, but it probably can be removed
|
||||
*/
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pmd_clear(pmd);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
|
||||
static void free_pmd_table(pud_t *pud, bool direct)
|
||||
{
|
||||
pmd_t *pmd_start, *pmd;
|
||||
struct page *page;
|
||||
int i;
|
||||
|
||||
pmd_start = (pmd_t *) pud_page_vaddr(*pud);
|
||||
/* Check if there is no valid entry in the PMD */
|
||||
for (i = 0; i < PTRS_PER_PMD; i++) {
|
||||
pmd = pmd_start + i;
|
||||
if (!pmd_none(*pmd))
|
||||
return;
|
||||
}
|
||||
|
||||
page = pud_page(*pud);
|
||||
|
||||
free_pagetable(page, 0, direct);
|
||||
|
||||
/*
|
||||
* This spin lock could be only taken in _pte_aloc_kernel
|
||||
* in mm/memory.c and nowhere else (for arm64). Not sure if
|
||||
* the function above can be called concurrently. In doubt,
|
||||
* I am living it here for now, but it probably can be removed
|
||||
*/
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pud_clear(pud);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* When the PUD is folded on the PGD (three levels of paging),
|
||||
* there's no need to free PUDs
|
||||
*/
|
||||
#if CONFIG_PGTABLE_LEVELS > 3
|
||||
static void free_pud_table(pgd_t *pgd, bool direct)
|
||||
{
|
||||
pud_t *pud_start, *pud;
|
||||
struct page *page;
|
||||
int i;
|
||||
|
||||
pud_start = (pud_t *) pgd_page_vaddr(*pgd);
|
||||
/* Check if there is no valid entry in the PUD */
|
||||
for (i = 0; i < PTRS_PER_PUD; i++) {
|
||||
pud = pud_start + i;
|
||||
if (!pud_none(*pud))
|
||||
return;
|
||||
}
|
||||
|
||||
page = pgd_page(*pgd);
|
||||
|
||||
free_pagetable(page, 0, direct);
|
||||
|
||||
/*
|
||||
* This spin lock could be only
|
||||
* taken in _pte_aloc_kernel in
|
||||
* mm/memory.c and nowhere else
|
||||
* (for arm64). Not sure if the
|
||||
* function above can be called
|
||||
* concurrently. In doubt,
|
||||
* I am living it here for now,
|
||||
* but it probably can be removed.
|
||||
*/
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pgd_clear(pgd);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void remove_pte_table(pte_t *pte, unsigned long addr,
|
||||
unsigned long end, bool direct)
|
||||
{
|
||||
unsigned long next;
|
||||
void *page_addr;
|
||||
|
||||
for (; addr < end; addr = next, pte++) {
|
||||
next = (addr + PAGE_SIZE) & PAGE_MASK;
|
||||
if (next > end)
|
||||
next = end;
|
||||
|
||||
if (!pte_present(*pte))
|
||||
continue;
|
||||
|
||||
if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) {
|
||||
/*
|
||||
* Do not free direct mapping pages since they were
|
||||
* freed when offlining, or simplely not in use.
|
||||
*/
|
||||
if (!direct)
|
||||
free_pagetable(pte_page(*pte), 0, direct);
|
||||
|
||||
/*
|
||||
* This spin lock could be only
|
||||
* taken in _pte_aloc_kernel in
|
||||
* mm/memory.c and nowhere else
|
||||
* (for arm64). Not sure if the
|
||||
* function above can be called
|
||||
* concurrently. In doubt,
|
||||
* I am living it here for now,
|
||||
* but it probably can be removed.
|
||||
*/
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pte_clear(&init_mm, addr, pte);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
} else {
|
||||
/*
|
||||
* If we are here, we are freeing vmemmap pages since
|
||||
* direct mapped memory ranges to be freed are aligned.
|
||||
*
|
||||
* If we are not removing the whole page, it means
|
||||
* other page structs in this page are being used and
|
||||
* we canot remove them. So fill the unused page_structs
|
||||
* with 0xFD, and remove the page when it is wholly
|
||||
* filled with 0xFD.
|
||||
*/
|
||||
memset((void *)addr, PAGE_INUSE, next - addr);
|
||||
|
||||
page_addr = page_address(pte_page(*pte));
|
||||
if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
|
||||
free_pagetable(pte_page(*pte), 0, direct);
|
||||
|
||||
/*
|
||||
* This spin lock could be only
|
||||
* taken in _pte_aloc_kernel in
|
||||
* mm/memory.c and nowhere else
|
||||
* (for arm64). Not sure if the
|
||||
* function above can be called
|
||||
* concurrently. In doubt,
|
||||
* I am living it here for now,
|
||||
* but it probably can be removed.
|
||||
*/
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pte_clear(&init_mm, addr, pte);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// I am adding this flush here in simmetry to the x86 code.
|
||||
// Why do I need to call it here and not in remove_p[mu]d
|
||||
flush_tlb_all();
|
||||
}
|
||||
|
||||
static void remove_pmd_table(pmd_t *pmd, unsigned long addr,
|
||||
unsigned long end, bool direct)
|
||||
{
|
||||
unsigned long next;
|
||||
void *page_addr;
|
||||
pte_t *pte;
|
||||
|
||||
for (; addr < end; addr = next, pmd++) {
|
||||
next = pmd_addr_end(addr, end);
|
||||
|
||||
if (!pmd_present(*pmd))
|
||||
continue;
|
||||
|
||||
// check if we are using 2MB section mappings
|
||||
if (pmd_sect(*pmd)) {
|
||||
if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) {
|
||||
if (!direct) {
|
||||
free_pagetable(pmd_page(*pmd),
|
||||
get_order(PMD_SIZE), direct);
|
||||
}
|
||||
/*
|
||||
* This spin lock could be only
|
||||
* taken in _pte_aloc_kernel in
|
||||
* mm/memory.c and nowhere else
|
||||
* (for arm64). Not sure if the
|
||||
* function above can be called
|
||||
* concurrently. In doubt,
|
||||
* I am living it here for now,
|
||||
* but it probably can be removed.
|
||||
*/
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pmd_clear(pmd);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
} else {
|
||||
/* If here, we are freeing vmemmap pages. */
|
||||
memset((void *)addr, PAGE_INUSE, next - addr);
|
||||
|
||||
page_addr = page_address(pmd_page(*pmd));
|
||||
if (!memchr_inv(page_addr, PAGE_INUSE,
|
||||
PMD_SIZE)) {
|
||||
free_pagetable(pmd_page(*pmd),
|
||||
get_order(PMD_SIZE), direct);
|
||||
|
||||
/*
|
||||
* This spin lock could be only
|
||||
* taken in _pte_aloc_kernel in
|
||||
* mm/memory.c and nowhere else
|
||||
* (for arm64). Not sure if the
|
||||
* function above can be called
|
||||
* concurrently. In doubt,
|
||||
* I am living it here for now,
|
||||
* but it probably can be removed.
|
||||
*/
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pmd_clear(pmd);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
BUG_ON(!pmd_table(*pmd));
|
||||
|
||||
pte = pte_offset_map(pmd, addr);
|
||||
remove_pte_table(pte, addr, next, direct);
|
||||
free_pte_table(pmd, direct);
|
||||
}
|
||||
}
|
||||
|
||||
static void remove_pud_table(pud_t *pud, unsigned long addr,
|
||||
unsigned long end, bool direct)
|
||||
{
|
||||
unsigned long next;
|
||||
pmd_t *pmd;
|
||||
void *page_addr;
|
||||
|
||||
for (; addr < end; addr = next, pud++) {
|
||||
next = pud_addr_end(addr, end);
|
||||
if (!pud_present(*pud))
|
||||
continue;
|
||||
/*
|
||||
* If we are using 4K granules, check if we are using
|
||||
* 1GB section mapping.
|
||||
*/
|
||||
if (pud_sect(*pud)) {
|
||||
if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) {
|
||||
if (!direct) {
|
||||
free_pagetable(pud_page(*pud),
|
||||
get_order(PUD_SIZE), direct);
|
||||
}
|
||||
|
||||
/*
|
||||
* This spin lock could be only
|
||||
* taken in _pte_aloc_kernel in
|
||||
* mm/memory.c and nowhere else
|
||||
* (for arm64). Not sure if the
|
||||
* function above can be called
|
||||
* concurrently. In doubt,
|
||||
* I am living it here for now,
|
||||
* but it probably can be removed.
|
||||
*/
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pud_clear(pud);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
} else {
|
||||
/* If here, we are freeing vmemmap pages. */
|
||||
memset((void *)addr, PAGE_INUSE, next - addr);
|
||||
|
||||
page_addr = page_address(pud_page(*pud));
|
||||
if (!memchr_inv(page_addr, PAGE_INUSE,
|
||||
PUD_SIZE)) {
|
||||
|
||||
free_pagetable(pud_page(*pud),
|
||||
get_order(PUD_SIZE), direct);
|
||||
|
||||
/*
|
||||
* This spin lock could be only
|
||||
* taken in _pte_aloc_kernel in
|
||||
* mm/memory.c and nowhere else
|
||||
* (for arm64). Not sure if the
|
||||
* function above can be called
|
||||
* concurrently. In doubt,
|
||||
* I am living it here for now,
|
||||
* but it probably can be removed.
|
||||
*/
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
pud_clear(pud);
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
BUG_ON(!pud_table(*pud));
|
||||
|
||||
pmd = pmd_offset(pud, addr);
|
||||
remove_pmd_table(pmd, addr, next, direct);
|
||||
free_pmd_table(pud, direct);
|
||||
}
|
||||
}
|
||||
|
||||
void remove_pagetable(unsigned long start, unsigned long end, bool direct)
|
||||
{
|
||||
unsigned long next;
|
||||
unsigned long addr;
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
|
||||
for (addr = start; addr < end; addr = next) {
|
||||
next = pgd_addr_end(addr, end);
|
||||
|
||||
pgd = pgd_offset_k(addr);
|
||||
if (pgd_none(*pgd))
|
||||
continue;
|
||||
|
||||
pud = pud_offset(pgd, addr);
|
||||
remove_pud_table(pud, addr, next, direct);
|
||||
/*
|
||||
* When the PUD is folded on the PGD (three levels of paging),
|
||||
* I did already clear the PMD page in free_pmd_table,
|
||||
* and reset the corresponding PGD==PUD entry.
|
||||
*/
|
||||
#if CONFIG_PGTABLE_LEVELS > 3
|
||||
free_pud_table(pgd, direct);
|
||||
#endif
|
||||
}
|
||||
|
||||
flush_tlb_all();
|
||||
}
|
||||
|
||||
|
||||
#endif /* CONFIG_MEMORY_HOTREMOVE */
|
||||
#endif /* CONFIG_MEMORY_HOTPLUG */
|
||||
|
||||
/*
|
||||
* Check whether a kernel address is valid (derived from arch/x86/).
|
||||
*/
|
||||
|
@ -731,6 +1103,9 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
|
|||
#endif /* CONFIG_ARM64_64K_PAGES */
|
||||
void vmemmap_free(unsigned long start, unsigned long end)
|
||||
{
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
remove_pagetable(start, end, false);
|
||||
#endif
|
||||
}
|
||||
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue