2006-09-30 23:29:12 -07:00
|
|
|
/*
|
|
|
|
* Re-map IO memory to kernel address space so that we can access it.
|
|
|
|
* This is needed for high PCI addresses that aren't mapped in the
|
|
|
|
* 640k-1MB IO memory area on PC's
|
|
|
|
*
|
|
|
|
* (C) Copyright 1995 1996 Linus Torvalds
|
|
|
|
*/
|
|
|
|
#include <linux/vmalloc.h>
|
|
|
|
#include <linux/mm.h>
|
Detach sched.h from mm.h
First thing mm.h does is including sched.h solely for can_do_mlock() inline
function which has "current" dereference inside. By dealing with can_do_mlock()
mm.h can be detached from sched.h which is good. See below, why.
This patch
a) removes unconditional inclusion of sched.h from mm.h
b) makes can_do_mlock() normal function in mm/mlock.c
c) exports can_do_mlock() to not break compilation
d) adds sched.h inclusions back to files that were getting it indirectly.
e) adds less bloated headers to some files (asm/signal.h, jiffies.h) that were
getting them indirectly
Net result is:
a) mm.h users would get less code to open, read, preprocess, parse, ... if
they don't need sched.h
b) sched.h stops being dependency for significant number of files:
on x86_64 allmodconfig touching sched.h results in recompile of 4083 files,
after patch it's only 3744 (-8.3%).
Cross-compile tested on
all arm defconfigs, all mips defconfigs, all powerpc defconfigs,
alpha alpha-up
arm
i386 i386-up i386-defconfig i386-allnoconfig
ia64 ia64-up
m68k
mips
parisc parisc-up
powerpc powerpc-up
s390 s390-up
sparc sparc-up
sparc64 sparc64-up
um-x86_64
x86_64 x86_64-up x86_64-defconfig x86_64-allnoconfig
as well as my two usual configs.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2007-05-21 01:22:52 +04:00
|
|
|
#include <linux/sched.h>
|
2007-10-16 23:26:42 -07:00
|
|
|
#include <linux/io.h>
|
2011-11-16 21:29:17 -05:00
|
|
|
#include <linux/export.h>
|
2006-09-30 23:29:12 -07:00
|
|
|
#include <asm/cacheflush.h>
|
|
|
|
#include <asm/pgtable.h>
|
|
|
|
|
2015-04-14 15:47:20 -07:00
|
|
|
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
|
2015-04-14 15:47:32 -07:00
|
|
|
static int __read_mostly ioremap_pud_capable;
|
|
|
|
static int __read_mostly ioremap_pmd_capable;
|
|
|
|
static int __read_mostly ioremap_huge_disabled;
|
2015-04-14 15:47:20 -07:00
|
|
|
|
|
|
|
static int __init set_nohugeiomap(char *str)
|
|
|
|
{
|
|
|
|
ioremap_huge_disabled = 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
early_param("nohugeiomap", set_nohugeiomap);
|
|
|
|
|
|
|
|
void __init ioremap_huge_init(void)
|
|
|
|
{
|
|
|
|
if (!ioremap_huge_disabled) {
|
|
|
|
if (arch_ioremap_pud_supported())
|
|
|
|
ioremap_pud_capable = 1;
|
|
|
|
if (arch_ioremap_pmd_supported())
|
|
|
|
ioremap_pmd_capable = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int ioremap_pud_enabled(void)
|
|
|
|
{
|
|
|
|
return ioremap_pud_capable;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int ioremap_pmd_enabled(void)
|
|
|
|
{
|
|
|
|
return ioremap_pmd_capable;
|
|
|
|
}
|
|
|
|
|
|
|
|
#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
|
|
|
|
static inline int ioremap_pud_enabled(void) { return 0; }
|
|
|
|
static inline int ioremap_pmd_enabled(void) { return 0; }
|
|
|
|
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
|
|
|
|
|
2006-09-30 23:29:12 -07:00
|
|
|
static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
|
2010-06-18 12:22:40 +09:00
|
|
|
unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
|
2006-09-30 23:29:12 -07:00
|
|
|
{
|
|
|
|
pte_t *pte;
|
2010-06-18 12:22:40 +09:00
|
|
|
u64 pfn;
|
2006-09-30 23:29:12 -07:00
|
|
|
|
|
|
|
pfn = phys_addr >> PAGE_SHIFT;
|
|
|
|
pte = pte_alloc_kernel(pmd, addr);
|
|
|
|
if (!pte)
|
|
|
|
return -ENOMEM;
|
|
|
|
do {
|
|
|
|
BUG_ON(!pte_none(*pte));
|
|
|
|
set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot));
|
|
|
|
pfn++;
|
|
|
|
} while (pte++, addr += PAGE_SIZE, addr != end);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
|
2010-06-18 12:22:40 +09:00
|
|
|
unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
|
2006-09-30 23:29:12 -07:00
|
|
|
{
|
|
|
|
pmd_t *pmd;
|
|
|
|
unsigned long next;
|
|
|
|
|
|
|
|
phys_addr -= addr;
|
|
|
|
pmd = pmd_alloc(&init_mm, pud, addr);
|
|
|
|
if (!pmd)
|
|
|
|
return -ENOMEM;
|
|
|
|
do {
|
|
|
|
next = pmd_addr_end(addr, end);
|
2015-04-14 15:47:23 -07:00
|
|
|
|
|
|
|
if (ioremap_pmd_enabled() &&
|
|
|
|
((next - addr) == PMD_SIZE) &&
|
mm/vmalloc: add interfaces to free unmapped page table
commit b6bdb7517c3d3f41f20e5c2948d6bc3f8897394e upstream.
On architectures with CONFIG_HAVE_ARCH_HUGE_VMAP set, ioremap() may
create pud/pmd mappings. A kernel panic was observed on arm64 systems
with Cortex-A75 in the following steps as described by Hanjun Guo.
1. ioremap a 4K size, valid page table will build,
2. iounmap it, pte0 will set to 0;
3. ioremap the same address with 2M size, pgd/pmd is unchanged,
then set the a new value for pmd;
4. pte0 is leaked;
5. CPU may meet exception because the old pmd is still in TLB,
which will lead to kernel panic.
This panic is not reproducible on x86. INVLPG, called from iounmap,
purges all levels of entries associated with purged address on x86. x86
still has memory leak.
The patch changes the ioremap path to free unmapped page table(s) since
doing so in the unmap path has the following issues:
- The iounmap() path is shared with vunmap(). Since vmap() only
supports pte mappings, making vunmap() to free a pte page is an
overhead for regular vmap users as they do not need a pte page freed
up.
- Checking if all entries in a pte page are cleared in the unmap path
is racy, and serializing this check is expensive.
- The unmap path calls free_vmap_area_noflush() to do lazy TLB purges.
Clearing a pud/pmd entry before the lazy TLB purges needs extra TLB
purge.
Add two interfaces, pud_free_pmd_page() and pmd_free_pte_page(), which
clear a given pud/pmd entry and free up a page for the lower level
entries.
This patch implements their stub functions on x86 and arm64, which work
as workaround.
[akpm@linux-foundation.org: fix typo in pmd_free_pte_page() stub]
Link: http://lkml.kernel.org/r/20180314180155.19492-2-toshi.kani@hpe.com
Fixes: e61ce6ade404e ("mm: change ioremap to set up huge I/O mappings")
Reported-by: Lei Li <lious.lilei@hisilicon.com>
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Wang Xuefeng <wxf.wang@hisilicon.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Hanjun Guo <guohanjun@huawei.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Chintan Pandya <cpandya@codeaurora.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
[ tweak arm64 portion to rely on CONFIG_ARCH_HAVE_HUGE_VMAP - gregkh]
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-03-22 16:17:20 -07:00
|
|
|
IS_ALIGNED(phys_addr + addr, PMD_SIZE) &&
|
2018-06-27 08:13:47 -06:00
|
|
|
pmd_free_pte_page(pmd, addr)) {
|
2015-04-14 15:47:23 -07:00
|
|
|
if (pmd_set_huge(pmd, phys_addr + addr, prot))
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2006-09-30 23:29:12 -07:00
|
|
|
if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot))
|
|
|
|
return -ENOMEM;
|
|
|
|
} while (pmd++, addr = next, addr != end);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
|
2010-06-18 12:22:40 +09:00
|
|
|
unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
|
2006-09-30 23:29:12 -07:00
|
|
|
{
|
|
|
|
pud_t *pud;
|
|
|
|
unsigned long next;
|
|
|
|
|
|
|
|
phys_addr -= addr;
|
|
|
|
pud = pud_alloc(&init_mm, pgd, addr);
|
|
|
|
if (!pud)
|
|
|
|
return -ENOMEM;
|
|
|
|
do {
|
|
|
|
next = pud_addr_end(addr, end);
|
2015-04-14 15:47:23 -07:00
|
|
|
|
|
|
|
if (ioremap_pud_enabled() &&
|
|
|
|
((next - addr) == PUD_SIZE) &&
|
mm/vmalloc: add interfaces to free unmapped page table
commit b6bdb7517c3d3f41f20e5c2948d6bc3f8897394e upstream.
On architectures with CONFIG_HAVE_ARCH_HUGE_VMAP set, ioremap() may
create pud/pmd mappings. A kernel panic was observed on arm64 systems
with Cortex-A75 in the following steps as described by Hanjun Guo.
1. ioremap a 4K size, valid page table will build,
2. iounmap it, pte0 will set to 0;
3. ioremap the same address with 2M size, pgd/pmd is unchanged,
then set the a new value for pmd;
4. pte0 is leaked;
5. CPU may meet exception because the old pmd is still in TLB,
which will lead to kernel panic.
This panic is not reproducible on x86. INVLPG, called from iounmap,
purges all levels of entries associated with purged address on x86. x86
still has memory leak.
The patch changes the ioremap path to free unmapped page table(s) since
doing so in the unmap path has the following issues:
- The iounmap() path is shared with vunmap(). Since vmap() only
supports pte mappings, making vunmap() to free a pte page is an
overhead for regular vmap users as they do not need a pte page freed
up.
- Checking if all entries in a pte page are cleared in the unmap path
is racy, and serializing this check is expensive.
- The unmap path calls free_vmap_area_noflush() to do lazy TLB purges.
Clearing a pud/pmd entry before the lazy TLB purges needs extra TLB
purge.
Add two interfaces, pud_free_pmd_page() and pmd_free_pte_page(), which
clear a given pud/pmd entry and free up a page for the lower level
entries.
This patch implements their stub functions on x86 and arm64, which work
as workaround.
[akpm@linux-foundation.org: fix typo in pmd_free_pte_page() stub]
Link: http://lkml.kernel.org/r/20180314180155.19492-2-toshi.kani@hpe.com
Fixes: e61ce6ade404e ("mm: change ioremap to set up huge I/O mappings")
Reported-by: Lei Li <lious.lilei@hisilicon.com>
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Wang Xuefeng <wxf.wang@hisilicon.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Hanjun Guo <guohanjun@huawei.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Chintan Pandya <cpandya@codeaurora.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
[ tweak arm64 portion to rely on CONFIG_ARCH_HAVE_HUGE_VMAP - gregkh]
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-03-22 16:17:20 -07:00
|
|
|
IS_ALIGNED(phys_addr + addr, PUD_SIZE) &&
|
2018-06-27 08:13:47 -06:00
|
|
|
pud_free_pmd_page(pud, addr)) {
|
2015-04-14 15:47:23 -07:00
|
|
|
if (pud_set_huge(pud, phys_addr + addr, prot))
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2006-09-30 23:29:12 -07:00
|
|
|
if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot))
|
|
|
|
return -ENOMEM;
|
|
|
|
} while (pud++, addr = next, addr != end);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int ioremap_page_range(unsigned long addr,
|
2010-06-18 12:22:40 +09:00
|
|
|
unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
|
2006-09-30 23:29:12 -07:00
|
|
|
{
|
|
|
|
pgd_t *pgd;
|
|
|
|
unsigned long start;
|
|
|
|
unsigned long next;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
BUG_ON(addr >= end);
|
|
|
|
|
|
|
|
start = addr;
|
|
|
|
phys_addr -= addr;
|
|
|
|
pgd = pgd_offset_k(addr);
|
|
|
|
do {
|
|
|
|
next = pgd_addr_end(addr, end);
|
|
|
|
err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot);
|
|
|
|
if (err)
|
|
|
|
break;
|
|
|
|
} while (pgd++, addr = next, addr != end);
|
|
|
|
|
2006-09-30 23:29:14 -07:00
|
|
|
flush_cache_vmap(start, end);
|
2006-09-30 23:29:12 -07:00
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
ACPI, APEI, Generic Hardware Error Source POLL/IRQ/NMI notification type support
Generic Hardware Error Source provides a way to report platform
hardware errors (such as that from chipset). It works in so called
"Firmware First" mode, that is, hardware errors are reported to
firmware firstly, then reported to Linux by firmware. This way, some
non-standard hardware error registers or non-standard hardware link
can be checked by firmware to produce more valuable hardware error
information for Linux.
This patch adds POLL/IRQ/NMI notification types support.
Because the memory area used to transfer hardware error information
from BIOS to Linux can be determined only in NMI, IRQ or timer
handler, but general ioremap can not be used in atomic context, so a
special version of atomic ioremap is implemented for that.
Known issue:
- Error information can not be printed for recoverable errors notified
via NMI, because printk is not NMI-safe. Will fix this via delay
printing to IRQ context via irq_work or make printk NMI-safe.
v2:
- adjust printk format per comments.
Signed-off-by: Huang Ying <ying.huang@intel.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
2011-01-12 14:44:55 +08:00
|
|
|
EXPORT_SYMBOL_GPL(ioremap_page_range);
|