kaiser: do not set _PAGE_NX on pgd_none
native_pgd_clear() uses native_set_pgd(), so native_set_pgd() must avoid setting the _PAGE_NX bit on an otherwise pgd_none() entry: usually that just generated a warning on exit, but sometimes more mysterious and damaging failures (our production machines could not complete booting). The original fix to this just avoided adding _PAGE_NX to an empty entry; but eventually more problems surfaced with kexec, and EFI mapping expected to be a problem too. So now instead change native_set_pgd() to update shadow only if _PAGE_USER: A few places (kernel/machine_kexec_64.c, platform/efi/efi_64.c for sure) use set_pgd() to set up a temporary internal virtual address space, with physical pages remapped at what Kaiser regards as userspace addresses: Kaiser then assumes a shadow pgd follows, which it will try to corrupt. This appears to be responsible for the recent kexec and kdump failures; though it's unclear how those did not manifest as a problem before. Ah, the shadow pgd will only be assumed to "follow" if the requested pgd is on an even-numbered page: so I suppose it was going wrong 50% of the time all along. What we need is a flag to set_pgd(), to tell it we're dealing with userspace. Er, isn't that what the pgd's _PAGE_USER bit is saying? Add a test for that. But we cannot do the same for pgd_clear() (which may be called to clear corrupted entries - set aside the question of "corrupt in which pgd?" until later), so there just rely on pgd_clear() not being called in the problematic cases - with a WARN_ON_ONCE() which should fire half the time if it is. But this is getting too big for an inline function: move it into arch/x86/mm/kaiser.c (which then demands a boot/compressed mod); and de-void and de-space native_get_shadow/normal_pgd() while here. Signed-off-by: Hugh Dickins <hughd@google.com> Acked-by: Jiri Kosina <jkosina@suse.cz> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
bed9bb7f3e
commit
edde73205b
3 changed files with 56 additions and 38 deletions
|
@ -9,6 +9,7 @@
|
|||
*/
|
||||
#undef CONFIG_PARAVIRT
|
||||
#undef CONFIG_PARAVIRT_SPINLOCKS
|
||||
#undef CONFIG_KAISER
|
||||
#undef CONFIG_KASAN
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
|
|
@ -107,61 +107,36 @@ static inline void native_pud_clear(pud_t *pud)
|
|||
}
|
||||
|
||||
#ifdef CONFIG_KAISER
|
||||
static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp)
|
||||
extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd);
|
||||
|
||||
static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
|
||||
{
|
||||
return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE);
|
||||
return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE);
|
||||
}
|
||||
|
||||
static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp)
|
||||
static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
|
||||
{
|
||||
return (pgd_t *)(void*)((unsigned long)(void*)pgdp & ~(unsigned long)PAGE_SIZE);
|
||||
return (pgd_t *)((unsigned long)pgdp & ~(unsigned long)PAGE_SIZE);
|
||||
}
|
||||
#else
|
||||
static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp)
|
||||
static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||
{
|
||||
return pgd;
|
||||
}
|
||||
static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
|
||||
{
|
||||
BUILD_BUG_ON(1);
|
||||
return NULL;
|
||||
}
|
||||
static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp)
|
||||
static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
|
||||
{
|
||||
return pgdp;
|
||||
}
|
||||
#endif /* CONFIG_KAISER */
|
||||
|
||||
/*
|
||||
* Page table pages are page-aligned. The lower half of the top
|
||||
* level is used for userspace and the top half for the kernel.
|
||||
* This returns true for user pages that need to get copied into
|
||||
* both the user and kernel copies of the page tables, and false
|
||||
* for kernel pages that should only be in the kernel copy.
|
||||
*/
|
||||
static inline bool is_userspace_pgd(void *__ptr)
|
||||
{
|
||||
unsigned long ptr = (unsigned long)__ptr;
|
||||
|
||||
return ((ptr % PAGE_SIZE) < (PAGE_SIZE / 2));
|
||||
}
|
||||
|
||||
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||
{
|
||||
#ifdef CONFIG_KAISER
|
||||
pteval_t extra_kern_pgd_flags = 0;
|
||||
/* Do we need to also populate the shadow pgd? */
|
||||
if (is_userspace_pgd(pgdp)) {
|
||||
native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
|
||||
/*
|
||||
* Even if the entry is *mapping* userspace, ensure
|
||||
* that userspace can not use it. This way, if we
|
||||
* get out to userspace running on the kernel CR3,
|
||||
* userspace will crash instead of running.
|
||||
*/
|
||||
extra_kern_pgd_flags = _PAGE_NX;
|
||||
}
|
||||
pgdp->pgd = pgd.pgd;
|
||||
pgdp->pgd |= extra_kern_pgd_flags;
|
||||
#else /* CONFIG_KAISER */
|
||||
*pgdp = pgd;
|
||||
#endif
|
||||
*pgdp = kaiser_set_shadow_pgd(pgdp, pgd);
|
||||
}
|
||||
|
||||
static inline void native_pgd_clear(pgd_t *pgd)
|
||||
|
|
|
@ -303,4 +303,46 @@ void kaiser_remove_mapping(unsigned long start, unsigned long size)
|
|||
unmap_pud_range_nofree(pgd, addr, end);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Page table pages are page-aligned. The lower half of the top
|
||||
* level is used for userspace and the top half for the kernel.
|
||||
* This returns true for user pages that need to get copied into
|
||||
* both the user and kernel copies of the page tables, and false
|
||||
* for kernel pages that should only be in the kernel copy.
|
||||
*/
|
||||
static inline bool is_userspace_pgd(pgd_t *pgdp)
|
||||
{
|
||||
return ((unsigned long)pgdp % PAGE_SIZE) < (PAGE_SIZE / 2);
|
||||
}
|
||||
|
||||
pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||
{
|
||||
/*
|
||||
* Do we need to also populate the shadow pgd? Check _PAGE_USER to
|
||||
* skip cases like kexec and EFI which make temporary low mappings.
|
||||
*/
|
||||
if (pgd.pgd & _PAGE_USER) {
|
||||
if (is_userspace_pgd(pgdp)) {
|
||||
native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
|
||||
/*
|
||||
* Even if the entry is *mapping* userspace, ensure
|
||||
* that userspace can not use it. This way, if we
|
||||
* get out to userspace running on the kernel CR3,
|
||||
* userspace will crash instead of running.
|
||||
*/
|
||||
pgd.pgd |= _PAGE_NX;
|
||||
}
|
||||
} else if (!pgd.pgd) {
|
||||
/*
|
||||
* pgd_clear() cannot check _PAGE_USER, and is even used to
|
||||
* clear corrupted pgd entries: so just rely on cases like
|
||||
* kexec and EFI never to be using pgd_clear().
|
||||
*/
|
||||
if (!WARN_ON_ONCE((unsigned long)pgdp & PAGE_SIZE) &&
|
||||
is_userspace_pgd(pgdp))
|
||||
native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
|
||||
}
|
||||
return pgd;
|
||||
}
|
||||
#endif /* CONFIG_KAISER */
|
||||
|
|
Loading…
Add table
Reference in a new issue