android_kernel_oneplus_msm8998/arch/x86/include/asm/mmu_context.h
Andy Lutomirski 937dad078f x86/mm: Give each mm TLB flush generation a unique ID
commit f39681ed0f48498b80455095376f11535feea332 upstream.

This adds two new variables to mmu_context_t: ctx_id and tlb_gen.
ctx_id uniquely identifies the mm_struct and will never be reused.
For a given mm_struct (and hence ctx_id), tlb_gen is a monotonic
count of the number of times that a TLB flush has been requested.
The pair (ctx_id, tlb_gen) can be used as an identifier for TLB
flush actions and will be used in subsequent patches to reliably
determine whether all needed TLB flushes have occurred on a given
CPU.

This patch is split out for ease of review.  By itself, it has no
real effect other than creating and updating the new variables.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Reviewed-by: Nadav Amit <nadav.amit@gmail.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/413a91c24dab3ed0caa5f4e4d017d87b0857f920.1498751203.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa@csail.mit.edu>
Reviewed-by: Matt Helsley (VMware) <matt.helsley@gmail.com>
Reviewed-by: Alexey Makhalov <amakhalov@vmware.com>
Reviewed-by: Bo Gan <ganb@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-07-25 10:18:21 +02:00

200 lines
5.1 KiB
C

#ifndef _ASM_X86_MMU_CONTEXT_H
#define _ASM_X86_MMU_CONTEXT_H
#include <asm/desc.h>
#include <linux/atomic.h>
#include <linux/mm_types.h>
#include <trace/events/tlb.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/paravirt.h>
#include <asm/mpx.h>
extern atomic64_t last_mm_ctx_id;
#ifndef CONFIG_PARAVIRT
static inline void paravirt_activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
}
#endif /* !CONFIG_PARAVIRT */
#ifdef CONFIG_PERF_EVENTS
extern struct static_key rdpmc_always_available;
static inline void load_mm_cr4(struct mm_struct *mm)
{
if (static_key_false(&rdpmc_always_available) ||
atomic_read(&mm->context.perf_rdpmc_allowed))
cr4_set_bits(X86_CR4_PCE);
else
cr4_clear_bits(X86_CR4_PCE);
}
#else
static inline void load_mm_cr4(struct mm_struct *mm) {}
#endif
#ifdef CONFIG_MODIFY_LDT_SYSCALL
/*
* ldt_structs can be allocated, used, and freed, but they are never
* modified while live.
*/
struct ldt_struct {
/*
* Xen requires page-aligned LDTs with special permissions. This is
* needed to prevent us from installing evil descriptors such as
* call gates. On native, we could merge the ldt_struct and LDT
* allocations, but it's not worth trying to optimize.
*/
struct desc_struct *entries;
int size;
};
/*
* Used for LDT copy/destruction.
*/
int init_new_context_ldt(struct task_struct *tsk, struct mm_struct *mm);
void destroy_context_ldt(struct mm_struct *mm);
#else /* CONFIG_MODIFY_LDT_SYSCALL */
static inline int init_new_context_ldt(struct task_struct *tsk,
struct mm_struct *mm)
{
return 0;
}
static inline void destroy_context_ldt(struct mm_struct *mm) {}
#endif
static inline void load_mm_ldt(struct mm_struct *mm)
{
#ifdef CONFIG_MODIFY_LDT_SYSCALL
struct ldt_struct *ldt;
/* lockless_dereference synchronizes with smp_store_release */
ldt = lockless_dereference(mm->context.ldt);
/*
* Any change to mm->context.ldt is followed by an IPI to all
* CPUs with the mm active. The LDT will not be freed until
* after the IPI is handled by all such CPUs. This means that,
* if the ldt_struct changes before we return, the values we see
* will be safe, and the new values will be loaded before we run
* any user code.
*
* NB: don't try to convert this to use RCU without extreme care.
* We would still need IRQs off, because we don't want to change
* the local LDT after an IPI loaded a newer value than the one
* that we can see.
*/
if (unlikely(ldt))
set_ldt(ldt->entries, ldt->size);
else
clear_LDT();
#else
clear_LDT();
#endif
DEBUG_LOCKS_WARN_ON(preemptible());
}
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
}
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
{
mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
init_new_context_ldt(tsk, mm);
return 0;
}
static inline void destroy_context(struct mm_struct *mm)
{
destroy_context_ldt(mm);
}
extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk);
extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk);
#define switch_mm_irqs_off switch_mm_irqs_off
#define activate_mm(prev, next) \
do { \
paravirt_activate_mm((prev), (next)); \
switch_mm((prev), (next), NULL); \
} while (0);
#ifdef CONFIG_X86_32
#define deactivate_mm(tsk, mm) \
do { \
lazy_load_gs(0); \
} while (0)
#else
#define deactivate_mm(tsk, mm) \
do { \
load_gs_index(0); \
loadsegment(fs, 0); \
} while (0)
#endif
static inline void arch_dup_mmap(struct mm_struct *oldmm,
struct mm_struct *mm)
{
paravirt_arch_dup_mmap(oldmm, mm);
}
static inline void arch_exit_mmap(struct mm_struct *mm)
{
paravirt_arch_exit_mmap(mm);
}
#ifdef CONFIG_X86_64
static inline bool is_64bit_mm(struct mm_struct *mm)
{
return !config_enabled(CONFIG_IA32_EMULATION) ||
!(mm->context.ia32_compat == TIF_IA32);
}
#else
static inline bool is_64bit_mm(struct mm_struct *mm)
{
return false;
}
#endif
static inline void arch_bprm_mm_init(struct mm_struct *mm,
struct vm_area_struct *vma)
{
mpx_mm_init(mm);
}
static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
/*
* mpx_notify_unmap() goes and reads a rarely-hot
* cacheline in the mm_struct. That can be expensive
* enough to be seen in profiles.
*
* The mpx_notify_unmap() call and its contents have been
* observed to affect munmap() performance on hardware
* where MPX is not present.
*
* The unlikely() optimizes for the fast case: no MPX
* in the CPU, or no MPX use in the process. Even if
* we get this wrong (in the unlikely event that MPX
* is widely enabled on some system) the overhead of
* MPX itself (reading bounds tables) is expected to
* overwhelm the overhead of getting this unlikely()
* consistently wrong.
*/
if (unlikely(cpu_feature_enabled(X86_FEATURE_MPX)))
mpx_notify_unmap(mm, vma, start, end);
}
#endif /* _ASM_X86_MMU_CONTEXT_H */