android_kernel_oneplus_msm8998/arch/x86/kernel/process.c
Greg Kroah-Hartman 4b2d6badbc This is the 4.4.144 stable release
-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAltYMlwACgkQONu9yGCS
 aT5ZmxAAjAWUndXt7fTUyHgxkoG61sEkdX4jcsp6NFwQMudU0UHx4/kcZE+HdMjL
 VU8BZtdUg+jMLXM4erVBpQRKY9YHIPi8nWMTm1UjduMCxVD6dVL1HU6/RXl1cYIx
 rf/opYOimqT9lYCeffmd9ai2zEEJKSt7/avddcJY4qHiqLan27gbUdAq2H26aM/5
 LUzAaSBzhq3VYo9Q5zv03b1+tORAxh2BIffZjGEFe8SQQl1o63WqwV4RxEhV/Bjt
 hBgl/6B/+EHtQnYnbnoOT/an9Ma15ik4/z3vVv6yRLNK+hS5T31OKcYCsUrjp6O+
 TQVaVLWWmn/VpIHAMkrhBs9Xxg5GmRziF77AkzyC506tK268M2+IoY77ursVl1YK
 STaOwUcLUlKLbl5OADqMpYtNU9ybkP+MmgDZsIEXz9UiCZM721fL5Au2PHuzaYOD
 2nE2EQb04It4k9GN8FStv2KPIiKUCEXi9MlNsHGPs6Mc+fliIigoKPhpU5JG+sxR
 eJgPMNv4OWhwXWTd1wf0Gy5X+i0lQlwlGgIHFfSB8vzArJ0Y/yuPj2a6xhQshOza
 Ivq7JudHvxYxhDSWYoCKgtTgzMdSBbJ3xjOoUUHy4ryamYeyaMvgFjsaCTMr0dsw
 76BkgNTbpsip+I77a9h4Ozlk5QE7h61EsqjmZBkGVqLYjrUQ/IU=
 =X4tZ
 -----END PGP SIGNATURE-----

Merge 4.4.144 into android-4.4

Changes in 4.4.144
	KVM/Eventfd: Avoid crash when assign and deassign specific eventfd in parallel.
	x86/MCE: Remove min interval polling limitation
	fat: fix memory allocation failure handling of match_strdup()
	ALSA: rawmidi: Change resized buffers atomically
	ARC: Fix CONFIG_SWAP
	ARC: mm: allow mprotect to make stack mappings executable
	mm: memcg: fix use after free in mem_cgroup_iter()
	ipv4: Return EINVAL when ping_group_range sysctl doesn't map to user ns
	ipv6: fix useless rol32 call on hash
	lib/rhashtable: consider param->min_size when setting initial table size
	net/ipv4: Set oif in fib_compute_spec_dst
	net: phy: fix flag masking in __set_phy_supported
	ptp: fix missing break in switch
	tg3: Add higher cpu clock for 5762.
	net: Don't copy pfmemalloc flag in __copy_skb_header()
	skbuff: Unconditionally copy pfmemalloc in __skb_clone()
	xhci: Fix perceived dead host due to runtime suspend race with event handler
	x86/paravirt: Make native_save_fl() extern inline
	x86/cpufeatures: Add CPUID_7_EDX CPUID leaf
	x86/cpufeatures: Add Intel feature bits for Speculation Control
	x86/cpufeatures: Add AMD feature bits for Speculation Control
	x86/msr: Add definitions for new speculation control MSRs
	x86/pti: Do not enable PTI on CPUs which are not vulnerable to Meltdown
	x86/cpufeature: Blacklist SPEC_CTRL/PRED_CMD on early Spectre v2 microcodes
	x86/speculation: Add basic IBPB (Indirect Branch Prediction Barrier) support
	x86/cpufeatures: Clean up Spectre v2 related CPUID flags
	x86/cpuid: Fix up "virtual" IBRS/IBPB/STIBP feature bits on Intel
	x86/pti: Mark constant arrays as __initconst
	x86/asm/entry/32: Simplify pushes of zeroed pt_regs->REGs
	x86/entry/64/compat: Clear registers for compat syscalls, to reduce speculation attack surface
	x86/speculation: Update Speculation Control microcode blacklist
	x86/speculation: Correct Speculation Control microcode blacklist again
	x86/speculation: Clean up various Spectre related details
	x86/speculation: Fix up array_index_nospec_mask() asm constraint
	x86/speculation: Add <asm/msr-index.h> dependency
	x86/xen: Zero MSR_IA32_SPEC_CTRL before suspend
	x86/mm: Factor out LDT init from context init
	x86/mm: Give each mm TLB flush generation a unique ID
	x86/speculation: Use Indirect Branch Prediction Barrier in context switch
	x86/spectre_v2: Don't check microcode versions when running under hypervisors
	x86/speculation: Use IBRS if available before calling into firmware
	x86/speculation: Move firmware_restrict_branch_speculation_*() from C to CPP
	x86/speculation: Remove Skylake C2 from Speculation Control microcode blacklist
	selftest/seccomp: Fix the flag name SECCOMP_FILTER_FLAG_TSYNC
	selftest/seccomp: Fix the seccomp(2) signature
	xen: set cpu capabilities from xen_start_kernel()
	x86/amd: don't set X86_BUG_SYSRET_SS_ATTRS when running under Xen
	x86/nospec: Simplify alternative_msr_write()
	x86/bugs: Concentrate bug detection into a separate function
	x86/bugs: Concentrate bug reporting into a separate function
	x86/bugs: Read SPEC_CTRL MSR during boot and re-use reserved bits
	x86/bugs, KVM: Support the combination of guest and host IBRS
	x86/cpu: Rename Merrifield2 to Moorefield
	x86/cpu/intel: Add Knights Mill to Intel family
	x86/bugs: Expose /sys/../spec_store_bypass
	x86/cpufeatures: Add X86_FEATURE_RDS
	x86/bugs: Provide boot parameters for the spec_store_bypass_disable mitigation
	x86/bugs/intel: Set proper CPU features and setup RDS
	x86/bugs: Whitelist allowed SPEC_CTRL MSR values
	x86/bugs/AMD: Add support to disable RDS on Fam[15, 16, 17]h if requested
	x86/speculation: Create spec-ctrl.h to avoid include hell
	prctl: Add speculation control prctls
	x86/process: Optimize TIF checks in __switch_to_xtra()
	x86/process: Correct and optimize TIF_BLOCKSTEP switch
	x86/process: Optimize TIF_NOTSC switch
	x86/process: Allow runtime control of Speculative Store Bypass
	x86/speculation: Add prctl for Speculative Store Bypass mitigation
	nospec: Allow getting/setting on non-current task
	proc: Provide details on speculation flaw mitigations
	seccomp: Enable speculation flaw mitigations
	prctl: Add force disable speculation
	seccomp: Use PR_SPEC_FORCE_DISABLE
	seccomp: Add filter flag to opt-out of SSB mitigation
	seccomp: Move speculation migitation control to arch code
	x86/speculation: Make "seccomp" the default mode for Speculative Store Bypass
	x86/bugs: Rename _RDS to _SSBD
	proc: Use underscores for SSBD in 'status'
	Documentation/spec_ctrl: Do some minor cleanups
	x86/bugs: Fix __ssb_select_mitigation() return type
	x86/bugs: Make cpu_show_common() static
	x86/bugs: Fix the parameters alignment and missing void
	x86/cpu: Make alternative_msr_write work for 32-bit code
	x86/speculation: Use synthetic bits for IBRS/IBPB/STIBP
	x86/cpufeatures: Disentangle MSR_SPEC_CTRL enumeration from IBRS
	x86/cpufeatures: Disentangle SSBD enumeration
	x86/cpu/AMD: Fix erratum 1076 (CPB bit)
	x86/cpufeatures: Add FEATURE_ZEN
	x86/speculation: Handle HT correctly on AMD
	x86/bugs, KVM: Extend speculation control for VIRT_SPEC_CTRL
	x86/speculation: Add virtualized speculative store bypass disable support
	x86/speculation: Rework speculative_store_bypass_update()
	x86/bugs: Unify x86_spec_ctrl_{set_guest, restore_host}
	x86/bugs: Expose x86_spec_ctrl_base directly
	x86/bugs: Remove x86_spec_ctrl_set()
	x86/bugs: Rework spec_ctrl base and mask logic
	x86/speculation, KVM: Implement support for VIRT_SPEC_CTRL/LS_CFG
	x86/bugs: Rename SSBD_NO to SSB_NO
	x86/xen: Add call of speculative_store_bypass_ht_init() to PV paths
	x86/cpu: Re-apply forced caps every time CPU caps are re-read
	block: do not use interruptible wait anywhere
	clk: tegra: Fix PLL_U post divider and initial rate on Tegra30
	ubi: Introduce vol_ignored()
	ubi: Rework Fastmap attach base code
	ubi: Be more paranoid while seaching for the most recent Fastmap
	ubi: Fix races around ubi_refill_pools()
	ubi: Fix Fastmap's update_vol()
	ubi: fastmap: Erase outdated anchor PEBs during attach
	Linux 4.4.144

Change-Id: Ia3e9b2b7bc653cba68b76878d34f8fcbbc007a13
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2018-07-31 20:18:19 +02:00

694 lines
16 KiB
C

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/prctl.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/pm.h>
#include <linux/tick.h>
#include <linux/random.h>
#include <linux/user-return-notifier.h>
#include <linux/dmi.h>
#include <linux/utsname.h>
#include <linux/stackprotector.h>
#include <linux/tick.h>
#include <linux/cpuidle.h>
#include <trace/events/power.h>
#include <linux/hw_breakpoint.h>
#include <asm/cpu.h>
#include <asm/apic.h>
#include <asm/syscalls.h>
#include <asm/idle.h>
#include <asm/uaccess.h>
#include <asm/mwait.h>
#include <asm/fpu/internal.h>
#include <asm/debugreg.h>
#include <asm/nmi.h>
#include <asm/tlbflush.h>
#include <asm/mce.h>
#include <asm/vm86.h>
#include <asm/spec-ctrl.h>
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
* no more per-task TSS's. The TSS size is kept cacheline-aligned
* so they are allowed to end up in the .data..cacheline_aligned
* section. Since TSS's are completely CPU-local, we want them
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
*/
__visible DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss) = {
.x86_tss = {
.sp0 = TOP_OF_INIT_STACK,
#ifdef CONFIG_X86_32
.ss0 = __KERNEL_DS,
.ss1 = __KERNEL_CS,
.io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
#endif
},
#ifdef CONFIG_X86_32
/*
* Note that the .io_bitmap member must be extra-big. This is because
* the CPU will access an additional byte beyond the end of the IO
* permission bitmap. The extra byte must be all 1 bits, and must
* be within the limit.
*/
.io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
#endif
};
EXPORT_PER_CPU_SYMBOL(cpu_tss);
#ifdef CONFIG_X86_64
static DEFINE_PER_CPU(unsigned char, is_idle);
#endif
/*
* this gets called so that we can store lazy state into memory and copy the
* current task into the new thread.
*/
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
memcpy(dst, src, arch_task_struct_size);
#ifdef CONFIG_VM86
dst->thread.vm86 = NULL;
#endif
return fpu__copy(&dst->thread.fpu, &src->thread.fpu);
}
/*
* Free current thread data structures etc..
*/
void exit_thread(struct task_struct *tsk)
{
struct thread_struct *t = &tsk->thread;
unsigned long *bp = t->io_bitmap_ptr;
struct fpu *fpu = &t->fpu;
if (bp) {
struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
t->io_bitmap_ptr = NULL;
clear_thread_flag(TIF_IO_BITMAP);
/*
* Careful, clear this in the TSS too:
*/
memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
t->io_bitmap_max = 0;
put_cpu();
kfree(bp);
}
free_vm86(t);
fpu__drop(fpu);
}
void flush_thread(void)
{
struct task_struct *tsk = current;
flush_ptrace_hw_breakpoint(tsk);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
fpu__clear(&tsk->thread.fpu);
}
void disable_TSC(void)
{
preempt_disable();
if (!test_and_set_thread_flag(TIF_NOTSC))
/*
* Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context.
*/
cr4_set_bits(X86_CR4_TSD);
preempt_enable();
}
static void enable_TSC(void)
{
preempt_disable();
if (test_and_clear_thread_flag(TIF_NOTSC))
/*
* Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context.
*/
cr4_clear_bits(X86_CR4_TSD);
preempt_enable();
}
int get_tsc_mode(unsigned long adr)
{
unsigned int val;
if (test_thread_flag(TIF_NOTSC))
val = PR_TSC_SIGSEGV;
else
val = PR_TSC_ENABLE;
return put_user(val, (unsigned int __user *)adr);
}
int set_tsc_mode(unsigned int val)
{
if (val == PR_TSC_SIGSEGV)
disable_TSC();
else if (val == PR_TSC_ENABLE)
enable_TSC();
else
return -EINVAL;
return 0;
}
static inline void switch_to_bitmap(struct tss_struct *tss,
struct thread_struct *prev,
struct thread_struct *next,
unsigned long tifp, unsigned long tifn)
{
if (tifn & _TIF_IO_BITMAP) {
/*
* Copy the relevant range of the IO bitmap.
* Normally this is 128 bytes or less:
*/
memcpy(tss->io_bitmap, next->io_bitmap_ptr,
max(prev->io_bitmap_max, next->io_bitmap_max));
} else if (tifp & _TIF_IO_BITMAP) {
/*
* Clear any possible leftover bits:
*/
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
}
}
#ifdef CONFIG_SMP
struct ssb_state {
struct ssb_state *shared_state;
raw_spinlock_t lock;
unsigned int disable_state;
unsigned long local_state;
};
#define LSTATE_SSB 0
static DEFINE_PER_CPU(struct ssb_state, ssb_state);
void speculative_store_bypass_ht_init(void)
{
struct ssb_state *st = this_cpu_ptr(&ssb_state);
unsigned int this_cpu = smp_processor_id();
unsigned int cpu;
st->local_state = 0;
/*
* Shared state setup happens once on the first bringup
* of the CPU. It's not destroyed on CPU hotunplug.
*/
if (st->shared_state)
return;
raw_spin_lock_init(&st->lock);
/*
* Go over HT siblings and check whether one of them has set up the
* shared state pointer already.
*/
for_each_cpu(cpu, topology_sibling_cpumask(this_cpu)) {
if (cpu == this_cpu)
continue;
if (!per_cpu(ssb_state, cpu).shared_state)
continue;
/* Link it to the state of the sibling: */
st->shared_state = per_cpu(ssb_state, cpu).shared_state;
return;
}
/*
* First HT sibling to come up on the core. Link shared state of
* the first HT sibling to itself. The siblings on the same core
* which come up later will see the shared state pointer and link
* themself to the state of this CPU.
*/
st->shared_state = st;
}
/*
* Logic is: First HT sibling enables SSBD for both siblings in the core
* and last sibling to disable it, disables it for the whole core. This how
* MSR_SPEC_CTRL works in "hardware":
*
* CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL
*/
static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
{
struct ssb_state *st = this_cpu_ptr(&ssb_state);
u64 msr = x86_amd_ls_cfg_base;
if (!static_cpu_has(X86_FEATURE_ZEN)) {
msr |= ssbd_tif_to_amd_ls_cfg(tifn);
wrmsrl(MSR_AMD64_LS_CFG, msr);
return;
}
if (tifn & _TIF_SSBD) {
/*
* Since this can race with prctl(), block reentry on the
* same CPU.
*/
if (__test_and_set_bit(LSTATE_SSB, &st->local_state))
return;
msr |= x86_amd_ls_cfg_ssbd_mask;
raw_spin_lock(&st->shared_state->lock);
/* First sibling enables SSBD: */
if (!st->shared_state->disable_state)
wrmsrl(MSR_AMD64_LS_CFG, msr);
st->shared_state->disable_state++;
raw_spin_unlock(&st->shared_state->lock);
} else {
if (!__test_and_clear_bit(LSTATE_SSB, &st->local_state))
return;
raw_spin_lock(&st->shared_state->lock);
st->shared_state->disable_state--;
if (!st->shared_state->disable_state)
wrmsrl(MSR_AMD64_LS_CFG, msr);
raw_spin_unlock(&st->shared_state->lock);
}
}
#else
static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
{
u64 msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn);
wrmsrl(MSR_AMD64_LS_CFG, msr);
}
#endif
static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
{
/*
* SSBD has the same definition in SPEC_CTRL and VIRT_SPEC_CTRL,
* so ssbd_tif_to_spec_ctrl() just works.
*/
wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
}
static __always_inline void intel_set_ssb_state(unsigned long tifn)
{
u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
wrmsrl(MSR_IA32_SPEC_CTRL, msr);
}
static __always_inline void __speculative_store_bypass_update(unsigned long tifn)
{
if (static_cpu_has(X86_FEATURE_VIRT_SSBD))
amd_set_ssb_virt_state(tifn);
else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD))
amd_set_core_ssb_state(tifn);
else
intel_set_ssb_state(tifn);
}
void speculative_store_bypass_update(unsigned long tif)
{
preempt_disable();
__speculative_store_bypass_update(tif);
preempt_enable();
}
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
struct tss_struct *tss)
{
struct thread_struct *prev, *next;
unsigned long tifp, tifn;
prev = &prev_p->thread;
next = &next_p->thread;
tifn = READ_ONCE(task_thread_info(next_p)->flags);
tifp = READ_ONCE(task_thread_info(prev_p)->flags);
switch_to_bitmap(tss, prev, next, tifp, tifn);
propagate_user_return_notify(prev_p, next_p);
if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) &&
arch_has_block_step()) {
unsigned long debugctl, msk;
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
debugctl &= ~DEBUGCTLMSR_BTF;
msk = tifn & _TIF_BLOCKSTEP;
debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT;
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
}
if ((tifp ^ tifn) & _TIF_NOTSC)
cr4_toggle_bits(X86_CR4_TSD);
if ((tifp ^ tifn) & _TIF_SSBD)
__speculative_store_bypass_update(tifn);
}
/*
* Idle related variables and functions
*/
unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
EXPORT_SYMBOL(boot_option_idle_override);
static void (*x86_idle)(void);
#ifndef CONFIG_SMP
static inline void play_dead(void)
{
BUG();
}
#endif
#ifdef CONFIG_X86_64
void enter_idle(void)
{
this_cpu_write(is_idle, 1);
idle_notifier_call_chain(IDLE_START);
}
static void __exit_idle(void)
{
if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
return;
idle_notifier_call_chain(IDLE_END);
}
/* Called from interrupts to signify idle end */
void exit_idle(void)
{
/* idle loop has pid 0 */
if (current->pid)
return;
__exit_idle();
}
#endif
void arch_cpu_idle_enter(void)
{
local_touch_nmi();
enter_idle();
}
void arch_cpu_idle_exit(void)
{
__exit_idle();
}
void arch_cpu_idle_dead(void)
{
play_dead();
}
/*
* Called from the generic idle code.
*/
void arch_cpu_idle(void)
{
x86_idle();
}
/*
* We use this if we don't have any better idle routine..
*/
void default_idle(void)
{
trace_cpu_idle_rcuidle(1, smp_processor_id());
safe_halt();
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
}
#ifdef CONFIG_APM_MODULE
EXPORT_SYMBOL(default_idle);
#endif
#ifdef CONFIG_XEN
bool xen_set_default_idle(void)
{
bool ret = !!x86_idle;
x86_idle = default_idle;
return ret;
}
#endif
void stop_this_cpu(void *dummy)
{
local_irq_disable();
/*
* Remove this CPU:
*/
set_cpu_online(smp_processor_id(), false);
disable_local_APIC();
mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
for (;;)
halt();
}
bool amd_e400_c1e_detected;
EXPORT_SYMBOL(amd_e400_c1e_detected);
static cpumask_var_t amd_e400_c1e_mask;
void amd_e400_remove_cpu(int cpu)
{
if (amd_e400_c1e_mask != NULL)
cpumask_clear_cpu(cpu, amd_e400_c1e_mask);
}
/*
* AMD Erratum 400 aware idle routine. We check for C1E active in the interrupt
* pending message MSR. If we detect C1E, then we handle it the same
* way as C3 power states (local apic timer and TSC stop)
*/
static void amd_e400_idle(void)
{
if (!amd_e400_c1e_detected) {
u32 lo, hi;
rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
if (lo & K8_INTP_C1E_ACTIVE_MASK) {
amd_e400_c1e_detected = true;
if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
mark_tsc_unstable("TSC halt in AMD C1E");
pr_info("System has AMD C1E enabled\n");
}
}
if (amd_e400_c1e_detected) {
int cpu = smp_processor_id();
if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) {
cpumask_set_cpu(cpu, amd_e400_c1e_mask);
/* Force broadcast so ACPI can not interfere. */
tick_broadcast_force();
pr_info("Switch to broadcast mode on CPU%d\n", cpu);
}
tick_broadcast_enter();
default_idle();
/*
* The switch back from broadcast mode needs to be
* called with interrupts disabled.
*/
local_irq_disable();
tick_broadcast_exit();
local_irq_enable();
} else
default_idle();
}
/*
* Intel Core2 and older machines prefer MWAIT over HALT for C1.
* We can't rely on cpuidle installing MWAIT, because it will not load
* on systems that support only C1 -- so the boot default must be MWAIT.
*
* Some AMD machines are the opposite, they depend on using HALT.
*
* So for default C1, which is used during boot until cpuidle loads,
* use MWAIT-C1 on Intel HW that has it, else use HALT.
*/
static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
{
if (c->x86_vendor != X86_VENDOR_INTEL)
return 0;
if (!cpu_has(c, X86_FEATURE_MWAIT))
return 0;
return 1;
}
/*
* MONITOR/MWAIT with no hints, used for default C1 state. This invokes MWAIT
* with interrupts enabled and no flags, which is backwards compatible with the
* original MWAIT implementation.
*/
static void mwait_idle(void)
{
if (!current_set_polling_and_test()) {
trace_cpu_idle_rcuidle(1, smp_processor_id());
if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
smp_mb(); /* quirk */
clflush((void *)&current_thread_info()->flags);
smp_mb(); /* quirk */
}
__monitor((void *)&current_thread_info()->flags, 0, 0);
if (!need_resched())
__sti_mwait(0, 0);
else
local_irq_enable();
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
} else {
local_irq_enable();
}
__current_clr_polling();
}
void select_idle_routine(const struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1)
pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
#endif
if (x86_idle || boot_option_idle_override == IDLE_POLL)
return;
if (cpu_has_bug(c, X86_BUG_AMD_APIC_C1E)) {
/* E400: APIC timer interrupt does not wake up CPU from C1e */
pr_info("using AMD E400 aware idle routine\n");
x86_idle = amd_e400_idle;
} else if (prefer_mwait_c1_over_halt(c)) {
pr_info("using mwait in idle threads\n");
x86_idle = mwait_idle;
} else
x86_idle = default_idle;
}
void __init init_amd_e400_c1e_mask(void)
{
/* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */
if (x86_idle == amd_e400_idle)
zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL);
}
static int __init idle_setup(char *str)
{
if (!str)
return -EINVAL;
if (!strcmp(str, "poll")) {
pr_info("using polling idle threads\n");
boot_option_idle_override = IDLE_POLL;
cpu_idle_poll_ctrl(true);
} else if (!strcmp(str, "halt")) {
/*
* When the boot option of idle=halt is added, halt is
* forced to be used for CPU idle. In such case CPU C2/C3
* won't be used again.
* To continue to load the CPU idle driver, don't touch
* the boot_option_idle_override.
*/
x86_idle = default_idle;
boot_option_idle_override = IDLE_HALT;
} else if (!strcmp(str, "nomwait")) {
/*
* If the boot option of "idle=nomwait" is added,
* it means that mwait will be disabled for CPU C2/C3
* states. In such case it won't touch the variable
* of boot_option_idle_override.
*/
boot_option_idle_override = IDLE_NOMWAIT;
} else
return -1;
return 0;
}
early_param("idle", idle_setup);
unsigned long arch_align_stack(unsigned long sp)
{
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
sp -= get_random_int() % 8192;
return sp & ~0xf;
}
unsigned long arch_randomize_brk(struct mm_struct *mm)
{
unsigned long range_end = mm->brk + 0x02000000;
return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
}
/*
* Called from fs/proc with a reference on @p to find the function
* which called into schedule(). This needs to be done carefully
* because the task might wake up and we might look at a stack
* changing under us.
*/
unsigned long get_wchan(struct task_struct *p)
{
unsigned long start, bottom, top, sp, fp, ip;
int count = 0;
if (!p || p == current || p->state == TASK_RUNNING)
return 0;
start = (unsigned long)task_stack_page(p);
if (!start)
return 0;
/*
* Layout of the stack page:
*
* ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long)
* PADDING
* ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING
* stack
* ----------- bottom = start + sizeof(thread_info)
* thread_info
* ----------- start
*
* The tasks stack pointer points at the location where the
* framepointer is stored. The data on the stack is:
* ... IP FP ... IP FP
*
* We need to read FP and IP, so we need to adjust the upper
* bound by another unsigned long.
*/
top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;
top -= 2 * sizeof(unsigned long);
bottom = start + sizeof(struct thread_info);
sp = READ_ONCE(p->thread.sp);
if (sp < bottom || sp > top)
return 0;
fp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
do {
if (fp < bottom || fp > top)
return 0;
ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long)));
if (!in_sched_functions(ip))
return ip;
fp = READ_ONCE_NOCHECK(*(unsigned long *)fp);
} while (count++ < 16 && p->state != TASK_RUNNING);
return 0;
}