-----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAltYMlwACgkQONu9yGCS aT5ZmxAAjAWUndXt7fTUyHgxkoG61sEkdX4jcsp6NFwQMudU0UHx4/kcZE+HdMjL VU8BZtdUg+jMLXM4erVBpQRKY9YHIPi8nWMTm1UjduMCxVD6dVL1HU6/RXl1cYIx rf/opYOimqT9lYCeffmd9ai2zEEJKSt7/avddcJY4qHiqLan27gbUdAq2H26aM/5 LUzAaSBzhq3VYo9Q5zv03b1+tORAxh2BIffZjGEFe8SQQl1o63WqwV4RxEhV/Bjt hBgl/6B/+EHtQnYnbnoOT/an9Ma15ik4/z3vVv6yRLNK+hS5T31OKcYCsUrjp6O+ TQVaVLWWmn/VpIHAMkrhBs9Xxg5GmRziF77AkzyC506tK268M2+IoY77ursVl1YK STaOwUcLUlKLbl5OADqMpYtNU9ybkP+MmgDZsIEXz9UiCZM721fL5Au2PHuzaYOD 2nE2EQb04It4k9GN8FStv2KPIiKUCEXi9MlNsHGPs6Mc+fliIigoKPhpU5JG+sxR eJgPMNv4OWhwXWTd1wf0Gy5X+i0lQlwlGgIHFfSB8vzArJ0Y/yuPj2a6xhQshOza Ivq7JudHvxYxhDSWYoCKgtTgzMdSBbJ3xjOoUUHy4ryamYeyaMvgFjsaCTMr0dsw 76BkgNTbpsip+I77a9h4Ozlk5QE7h61EsqjmZBkGVqLYjrUQ/IU= =X4tZ -----END PGP SIGNATURE----- Merge 4.4.144 into android-4.4 Changes in 4.4.144 KVM/Eventfd: Avoid crash when assign and deassign specific eventfd in parallel. x86/MCE: Remove min interval polling limitation fat: fix memory allocation failure handling of match_strdup() ALSA: rawmidi: Change resized buffers atomically ARC: Fix CONFIG_SWAP ARC: mm: allow mprotect to make stack mappings executable mm: memcg: fix use after free in mem_cgroup_iter() ipv4: Return EINVAL when ping_group_range sysctl doesn't map to user ns ipv6: fix useless rol32 call on hash lib/rhashtable: consider param->min_size when setting initial table size net/ipv4: Set oif in fib_compute_spec_dst net: phy: fix flag masking in __set_phy_supported ptp: fix missing break in switch tg3: Add higher cpu clock for 5762. net: Don't copy pfmemalloc flag in __copy_skb_header() skbuff: Unconditionally copy pfmemalloc in __skb_clone() xhci: Fix perceived dead host due to runtime suspend race with event handler x86/paravirt: Make native_save_fl() extern inline x86/cpufeatures: Add CPUID_7_EDX CPUID leaf x86/cpufeatures: Add Intel feature bits for Speculation Control x86/cpufeatures: Add AMD feature bits for Speculation Control x86/msr: Add definitions for new speculation control MSRs x86/pti: Do not enable PTI on CPUs which are not vulnerable to Meltdown x86/cpufeature: Blacklist SPEC_CTRL/PRED_CMD on early Spectre v2 microcodes x86/speculation: Add basic IBPB (Indirect Branch Prediction Barrier) support x86/cpufeatures: Clean up Spectre v2 related CPUID flags x86/cpuid: Fix up "virtual" IBRS/IBPB/STIBP feature bits on Intel x86/pti: Mark constant arrays as __initconst x86/asm/entry/32: Simplify pushes of zeroed pt_regs->REGs x86/entry/64/compat: Clear registers for compat syscalls, to reduce speculation attack surface x86/speculation: Update Speculation Control microcode blacklist x86/speculation: Correct Speculation Control microcode blacklist again x86/speculation: Clean up various Spectre related details x86/speculation: Fix up array_index_nospec_mask() asm constraint x86/speculation: Add <asm/msr-index.h> dependency x86/xen: Zero MSR_IA32_SPEC_CTRL before suspend x86/mm: Factor out LDT init from context init x86/mm: Give each mm TLB flush generation a unique ID x86/speculation: Use Indirect Branch Prediction Barrier in context switch x86/spectre_v2: Don't check microcode versions when running under hypervisors x86/speculation: Use IBRS if available before calling into firmware x86/speculation: Move firmware_restrict_branch_speculation_*() from C to CPP x86/speculation: Remove Skylake C2 from Speculation Control microcode blacklist selftest/seccomp: Fix the flag name SECCOMP_FILTER_FLAG_TSYNC selftest/seccomp: Fix the seccomp(2) signature xen: set cpu capabilities from xen_start_kernel() x86/amd: don't set X86_BUG_SYSRET_SS_ATTRS when running under Xen x86/nospec: Simplify alternative_msr_write() x86/bugs: Concentrate bug detection into a separate function x86/bugs: Concentrate bug reporting into a separate function x86/bugs: Read SPEC_CTRL MSR during boot and re-use reserved bits x86/bugs, KVM: Support the combination of guest and host IBRS x86/cpu: Rename Merrifield2 to Moorefield x86/cpu/intel: Add Knights Mill to Intel family x86/bugs: Expose /sys/../spec_store_bypass x86/cpufeatures: Add X86_FEATURE_RDS x86/bugs: Provide boot parameters for the spec_store_bypass_disable mitigation x86/bugs/intel: Set proper CPU features and setup RDS x86/bugs: Whitelist allowed SPEC_CTRL MSR values x86/bugs/AMD: Add support to disable RDS on Fam[15, 16, 17]h if requested x86/speculation: Create spec-ctrl.h to avoid include hell prctl: Add speculation control prctls x86/process: Optimize TIF checks in __switch_to_xtra() x86/process: Correct and optimize TIF_BLOCKSTEP switch x86/process: Optimize TIF_NOTSC switch x86/process: Allow runtime control of Speculative Store Bypass x86/speculation: Add prctl for Speculative Store Bypass mitigation nospec: Allow getting/setting on non-current task proc: Provide details on speculation flaw mitigations seccomp: Enable speculation flaw mitigations prctl: Add force disable speculation seccomp: Use PR_SPEC_FORCE_DISABLE seccomp: Add filter flag to opt-out of SSB mitigation seccomp: Move speculation migitation control to arch code x86/speculation: Make "seccomp" the default mode for Speculative Store Bypass x86/bugs: Rename _RDS to _SSBD proc: Use underscores for SSBD in 'status' Documentation/spec_ctrl: Do some minor cleanups x86/bugs: Fix __ssb_select_mitigation() return type x86/bugs: Make cpu_show_common() static x86/bugs: Fix the parameters alignment and missing void x86/cpu: Make alternative_msr_write work for 32-bit code x86/speculation: Use synthetic bits for IBRS/IBPB/STIBP x86/cpufeatures: Disentangle MSR_SPEC_CTRL enumeration from IBRS x86/cpufeatures: Disentangle SSBD enumeration x86/cpu/AMD: Fix erratum 1076 (CPB bit) x86/cpufeatures: Add FEATURE_ZEN x86/speculation: Handle HT correctly on AMD x86/bugs, KVM: Extend speculation control for VIRT_SPEC_CTRL x86/speculation: Add virtualized speculative store bypass disable support x86/speculation: Rework speculative_store_bypass_update() x86/bugs: Unify x86_spec_ctrl_{set_guest, restore_host} x86/bugs: Expose x86_spec_ctrl_base directly x86/bugs: Remove x86_spec_ctrl_set() x86/bugs: Rework spec_ctrl base and mask logic x86/speculation, KVM: Implement support for VIRT_SPEC_CTRL/LS_CFG x86/bugs: Rename SSBD_NO to SSB_NO x86/xen: Add call of speculative_store_bypass_ht_init() to PV paths x86/cpu: Re-apply forced caps every time CPU caps are re-read block: do not use interruptible wait anywhere clk: tegra: Fix PLL_U post divider and initial rate on Tegra30 ubi: Introduce vol_ignored() ubi: Rework Fastmap attach base code ubi: Be more paranoid while seaching for the most recent Fastmap ubi: Fix races around ubi_refill_pools() ubi: Fix Fastmap's update_vol() ubi: fastmap: Erase outdated anchor PEBs during attach Linux 4.4.144 Change-Id: Ia3e9b2b7bc653cba68b76878d34f8fcbbc007a13 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
694 lines
16 KiB
C
694 lines
16 KiB
C
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
#include <linux/errno.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/prctl.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/module.h>
|
|
#include <linux/pm.h>
|
|
#include <linux/tick.h>
|
|
#include <linux/random.h>
|
|
#include <linux/user-return-notifier.h>
|
|
#include <linux/dmi.h>
|
|
#include <linux/utsname.h>
|
|
#include <linux/stackprotector.h>
|
|
#include <linux/tick.h>
|
|
#include <linux/cpuidle.h>
|
|
#include <trace/events/power.h>
|
|
#include <linux/hw_breakpoint.h>
|
|
#include <asm/cpu.h>
|
|
#include <asm/apic.h>
|
|
#include <asm/syscalls.h>
|
|
#include <asm/idle.h>
|
|
#include <asm/uaccess.h>
|
|
#include <asm/mwait.h>
|
|
#include <asm/fpu/internal.h>
|
|
#include <asm/debugreg.h>
|
|
#include <asm/nmi.h>
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/mce.h>
|
|
#include <asm/vm86.h>
|
|
#include <asm/spec-ctrl.h>
|
|
|
|
/*
|
|
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
|
|
* no more per-task TSS's. The TSS size is kept cacheline-aligned
|
|
* so they are allowed to end up in the .data..cacheline_aligned
|
|
* section. Since TSS's are completely CPU-local, we want them
|
|
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
|
|
*/
|
|
__visible DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss) = {
|
|
.x86_tss = {
|
|
.sp0 = TOP_OF_INIT_STACK,
|
|
#ifdef CONFIG_X86_32
|
|
.ss0 = __KERNEL_DS,
|
|
.ss1 = __KERNEL_CS,
|
|
.io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
|
|
#endif
|
|
},
|
|
#ifdef CONFIG_X86_32
|
|
/*
|
|
* Note that the .io_bitmap member must be extra-big. This is because
|
|
* the CPU will access an additional byte beyond the end of the IO
|
|
* permission bitmap. The extra byte must be all 1 bits, and must
|
|
* be within the limit.
|
|
*/
|
|
.io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
|
|
#endif
|
|
};
|
|
EXPORT_PER_CPU_SYMBOL(cpu_tss);
|
|
|
|
#ifdef CONFIG_X86_64
|
|
static DEFINE_PER_CPU(unsigned char, is_idle);
|
|
#endif
|
|
|
|
/*
|
|
* this gets called so that we can store lazy state into memory and copy the
|
|
* current task into the new thread.
|
|
*/
|
|
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
|
|
{
|
|
memcpy(dst, src, arch_task_struct_size);
|
|
#ifdef CONFIG_VM86
|
|
dst->thread.vm86 = NULL;
|
|
#endif
|
|
|
|
return fpu__copy(&dst->thread.fpu, &src->thread.fpu);
|
|
}
|
|
|
|
/*
|
|
* Free current thread data structures etc..
|
|
*/
|
|
void exit_thread(struct task_struct *tsk)
|
|
{
|
|
struct thread_struct *t = &tsk->thread;
|
|
unsigned long *bp = t->io_bitmap_ptr;
|
|
struct fpu *fpu = &t->fpu;
|
|
|
|
if (bp) {
|
|
struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
|
|
|
|
t->io_bitmap_ptr = NULL;
|
|
clear_thread_flag(TIF_IO_BITMAP);
|
|
/*
|
|
* Careful, clear this in the TSS too:
|
|
*/
|
|
memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
|
|
t->io_bitmap_max = 0;
|
|
put_cpu();
|
|
kfree(bp);
|
|
}
|
|
|
|
free_vm86(t);
|
|
|
|
fpu__drop(fpu);
|
|
}
|
|
|
|
void flush_thread(void)
|
|
{
|
|
struct task_struct *tsk = current;
|
|
|
|
flush_ptrace_hw_breakpoint(tsk);
|
|
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
|
|
|
|
fpu__clear(&tsk->thread.fpu);
|
|
}
|
|
|
|
void disable_TSC(void)
|
|
{
|
|
preempt_disable();
|
|
if (!test_and_set_thread_flag(TIF_NOTSC))
|
|
/*
|
|
* Must flip the CPU state synchronously with
|
|
* TIF_NOTSC in the current running context.
|
|
*/
|
|
cr4_set_bits(X86_CR4_TSD);
|
|
preempt_enable();
|
|
}
|
|
|
|
static void enable_TSC(void)
|
|
{
|
|
preempt_disable();
|
|
if (test_and_clear_thread_flag(TIF_NOTSC))
|
|
/*
|
|
* Must flip the CPU state synchronously with
|
|
* TIF_NOTSC in the current running context.
|
|
*/
|
|
cr4_clear_bits(X86_CR4_TSD);
|
|
preempt_enable();
|
|
}
|
|
|
|
int get_tsc_mode(unsigned long adr)
|
|
{
|
|
unsigned int val;
|
|
|
|
if (test_thread_flag(TIF_NOTSC))
|
|
val = PR_TSC_SIGSEGV;
|
|
else
|
|
val = PR_TSC_ENABLE;
|
|
|
|
return put_user(val, (unsigned int __user *)adr);
|
|
}
|
|
|
|
int set_tsc_mode(unsigned int val)
|
|
{
|
|
if (val == PR_TSC_SIGSEGV)
|
|
disable_TSC();
|
|
else if (val == PR_TSC_ENABLE)
|
|
enable_TSC();
|
|
else
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline void switch_to_bitmap(struct tss_struct *tss,
|
|
struct thread_struct *prev,
|
|
struct thread_struct *next,
|
|
unsigned long tifp, unsigned long tifn)
|
|
{
|
|
if (tifn & _TIF_IO_BITMAP) {
|
|
/*
|
|
* Copy the relevant range of the IO bitmap.
|
|
* Normally this is 128 bytes or less:
|
|
*/
|
|
memcpy(tss->io_bitmap, next->io_bitmap_ptr,
|
|
max(prev->io_bitmap_max, next->io_bitmap_max));
|
|
} else if (tifp & _TIF_IO_BITMAP) {
|
|
/*
|
|
* Clear any possible leftover bits:
|
|
*/
|
|
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
|
|
}
|
|
}
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
struct ssb_state {
|
|
struct ssb_state *shared_state;
|
|
raw_spinlock_t lock;
|
|
unsigned int disable_state;
|
|
unsigned long local_state;
|
|
};
|
|
|
|
#define LSTATE_SSB 0
|
|
|
|
static DEFINE_PER_CPU(struct ssb_state, ssb_state);
|
|
|
|
void speculative_store_bypass_ht_init(void)
|
|
{
|
|
struct ssb_state *st = this_cpu_ptr(&ssb_state);
|
|
unsigned int this_cpu = smp_processor_id();
|
|
unsigned int cpu;
|
|
|
|
st->local_state = 0;
|
|
|
|
/*
|
|
* Shared state setup happens once on the first bringup
|
|
* of the CPU. It's not destroyed on CPU hotunplug.
|
|
*/
|
|
if (st->shared_state)
|
|
return;
|
|
|
|
raw_spin_lock_init(&st->lock);
|
|
|
|
/*
|
|
* Go over HT siblings and check whether one of them has set up the
|
|
* shared state pointer already.
|
|
*/
|
|
for_each_cpu(cpu, topology_sibling_cpumask(this_cpu)) {
|
|
if (cpu == this_cpu)
|
|
continue;
|
|
|
|
if (!per_cpu(ssb_state, cpu).shared_state)
|
|
continue;
|
|
|
|
/* Link it to the state of the sibling: */
|
|
st->shared_state = per_cpu(ssb_state, cpu).shared_state;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* First HT sibling to come up on the core. Link shared state of
|
|
* the first HT sibling to itself. The siblings on the same core
|
|
* which come up later will see the shared state pointer and link
|
|
* themself to the state of this CPU.
|
|
*/
|
|
st->shared_state = st;
|
|
}
|
|
|
|
/*
|
|
* Logic is: First HT sibling enables SSBD for both siblings in the core
|
|
* and last sibling to disable it, disables it for the whole core. This how
|
|
* MSR_SPEC_CTRL works in "hardware":
|
|
*
|
|
* CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL
|
|
*/
|
|
static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
|
|
{
|
|
struct ssb_state *st = this_cpu_ptr(&ssb_state);
|
|
u64 msr = x86_amd_ls_cfg_base;
|
|
|
|
if (!static_cpu_has(X86_FEATURE_ZEN)) {
|
|
msr |= ssbd_tif_to_amd_ls_cfg(tifn);
|
|
wrmsrl(MSR_AMD64_LS_CFG, msr);
|
|
return;
|
|
}
|
|
|
|
if (tifn & _TIF_SSBD) {
|
|
/*
|
|
* Since this can race with prctl(), block reentry on the
|
|
* same CPU.
|
|
*/
|
|
if (__test_and_set_bit(LSTATE_SSB, &st->local_state))
|
|
return;
|
|
|
|
msr |= x86_amd_ls_cfg_ssbd_mask;
|
|
|
|
raw_spin_lock(&st->shared_state->lock);
|
|
/* First sibling enables SSBD: */
|
|
if (!st->shared_state->disable_state)
|
|
wrmsrl(MSR_AMD64_LS_CFG, msr);
|
|
st->shared_state->disable_state++;
|
|
raw_spin_unlock(&st->shared_state->lock);
|
|
} else {
|
|
if (!__test_and_clear_bit(LSTATE_SSB, &st->local_state))
|
|
return;
|
|
|
|
raw_spin_lock(&st->shared_state->lock);
|
|
st->shared_state->disable_state--;
|
|
if (!st->shared_state->disable_state)
|
|
wrmsrl(MSR_AMD64_LS_CFG, msr);
|
|
raw_spin_unlock(&st->shared_state->lock);
|
|
}
|
|
}
|
|
#else
|
|
static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
|
|
{
|
|
u64 msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn);
|
|
|
|
wrmsrl(MSR_AMD64_LS_CFG, msr);
|
|
}
|
|
#endif
|
|
|
|
static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
|
|
{
|
|
/*
|
|
* SSBD has the same definition in SPEC_CTRL and VIRT_SPEC_CTRL,
|
|
* so ssbd_tif_to_spec_ctrl() just works.
|
|
*/
|
|
wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
|
|
}
|
|
|
|
static __always_inline void intel_set_ssb_state(unsigned long tifn)
|
|
{
|
|
u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
|
|
|
|
wrmsrl(MSR_IA32_SPEC_CTRL, msr);
|
|
}
|
|
|
|
static __always_inline void __speculative_store_bypass_update(unsigned long tifn)
|
|
{
|
|
if (static_cpu_has(X86_FEATURE_VIRT_SSBD))
|
|
amd_set_ssb_virt_state(tifn);
|
|
else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD))
|
|
amd_set_core_ssb_state(tifn);
|
|
else
|
|
intel_set_ssb_state(tifn);
|
|
}
|
|
|
|
void speculative_store_bypass_update(unsigned long tif)
|
|
{
|
|
preempt_disable();
|
|
__speculative_store_bypass_update(tif);
|
|
preempt_enable();
|
|
}
|
|
|
|
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
|
|
struct tss_struct *tss)
|
|
{
|
|
struct thread_struct *prev, *next;
|
|
unsigned long tifp, tifn;
|
|
|
|
prev = &prev_p->thread;
|
|
next = &next_p->thread;
|
|
|
|
tifn = READ_ONCE(task_thread_info(next_p)->flags);
|
|
tifp = READ_ONCE(task_thread_info(prev_p)->flags);
|
|
switch_to_bitmap(tss, prev, next, tifp, tifn);
|
|
|
|
propagate_user_return_notify(prev_p, next_p);
|
|
|
|
if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) &&
|
|
arch_has_block_step()) {
|
|
unsigned long debugctl, msk;
|
|
|
|
rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
|
|
debugctl &= ~DEBUGCTLMSR_BTF;
|
|
msk = tifn & _TIF_BLOCKSTEP;
|
|
debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT;
|
|
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
|
|
}
|
|
|
|
if ((tifp ^ tifn) & _TIF_NOTSC)
|
|
cr4_toggle_bits(X86_CR4_TSD);
|
|
|
|
if ((tifp ^ tifn) & _TIF_SSBD)
|
|
__speculative_store_bypass_update(tifn);
|
|
}
|
|
|
|
/*
|
|
* Idle related variables and functions
|
|
*/
|
|
unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
|
|
EXPORT_SYMBOL(boot_option_idle_override);
|
|
|
|
static void (*x86_idle)(void);
|
|
|
|
#ifndef CONFIG_SMP
|
|
static inline void play_dead(void)
|
|
{
|
|
BUG();
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_X86_64
|
|
void enter_idle(void)
|
|
{
|
|
this_cpu_write(is_idle, 1);
|
|
idle_notifier_call_chain(IDLE_START);
|
|
}
|
|
|
|
static void __exit_idle(void)
|
|
{
|
|
if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
|
|
return;
|
|
idle_notifier_call_chain(IDLE_END);
|
|
}
|
|
|
|
/* Called from interrupts to signify idle end */
|
|
void exit_idle(void)
|
|
{
|
|
/* idle loop has pid 0 */
|
|
if (current->pid)
|
|
return;
|
|
__exit_idle();
|
|
}
|
|
#endif
|
|
|
|
void arch_cpu_idle_enter(void)
|
|
{
|
|
local_touch_nmi();
|
|
enter_idle();
|
|
}
|
|
|
|
void arch_cpu_idle_exit(void)
|
|
{
|
|
__exit_idle();
|
|
}
|
|
|
|
void arch_cpu_idle_dead(void)
|
|
{
|
|
play_dead();
|
|
}
|
|
|
|
/*
|
|
* Called from the generic idle code.
|
|
*/
|
|
void arch_cpu_idle(void)
|
|
{
|
|
x86_idle();
|
|
}
|
|
|
|
/*
|
|
* We use this if we don't have any better idle routine..
|
|
*/
|
|
void default_idle(void)
|
|
{
|
|
trace_cpu_idle_rcuidle(1, smp_processor_id());
|
|
safe_halt();
|
|
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
|
|
}
|
|
#ifdef CONFIG_APM_MODULE
|
|
EXPORT_SYMBOL(default_idle);
|
|
#endif
|
|
|
|
#ifdef CONFIG_XEN
|
|
bool xen_set_default_idle(void)
|
|
{
|
|
bool ret = !!x86_idle;
|
|
|
|
x86_idle = default_idle;
|
|
|
|
return ret;
|
|
}
|
|
#endif
|
|
void stop_this_cpu(void *dummy)
|
|
{
|
|
local_irq_disable();
|
|
/*
|
|
* Remove this CPU:
|
|
*/
|
|
set_cpu_online(smp_processor_id(), false);
|
|
disable_local_APIC();
|
|
mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
|
|
|
|
for (;;)
|
|
halt();
|
|
}
|
|
|
|
bool amd_e400_c1e_detected;
|
|
EXPORT_SYMBOL(amd_e400_c1e_detected);
|
|
|
|
static cpumask_var_t amd_e400_c1e_mask;
|
|
|
|
void amd_e400_remove_cpu(int cpu)
|
|
{
|
|
if (amd_e400_c1e_mask != NULL)
|
|
cpumask_clear_cpu(cpu, amd_e400_c1e_mask);
|
|
}
|
|
|
|
/*
|
|
* AMD Erratum 400 aware idle routine. We check for C1E active in the interrupt
|
|
* pending message MSR. If we detect C1E, then we handle it the same
|
|
* way as C3 power states (local apic timer and TSC stop)
|
|
*/
|
|
static void amd_e400_idle(void)
|
|
{
|
|
if (!amd_e400_c1e_detected) {
|
|
u32 lo, hi;
|
|
|
|
rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
|
|
|
|
if (lo & K8_INTP_C1E_ACTIVE_MASK) {
|
|
amd_e400_c1e_detected = true;
|
|
if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
|
|
mark_tsc_unstable("TSC halt in AMD C1E");
|
|
pr_info("System has AMD C1E enabled\n");
|
|
}
|
|
}
|
|
|
|
if (amd_e400_c1e_detected) {
|
|
int cpu = smp_processor_id();
|
|
|
|
if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) {
|
|
cpumask_set_cpu(cpu, amd_e400_c1e_mask);
|
|
/* Force broadcast so ACPI can not interfere. */
|
|
tick_broadcast_force();
|
|
pr_info("Switch to broadcast mode on CPU%d\n", cpu);
|
|
}
|
|
tick_broadcast_enter();
|
|
|
|
default_idle();
|
|
|
|
/*
|
|
* The switch back from broadcast mode needs to be
|
|
* called with interrupts disabled.
|
|
*/
|
|
local_irq_disable();
|
|
tick_broadcast_exit();
|
|
local_irq_enable();
|
|
} else
|
|
default_idle();
|
|
}
|
|
|
|
/*
|
|
* Intel Core2 and older machines prefer MWAIT over HALT for C1.
|
|
* We can't rely on cpuidle installing MWAIT, because it will not load
|
|
* on systems that support only C1 -- so the boot default must be MWAIT.
|
|
*
|
|
* Some AMD machines are the opposite, they depend on using HALT.
|
|
*
|
|
* So for default C1, which is used during boot until cpuidle loads,
|
|
* use MWAIT-C1 on Intel HW that has it, else use HALT.
|
|
*/
|
|
static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
|
|
{
|
|
if (c->x86_vendor != X86_VENDOR_INTEL)
|
|
return 0;
|
|
|
|
if (!cpu_has(c, X86_FEATURE_MWAIT))
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* MONITOR/MWAIT with no hints, used for default C1 state. This invokes MWAIT
|
|
* with interrupts enabled and no flags, which is backwards compatible with the
|
|
* original MWAIT implementation.
|
|
*/
|
|
static void mwait_idle(void)
|
|
{
|
|
if (!current_set_polling_and_test()) {
|
|
trace_cpu_idle_rcuidle(1, smp_processor_id());
|
|
if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
|
|
smp_mb(); /* quirk */
|
|
clflush((void *)¤t_thread_info()->flags);
|
|
smp_mb(); /* quirk */
|
|
}
|
|
|
|
__monitor((void *)¤t_thread_info()->flags, 0, 0);
|
|
if (!need_resched())
|
|
__sti_mwait(0, 0);
|
|
else
|
|
local_irq_enable();
|
|
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
|
|
} else {
|
|
local_irq_enable();
|
|
}
|
|
__current_clr_polling();
|
|
}
|
|
|
|
void select_idle_routine(const struct cpuinfo_x86 *c)
|
|
{
|
|
#ifdef CONFIG_SMP
|
|
if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1)
|
|
pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
|
|
#endif
|
|
if (x86_idle || boot_option_idle_override == IDLE_POLL)
|
|
return;
|
|
|
|
if (cpu_has_bug(c, X86_BUG_AMD_APIC_C1E)) {
|
|
/* E400: APIC timer interrupt does not wake up CPU from C1e */
|
|
pr_info("using AMD E400 aware idle routine\n");
|
|
x86_idle = amd_e400_idle;
|
|
} else if (prefer_mwait_c1_over_halt(c)) {
|
|
pr_info("using mwait in idle threads\n");
|
|
x86_idle = mwait_idle;
|
|
} else
|
|
x86_idle = default_idle;
|
|
}
|
|
|
|
void __init init_amd_e400_c1e_mask(void)
|
|
{
|
|
/* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */
|
|
if (x86_idle == amd_e400_idle)
|
|
zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL);
|
|
}
|
|
|
|
static int __init idle_setup(char *str)
|
|
{
|
|
if (!str)
|
|
return -EINVAL;
|
|
|
|
if (!strcmp(str, "poll")) {
|
|
pr_info("using polling idle threads\n");
|
|
boot_option_idle_override = IDLE_POLL;
|
|
cpu_idle_poll_ctrl(true);
|
|
} else if (!strcmp(str, "halt")) {
|
|
/*
|
|
* When the boot option of idle=halt is added, halt is
|
|
* forced to be used for CPU idle. In such case CPU C2/C3
|
|
* won't be used again.
|
|
* To continue to load the CPU idle driver, don't touch
|
|
* the boot_option_idle_override.
|
|
*/
|
|
x86_idle = default_idle;
|
|
boot_option_idle_override = IDLE_HALT;
|
|
} else if (!strcmp(str, "nomwait")) {
|
|
/*
|
|
* If the boot option of "idle=nomwait" is added,
|
|
* it means that mwait will be disabled for CPU C2/C3
|
|
* states. In such case it won't touch the variable
|
|
* of boot_option_idle_override.
|
|
*/
|
|
boot_option_idle_override = IDLE_NOMWAIT;
|
|
} else
|
|
return -1;
|
|
|
|
return 0;
|
|
}
|
|
early_param("idle", idle_setup);
|
|
|
|
unsigned long arch_align_stack(unsigned long sp)
|
|
{
|
|
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
|
|
sp -= get_random_int() % 8192;
|
|
return sp & ~0xf;
|
|
}
|
|
|
|
unsigned long arch_randomize_brk(struct mm_struct *mm)
|
|
{
|
|
unsigned long range_end = mm->brk + 0x02000000;
|
|
return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
|
|
}
|
|
|
|
/*
|
|
* Called from fs/proc with a reference on @p to find the function
|
|
* which called into schedule(). This needs to be done carefully
|
|
* because the task might wake up and we might look at a stack
|
|
* changing under us.
|
|
*/
|
|
unsigned long get_wchan(struct task_struct *p)
|
|
{
|
|
unsigned long start, bottom, top, sp, fp, ip;
|
|
int count = 0;
|
|
|
|
if (!p || p == current || p->state == TASK_RUNNING)
|
|
return 0;
|
|
|
|
start = (unsigned long)task_stack_page(p);
|
|
if (!start)
|
|
return 0;
|
|
|
|
/*
|
|
* Layout of the stack page:
|
|
*
|
|
* ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long)
|
|
* PADDING
|
|
* ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING
|
|
* stack
|
|
* ----------- bottom = start + sizeof(thread_info)
|
|
* thread_info
|
|
* ----------- start
|
|
*
|
|
* The tasks stack pointer points at the location where the
|
|
* framepointer is stored. The data on the stack is:
|
|
* ... IP FP ... IP FP
|
|
*
|
|
* We need to read FP and IP, so we need to adjust the upper
|
|
* bound by another unsigned long.
|
|
*/
|
|
top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;
|
|
top -= 2 * sizeof(unsigned long);
|
|
bottom = start + sizeof(struct thread_info);
|
|
|
|
sp = READ_ONCE(p->thread.sp);
|
|
if (sp < bottom || sp > top)
|
|
return 0;
|
|
|
|
fp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
|
|
do {
|
|
if (fp < bottom || fp > top)
|
|
return 0;
|
|
ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long)));
|
|
if (!in_sched_functions(ip))
|
|
return ip;
|
|
fp = READ_ONCE_NOCHECK(*(unsigned long *)fp);
|
|
} while (count++ < 16 && p->state != TASK_RUNNING);
|
|
return 0;
|
|
}
|