x86/entry/32: Re-implement SYSENTER using the new C path
Signed-off-by: Andy Lutomirski <luto@kernel.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/5b99659e8be70f3dd10cd8970a5c90293d9ad9a7.1444091585.git.luto@kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
150ac78d63
commit
5f310f739b
3 changed files with 51 additions and 100 deletions
|
@ -363,7 +363,7 @@ __visible void do_int80_syscall_32(struct pt_regs *regs)
|
||||||
syscall_return_slowpath(regs);
|
syscall_return_slowpath(regs);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Returns 0 to return using IRET or 1 to return using SYSRETL. */
|
/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
|
||||||
__visible long do_fast_syscall_32(struct pt_regs *regs)
|
__visible long do_fast_syscall_32(struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -417,7 +417,20 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
|
||||||
regs->ip == landing_pad &&
|
regs->ip == landing_pad &&
|
||||||
(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0;
|
(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0;
|
||||||
#else
|
#else
|
||||||
return 0;
|
/*
|
||||||
|
* Opportunistic SYSEXIT: if possible, try to return using SYSEXIT.
|
||||||
|
*
|
||||||
|
* Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP,
|
||||||
|
* because the ECX fixup above will ensure that this is essentially
|
||||||
|
* never the case.
|
||||||
|
*
|
||||||
|
* We don't allow syscalls at all from VM86 mode, but we still
|
||||||
|
* need to check VM, because we might be returning from sys_vm86.
|
||||||
|
*/
|
||||||
|
return static_cpu_has(X86_FEATURE_SEP) &&
|
||||||
|
regs->cs == __USER_CS && regs->ss == __USER_DS &&
|
||||||
|
regs->ip == landing_pad &&
|
||||||
|
(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -287,76 +287,47 @@ need_resched:
|
||||||
END(resume_kernel)
|
END(resume_kernel)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
|
||||||
* SYSENTER_RETURN points to after the SYSENTER instruction
|
|
||||||
* in the vsyscall page. See vsyscall-sysentry.S, which defines
|
|
||||||
* the symbol.
|
|
||||||
*/
|
|
||||||
|
|
||||||
# SYSENTER call handler stub
|
# SYSENTER call handler stub
|
||||||
ENTRY(entry_SYSENTER_32)
|
ENTRY(entry_SYSENTER_32)
|
||||||
movl TSS_sysenter_sp0(%esp), %esp
|
movl TSS_sysenter_sp0(%esp), %esp
|
||||||
sysenter_past_esp:
|
sysenter_past_esp:
|
||||||
|
pushl $__USER_DS /* pt_regs->ss */
|
||||||
|
pushl %ecx /* pt_regs->cx */
|
||||||
|
pushfl /* pt_regs->flags (except IF = 0) */
|
||||||
|
orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
|
||||||
|
pushl $__USER_CS /* pt_regs->cs */
|
||||||
|
pushl $0 /* pt_regs->ip = 0 (placeholder) */
|
||||||
|
pushl %eax /* pt_regs->orig_ax */
|
||||||
|
SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Interrupts are disabled here, but we can't trace it until
|
* User mode is traced as though IRQs are on, and SYSENTER
|
||||||
* enough kernel state to call TRACE_IRQS_OFF can be called - but
|
* turned them off.
|
||||||
* we immediately enable interrupts at that point anyway.
|
|
||||||
*/
|
*/
|
||||||
pushl $__USER_DS
|
|
||||||
pushl %ebp
|
|
||||||
pushfl
|
|
||||||
orl $X86_EFLAGS_IF, (%esp)
|
|
||||||
pushl $__USER_CS
|
|
||||||
/*
|
|
||||||
* Push current_thread_info()->sysenter_return to the stack.
|
|
||||||
* A tiny bit of offset fixup is necessary: TI_sysenter_return
|
|
||||||
* is relative to thread_info, which is at the bottom of the
|
|
||||||
* kernel stack page. 4*4 means the 4 words pushed above;
|
|
||||||
* TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack;
|
|
||||||
* and THREAD_SIZE takes us to the bottom.
|
|
||||||
*/
|
|
||||||
pushl ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp)
|
|
||||||
|
|
||||||
pushl %eax
|
|
||||||
SAVE_ALL
|
|
||||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Load the potential sixth argument from user stack.
|
|
||||||
* Careful about security.
|
|
||||||
*/
|
|
||||||
cmpl $__PAGE_OFFSET-3, %ebp
|
|
||||||
jae syscall_fault
|
|
||||||
ASM_STAC
|
|
||||||
1: movl (%ebp), %ebp
|
|
||||||
ASM_CLAC
|
|
||||||
movl %ebp, PT_EBP(%esp)
|
|
||||||
_ASM_EXTABLE(1b, syscall_fault)
|
|
||||||
|
|
||||||
GET_THREAD_INFO(%ebp)
|
|
||||||
|
|
||||||
testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp)
|
|
||||||
jnz syscall_trace_entry
|
|
||||||
sysenter_do_call:
|
|
||||||
cmpl $(NR_syscalls), %eax
|
|
||||||
jae sysenter_badsys
|
|
||||||
call *sys_call_table(, %eax, 4)
|
|
||||||
sysenter_after_call:
|
|
||||||
movl %eax, PT_EAX(%esp)
|
|
||||||
LOCKDEP_SYS_EXIT
|
|
||||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
|
||||||
TRACE_IRQS_OFF
|
TRACE_IRQS_OFF
|
||||||
movl TI_flags(%ebp), %ecx
|
|
||||||
testl $_TIF_ALLWORK_MASK, %ecx
|
movl %esp, %eax
|
||||||
jnz syscall_exit_work_irqs_off
|
call do_fast_syscall_32
|
||||||
sysenter_exit:
|
testl %eax, %eax
|
||||||
/* if something modifies registers it must also disable sysexit */
|
jz .Lsyscall_32_done
|
||||||
movl PT_EIP(%esp), %edx
|
|
||||||
movl PT_OLDESP(%esp), %ecx
|
/* Opportunistic SYSEXIT */
|
||||||
xorl %ebp, %ebp
|
TRACE_IRQS_ON /* User mode traces as IRQs on. */
|
||||||
TRACE_IRQS_ON
|
movl PT_EIP(%esp), %edx /* pt_regs->ip */
|
||||||
|
movl PT_OLDESP(%esp), %ecx /* pt_regs->sp */
|
||||||
|
popl %ebx /* pt_regs->bx */
|
||||||
|
addl $2*4, %esp /* skip pt_regs->cx and pt_regs->dx */
|
||||||
|
popl %esi /* pt_regs->si */
|
||||||
|
popl %edi /* pt_regs->di */
|
||||||
|
popl %ebp /* pt_regs->bp */
|
||||||
|
popl %eax /* pt_regs->ax */
|
||||||
1: mov PT_FS(%esp), %fs
|
1: mov PT_FS(%esp), %fs
|
||||||
PTGS_TO_GS
|
PTGS_TO_GS
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return back to the vDSO, which will pop ecx and edx.
|
||||||
|
* Don't bother with DS and ES (they already contain __USER_DS).
|
||||||
|
*/
|
||||||
ENABLE_INTERRUPTS_SYSEXIT
|
ENABLE_INTERRUPTS_SYSEXIT
|
||||||
|
|
||||||
.pushsection .fixup, "ax"
|
.pushsection .fixup, "ax"
|
||||||
|
@ -371,7 +342,7 @@ ENDPROC(entry_SYSENTER_32)
|
||||||
ENTRY(entry_INT80_32)
|
ENTRY(entry_INT80_32)
|
||||||
ASM_CLAC
|
ASM_CLAC
|
||||||
pushl %eax /* pt_regs->orig_ax */
|
pushl %eax /* pt_regs->orig_ax */
|
||||||
SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest, load -ENOSYS into ax */
|
SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* User mode is traced as though IRQs are on, and the interrupt gate
|
* User mode is traced as though IRQs are on, and the interrupt gate
|
||||||
|
@ -381,6 +352,7 @@ ENTRY(entry_INT80_32)
|
||||||
|
|
||||||
movl %esp, %eax
|
movl %esp, %eax
|
||||||
call do_int80_syscall_32
|
call do_int80_syscall_32
|
||||||
|
.Lsyscall_32_done:
|
||||||
|
|
||||||
restore_all:
|
restore_all:
|
||||||
TRACE_IRQS_IRET
|
TRACE_IRQS_IRET
|
||||||
|
@ -457,42 +429,6 @@ ldt_ss:
|
||||||
#endif
|
#endif
|
||||||
ENDPROC(entry_INT80_32)
|
ENDPROC(entry_INT80_32)
|
||||||
|
|
||||||
# perform syscall exit tracing
|
|
||||||
ALIGN
|
|
||||||
syscall_trace_entry:
|
|
||||||
movl $-ENOSYS, PT_EAX(%esp)
|
|
||||||
movl %esp, %eax
|
|
||||||
call syscall_trace_enter
|
|
||||||
/* What it returned is what we'll actually use. */
|
|
||||||
cmpl $(NR_syscalls), %eax
|
|
||||||
jnae syscall_call
|
|
||||||
jmp syscall_exit
|
|
||||||
END(syscall_trace_entry)
|
|
||||||
|
|
||||||
# perform syscall exit tracing
|
|
||||||
ALIGN
|
|
||||||
syscall_exit_work_irqs_off:
|
|
||||||
TRACE_IRQS_ON
|
|
||||||
ENABLE_INTERRUPTS(CLBR_ANY)
|
|
||||||
|
|
||||||
syscall_exit_work:
|
|
||||||
movl %esp, %eax
|
|
||||||
call syscall_return_slowpath
|
|
||||||
jmp restore_all
|
|
||||||
END(syscall_exit_work)
|
|
||||||
|
|
||||||
syscall_fault:
|
|
||||||
ASM_CLAC
|
|
||||||
GET_THREAD_INFO(%ebp)
|
|
||||||
movl $-EFAULT, PT_EAX(%esp)
|
|
||||||
jmp resume_userspace
|
|
||||||
END(syscall_fault)
|
|
||||||
|
|
||||||
sysenter_badsys:
|
|
||||||
movl $-ENOSYS, %eax
|
|
||||||
jmp sysenter_after_call
|
|
||||||
END(sysenter_badsys)
|
|
||||||
|
|
||||||
.macro FIXUP_ESPFIX_STACK
|
.macro FIXUP_ESPFIX_STACK
|
||||||
/*
|
/*
|
||||||
* Switch back for ESPFIX stack to the normal zerobased stack
|
* Switch back for ESPFIX stack to the normal zerobased stack
|
||||||
|
|
|
@ -34,6 +34,8 @@ __kernel_vsyscall:
|
||||||
/* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */
|
/* If SYSENTER (Intel) or SYSCALL32 (AMD) is available, use it. */
|
||||||
ALTERNATIVE_2 "", "sysenter", X86_FEATURE_SYSENTER32, \
|
ALTERNATIVE_2 "", "sysenter", X86_FEATURE_SYSENTER32, \
|
||||||
"syscall", X86_FEATURE_SYSCALL32
|
"syscall", X86_FEATURE_SYSCALL32
|
||||||
|
#else
|
||||||
|
ALTERNATIVE "", "sysenter", X86_FEATURE_SEP
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Enter using int $0x80 */
|
/* Enter using int $0x80 */
|
||||||
|
|
Loading…
Add table
Reference in a new issue