FROMLIST: [PATCH v5 10/12] arm64: vdso: replace gettimeofday.S with global vgettimeofday.C
(cherry picked from url https://patchwork.kernel.org/patch/10044501/) Take an effort from the previous 9 patches to recode the arm64 vdso code from assembler to C previously submitted by Andrew Pinski <apinski@cavium.com>, rework it for use in both arm and arm64, overlapping any optimizations for each architecture. But instead of landing it in arm64, land the result into lib/vdso and unify both implementations to simplify future maintenance. apinski@cavium.com makes the following claims in the original patch: This allows the compiler to optimize the divide by 1000 and remove the other divides. On ThunderX, gettimeofday improves by 32%. On ThunderX 2, gettimeofday improves by 18%. Note I noticed a bug in the old (arm64) implementation of __kernel_clock_getres; it was checking only the lower 32bits of the pointer; this would work for most cases but could fail in a few. <end of claim> Signed-off-by: Mark Salyzyn <salyzyn@android.com> Cc: James Morse <james.morse@arm.com> Cc: Russell King <linux@armlinux.org.uk> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Dmitry Safonov <dsafonov@virtuozzo.com> Cc: John Stultz <john.stultz@linaro.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Laura Abbott <labbott@redhat.com> Cc: Kees Cook <keescook@chromium.org> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org> Cc: Andy Gross <andy.gross@linaro.org> Cc: Kevin Brodsky <kevin.brodsky@arm.com> Cc: Andrew Pinski <apinski@cavium.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Bug: 63737556 Bug: 20045882 Change-Id: I71ff27ff5bfa323354fda6867b01ec908d8d6cbd
This commit is contained in:
parent
e5016f919d
commit
b9c53b6ff8
6 changed files with 106 additions and 44 deletions
|
@ -20,16 +20,31 @@
|
|||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#ifndef _VDSO_WTM_CLOCK_SEC_T
|
||||
#define _VDSO_WTM_CLOCK_SEC_T
|
||||
typedef __u64 vdso_wtm_clock_nsec_t;
|
||||
#endif
|
||||
|
||||
#ifndef _VDSO_XTIME_CLOCK_SEC_T
|
||||
#define _VDSO_XTIME_CLOCK_SEC_T
|
||||
typedef __u64 vdso_xtime_clock_sec_t;
|
||||
#endif
|
||||
|
||||
#ifndef _VDSO_RAW_TIME_SEC_T
|
||||
#define _VDSO_RAW_TIME_SEC_T
|
||||
typedef __u64 vdso_raw_time_sec_t;
|
||||
#endif
|
||||
|
||||
struct vdso_data {
|
||||
__u64 cs_cycle_last; /* Timebase at clocksource init */
|
||||
__u64 raw_time_sec; /* Raw time */
|
||||
vdso_raw_time_sec_t raw_time_sec; /* Raw time */
|
||||
__u64 raw_time_nsec;
|
||||
__u64 xtime_clock_sec; /* Kernel time */
|
||||
__u64 xtime_clock_nsec;
|
||||
vdso_xtime_clock_sec_t xtime_clock_sec; /* Kernel time */
|
||||
__u64 xtime_clock_snsec;
|
||||
__u64 xtime_coarse_sec; /* Coarse time */
|
||||
__u64 xtime_coarse_nsec;
|
||||
__u64 wtm_clock_sec; /* Wall to monotonic time */
|
||||
__u64 wtm_clock_nsec;
|
||||
vdso_wtm_clock_nsec_t wtm_clock_nsec;
|
||||
__u32 tb_seq_count; /* Timebase sequence counter */
|
||||
/* cs_* members must be adjacent and in this order (ldp accesses) */
|
||||
__u32 cs_mono_mult; /* NTP-adjusted clocksource multiplier */
|
||||
|
|
|
@ -96,40 +96,6 @@ int main(void)
|
|||
DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE);
|
||||
DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE);
|
||||
BLANK();
|
||||
DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
|
||||
DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
|
||||
DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW);
|
||||
DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
|
||||
DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
|
||||
DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE);
|
||||
DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC);
|
||||
DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
|
||||
BLANK();
|
||||
DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last));
|
||||
DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec));
|
||||
DEFINE(VDSO_RAW_TIME_NSEC, offsetof(struct vdso_data, raw_time_nsec));
|
||||
DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec));
|
||||
DEFINE(VDSO_XTIME_CLK_NSEC, offsetof(struct vdso_data, xtime_clock_nsec));
|
||||
DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec));
|
||||
DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec));
|
||||
DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec));
|
||||
DEFINE(VDSO_WTM_CLK_NSEC, offsetof(struct vdso_data, wtm_clock_nsec));
|
||||
DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count));
|
||||
DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult));
|
||||
DEFINE(VDSO_CS_RAW_MULT, offsetof(struct vdso_data, cs_raw_mult));
|
||||
DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift));
|
||||
DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest));
|
||||
DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime));
|
||||
DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall));
|
||||
BLANK();
|
||||
DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec));
|
||||
DEFINE(TVAL_TV_USEC, offsetof(struct timeval, tv_usec));
|
||||
DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec));
|
||||
DEFINE(TSPEC_TV_NSEC, offsetof(struct timespec, tv_nsec));
|
||||
BLANK();
|
||||
DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
|
||||
DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
|
||||
BLANK();
|
||||
#ifdef CONFIG_THREAD_INFO_IN_TASK
|
||||
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
|
||||
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
|
||||
|
|
|
@ -261,7 +261,7 @@ void update_vsyscall(struct timekeeper *tk)
|
|||
vdso_data->raw_time_sec = tk->raw_sec;
|
||||
vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec;
|
||||
vdso_data->xtime_clock_sec = tk->xtime_sec;
|
||||
vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
|
||||
vdso_data->xtime_clock_snsec = tk->tkr_mono.xtime_nsec;
|
||||
vdso_data->cs_mono_mult = tk->tkr_mono.mult;
|
||||
vdso_data->cs_raw_mult = tk->tkr_raw.mult;
|
||||
/* tkr_mono.shift == tkr_raw.shift */
|
||||
|
|
|
@ -5,18 +5,32 @@
|
|||
# Heavily based on the vDSO Makefiles for other archs.
|
||||
#
|
||||
|
||||
obj-vdso := gettimeofday.o note.o sigreturn.o
|
||||
obj-vdso-s := note.o sigreturn.o
|
||||
obj-vdso-c := vgettimeofday.o
|
||||
|
||||
# Build rules
|
||||
targets := $(obj-vdso) vdso.so vdso.so.dbg
|
||||
obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
|
||||
targets := $(obj-vdso-s) $(obj-vdso-c) vdso.so vdso.so.dbg
|
||||
obj-vdso-s := $(addprefix $(obj)/, $(obj-vdso-s))
|
||||
obj-vdso-c := $(addprefix $(obj)/, $(obj-vdso-c))
|
||||
obj-vdso := $(obj-vdso-c) $(obj-vdso-s)
|
||||
|
||||
ccflags-y := -shared -fno-common -fno-builtin
|
||||
ccflags-y := -shared -fno-common -fno-builtin -fno-stack-protector
|
||||
ccflags-y += -DDISABLE_BRANCH_PROFILING
|
||||
ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \
|
||||
$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
|
||||
|
||||
# Force -O2 to avoid libgcc dependencies
|
||||
CFLAGS_REMOVE_vgettimeofday.o = -pg -Os
|
||||
CFLAGS_vgettimeofday.o = -O2 -fPIC
|
||||
ifneq ($(cc-name),clang)
|
||||
CFLAGS_vgettimeofday.o += -mcmodel=tiny
|
||||
endif
|
||||
|
||||
# Disable gcov profiling for VDSO code
|
||||
GCOV_PROFILE := n
|
||||
KASAN_SANITIZE := n
|
||||
UBSAN_SANITIZE := n
|
||||
KCOV_INSTRUMENT := n
|
||||
|
||||
# Workaround for bare-metal (ELF) toolchains that neglect to pass -shared
|
||||
# down to collect2, resulting in silent corruption of the vDSO image.
|
||||
|
@ -49,12 +63,17 @@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
|
|||
$(call if_changed,vdsosym)
|
||||
|
||||
# Assembly rules for the .S files
|
||||
$(obj-vdso): %.o: %.S FORCE
|
||||
$(obj-vdso-s): %.o: %.S FORCE
|
||||
$(call if_changed_dep,vdsoas)
|
||||
|
||||
$(obj-vdso-c): %.o: %.c FORCE
|
||||
$(call if_changed_dep,vdsocc)
|
||||
|
||||
# Actual build commands
|
||||
quiet_cmd_vdsold = VDSOL $@
|
||||
cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@
|
||||
quiet_cmd_vdsocc = VDSOC $@
|
||||
cmd_vdsocc = ${CC} $(c_flags) -c -o $@ $<
|
||||
quiet_cmd_vdsoas = VDSOA $@
|
||||
cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $<
|
||||
|
||||
|
|
59
arch/arm64/kernel/vdso/datapage.h
Normal file
59
arch/arm64/kernel/vdso/datapage.h
Normal file
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Userspace implementations of __get_datapage
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __VDSO_DATAPAGE_H
|
||||
#define __VDSO_DATAPAGE_H
|
||||
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/types.h>
|
||||
#include <asm/vdso_datapage.h>
|
||||
|
||||
/*
|
||||
* We use the hidden visibility to prevent the compiler from generating a GOT
|
||||
* relocation. Not only is going through a GOT useless (the entry couldn't and
|
||||
* mustn't be overridden by another library), it does not even work: the linker
|
||||
* cannot generate an absolute address to the data page.
|
||||
*
|
||||
* With the hidden visibility, the compiler simply generates a PC-relative
|
||||
* relocation (R_ARM_REL32), and this is what we need.
|
||||
*/
|
||||
extern const struct vdso_data _vdso_data __attribute__((visibility("hidden")));
|
||||
|
||||
static inline const struct vdso_data *__get_datapage(void)
|
||||
{
|
||||
const struct vdso_data *ret;
|
||||
/*
|
||||
* This simply puts &_vdso_data into ret. The reason why we don't use
|
||||
* `ret = &_vdso_data` is that the compiler tends to optimise this in a
|
||||
* very suboptimal way: instead of keeping &_vdso_data in a register,
|
||||
* it goes through a relocation almost every time _vdso_data must be
|
||||
* accessed (even in subfunctions). This is both time and space
|
||||
* consuming: each relocation uses a word in the code section, and it
|
||||
* has to be loaded at runtime.
|
||||
*
|
||||
* This trick hides the assignment from the compiler. Since it cannot
|
||||
* track where the pointer comes from, it will only use one relocation
|
||||
* where __get_datapage() is called, and then keep the result in a
|
||||
* register.
|
||||
*/
|
||||
asm("" : "=r"(ret) : "0"(&_vdso_data));
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* We can only guarantee 56 bits of precision. */
|
||||
#define ARCH_CLOCK_FIXED_MASK GENMASK_ULL(55, 0)
|
||||
|
||||
#endif /* __VDSO_DATAPAGE_H */
|
3
arch/arm64/kernel/vdso/vgettimeofday.c
Normal file
3
arch/arm64/kernel/vdso/vgettimeofday.c
Normal file
|
@ -0,0 +1,3 @@
|
|||
#include "compiler.h"
|
||||
#include "datapage.h"
|
||||
#include "../../../../lib/vdso/vgettimeofday.c"
|
Loading…
Add table
Reference in a new issue