FROMLIST: [PATCH v5 10/12] arm64: vdso: replace gettimeofday.S with global vgettimeofday.C

(cherry picked from url https://patchwork.kernel.org/patch/10044501/)

Take an effort from the previous 9 patches to recode the arm64 vdso
code from assembler to C previously submitted by
Andrew Pinski <apinski@cavium.com>, rework it for use in both arm and
arm64, overlapping any optimizations for each architecture. But
instead of landing it in arm64, land the result into lib/vdso and
unify both implementations to simplify future maintenance.

apinski@cavium.com makes the following claims in the original patch:

This allows the compiler to optimize the divide by 1000 and remove
the other divides.

On ThunderX, gettimeofday improves by 32%.  On ThunderX 2,
gettimeofday improves by 18%.

Note I noticed a bug in the old (arm64) implementation of
__kernel_clock_getres; it was checking only the lower 32bits of the
pointer; this would work for most cases but could fail in a few.

<end of claim>

Signed-off-by: Mark Salyzyn <salyzyn@android.com>
Cc: James Morse <james.morse@arm.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dmitry Safonov <dsafonov@virtuozzo.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Andy Gross <andy.gross@linaro.org>
Cc: Kevin Brodsky <kevin.brodsky@arm.com>
Cc: Andrew Pinski <apinski@cavium.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Bug: 63737556
Bug: 20045882
Change-Id: I71ff27ff5bfa323354fda6867b01ec908d8d6cbd
This commit is contained in:
Mark Salyzyn 2017-08-04 09:58:40 -07:00 committed by Timi
parent e5016f919d
commit b9c53b6ff8
6 changed files with 106 additions and 44 deletions

View file

@ -20,16 +20,31 @@
#ifndef __ASSEMBLY__
#ifndef _VDSO_WTM_CLOCK_SEC_T
#define _VDSO_WTM_CLOCK_SEC_T
typedef __u64 vdso_wtm_clock_nsec_t;
#endif
#ifndef _VDSO_XTIME_CLOCK_SEC_T
#define _VDSO_XTIME_CLOCK_SEC_T
typedef __u64 vdso_xtime_clock_sec_t;
#endif
#ifndef _VDSO_RAW_TIME_SEC_T
#define _VDSO_RAW_TIME_SEC_T
typedef __u64 vdso_raw_time_sec_t;
#endif
struct vdso_data {
__u64 cs_cycle_last; /* Timebase at clocksource init */
__u64 raw_time_sec; /* Raw time */
vdso_raw_time_sec_t raw_time_sec; /* Raw time */
__u64 raw_time_nsec;
__u64 xtime_clock_sec; /* Kernel time */
__u64 xtime_clock_nsec;
vdso_xtime_clock_sec_t xtime_clock_sec; /* Kernel time */
__u64 xtime_clock_snsec;
__u64 xtime_coarse_sec; /* Coarse time */
__u64 xtime_coarse_nsec;
__u64 wtm_clock_sec; /* Wall to monotonic time */
__u64 wtm_clock_nsec;
vdso_wtm_clock_nsec_t wtm_clock_nsec;
__u32 tb_seq_count; /* Timebase sequence counter */
/* cs_* members must be adjacent and in this order (ldp accesses) */
__u32 cs_mono_mult; /* NTP-adjusted clocksource multiplier */

View file

@ -96,40 +96,6 @@ int main(void)
DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE);
DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE);
BLANK();
DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW);
DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE);
DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC);
DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
BLANK();
DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last));
DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec));
DEFINE(VDSO_RAW_TIME_NSEC, offsetof(struct vdso_data, raw_time_nsec));
DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec));
DEFINE(VDSO_XTIME_CLK_NSEC, offsetof(struct vdso_data, xtime_clock_nsec));
DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec));
DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec));
DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec));
DEFINE(VDSO_WTM_CLK_NSEC, offsetof(struct vdso_data, wtm_clock_nsec));
DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count));
DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult));
DEFINE(VDSO_CS_RAW_MULT, offsetof(struct vdso_data, cs_raw_mult));
DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift));
DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest));
DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime));
DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall));
BLANK();
DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec));
DEFINE(TVAL_TV_USEC, offsetof(struct timeval, tv_usec));
DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec));
DEFINE(TSPEC_TV_NSEC, offsetof(struct timespec, tv_nsec));
BLANK();
DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
BLANK();
#ifdef CONFIG_THREAD_INFO_IN_TASK
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));

View file

@ -261,7 +261,7 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->raw_time_sec = tk->raw_sec;
vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec;
vdso_data->xtime_clock_sec = tk->xtime_sec;
vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
vdso_data->xtime_clock_snsec = tk->tkr_mono.xtime_nsec;
vdso_data->cs_mono_mult = tk->tkr_mono.mult;
vdso_data->cs_raw_mult = tk->tkr_raw.mult;
/* tkr_mono.shift == tkr_raw.shift */

View file

@ -5,18 +5,32 @@
# Heavily based on the vDSO Makefiles for other archs.
#
obj-vdso := gettimeofday.o note.o sigreturn.o
obj-vdso-s := note.o sigreturn.o
obj-vdso-c := vgettimeofday.o
# Build rules
targets := $(obj-vdso) vdso.so vdso.so.dbg
obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
targets := $(obj-vdso-s) $(obj-vdso-c) vdso.so vdso.so.dbg
obj-vdso-s := $(addprefix $(obj)/, $(obj-vdso-s))
obj-vdso-c := $(addprefix $(obj)/, $(obj-vdso-c))
obj-vdso := $(obj-vdso-c) $(obj-vdso-s)
ccflags-y := -shared -fno-common -fno-builtin
ccflags-y := -shared -fno-common -fno-builtin -fno-stack-protector
ccflags-y += -DDISABLE_BRANCH_PROFILING
ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \
$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
# Force -O2 to avoid libgcc dependencies
CFLAGS_REMOVE_vgettimeofday.o = -pg -Os
CFLAGS_vgettimeofday.o = -O2 -fPIC
ifneq ($(cc-name),clang)
CFLAGS_vgettimeofday.o += -mcmodel=tiny
endif
# Disable gcov profiling for VDSO code
GCOV_PROFILE := n
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
# Workaround for bare-metal (ELF) toolchains that neglect to pass -shared
# down to collect2, resulting in silent corruption of the vDSO image.
@ -49,12 +63,17 @@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
$(call if_changed,vdsosym)
# Assembly rules for the .S files
$(obj-vdso): %.o: %.S FORCE
$(obj-vdso-s): %.o: %.S FORCE
$(call if_changed_dep,vdsoas)
$(obj-vdso-c): %.o: %.c FORCE
$(call if_changed_dep,vdsocc)
# Actual build commands
quiet_cmd_vdsold = VDSOL $@
cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@
quiet_cmd_vdsocc = VDSOC $@
cmd_vdsocc = ${CC} $(c_flags) -c -o $@ $<
quiet_cmd_vdsoas = VDSOA $@
cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $<

View file

@ -0,0 +1,59 @@
/*
* Userspace implementations of __get_datapage
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __VDSO_DATAPAGE_H
#define __VDSO_DATAPAGE_H
#include <linux/bitops.h>
#include <linux/types.h>
#include <asm/vdso_datapage.h>
/*
* We use the hidden visibility to prevent the compiler from generating a GOT
* relocation. Not only is going through a GOT useless (the entry couldn't and
* mustn't be overridden by another library), it does not even work: the linker
* cannot generate an absolute address to the data page.
*
* With the hidden visibility, the compiler simply generates a PC-relative
* relocation (R_ARM_REL32), and this is what we need.
*/
extern const struct vdso_data _vdso_data __attribute__((visibility("hidden")));
static inline const struct vdso_data *__get_datapage(void)
{
const struct vdso_data *ret;
/*
* This simply puts &_vdso_data into ret. The reason why we don't use
* `ret = &_vdso_data` is that the compiler tends to optimise this in a
* very suboptimal way: instead of keeping &_vdso_data in a register,
* it goes through a relocation almost every time _vdso_data must be
* accessed (even in subfunctions). This is both time and space
* consuming: each relocation uses a word in the code section, and it
* has to be loaded at runtime.
*
* This trick hides the assignment from the compiler. Since it cannot
* track where the pointer comes from, it will only use one relocation
* where __get_datapage() is called, and then keep the result in a
* register.
*/
asm("" : "=r"(ret) : "0"(&_vdso_data));
return ret;
}
/* We can only guarantee 56 bits of precision. */
#define ARCH_CLOCK_FIXED_MASK GENMASK_ULL(55, 0)
#endif /* __VDSO_DATAPAGE_H */

View file

@ -0,0 +1,3 @@
#include "compiler.h"
#include "datapage.h"
#include "../../../../lib/vdso/vgettimeofday.c"