x86-64: Don't generate cmov in vread_tsc
vread_tsc checks whether rdtsc returns something less than cycle_last, which is an extremely predictable branch. GCC likes to generate a cmov anyway, which is several cycles slower than a predicted branch. This saves a couple of nanoseconds. Signed-off-by: Andy Lutomirski <luto@mit.edu> Cc: Andi Kleen <andi@firstfloor.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Eric Dumazet <eric.dumazet@gmail.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Borislav Petkov <bp@amd64.org> Link: http://lkml.kernel.org/r/%3C561280649519de41352fcb620684dfb22bad6bac.1306156808.git.luto%40mit.edu%3E Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
parent
057e6a8c66
commit
3729db5ca2
1 changed files with 16 additions and 2 deletions
|
@ -767,6 +767,7 @@ static cycle_t read_tsc(struct clocksource *cs)
|
||||||
static cycle_t __vsyscall_fn vread_tsc(void)
|
static cycle_t __vsyscall_fn vread_tsc(void)
|
||||||
{
|
{
|
||||||
cycle_t ret;
|
cycle_t ret;
|
||||||
|
u64 last;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Empirically, a fence (of type that depends on the CPU)
|
* Empirically, a fence (of type that depends on the CPU)
|
||||||
|
@ -778,8 +779,21 @@ static cycle_t __vsyscall_fn vread_tsc(void)
|
||||||
rdtsc_barrier();
|
rdtsc_barrier();
|
||||||
ret = (cycle_t)vget_cycles();
|
ret = (cycle_t)vget_cycles();
|
||||||
|
|
||||||
return ret >= VVAR(vsyscall_gtod_data).clock.cycle_last ?
|
last = VVAR(vsyscall_gtod_data).clock.cycle_last;
|
||||||
ret : VVAR(vsyscall_gtod_data).clock.cycle_last;
|
|
||||||
|
if (likely(ret >= last))
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GCC likes to generate cmov here, but this branch is extremely
|
||||||
|
* predictable (it's just a funciton of time and the likely is
|
||||||
|
* very likely) and there's a data dependence, so force GCC
|
||||||
|
* to generate a branch instead. I don't barrier() because
|
||||||
|
* we don't actually need a barrier, and if this function
|
||||||
|
* ever gets inlined it will generate worse code.
|
||||||
|
*/
|
||||||
|
asm volatile ("");
|
||||||
|
return last;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue