[PATCH] i386 and x86_64 TSC set_cyc2ns_scale imprecision
I just found out that some precision is unnecessarily lost in the arch/i386/kernel/timers/timer_tsc.c:set_cyc2ns_scale function. It uses a cpu_mhz parameter when it could use a cpu_khz. In the specific case of an Intel P4 running at 3001.171 Mhz, the truncation to 3001 Mhz leads to an imprecision of 19 microseconds per second : this is very sad for a timer with nearly nanosecond accuracy. Fix the x86_64 architecture too. Cc: george anzinger <george@mvista.com> Cc: john stultz <johnstul@us.ibm.com> Cc: Andi Kleen <ak@muc.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
bfd51626cb
commit
dacb16b1a0
3 changed files with 28 additions and 18 deletions
|
@ -30,23 +30,28 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
|
||||||
* basic equation:
|
* basic equation:
|
||||||
* ns = cycles / (freq / ns_per_sec)
|
* ns = cycles / (freq / ns_per_sec)
|
||||||
* ns = cycles * (ns_per_sec / freq)
|
* ns = cycles * (ns_per_sec / freq)
|
||||||
* ns = cycles * (10^9 / (cpu_mhz * 10^6))
|
* ns = cycles * (10^9 / (cpu_khz * 10^3))
|
||||||
* ns = cycles * (10^3 / cpu_mhz)
|
* ns = cycles * (10^6 / cpu_khz)
|
||||||
*
|
*
|
||||||
* Then we use scaling math (suggested by george@mvista.com) to get:
|
* Then we use scaling math (suggested by george@mvista.com) to get:
|
||||||
* ns = cycles * (10^3 * SC / cpu_mhz) / SC
|
* ns = cycles * (10^6 * SC / cpu_khz) / SC
|
||||||
* ns = cycles * cyc2ns_scale / SC
|
* ns = cycles * cyc2ns_scale / SC
|
||||||
*
|
*
|
||||||
* And since SC is a constant power of two, we can convert the div
|
* And since SC is a constant power of two, we can convert the div
|
||||||
* into a shift.
|
* into a shift.
|
||||||
|
*
|
||||||
|
* We can use khz divisor instead of mhz to keep a better percision, since
|
||||||
|
* cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
|
||||||
|
* (mathieu.desnoyers@polymtl.ca)
|
||||||
|
*
|
||||||
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
|
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
|
||||||
*/
|
*/
|
||||||
static unsigned long cyc2ns_scale;
|
static unsigned long cyc2ns_scale;
|
||||||
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
|
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
|
||||||
|
|
||||||
static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
|
static inline void set_cyc2ns_scale(unsigned long cpu_khz)
|
||||||
{
|
{
|
||||||
cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
|
cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
|
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
|
||||||
|
@ -163,7 +168,7 @@ static int __init init_hpet(char* override)
|
||||||
printk("Detected %u.%03u MHz processor.\n",
|
printk("Detected %u.%03u MHz processor.\n",
|
||||||
cpu_khz / 1000, cpu_khz % 1000);
|
cpu_khz / 1000, cpu_khz % 1000);
|
||||||
}
|
}
|
||||||
set_cyc2ns_scale(cpu_khz/1000);
|
set_cyc2ns_scale(cpu_khz);
|
||||||
}
|
}
|
||||||
/* set this only when cpu_has_tsc */
|
/* set this only when cpu_has_tsc */
|
||||||
timer_hpet.read_timer = read_timer_tsc;
|
timer_hpet.read_timer = read_timer_tsc;
|
||||||
|
|
|
@ -49,23 +49,28 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
|
||||||
* basic equation:
|
* basic equation:
|
||||||
* ns = cycles / (freq / ns_per_sec)
|
* ns = cycles / (freq / ns_per_sec)
|
||||||
* ns = cycles * (ns_per_sec / freq)
|
* ns = cycles * (ns_per_sec / freq)
|
||||||
* ns = cycles * (10^9 / (cpu_mhz * 10^6))
|
* ns = cycles * (10^9 / (cpu_khz * 10^3))
|
||||||
* ns = cycles * (10^3 / cpu_mhz)
|
* ns = cycles * (10^6 / cpu_khz)
|
||||||
*
|
*
|
||||||
* Then we use scaling math (suggested by george@mvista.com) to get:
|
* Then we use scaling math (suggested by george@mvista.com) to get:
|
||||||
* ns = cycles * (10^3 * SC / cpu_mhz) / SC
|
* ns = cycles * (10^6 * SC / cpu_khz) / SC
|
||||||
* ns = cycles * cyc2ns_scale / SC
|
* ns = cycles * cyc2ns_scale / SC
|
||||||
*
|
*
|
||||||
* And since SC is a constant power of two, we can convert the div
|
* And since SC is a constant power of two, we can convert the div
|
||||||
* into a shift.
|
* into a shift.
|
||||||
|
*
|
||||||
|
* We can use khz divisor instead of mhz to keep a better percision, since
|
||||||
|
* cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
|
||||||
|
* (mathieu.desnoyers@polymtl.ca)
|
||||||
|
*
|
||||||
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
|
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
|
||||||
*/
|
*/
|
||||||
static unsigned long cyc2ns_scale;
|
static unsigned long cyc2ns_scale;
|
||||||
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
|
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
|
||||||
|
|
||||||
static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
|
static inline void set_cyc2ns_scale(unsigned long cpu_khz)
|
||||||
{
|
{
|
||||||
cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
|
cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
|
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
|
||||||
|
@ -286,7 +291,7 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
|
||||||
if (use_tsc) {
|
if (use_tsc) {
|
||||||
if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
|
if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
|
||||||
fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
|
fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
|
||||||
set_cyc2ns_scale(cpu_khz/1000);
|
set_cyc2ns_scale(cpu_khz);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -536,7 +541,7 @@ static int __init init_tsc(char* override)
|
||||||
printk("Detected %u.%03u MHz processor.\n",
|
printk("Detected %u.%03u MHz processor.\n",
|
||||||
cpu_khz / 1000, cpu_khz % 1000);
|
cpu_khz / 1000, cpu_khz % 1000);
|
||||||
}
|
}
|
||||||
set_cyc2ns_scale(cpu_khz/1000);
|
set_cyc2ns_scale(cpu_khz);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -481,9 +481,9 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
|
||||||
static unsigned int cyc2ns_scale;
|
static unsigned int cyc2ns_scale;
|
||||||
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
|
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
|
||||||
|
|
||||||
static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
|
static inline void set_cyc2ns_scale(unsigned long cpu_khz)
|
||||||
{
|
{
|
||||||
cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
|
cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
|
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
|
||||||
|
@ -655,7 +655,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
|
||||||
vxtime.tsc_quot = (1000L << 32) / cpu_khz;
|
vxtime.tsc_quot = (1000L << 32) / cpu_khz;
|
||||||
}
|
}
|
||||||
|
|
||||||
set_cyc2ns_scale(cpu_khz_ref / 1000);
|
set_cyc2ns_scale(cpu_khz_ref);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -939,7 +939,7 @@ void __init time_init(void)
|
||||||
rdtscll_sync(&vxtime.last_tsc);
|
rdtscll_sync(&vxtime.last_tsc);
|
||||||
setup_irq(0, &irq0);
|
setup_irq(0, &irq0);
|
||||||
|
|
||||||
set_cyc2ns_scale(cpu_khz / 1000);
|
set_cyc2ns_scale(cpu_khz);
|
||||||
|
|
||||||
#ifndef CONFIG_SMP
|
#ifndef CONFIG_SMP
|
||||||
time_init_gtod();
|
time_init_gtod();
|
||||||
|
|
Loading…
Add table
Reference in a new issue