sched: use ktime instead of sched_clock for load tracking

At present, HMP scheduler uses sched_clock to setup window boundary to
be aligned with timer interrupt to ensure timer interrupt fires after
window rollover.  However this alignment won't last long since the timer
interrupt rearms next timer based on time measured by ktime which isn't
coupled with sched_clock.

Convert sched_clock to ktime to avoid wallclock discrepancy between
scheduler and timer so that we can ensure scheduler's window boundary is
always aligned with timer.

CRs-fixed: 933330
Change-Id: I4108819a4382f725b3ce6075eb46aab0cf670b7e
[joonwoop@codeaurora.org: fixed minor conflict in include/linux/tick.h
 and kernel/sched/core.c.  omitted fixes for kernel/sched/qhmp_core.c]
Signed-off-by: Joonwoo Park <joonwoop@codeaurora.org>
This commit is contained in:
Joonwoo Park 2015-11-24 14:33:26 -08:00 committed by David Keitel
parent d2d5734fec
commit 0498f793e8
5 changed files with 80 additions and 46 deletions

View file

@ -2406,6 +2406,7 @@ extern u64 local_clock(void);
extern u64 running_clock(void);
extern u64 sched_clock_cpu(int cpu);
extern u64 sched_ktime_clock(void);
extern void sched_clock_init(void);
extern int sched_clock_initialized(void);

View file

@ -27,7 +27,7 @@ static inline void tick_handover_do_timer(void) { }
static inline void tick_cleanup_dead_cpu(int cpu) { }
#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
extern u64 jiffy_to_sched_clock(u64 *now, u64 *jiffy_sched_clock);
extern u64 jiffy_to_ktime_ns(u64 *now, u64 *jiffy_ktime_ns);
#if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_SUSPEND)
extern void tick_freeze(void);

View file

@ -138,7 +138,8 @@ TRACE_EVENT(sched_task_load,
__entry->need_idle = need_idle;
__entry->best_cpu = best_cpu;
__entry->latency = p->state == TASK_WAKING ?
sched_clock() - p->ravg.mark_start : 0;
sched_ktime_clock() -
p->ravg.mark_start : 0;
),
TP_printk("%d (%s): demand=%u boost=%d reason=%d sync=%d need_idle=%d best_cpu=%d latency=%llu",

View file

@ -75,6 +75,7 @@
#include <linux/context_tracking.h>
#include <linux/compiler.h>
#include <linux/cpufreq.h>
#include <linux/syscore_ops.h>
#include <asm/switch_to.h>
#include <asm/tlb.h>
@ -813,6 +814,41 @@ void sched_set_cluster_dstate(const cpumask_t *cluster_cpus, int dstate,
#endif /* CONFIG_SMP */
#ifdef CONFIG_SCHED_HMP
static ktime_t ktime_last;
static bool sched_ktime_suspended;
u64 sched_ktime_clock(void)
{
if (unlikely(sched_ktime_suspended))
return ktime_to_ns(ktime_last);
return ktime_get_ns();
}
static void sched_resume(void)
{
sched_ktime_suspended = false;
}
static int sched_suspend(void)
{
ktime_last = ktime_get();
sched_ktime_suspended = true;
return 0;
}
static struct syscore_ops sched_syscore_ops = {
.resume = sched_resume,
.suspend = sched_suspend
};
static int __init sched_init_ops(void)
{
register_syscore_ops(&sched_syscore_ops);
return 0;
}
late_initcall(sched_init_ops);
static inline void clear_ed_task(struct task_struct *p, struct rq *rq)
{
if (p == rq->ed_task)
@ -824,6 +860,11 @@ static inline void set_task_last_wake(struct task_struct *p, u64 wallclock)
p->last_wake_ts = wallclock;
}
#else
u64 sched_ktime_clock(void)
{
return 0;
}
static inline void clear_ed_task(struct task_struct *p, struct rq *rq) {}
static inline void set_task_last_wake(struct task_struct *p, u64 wallclock) {}
#endif
@ -2067,16 +2108,20 @@ void sched_account_irqtime(int cpu, struct task_struct *curr,
{
struct rq *rq = cpu_rq(cpu);
unsigned long flags, nr_windows;
u64 cur_jiffies_ts, now;
u64 cur_jiffies_ts;
raw_spin_lock_irqsave(&rq->lock, flags);
now = sched_clock();
delta += (now - wallclock);
/*
* cputime (wallclock) uses sched_clock so use the same here for
* consistency.
*/
delta += sched_clock() - wallclock;
cur_jiffies_ts = get_jiffies_64();
if (is_idle_task(curr))
update_task_ravg(curr, rq, IRQ_UPDATE, now, delta);
update_task_ravg(curr, rq, IRQ_UPDATE, sched_ktime_clock(),
delta);
nr_windows = cur_jiffies_ts - rq->irqload_ts;
@ -2141,14 +2186,15 @@ static void reset_task_stats(struct task_struct *p)
static inline void mark_task_starting(struct task_struct *p)
{
u64 wallclock;
struct rq *rq = task_rq(p);
u64 wallclock = sched_clock();
if (!rq->window_start || sched_disable_window_stats) {
reset_task_stats(p);
return;
}
wallclock = sched_ktime_clock();
p->ravg.mark_start = p->last_wake_ts = wallclock;
}
@ -2157,12 +2203,11 @@ static inline void set_window_start(struct rq *rq)
int cpu = cpu_of(rq);
struct rq *sync_rq = cpu_rq(sync_cpu);
if (rq->window_start || !sched_enable_hmp ||
!sched_clock_initialized() || !sched_clock_cpu(cpu))
if (rq->window_start || !sched_enable_hmp)
return;
if (cpu == sync_cpu) {
rq->window_start = sched_clock();
rq->window_start = sched_ktime_clock();
} else {
raw_spin_unlock(&rq->lock);
double_rq_lock(rq, sync_rq);
@ -2215,7 +2260,7 @@ void sched_exit(struct task_struct *p)
raw_spin_lock_irqsave(&rq->lock, flags);
/* rq->curr == p */
wallclock = sched_clock();
wallclock = sched_ktime_clock();
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
dequeue_task(rq, p, 0);
reset_task_stats(p);
@ -2274,7 +2319,7 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)
{
int cpu;
unsigned long flags;
u64 start_ts = sched_clock();
u64 start_ts = sched_ktime_clock();
int reason = WINDOW_CHANGE;
unsigned int old = 0, new = 0;
@ -2348,7 +2393,7 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)
local_irq_restore(flags);
trace_sched_reset_all_window_stats(window_start, window_size,
sched_clock() - start_ts, reason, old, new);
sched_ktime_clock() - start_ts, reason, old, new);
}
#ifdef CONFIG_SCHED_FREQ_INPUT
@ -2389,7 +2434,8 @@ void sched_get_cpus_busy(struct sched_load *busy,
for_each_cpu(cpu, query_cpus) {
rq = cpu_rq(cpu);
update_task_ravg(rq->curr, rq, TASK_UPDATE, sched_clock(), 0);
update_task_ravg(rq->curr, rq, TASK_UPDATE,
sched_ktime_clock(), 0);
load[i] = rq->old_busy_time = rq->prev_runnable_sum;
nload[i] = rq->nt_prev_runnable_sum;
/*
@ -2473,7 +2519,7 @@ void sched_set_io_is_busy(int val)
int sched_set_window(u64 window_start, unsigned int window_size)
{
u64 now, cur_jiffies, jiffy_sched_clock;
u64 now, cur_jiffies, jiffy_ktime_ns;
s64 ws;
unsigned long flags;
@ -2483,23 +2529,25 @@ int sched_set_window(u64 window_start, unsigned int window_size)
mutex_lock(&policy_mutex);
/* Get a consistent view of sched_clock, jiffies, and the time
* since the last jiffy (based on last_jiffies_update). */
/*
* Get a consistent view of ktime, jiffies, and the time
* since the last jiffy (based on last_jiffies_update).
*/
local_irq_save(flags);
cur_jiffies = jiffy_to_sched_clock(&now, &jiffy_sched_clock);
cur_jiffies = jiffy_to_ktime_ns(&now, &jiffy_ktime_ns);
local_irq_restore(flags);
/* translate window_start from jiffies to nanoseconds */
ws = (window_start - cur_jiffies); /* jiffy difference */
ws *= TICK_NSEC;
ws += jiffy_sched_clock;
ws += jiffy_ktime_ns;
/* roll back calculated window start so that it is in
* the past (window stats must have a current window) */
while (ws > now)
ws -= (window_size * TICK_NSEC);
BUG_ON(sched_clock() < ws);
BUG_ON(sched_ktime_clock() < ws);
reset_all_window_stats(ws, window_size);
@ -2532,7 +2580,7 @@ static void fixup_busy_time(struct task_struct *p, int new_cpu)
if (sched_disable_window_stats)
goto done;
wallclock = sched_clock();
wallclock = sched_ktime_clock();
update_task_ravg(task_rq(p)->curr, task_rq(p),
TASK_UPDATE,
@ -2857,7 +2905,8 @@ static int cpufreq_notifier_trans(struct notifier_block *nb,
for_each_cpu(i, &cpu_rq(cpu)->freq_domain_cpumask) {
struct rq *rq = cpu_rq(i);
raw_spin_lock_irqsave(&rq->lock, flags);
update_task_ravg(rq->curr, rq, TASK_UPDATE, sched_clock(), 0);
update_task_ravg(rq->curr, rq, TASK_UPDATE,
sched_ktime_clock(), 0);
rq->cur_freq = new_freq;
raw_spin_unlock_irqrestore(&rq->lock, flags);
}
@ -3956,7 +4005,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
rq = cpu_rq(task_cpu(p));
raw_spin_lock(&rq->lock);
wallclock = sched_clock();
wallclock = sched_ktime_clock();
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
heavy_task = heavy_task_wakeup(p, rq, TASK_WAKE);
update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
@ -4058,7 +4107,7 @@ static void try_to_wake_up_local(struct task_struct *p)
trace_sched_waking(p);
if (!task_on_rq_queued(p)) {
u64 wallclock = sched_clock();
u64 wallclock = sched_ktime_clock();
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
@ -4936,7 +4985,7 @@ void scheduler_tick(void)
curr->sched_class->task_tick(rq, curr, 0);
update_cpu_load_active(rq);
calc_global_load_tick(rq);
wallclock = sched_clock();
wallclock = sched_ktime_clock();
update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
early_notif = early_detection_notify(rq, wallclock);
raw_spin_unlock(&rq->lock);
@ -5245,7 +5294,7 @@ static void __sched notrace __schedule(bool preempt)
update_rq_clock(rq);
next = pick_next_task(rq, prev);
wallclock = sched_clock();
wallclock = sched_ktime_clock();
update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, 0);
update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0);
clear_tsk_need_resched(prev);

View file

@ -46,32 +46,15 @@ static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
*/
static ktime_t last_jiffies_update;
/*
* Conversion from ktime to sched_clock is error prone. Use this
* as a safetly margin when calculating the sched_clock value at
* a particular jiffy as last_jiffies_update uses ktime.
*/
#define SCHED_CLOCK_MARGIN 100000
static u64 ns_since_jiffy(void)
{
ktime_t delta;
delta = ktime_sub(ktime_get(), last_jiffies_update);
return ktime_to_ns(delta);
}
u64 jiffy_to_sched_clock(u64 *now, u64 *jiffy_sched_clock)
u64 jiffy_to_ktime_ns(u64 *now, u64 *jiffy_ktime_ns)
{
u64 cur_jiffies;
unsigned long seq;
do {
seq = read_seqbegin(&jiffies_lock);
*now = sched_clock();
*jiffy_sched_clock = *now -
(ns_since_jiffy() + SCHED_CLOCK_MARGIN);
*now = ktime_get_ns();
*jiffy_ktime_ns = ktime_to_ns(last_jiffies_update);
cur_jiffies = get_jiffies_64();
} while (read_seqretry(&jiffies_lock, seq));