sched: backport cpufreq hooks from 4.9-rc4

The scheduler cpufreq hooks are required by the schedutil cpufreq
governor.

Change-Id: Ied6c46262bb33b7e81bbb3d3d2761124e0c676b7
Signed-off-by: Steve Muckle <smuckle@linaro.org>
[trivial cherry-picking fixes]
Signed-off-by: Juri Lelli <juri.lelli@arm.com>
Signed-off-by: Chris Redpath <chris.redpath@arm.com>
This commit is contained in:
Steve Muckle 2016-11-11 14:04:43 -08:00 committed by Andres Oportus
parent 5df19f969e
commit f02702dcf2
7 changed files with 191 additions and 7 deletions

View file

@ -3291,4 +3291,21 @@ static inline unsigned long rlimit_max(unsigned int limit)
return task_rlimit_max(current, limit);
}
#define SCHED_CPUFREQ_RT (1U << 0)
#define SCHED_CPUFREQ_DL (1U << 1)
#define SCHED_CPUFREQ_IOWAIT (1U << 2)
#define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL)
#ifdef CONFIG_CPU_FREQ
struct update_util_data {
void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
};
void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
void (*func)(struct update_util_data *data, u64 time,
unsigned int flags));
void cpufreq_remove_update_util_hook(int cpu);
#endif /* CONFIG_CPU_FREQ */
#endif

View file

@ -21,4 +21,5 @@ obj-$(CONFIG_SCHEDSTATS) += stats.o
obj-$(CONFIG_SCHED_DEBUG) += debug.o
obj-$(CONFIG_SCHED_TUNE) += tune.o
obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
obj-$(CONFIG_CPU_FREQ) += cpufreq.o
obj-$(CONFIG_CPU_FREQ_GOV_SCHED) += cpufreq_sched.o

63
kernel/sched/cpufreq.c Normal file
View file

@ -0,0 +1,63 @@
/*
* Scheduler code and data structures related to cpufreq.
*
* Copyright (C) 2016, Intel Corporation
* Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include "sched.h"
DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
/**
* cpufreq_add_update_util_hook - Populate the CPU's update_util_data pointer.
* @cpu: The CPU to set the pointer for.
* @data: New pointer value.
* @func: Callback function to set for the CPU.
*
* Set and publish the update_util_data pointer for the given CPU.
*
* The update_util_data pointer of @cpu is set to @data and the callback
* function pointer in the target struct update_util_data is set to @func.
* That function will be called by cpufreq_update_util() from RCU-sched
* read-side critical sections, so it must not sleep. @data will always be
* passed to it as the first argument which allows the function to get to the
* target update_util_data structure and its container.
*
* The update_util_data pointer of @cpu must be NULL when this function is
* called or it will WARN() and return with no effect.
*/
void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
void (*func)(struct update_util_data *data, u64 time,
unsigned int flags))
{
if (WARN_ON(!data || !func))
return;
if (WARN_ON(per_cpu(cpufreq_update_util_data, cpu)))
return;
data->func = func;
rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data);
}
EXPORT_SYMBOL_GPL(cpufreq_add_update_util_hook);
/**
* cpufreq_remove_update_util_hook - Clear the CPU's update_util_data pointer.
* @cpu: The CPU to clear the pointer for.
*
* Clear the update_util_data pointer for the given CPU.
*
* Callers must use RCU-sched callbacks to free any memory that might be
* accessed via the old update_util_data pointer or invoke synchronize_sched()
* right after this function to avoid use-after-free.
*/
void cpufreq_remove_update_util_hook(int cpu)
{
rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), NULL);
}
EXPORT_SYMBOL_GPL(cpufreq_remove_update_util_hook);

View file

@ -753,6 +753,9 @@ static void update_curr_dl(struct rq *rq)
if (unlikely((s64)delta_exec <= 0))
return;
/* kick cpufreq (see the comment in kernel/sched/sched.h). */
cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_DL);
schedstat_set(curr->se.statistics.exec_max,
max(curr->se.statistics.exec_max, delta_exec));

View file

@ -2711,6 +2711,29 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
#endif /* CONFIG_FAIR_GROUP_SCHED */
static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
{
if (&this_rq()->cfs == cfs_rq) {
/*
* There are a few boundary cases this might miss but it should
* get called often enough that that should (hopefully) not be
* a real problem -- added to that it only calls on the local
* CPU, so if we enqueue remotely we'll miss an update, but
* the next tick/schedule should update.
*
* It will not get called when we go idle, because the idle
* thread is a different class (!fair), nor will the utilization
* number include things like RT tasks.
*
* As is, the util number is not freq-invariant (we'd have to
* implement arch_scale_freq_capacity() for that).
*
* See cpu_util().
*/
cpufreq_update_util(rq_of(cfs_rq), 0);
}
}
static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
/*
@ -2731,10 +2754,11 @@ static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
} while (0)
/* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */
static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq,
bool update_freq)
{
struct sched_avg *sa = &cfs_rq->avg;
int decayed, removed = 0;
int decayed, removed = 0, removed_util = 0;
if (atomic_long_read(&cfs_rq->removed_load_avg)) {
s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
@ -2747,6 +2771,7 @@ static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0);
sub_positive(&sa->util_avg, r);
sub_positive(&sa->util_sum, r * LOAD_AVG_MAX);
removed_util = 1;
}
decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
@ -2761,6 +2786,9 @@ static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
if (cfs_rq == &rq_of(cfs_rq)->cfs)
trace_sched_load_avg_cpu(cpu_of(rq_of(cfs_rq)), cfs_rq);
if (update_freq && (decayed || removed_util))
cfs_rq_util_change(cfs_rq);
return decayed || removed;
}
@ -2779,7 +2807,7 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
se->on_rq * scale_load_down(se->load.weight),
cfs_rq->curr == se, NULL);
if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
if (update_cfs_rq_load_avg(now, cfs_rq, true) && update_tg)
update_tg_load_avg(cfs_rq, 0);
if (entity_is_task(se))
@ -2811,6 +2839,8 @@ skip_aging:
cfs_rq->avg.load_sum += se->avg.load_sum;
cfs_rq->avg.util_avg += se->avg.util_avg;
cfs_rq->avg.util_sum += se->avg.util_sum;
cfs_rq_util_change(cfs_rq);
}
static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
@ -2823,6 +2853,8 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
sub_positive(&cfs_rq->avg.load_sum, se->avg.load_sum);
sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
cfs_rq_util_change(cfs_rq);
}
/* Add the load generated by se into cfs_rq's load average */
@ -2840,7 +2872,7 @@ enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
cfs_rq->curr == se, NULL);
}
decayed = update_cfs_rq_load_avg(now, cfs_rq);
decayed = update_cfs_rq_load_avg(now, cfs_rq, !migrated);
cfs_rq->runnable_load_avg += sa->load_avg;
cfs_rq->runnable_load_sum += sa->load_sum;
@ -2940,7 +2972,11 @@ static int idle_balance(struct rq *this_rq);
#else /* CONFIG_SMP */
static inline void update_load_avg(struct sched_entity *se, int update_tg) {}
static inline void update_load_avg(struct sched_entity *se, int update_tg)
{
cpufreq_update_util(rq_of(cfs_rq_of(se)), 0);
}
static inline void
enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
static inline void
@ -4257,6 +4293,14 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
int task_wakeup = flags & ENQUEUE_WAKEUP;
#endif
/*
* If in_iowait is set, the code below may not trigger any cpufreq
* utilization updates, so do it here explicitly with the IOWAIT flag
* passed.
*/
if (p->in_iowait)
cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT);
for_each_sched_entity(se) {
if (se->on_rq)
break;
@ -6923,7 +6967,8 @@ static void update_blocked_averages(int cpu)
if (throttled_hierarchy(cfs_rq))
continue;
if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq))
if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq,
true))
update_tg_load_avg(cfs_rq, 0);
}
raw_spin_unlock_irqrestore(&rq->lock, flags);
@ -6984,7 +7029,7 @@ static inline void update_blocked_averages(int cpu)
raw_spin_lock_irqsave(&rq->lock, flags);
update_rq_clock(rq);
update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true);
raw_spin_unlock_irqrestore(&rq->lock, flags);
}

View file

@ -1002,6 +1002,9 @@ static void update_curr_rt(struct rq *rq)
if (unlikely((s64)delta_exec <= 0))
return;
/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT);
schedstat_set(curr->se.statistics.exec_max,
max(curr->se.statistics.exec_max, delta_exec));

View file

@ -2038,3 +2038,55 @@ static inline void account_reset_rq(struct rq *rq)
rq->prev_steal_time_rq = 0;
#endif
}
#ifdef CONFIG_CPU_FREQ
DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
/**
* cpufreq_update_util - Take a note about CPU utilization changes.
* @rq: Runqueue to carry out the update for.
* @flags: Update reason flags.
*
* This function is called by the scheduler on the CPU whose utilization is
* being updated.
*
* It can only be called from RCU-sched read-side critical sections.
*
* The way cpufreq is currently arranged requires it to evaluate the CPU
* performance state (frequency/voltage) on a regular basis to prevent it from
* being stuck in a completely inadequate performance level for too long.
* That is not guaranteed to happen if the updates are only triggered from CFS,
* though, because they may not be coming in if RT or deadline tasks are active
* all the time (or there are RT and DL tasks only).
*
* As a workaround for that issue, this function is called by the RT and DL
* sched classes to trigger extra cpufreq updates to prevent it from stalling,
* but that really is a band-aid. Going forward it should be replaced with
* solutions targeted more specifically at RT and DL tasks.
*/
static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
{
struct update_util_data *data;
data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
if (data)
data->func(data, rq_clock(rq), flags);
}
static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags)
{
if (cpu_of(rq) == smp_processor_id())
cpufreq_update_util(rq, flags);
}
#else
static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {}
#endif /* CONFIG_CPU_FREQ */
#ifdef arch_scale_freq_capacity
#ifndef arch_scale_freq_invariant
#define arch_scale_freq_invariant() (true)
#endif
#else /* arch_scale_freq_capacity */
#define arch_scale_freq_invariant() (false)
#endif