Merge "sched/hmp: Enhance co-location and scheduler boost features"
This commit is contained in:
commit
54e5bae2ed
11 changed files with 800 additions and 385 deletions
|
@ -53,6 +53,8 @@ extern unsigned int sysctl_sched_spill_nr_run;
|
|||
extern unsigned int sysctl_sched_spill_load_pct;
|
||||
extern unsigned int sysctl_sched_upmigrate_pct;
|
||||
extern unsigned int sysctl_sched_downmigrate_pct;
|
||||
extern unsigned int sysctl_sched_group_upmigrate_pct;
|
||||
extern unsigned int sysctl_sched_group_downmigrate_pct;
|
||||
extern unsigned int sysctl_early_detection_duration;
|
||||
extern unsigned int sysctl_sched_boost;
|
||||
extern unsigned int sysctl_sched_small_wakee_task_load_pct;
|
||||
|
|
|
@ -133,6 +133,7 @@ TRACE_EVENT(sched_task_load,
|
|||
__field( u32, flags )
|
||||
__field( int, best_cpu )
|
||||
__field( u64, latency )
|
||||
__field( int, grp_id )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
|
@ -148,12 +149,13 @@ TRACE_EVENT(sched_task_load,
|
|||
__entry->latency = p->state == TASK_WAKING ?
|
||||
sched_ktime_clock() -
|
||||
p->ravg.mark_start : 0;
|
||||
__entry->grp_id = p->grp ? p->grp->id : 0;
|
||||
),
|
||||
|
||||
TP_printk("%d (%s): demand=%u boost=%d reason=%d sync=%d need_idle=%d flags=%x best_cpu=%d latency=%llu",
|
||||
TP_printk("%d (%s): demand=%u boost=%d reason=%d sync=%d need_idle=%d flags=%x grp=%d best_cpu=%d latency=%llu",
|
||||
__entry->pid, __entry->comm, __entry->demand,
|
||||
__entry->boost, __entry->reason, __entry->sync,
|
||||
__entry->need_idle, __entry->flags,
|
||||
__entry->need_idle, __entry->flags, __entry->grp_id,
|
||||
__entry->best_cpu, __entry->latency)
|
||||
);
|
||||
|
||||
|
@ -164,9 +166,12 @@ TRACE_EVENT(sched_set_preferred_cluster,
|
|||
TP_ARGS(grp, total_demand),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( int, id )
|
||||
__field( u64, demand )
|
||||
__field( int, cluster_first_cpu )
|
||||
__field( int, id )
|
||||
__field( u64, demand )
|
||||
__field( int, cluster_first_cpu )
|
||||
__array( char, comm, TASK_COMM_LEN )
|
||||
__field( pid_t, pid )
|
||||
__field(unsigned int, task_demand )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
|
@ -245,19 +250,19 @@ DEFINE_EVENT(sched_cpu_load, sched_cpu_load_cgroup,
|
|||
|
||||
TRACE_EVENT(sched_set_boost,
|
||||
|
||||
TP_PROTO(int ref_count),
|
||||
TP_PROTO(int type),
|
||||
|
||||
TP_ARGS(ref_count),
|
||||
TP_ARGS(type),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, ref_count )
|
||||
__field(int, type )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->ref_count = ref_count;
|
||||
__entry->type = type;
|
||||
),
|
||||
|
||||
TP_printk("ref_count=%d", __entry->ref_count)
|
||||
TP_printk("type %d", __entry->type)
|
||||
);
|
||||
|
||||
#if defined(CREATE_TRACE_POINTS) && defined(CONFIG_SCHED_HMP)
|
||||
|
|
|
@ -15,7 +15,7 @@ obj-y += core.o loadavg.o clock.o cputime.o
|
|||
obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
|
||||
obj-y += wait.o completion.o idle.o sched_avg.o
|
||||
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
|
||||
obj-$(CONFIG_SCHED_HMP) += hmp.o
|
||||
obj-$(CONFIG_SCHED_HMP) += hmp.o boost.o
|
||||
obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
|
||||
obj-$(CONFIG_SCHEDSTATS) += stats.o
|
||||
obj-$(CONFIG_SCHED_DEBUG) += debug.o
|
||||
|
|
226
kernel/sched/boost.c
Normal file
226
kernel/sched/boost.c
Normal file
|
@ -0,0 +1,226 @@
|
|||
/* Copyright (c) 2012-2016, The Linux Foundation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 and
|
||||
* only version 2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include "sched.h"
|
||||
#include <linux/of.h>
|
||||
#include <linux/sched/core_ctl.h>
|
||||
#include <trace/events/sched.h>
|
||||
|
||||
/*
|
||||
* Scheduler boost is a mechanism to temporarily place tasks on CPUs
|
||||
* with higher capacity than those where a task would have normally
|
||||
* ended up with their load characteristics. Any entity enabling
|
||||
* boost is responsible for disabling it as well.
|
||||
*/
|
||||
|
||||
unsigned int sysctl_sched_boost;
|
||||
static enum sched_boost_policy boost_policy;
|
||||
static enum sched_boost_policy boost_policy_dt = SCHED_BOOST_NONE;
|
||||
static DEFINE_MUTEX(boost_mutex);
|
||||
static unsigned int freq_aggr_threshold_backup;
|
||||
|
||||
static inline void boost_kick(int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
if (!test_and_set_bit(BOOST_KICK, &rq->hmp_flags))
|
||||
smp_send_reschedule(cpu);
|
||||
}
|
||||
|
||||
static void boost_kick_cpus(void)
|
||||
{
|
||||
int i;
|
||||
struct cpumask kick_mask;
|
||||
|
||||
if (boost_policy != SCHED_BOOST_ON_BIG)
|
||||
return;
|
||||
|
||||
cpumask_andnot(&kick_mask, cpu_online_mask, cpu_isolated_mask);
|
||||
|
||||
for_each_cpu(i, &kick_mask) {
|
||||
if (cpu_capacity(i) != max_capacity)
|
||||
boost_kick(i);
|
||||
}
|
||||
}
|
||||
|
||||
int got_boost_kick(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
return test_bit(BOOST_KICK, &rq->hmp_flags);
|
||||
}
|
||||
|
||||
void clear_boost_kick(int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
clear_bit(BOOST_KICK, &rq->hmp_flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Scheduler boost type and boost policy might at first seem unrelated,
|
||||
* however, there exists a connection between them that will allow us
|
||||
* to use them interchangeably during placement decisions. We'll explain
|
||||
* the connection here in one possible way so that the implications are
|
||||
* clear when looking at placement policies.
|
||||
*
|
||||
* When policy = SCHED_BOOST_NONE, type is either none or RESTRAINED
|
||||
* When policy = SCHED_BOOST_ON_ALL or SCHED_BOOST_ON_BIG, type can
|
||||
* neither be none nor RESTRAINED.
|
||||
*/
|
||||
static void set_boost_policy(int type)
|
||||
{
|
||||
if (type == SCHED_BOOST_NONE || type == RESTRAINED_BOOST) {
|
||||
boost_policy = SCHED_BOOST_NONE;
|
||||
return;
|
||||
}
|
||||
|
||||
if (boost_policy_dt) {
|
||||
boost_policy = boost_policy_dt;
|
||||
return;
|
||||
}
|
||||
|
||||
if (min_possible_efficiency != max_possible_efficiency) {
|
||||
boost_policy = SCHED_BOOST_ON_BIG;
|
||||
return;
|
||||
}
|
||||
|
||||
boost_policy = SCHED_BOOST_ON_ALL;
|
||||
}
|
||||
|
||||
enum sched_boost_policy sched_boost_policy(void)
|
||||
{
|
||||
return boost_policy;
|
||||
}
|
||||
|
||||
static bool verify_boost_params(int old_val, int new_val)
|
||||
{
|
||||
/*
|
||||
* Boost can only be turned on or off. There is no possiblity of
|
||||
* switching from one boost type to another or to set the same
|
||||
* kind of boost several times.
|
||||
*/
|
||||
return !(!!old_val == !!new_val);
|
||||
}
|
||||
|
||||
static void _sched_set_boost(int old_val, int type)
|
||||
{
|
||||
switch (type) {
|
||||
case NO_BOOST:
|
||||
if (old_val == FULL_THROTTLE_BOOST)
|
||||
core_ctl_set_boost(false);
|
||||
else if (old_val == CONSERVATIVE_BOOST)
|
||||
restore_cgroup_boost_settings();
|
||||
else
|
||||
update_freq_aggregate_threshold(
|
||||
freq_aggr_threshold_backup);
|
||||
break;
|
||||
|
||||
case FULL_THROTTLE_BOOST:
|
||||
core_ctl_set_boost(true);
|
||||
boost_kick_cpus();
|
||||
break;
|
||||
|
||||
case CONSERVATIVE_BOOST:
|
||||
update_cgroup_boost_settings();
|
||||
boost_kick_cpus();
|
||||
break;
|
||||
|
||||
case RESTRAINED_BOOST:
|
||||
freq_aggr_threshold_backup =
|
||||
update_freq_aggregate_threshold(1);
|
||||
break;
|
||||
|
||||
default:
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
|
||||
set_boost_policy(type);
|
||||
sysctl_sched_boost = type;
|
||||
trace_sched_set_boost(type);
|
||||
}
|
||||
|
||||
void sched_boost_parse_dt(void)
|
||||
{
|
||||
struct device_node *sn;
|
||||
const char *boost_policy;
|
||||
|
||||
if (!sched_enable_hmp)
|
||||
return;
|
||||
|
||||
sn = of_find_node_by_path("/sched-hmp");
|
||||
if (!sn)
|
||||
return;
|
||||
|
||||
if (!of_property_read_string(sn, "boost-policy", &boost_policy)) {
|
||||
if (!strcmp(boost_policy, "boost-on-big"))
|
||||
boost_policy_dt = SCHED_BOOST_ON_BIG;
|
||||
else if (!strcmp(boost_policy, "boost-on-all"))
|
||||
boost_policy_dt = SCHED_BOOST_ON_ALL;
|
||||
}
|
||||
}
|
||||
|
||||
int sched_set_boost(int type)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (!sched_enable_hmp)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&boost_mutex);
|
||||
|
||||
if (verify_boost_params(sysctl_sched_boost, type))
|
||||
_sched_set_boost(sysctl_sched_boost, type);
|
||||
else
|
||||
ret = -EINVAL;
|
||||
|
||||
mutex_unlock(&boost_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int sched_boost_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos)
|
||||
{
|
||||
int ret;
|
||||
unsigned int *data = (unsigned int *)table->data;
|
||||
unsigned int old_val;
|
||||
|
||||
if (!sched_enable_hmp)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&boost_mutex);
|
||||
|
||||
old_val = *data;
|
||||
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
||||
|
||||
if (ret || !write)
|
||||
goto done;
|
||||
|
||||
if (verify_boost_params(old_val, *data)) {
|
||||
_sched_set_boost(old_val, *data);
|
||||
} else {
|
||||
*data = old_val;
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
done:
|
||||
mutex_unlock(&boost_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int sched_boost(void)
|
||||
{
|
||||
return sysctl_sched_boost;
|
||||
}
|
|
@ -7846,7 +7846,6 @@ void __init sched_init_smp(void)
|
|||
hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
|
||||
|
||||
update_cluster_topology();
|
||||
init_sched_hmp_boost_policy();
|
||||
|
||||
init_hrtick();
|
||||
|
||||
|
@ -7895,7 +7894,7 @@ void __init sched_init(void)
|
|||
|
||||
BUG_ON(num_possible_cpus() > BITS_PER_LONG);
|
||||
|
||||
sched_hmp_parse_dt();
|
||||
sched_boost_parse_dt();
|
||||
init_clusters();
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
|
|
|
@ -2596,6 +2596,7 @@ static u32 __compute_runnable_contrib(u64 n)
|
|||
#define SBC_FLAG_COLOC_CLUSTER 0x10000
|
||||
#define SBC_FLAG_WAKER_CLUSTER 0x20000
|
||||
#define SBC_FLAG_BACKUP_CLUSTER 0x40000
|
||||
#define SBC_FLAG_BOOST_CLUSTER 0x80000
|
||||
|
||||
struct cpu_select_env {
|
||||
struct task_struct *p;
|
||||
|
@ -2605,7 +2606,7 @@ struct cpu_select_env {
|
|||
u8 need_waker_cluster:1;
|
||||
u8 sync:1;
|
||||
u8 ignore_prev_cpu:1;
|
||||
enum sched_boost_type boost_type;
|
||||
enum sched_boost_policy boost_policy;
|
||||
int prev_cpu;
|
||||
DECLARE_BITMAP(candidate_list, NR_CPUS);
|
||||
DECLARE_BITMAP(backup_list, NR_CPUS);
|
||||
|
@ -2705,10 +2706,38 @@ select_least_power_cluster(struct cpu_select_env *env)
|
|||
struct sched_cluster *cluster;
|
||||
|
||||
if (env->rtg) {
|
||||
env->task_load = scale_load_to_cpu(task_load(env->p),
|
||||
cluster_first_cpu(env->rtg->preferred_cluster));
|
||||
env->sbc_best_cluster_flag |= SBC_FLAG_COLOC_CLUSTER;
|
||||
return env->rtg->preferred_cluster;
|
||||
int cpu = cluster_first_cpu(env->rtg->preferred_cluster);
|
||||
|
||||
env->task_load = scale_load_to_cpu(task_load(env->p), cpu);
|
||||
|
||||
if (task_load_will_fit(env->p, env->task_load,
|
||||
cpu, env->boost_policy)) {
|
||||
env->sbc_best_cluster_flag |= SBC_FLAG_COLOC_CLUSTER;
|
||||
|
||||
if (env->boost_policy == SCHED_BOOST_NONE)
|
||||
return env->rtg->preferred_cluster;
|
||||
|
||||
for_each_sched_cluster(cluster) {
|
||||
if (cluster != env->rtg->preferred_cluster) {
|
||||
__set_bit(cluster->id,
|
||||
env->backup_list);
|
||||
__clear_bit(cluster->id,
|
||||
env->candidate_list);
|
||||
}
|
||||
}
|
||||
|
||||
return env->rtg->preferred_cluster;
|
||||
}
|
||||
|
||||
/*
|
||||
* Since the task load does not fit on the preferred
|
||||
* cluster anymore, pretend that the task does not
|
||||
* have any preferred cluster. This allows the waking
|
||||
* task to get the appropriate CPU it needs as per the
|
||||
* non co-location placement policy without having to
|
||||
* wait until the preferred cluster is updated.
|
||||
*/
|
||||
env->rtg = NULL;
|
||||
}
|
||||
|
||||
for_each_sched_cluster(cluster) {
|
||||
|
@ -2718,7 +2747,7 @@ select_least_power_cluster(struct cpu_select_env *env)
|
|||
env->task_load = scale_load_to_cpu(task_load(env->p),
|
||||
cpu);
|
||||
if (task_load_will_fit(env->p, env->task_load, cpu,
|
||||
env->boost_type))
|
||||
env->boost_policy))
|
||||
return cluster;
|
||||
|
||||
__set_bit(cluster->id, env->backup_list);
|
||||
|
@ -2961,7 +2990,14 @@ static void find_best_cpu_in_cluster(struct sched_cluster *c,
|
|||
update_spare_capacity(stats, env, i, c->capacity,
|
||||
env->cpu_load);
|
||||
|
||||
if (env->boost_type == SCHED_BOOST_ON_ALL ||
|
||||
/*
|
||||
* need_idle takes precedence over sched boost but when both
|
||||
* are set, idlest CPU with in all the clusters is selected
|
||||
* when boost_policy = BOOST_ON_ALL whereas idlest CPU in the
|
||||
* big cluster is selected within boost_policy = BOOST_ON_BIG.
|
||||
*/
|
||||
if ((!env->need_idle &&
|
||||
env->boost_policy != SCHED_BOOST_NONE) ||
|
||||
env->need_waker_cluster ||
|
||||
sched_cpu_high_irqload(i) ||
|
||||
spill_threshold_crossed(env, cpu_rq(i)))
|
||||
|
@ -3005,7 +3041,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
|
|||
struct task_struct *task = env->p;
|
||||
struct sched_cluster *cluster;
|
||||
|
||||
if (env->boost_type != SCHED_BOOST_NONE || env->reason ||
|
||||
if (env->boost_policy != SCHED_BOOST_NONE || env->reason ||
|
||||
!task->ravg.mark_start ||
|
||||
env->need_idle || !sched_short_sleep_task_threshold)
|
||||
return false;
|
||||
|
@ -3034,7 +3070,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
|
|||
cluster = cpu_rq(prev_cpu)->cluster;
|
||||
|
||||
if (!task_load_will_fit(task, env->task_load, prev_cpu,
|
||||
sched_boost_type())) {
|
||||
sched_boost_policy())) {
|
||||
|
||||
__set_bit(cluster->id, env->backup_list);
|
||||
__clear_bit(cluster->id, env->candidate_list);
|
||||
|
@ -3056,7 +3092,7 @@ bias_to_prev_cpu(struct cpu_select_env *env, struct cluster_cpu_stats *stats)
|
|||
static inline bool
|
||||
wake_to_waker_cluster(struct cpu_select_env *env)
|
||||
{
|
||||
return env->boost_type == SCHED_BOOST_NONE &&
|
||||
return env->boost_policy == SCHED_BOOST_NONE &&
|
||||
!env->need_idle && !env->reason && env->sync &&
|
||||
task_load(current) > sched_big_waker_task_load &&
|
||||
task_load(env->p) < sched_small_wakee_task_load;
|
||||
|
@ -3098,7 +3134,6 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
|
|||
.reason = reason,
|
||||
.need_idle = wake_to_idle(p),
|
||||
.need_waker_cluster = 0,
|
||||
.boost_type = sched_boost_type(),
|
||||
.sync = sync,
|
||||
.prev_cpu = target,
|
||||
.ignore_prev_cpu = 0,
|
||||
|
@ -3107,6 +3142,9 @@ static int select_best_cpu(struct task_struct *p, int target, int reason,
|
|||
.sbc_best_cluster_flag = 0,
|
||||
};
|
||||
|
||||
env.boost_policy = task_sched_boost(p) ?
|
||||
sched_boost_policy() : SCHED_BOOST_NONE;
|
||||
|
||||
bitmap_copy(env.candidate_list, all_cluster_ids, NR_CPUS);
|
||||
bitmap_zero(env.backup_list, NR_CPUS);
|
||||
|
||||
|
@ -3178,12 +3216,23 @@ retry:
|
|||
sbc_flag |= env.sbc_best_flag;
|
||||
target = stats.best_cpu;
|
||||
} else {
|
||||
if (env.rtg) {
|
||||
if (env.rtg && env.boost_policy == SCHED_BOOST_NONE) {
|
||||
env.rtg = NULL;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
find_backup_cluster(&env, &stats);
|
||||
/*
|
||||
* With boost_policy == SCHED_BOOST_ON_BIG, we reach here with
|
||||
* backup_list = little cluster, candidate_list = none and
|
||||
* stats->best_capacity_cpu points the best spare capacity
|
||||
* CPU among the CPUs in the big cluster.
|
||||
*/
|
||||
if (env.boost_policy == SCHED_BOOST_ON_BIG &&
|
||||
stats.best_capacity_cpu >= 0)
|
||||
sbc_flag |= SBC_FLAG_BOOST_CLUSTER;
|
||||
else
|
||||
find_backup_cluster(&env, &stats);
|
||||
|
||||
if (stats.best_capacity_cpu >= 0) {
|
||||
target = stats.best_capacity_cpu;
|
||||
sbc_flag |= SBC_FLAG_BEST_CAP_CPU;
|
||||
|
@ -3193,8 +3242,8 @@ retry:
|
|||
out:
|
||||
sbc_flag |= env.sbc_best_cluster_flag;
|
||||
rcu_read_unlock();
|
||||
trace_sched_task_load(p, sched_boost(), env.reason, env.sync,
|
||||
env.need_idle, sbc_flag, target);
|
||||
trace_sched_task_load(p, sched_boost_policy() && task_sched_boost(p),
|
||||
env.reason, env.sync, env.need_idle, sbc_flag, target);
|
||||
return target;
|
||||
}
|
||||
|
||||
|
@ -3402,11 +3451,9 @@ static inline int migration_needed(struct task_struct *p, int cpu)
|
|||
if (task_will_be_throttled(p))
|
||||
return 0;
|
||||
|
||||
if (sched_boost_type() == SCHED_BOOST_ON_BIG) {
|
||||
if (cpu_capacity(cpu) != max_capacity)
|
||||
return UP_MIGRATION;
|
||||
return 0;
|
||||
}
|
||||
if (sched_boost_policy() == SCHED_BOOST_ON_BIG &&
|
||||
cpu_capacity(cpu) != max_capacity && task_sched_boost(p))
|
||||
return UP_MIGRATION;
|
||||
|
||||
if (sched_cpu_high_irqload(cpu))
|
||||
return IRQLOAD_MIGRATION;
|
||||
|
@ -3420,7 +3467,7 @@ static inline int migration_needed(struct task_struct *p, int cpu)
|
|||
return DOWN_MIGRATION;
|
||||
}
|
||||
|
||||
if (!grp && !task_will_fit(p, cpu)) {
|
||||
if (!task_will_fit(p, cpu)) {
|
||||
rcu_read_unlock();
|
||||
return UP_MIGRATION;
|
||||
}
|
||||
|
@ -6648,10 +6695,7 @@ enum fbq_type { regular, remote, all };
|
|||
#define LBF_NEED_BREAK 0x02
|
||||
#define LBF_DST_PINNED 0x04
|
||||
#define LBF_SOME_PINNED 0x08
|
||||
#define LBF_SCHED_BOOST_ACTIVE_BALANCE 0x40
|
||||
#define LBF_BIG_TASK_ACTIVE_BALANCE 0x80
|
||||
#define LBF_HMP_ACTIVE_BALANCE (LBF_SCHED_BOOST_ACTIVE_BALANCE | \
|
||||
LBF_BIG_TASK_ACTIVE_BALANCE)
|
||||
#define LBF_IGNORE_BIG_TASKS 0x100
|
||||
#define LBF_IGNORE_PREFERRED_CLUSTER_TASKS 0x200
|
||||
#define LBF_MOVED_RELATED_THREAD_GROUP_TASK 0x400
|
||||
|
@ -6682,6 +6726,7 @@ struct lb_env {
|
|||
|
||||
enum fbq_type fbq_type;
|
||||
struct list_head tasks;
|
||||
enum sched_boost_policy boost_policy;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -6826,9 +6871,14 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
|
|||
/* Record that we found atleast one task that could run on dst_cpu */
|
||||
env->flags &= ~LBF_ALL_PINNED;
|
||||
|
||||
if (cpu_capacity(env->dst_cpu) > cpu_capacity(env->src_cpu) &&
|
||||
nr_big_tasks(env->src_rq) && !is_big_task(p))
|
||||
return 0;
|
||||
if (cpu_capacity(env->dst_cpu) > cpu_capacity(env->src_cpu)) {
|
||||
if (nr_big_tasks(env->src_rq) && !is_big_task(p))
|
||||
return 0;
|
||||
|
||||
if (env->boost_policy == SCHED_BOOST_ON_BIG &&
|
||||
!task_sched_boost(p))
|
||||
return 0;
|
||||
}
|
||||
|
||||
twf = task_will_fit(p, env->dst_cpu);
|
||||
|
||||
|
@ -6951,8 +7001,7 @@ static int detach_tasks(struct lb_env *env)
|
|||
if (env->imbalance <= 0)
|
||||
return 0;
|
||||
|
||||
if (cpu_capacity(env->dst_cpu) < cpu_capacity(env->src_cpu) &&
|
||||
!sched_boost())
|
||||
if (cpu_capacity(env->dst_cpu) < cpu_capacity(env->src_cpu))
|
||||
env->flags |= LBF_IGNORE_BIG_TASKS;
|
||||
else if (!same_cluster(env->dst_cpu, env->src_cpu))
|
||||
env->flags |= LBF_IGNORE_PREFERRED_CLUSTER_TASKS;
|
||||
|
@ -7255,8 +7304,10 @@ bail_inter_cluster_balance(struct lb_env *env, struct sd_lb_stats *sds)
|
|||
int local_capacity, busiest_capacity;
|
||||
int local_pwr_cost, busiest_pwr_cost;
|
||||
int nr_cpus;
|
||||
int boost = sched_boost();
|
||||
|
||||
if (!sysctl_sched_restrict_cluster_spill || sched_boost())
|
||||
if (!sysctl_sched_restrict_cluster_spill ||
|
||||
boost == FULL_THROTTLE_BOOST || boost == CONSERVATIVE_BOOST)
|
||||
return 0;
|
||||
|
||||
local_cpu = group_first_cpu(sds->local);
|
||||
|
@ -7628,11 +7679,6 @@ static bool update_sd_pick_busiest_active_balance(struct lb_env *env,
|
|||
{
|
||||
if (env->idle != CPU_NOT_IDLE &&
|
||||
cpu_capacity(env->dst_cpu) > group_rq_capacity(sg)) {
|
||||
if (sched_boost() && !sds->busiest && sgs->sum_nr_running) {
|
||||
env->flags |= LBF_SCHED_BOOST_ACTIVE_BALANCE;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (sgs->sum_nr_big_tasks >
|
||||
sds->busiest_stat.sum_nr_big_tasks) {
|
||||
env->flags |= LBF_BIG_TASK_ACTIVE_BALANCE;
|
||||
|
@ -8045,7 +8091,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
|
|||
if (!sds.busiest || busiest->sum_nr_running == 0)
|
||||
goto out_balanced;
|
||||
|
||||
if (env->flags & LBF_HMP_ACTIVE_BALANCE)
|
||||
if (env->flags & LBF_BIG_TASK_ACTIVE_BALANCE)
|
||||
goto force_balance;
|
||||
|
||||
if (bail_inter_cluster_balance(env, &sds))
|
||||
|
@ -8257,7 +8303,7 @@ static int need_active_balance(struct lb_env *env)
|
|||
{
|
||||
struct sched_domain *sd = env->sd;
|
||||
|
||||
if (env->flags & LBF_HMP_ACTIVE_BALANCE)
|
||||
if (env->flags & LBF_BIG_TASK_ACTIVE_BALANCE)
|
||||
return 1;
|
||||
|
||||
if (env->idle == CPU_NEWLY_IDLE) {
|
||||
|
@ -8348,20 +8394,21 @@ static int load_balance(int this_cpu, struct rq *this_rq,
|
|||
struct cpumask *cpus = this_cpu_cpumask_var_ptr(load_balance_mask);
|
||||
|
||||
struct lb_env env = {
|
||||
.sd = sd,
|
||||
.dst_cpu = this_cpu,
|
||||
.dst_rq = this_rq,
|
||||
.dst_grpmask = sched_group_cpus(sd->groups),
|
||||
.idle = idle,
|
||||
.loop_break = sched_nr_migrate_break,
|
||||
.cpus = cpus,
|
||||
.fbq_type = all,
|
||||
.tasks = LIST_HEAD_INIT(env.tasks),
|
||||
.imbalance = 0,
|
||||
.flags = 0,
|
||||
.loop = 0,
|
||||
.sd = sd,
|
||||
.dst_cpu = this_cpu,
|
||||
.dst_rq = this_rq,
|
||||
.dst_grpmask = sched_group_cpus(sd->groups),
|
||||
.idle = idle,
|
||||
.loop_break = sched_nr_migrate_break,
|
||||
.cpus = cpus,
|
||||
.fbq_type = all,
|
||||
.tasks = LIST_HEAD_INIT(env.tasks),
|
||||
.imbalance = 0,
|
||||
.flags = 0,
|
||||
.loop = 0,
|
||||
.busiest_nr_running = 0,
|
||||
.busiest_grp_capacity = 0,
|
||||
.boost_policy = sched_boost_policy(),
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -8510,7 +8557,7 @@ more_balance:
|
|||
|
||||
no_move:
|
||||
if (!ld_moved) {
|
||||
if (!(env.flags & LBF_HMP_ACTIVE_BALANCE))
|
||||
if (!(env.flags & LBF_BIG_TASK_ACTIVE_BALANCE))
|
||||
schedstat_inc(sd, lb_failed[idle]);
|
||||
|
||||
/*
|
||||
|
@ -8520,7 +8567,7 @@ no_move:
|
|||
* excessive cache_hot migrations and active balances.
|
||||
*/
|
||||
if (idle != CPU_NEWLY_IDLE &&
|
||||
!(env.flags & LBF_HMP_ACTIVE_BALANCE))
|
||||
!(env.flags & LBF_BIG_TASK_ACTIVE_BALANCE))
|
||||
sd->nr_balance_failed++;
|
||||
|
||||
if (need_active_balance(&env)) {
|
||||
|
@ -8797,6 +8844,7 @@ static int active_load_balance_cpu_stop(void *data)
|
|||
.busiest_grp_capacity = 0,
|
||||
.flags = 0,
|
||||
.loop = 0,
|
||||
.boost_policy = sched_boost_policy(),
|
||||
};
|
||||
bool moved = false;
|
||||
|
||||
|
@ -9272,7 +9320,8 @@ static inline int _nohz_kick_needed_hmp(struct rq *rq, int cpu, int *type)
|
|||
if (rq->nr_running < 2)
|
||||
return 0;
|
||||
|
||||
if (!sysctl_sched_restrict_cluster_spill || sched_boost())
|
||||
if (!sysctl_sched_restrict_cluster_spill ||
|
||||
sched_boost_policy() == SCHED_BOOST_ON_ALL)
|
||||
return 1;
|
||||
|
||||
if (cpu_max_power_cost(cpu) == max_power_cost)
|
||||
|
|
|
@ -17,8 +17,6 @@
|
|||
#include <linux/cpufreq.h>
|
||||
#include <linux/list_sort.h>
|
||||
#include <linux/syscore_ops.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/sched/core_ctl.h>
|
||||
|
||||
#include "sched.h"
|
||||
|
||||
|
@ -231,52 +229,6 @@ fail:
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* It is possible that CPUs of the same micro architecture can have slight
|
||||
* difference in the efficiency due to other factors like cache size. The
|
||||
* BOOST_ON_BIG policy may not be optimial for such systems. The required
|
||||
* boost policy can be specified via device tree to handle this.
|
||||
*/
|
||||
static int __read_mostly sched_boost_policy = SCHED_BOOST_NONE;
|
||||
|
||||
/*
|
||||
* This should be called after clusters are populated and
|
||||
* the respective efficiency values are initialized.
|
||||
*/
|
||||
void init_sched_hmp_boost_policy(void)
|
||||
{
|
||||
/*
|
||||
* Initialize the boost type here if it is not passed from
|
||||
* device tree.
|
||||
*/
|
||||
if (sched_boost_policy == SCHED_BOOST_NONE) {
|
||||
if (max_possible_efficiency != min_possible_efficiency)
|
||||
sched_boost_policy = SCHED_BOOST_ON_BIG;
|
||||
else
|
||||
sched_boost_policy = SCHED_BOOST_ON_ALL;
|
||||
}
|
||||
}
|
||||
|
||||
void sched_hmp_parse_dt(void)
|
||||
{
|
||||
struct device_node *sn;
|
||||
const char *boost_policy;
|
||||
|
||||
if (!sched_enable_hmp)
|
||||
return;
|
||||
|
||||
sn = of_find_node_by_path("/sched-hmp");
|
||||
if (!sn)
|
||||
return;
|
||||
|
||||
if (!of_property_read_string(sn, "boost-policy", &boost_policy)) {
|
||||
if (!strcmp(boost_policy, "boost-on-big"))
|
||||
sched_boost_policy = SCHED_BOOST_ON_BIG;
|
||||
else if (!strcmp(boost_policy, "boost-on-all"))
|
||||
sched_boost_policy = SCHED_BOOST_ON_ALL;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int max_possible_efficiency = 1;
|
||||
unsigned int min_possible_efficiency = UINT_MAX;
|
||||
|
||||
|
@ -680,29 +632,6 @@ int __init set_sched_enable_hmp(char *str)
|
|||
|
||||
early_param("sched_enable_hmp", set_sched_enable_hmp);
|
||||
|
||||
int got_boost_kick(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
return test_bit(BOOST_KICK, &rq->hmp_flags);
|
||||
}
|
||||
|
||||
inline void clear_boost_kick(int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
clear_bit(BOOST_KICK, &rq->hmp_flags);
|
||||
}
|
||||
|
||||
inline void boost_kick(int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
if (!test_and_set_bit(BOOST_KICK, &rq->hmp_flags))
|
||||
smp_send_reschedule(cpu);
|
||||
}
|
||||
|
||||
/* Clear any HMP scheduler related requests pending from or on cpu */
|
||||
void clear_hmp_request(int cpu)
|
||||
{
|
||||
|
@ -840,6 +769,9 @@ min_max_possible_capacity = 1024; /* min(rq->max_possible_capacity) */
|
|||
/* Window size (in ns) */
|
||||
__read_mostly unsigned int sched_ravg_window = MIN_SCHED_RAVG_WINDOW;
|
||||
|
||||
/* Maximum allowed threshold before freq aggregation must be enabled */
|
||||
#define MAX_FREQ_AGGR_THRESH 1000
|
||||
|
||||
/* Temporarily disable window-stats activity on all cpus */
|
||||
unsigned int __read_mostly sched_disable_window_stats;
|
||||
|
||||
|
@ -919,8 +851,8 @@ static const unsigned int top_tasks_bitmap_size =
|
|||
* C1 busy time = 5 + 5 + 6 = 16ms
|
||||
*
|
||||
*/
|
||||
static __read_mostly unsigned int sched_freq_aggregate;
|
||||
__read_mostly unsigned int sysctl_sched_freq_aggregate;
|
||||
static __read_mostly unsigned int sched_freq_aggregate = 1;
|
||||
__read_mostly unsigned int sysctl_sched_freq_aggregate = 1;
|
||||
|
||||
unsigned int __read_mostly sysctl_sched_freq_aggregate_threshold_pct;
|
||||
static unsigned int __read_mostly sched_freq_aggregate_threshold;
|
||||
|
@ -937,14 +869,6 @@ unsigned int max_task_load(void)
|
|||
/* Use this knob to turn on or off HMP-aware task placement logic */
|
||||
unsigned int __read_mostly sched_enable_hmp;
|
||||
|
||||
/*
|
||||
* Scheduler boost is a mechanism to temporarily place tasks on CPUs
|
||||
* with higher capacity than those where a task would have normally
|
||||
* ended up with their load characteristics. Any entity enabling
|
||||
* boost is responsible for disabling it as well.
|
||||
*/
|
||||
unsigned int sysctl_sched_boost;
|
||||
|
||||
/* A cpu can no longer accommodate more tasks if:
|
||||
*
|
||||
* rq->nr_running > sysctl_sched_spill_nr_run ||
|
||||
|
@ -995,6 +919,21 @@ unsigned int __read_mostly sysctl_sched_upmigrate_pct = 80;
|
|||
unsigned int __read_mostly sched_downmigrate;
|
||||
unsigned int __read_mostly sysctl_sched_downmigrate_pct = 60;
|
||||
|
||||
/*
|
||||
* Task groups whose aggregate demand on a cpu is more than
|
||||
* sched_group_upmigrate need to be up-migrated if possible.
|
||||
*/
|
||||
unsigned int __read_mostly sched_group_upmigrate;
|
||||
unsigned int __read_mostly sysctl_sched_group_upmigrate_pct = 100;
|
||||
|
||||
/*
|
||||
* Task groups, once up-migrated, will need to drop their aggregate
|
||||
* demand to less than sched_group_downmigrate before they are "down"
|
||||
* migrated.
|
||||
*/
|
||||
unsigned int __read_mostly sched_group_downmigrate;
|
||||
unsigned int __read_mostly sysctl_sched_group_downmigrate_pct = 95;
|
||||
|
||||
/*
|
||||
* The load scale factor of a CPU gets boosted when its max frequency
|
||||
* is restricted due to which the tasks are migrating to higher capacity
|
||||
|
@ -1017,33 +956,46 @@ sched_long_cpu_selection_threshold = 100 * NSEC_PER_MSEC;
|
|||
|
||||
unsigned int __read_mostly sysctl_sched_restrict_cluster_spill;
|
||||
|
||||
void update_up_down_migrate(void)
|
||||
static void
|
||||
_update_up_down_migrate(unsigned int *up_migrate, unsigned int *down_migrate)
|
||||
{
|
||||
unsigned int up_migrate = pct_to_real(sysctl_sched_upmigrate_pct);
|
||||
unsigned int down_migrate = pct_to_real(sysctl_sched_downmigrate_pct);
|
||||
unsigned int delta;
|
||||
|
||||
if (up_down_migrate_scale_factor == 1024)
|
||||
goto done;
|
||||
return;
|
||||
|
||||
delta = up_migrate - down_migrate;
|
||||
delta = *up_migrate - *down_migrate;
|
||||
|
||||
up_migrate /= NSEC_PER_USEC;
|
||||
up_migrate *= up_down_migrate_scale_factor;
|
||||
up_migrate >>= 10;
|
||||
up_migrate *= NSEC_PER_USEC;
|
||||
*up_migrate /= NSEC_PER_USEC;
|
||||
*up_migrate *= up_down_migrate_scale_factor;
|
||||
*up_migrate >>= 10;
|
||||
*up_migrate *= NSEC_PER_USEC;
|
||||
|
||||
up_migrate = min(up_migrate, sched_ravg_window);
|
||||
*up_migrate = min(*up_migrate, sched_ravg_window);
|
||||
|
||||
down_migrate /= NSEC_PER_USEC;
|
||||
down_migrate *= up_down_migrate_scale_factor;
|
||||
down_migrate >>= 10;
|
||||
down_migrate *= NSEC_PER_USEC;
|
||||
*down_migrate /= NSEC_PER_USEC;
|
||||
*down_migrate *= up_down_migrate_scale_factor;
|
||||
*down_migrate >>= 10;
|
||||
*down_migrate *= NSEC_PER_USEC;
|
||||
|
||||
down_migrate = min(down_migrate, up_migrate - delta);
|
||||
done:
|
||||
*down_migrate = min(*down_migrate, *up_migrate - delta);
|
||||
}
|
||||
|
||||
static void update_up_down_migrate(void)
|
||||
{
|
||||
unsigned int up_migrate = pct_to_real(sysctl_sched_upmigrate_pct);
|
||||
unsigned int down_migrate = pct_to_real(sysctl_sched_downmigrate_pct);
|
||||
|
||||
_update_up_down_migrate(&up_migrate, &down_migrate);
|
||||
sched_upmigrate = up_migrate;
|
||||
sched_downmigrate = down_migrate;
|
||||
|
||||
up_migrate = pct_to_real(sysctl_sched_group_upmigrate_pct);
|
||||
down_migrate = pct_to_real(sysctl_sched_group_downmigrate_pct);
|
||||
|
||||
_update_up_down_migrate(&up_migrate, &down_migrate);
|
||||
sched_group_upmigrate = up_migrate;
|
||||
sched_group_downmigrate = down_migrate;
|
||||
}
|
||||
|
||||
void set_hmp_defaults(void)
|
||||
|
@ -1134,82 +1086,6 @@ u64 cpu_load_sync(int cpu, int sync)
|
|||
return scale_load_to_cpu(cpu_cravg_sync(cpu, sync), cpu);
|
||||
}
|
||||
|
||||
static int boost_refcount;
|
||||
static DEFINE_SPINLOCK(boost_lock);
|
||||
static DEFINE_MUTEX(boost_mutex);
|
||||
|
||||
static void boost_kick_cpus(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for_each_online_cpu(i) {
|
||||
if (cpu_capacity(i) != max_capacity)
|
||||
boost_kick(i);
|
||||
}
|
||||
}
|
||||
|
||||
int sched_boost(void)
|
||||
{
|
||||
return boost_refcount > 0;
|
||||
}
|
||||
|
||||
int sched_set_boost(int enable)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
int old_refcount;
|
||||
|
||||
if (!sched_enable_hmp)
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock_irqsave(&boost_lock, flags);
|
||||
|
||||
old_refcount = boost_refcount;
|
||||
|
||||
if (enable == 1) {
|
||||
boost_refcount++;
|
||||
} else if (!enable) {
|
||||
if (boost_refcount >= 1)
|
||||
boost_refcount--;
|
||||
else
|
||||
ret = -EINVAL;
|
||||
} else {
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
if (!old_refcount && boost_refcount)
|
||||
boost_kick_cpus();
|
||||
|
||||
if (boost_refcount <= 1)
|
||||
core_ctl_set_boost(boost_refcount == 1);
|
||||
trace_sched_set_boost(boost_refcount);
|
||||
spin_unlock_irqrestore(&boost_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int sched_boost_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&boost_mutex);
|
||||
if (!write)
|
||||
sysctl_sched_boost = sched_boost();
|
||||
|
||||
ret = proc_dointvec(table, write, buffer, lenp, ppos);
|
||||
if (ret || !write)
|
||||
goto done;
|
||||
|
||||
ret = (sysctl_sched_boost <= 1) ?
|
||||
sched_set_boost(sysctl_sched_boost) : -EINVAL;
|
||||
|
||||
done:
|
||||
mutex_unlock(&boost_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Task will fit on a cpu if it's bandwidth consumption on that cpu
|
||||
* will be less than sched_upmigrate. A big task that was previously
|
||||
|
@ -1219,60 +1095,63 @@ done:
|
|||
* tasks with load close to the upmigrate threshold
|
||||
*/
|
||||
int task_load_will_fit(struct task_struct *p, u64 task_load, int cpu,
|
||||
enum sched_boost_type boost_type)
|
||||
enum sched_boost_policy boost_policy)
|
||||
{
|
||||
int upmigrate;
|
||||
int upmigrate = sched_upmigrate;
|
||||
|
||||
if (cpu_capacity(cpu) == max_capacity)
|
||||
return 1;
|
||||
|
||||
if (boost_type != SCHED_BOOST_ON_BIG) {
|
||||
if (cpu_capacity(task_cpu(p)) > cpu_capacity(cpu))
|
||||
upmigrate = sched_downmigrate;
|
||||
|
||||
if (boost_policy != SCHED_BOOST_ON_BIG) {
|
||||
if (task_nice(p) > SCHED_UPMIGRATE_MIN_NICE ||
|
||||
upmigrate_discouraged(p))
|
||||
return 1;
|
||||
|
||||
upmigrate = sched_upmigrate;
|
||||
if (cpu_capacity(task_cpu(p)) > cpu_capacity(cpu))
|
||||
upmigrate = sched_downmigrate;
|
||||
|
||||
if (task_load < upmigrate)
|
||||
return 1;
|
||||
} else {
|
||||
if (task_sched_boost(p) || task_load >= upmigrate)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
enum sched_boost_type sched_boost_type(void)
|
||||
{
|
||||
if (sched_boost())
|
||||
return sched_boost_policy;
|
||||
|
||||
return SCHED_BOOST_NONE;
|
||||
}
|
||||
|
||||
int task_will_fit(struct task_struct *p, int cpu)
|
||||
{
|
||||
u64 tload = scale_load_to_cpu(task_load(p), cpu);
|
||||
|
||||
return task_load_will_fit(p, tload, cpu, sched_boost_type());
|
||||
return task_load_will_fit(p, tload, cpu, sched_boost_policy());
|
||||
}
|
||||
|
||||
int group_will_fit(struct sched_cluster *cluster,
|
||||
struct related_thread_group *grp, u64 demand)
|
||||
static int
|
||||
group_will_fit(struct sched_cluster *cluster, struct related_thread_group *grp,
|
||||
u64 demand, bool group_boost)
|
||||
{
|
||||
int cpu = cluster_first_cpu(cluster);
|
||||
int prev_capacity = 0;
|
||||
unsigned int threshold = sched_upmigrate;
|
||||
unsigned int threshold = sched_group_upmigrate;
|
||||
u64 load;
|
||||
|
||||
if (cluster->capacity == max_capacity)
|
||||
return 1;
|
||||
|
||||
if (group_boost)
|
||||
return 0;
|
||||
|
||||
if (!demand)
|
||||
return 1;
|
||||
|
||||
if (grp->preferred_cluster)
|
||||
prev_capacity = grp->preferred_cluster->capacity;
|
||||
|
||||
if (cluster->capacity < prev_capacity)
|
||||
threshold = sched_downmigrate;
|
||||
threshold = sched_group_downmigrate;
|
||||
|
||||
load = scale_load_to_cpu(demand, cpu);
|
||||
if (load < threshold)
|
||||
|
@ -1495,6 +1374,23 @@ void post_big_task_count_change(const struct cpumask *cpus)
|
|||
|
||||
DEFINE_MUTEX(policy_mutex);
|
||||
|
||||
unsigned int update_freq_aggregate_threshold(unsigned int threshold)
|
||||
{
|
||||
unsigned int old_threshold;
|
||||
|
||||
mutex_lock(&policy_mutex);
|
||||
|
||||
old_threshold = sysctl_sched_freq_aggregate_threshold_pct;
|
||||
|
||||
sysctl_sched_freq_aggregate_threshold_pct = threshold;
|
||||
sched_freq_aggregate_threshold =
|
||||
pct_to_real(sysctl_sched_freq_aggregate_threshold_pct);
|
||||
|
||||
mutex_unlock(&policy_mutex);
|
||||
|
||||
return old_threshold;
|
||||
}
|
||||
|
||||
static inline int invalid_value_freq_input(unsigned int *data)
|
||||
{
|
||||
if (data == &sysctl_sched_freq_aggregate)
|
||||
|
@ -1578,7 +1474,9 @@ int sched_hmp_proc_update_handler(struct ctl_table *table, int write,
|
|||
if (write && (old_val == *data))
|
||||
goto done;
|
||||
|
||||
if (sysctl_sched_downmigrate_pct > sysctl_sched_upmigrate_pct) {
|
||||
if (sysctl_sched_downmigrate_pct > sysctl_sched_upmigrate_pct ||
|
||||
sysctl_sched_group_downmigrate_pct >
|
||||
sysctl_sched_group_upmigrate_pct) {
|
||||
*data = old_val;
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
|
@ -3110,37 +3008,9 @@ static void reset_all_task_stats(void)
|
|||
{
|
||||
struct task_struct *g, *p;
|
||||
|
||||
read_lock(&tasklist_lock);
|
||||
do_each_thread(g, p) {
|
||||
raw_spin_lock_irq(&p->pi_lock);
|
||||
reset_task_stats(p);
|
||||
raw_spin_unlock_irq(&p->pi_lock);
|
||||
} while_each_thread(g, p);
|
||||
read_unlock(&tasklist_lock);
|
||||
}
|
||||
|
||||
static void disable_window_stats(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
int i;
|
||||
|
||||
local_irq_save(flags);
|
||||
for_each_possible_cpu(i)
|
||||
raw_spin_lock(&cpu_rq(i)->lock);
|
||||
|
||||
sched_disable_window_stats = 1;
|
||||
|
||||
for_each_possible_cpu(i)
|
||||
raw_spin_unlock(&cpu_rq(i)->lock);
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/* Called with all cpu's rq->lock held */
|
||||
static void enable_window_stats(void)
|
||||
{
|
||||
sched_disable_window_stats = 0;
|
||||
|
||||
}
|
||||
|
||||
enum reset_reason_code {
|
||||
|
@ -3166,16 +3036,21 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)
|
|||
unsigned int old = 0, new = 0;
|
||||
struct related_thread_group *grp;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
read_lock(&tasklist_lock);
|
||||
|
||||
read_lock(&related_thread_group_lock);
|
||||
|
||||
disable_window_stats();
|
||||
/* Taking all runqueue locks prevents race with sched_exit(). */
|
||||
for_each_possible_cpu(cpu)
|
||||
raw_spin_lock(&cpu_rq(cpu)->lock);
|
||||
|
||||
sched_disable_window_stats = 1;
|
||||
|
||||
reset_all_task_stats();
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
raw_spin_lock(&cpu_rq(cpu)->lock);
|
||||
read_unlock(&tasklist_lock);
|
||||
|
||||
list_for_each_entry(grp, &related_thread_groups, list) {
|
||||
int j;
|
||||
|
@ -3196,7 +3071,7 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)
|
|||
sched_load_granule = sched_ravg_window / NUM_LOAD_INDICES;
|
||||
}
|
||||
|
||||
enable_window_stats();
|
||||
sched_disable_window_stats = 0;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
@ -3239,10 +3114,10 @@ void reset_all_window_stats(u64 window_start, unsigned int window_size)
|
|||
for_each_possible_cpu(cpu)
|
||||
raw_spin_unlock(&cpu_rq(cpu)->lock);
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
read_unlock(&related_thread_group_lock);
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
trace_sched_reset_all_window_stats(window_start, window_size,
|
||||
sched_ktime_clock() - start_ts, reason, old, new);
|
||||
}
|
||||
|
@ -3824,13 +3699,13 @@ static void check_for_up_down_migrate_update(const struct cpumask *cpus)
|
|||
}
|
||||
|
||||
/* Return cluster which can offer required capacity for group */
|
||||
static struct sched_cluster *
|
||||
best_cluster(struct related_thread_group *grp, u64 total_demand)
|
||||
static struct sched_cluster *best_cluster(struct related_thread_group *grp,
|
||||
u64 total_demand, bool group_boost)
|
||||
{
|
||||
struct sched_cluster *cluster = NULL;
|
||||
|
||||
for_each_sched_cluster(cluster) {
|
||||
if (group_will_fit(cluster, grp, total_demand))
|
||||
if (group_will_fit(cluster, grp, total_demand, group_boost))
|
||||
return cluster;
|
||||
}
|
||||
|
||||
|
@ -3841,6 +3716,9 @@ static void _set_preferred_cluster(struct related_thread_group *grp)
|
|||
{
|
||||
struct task_struct *p;
|
||||
u64 combined_demand = 0;
|
||||
bool boost_on_big = sched_boost_policy() == SCHED_BOOST_ON_BIG;
|
||||
bool group_boost = false;
|
||||
u64 wallclock;
|
||||
|
||||
if (!sysctl_sched_enable_colocation) {
|
||||
grp->last_update = sched_ktime_clock();
|
||||
|
@ -3848,31 +3726,43 @@ static void _set_preferred_cluster(struct related_thread_group *grp)
|
|||
return;
|
||||
}
|
||||
|
||||
if (list_empty(&grp->tasks))
|
||||
return;
|
||||
|
||||
wallclock = sched_ktime_clock();
|
||||
|
||||
/*
|
||||
* wakeup of two or more related tasks could race with each other and
|
||||
* could result in multiple calls to _set_preferred_cluster being issued
|
||||
* at same time. Avoid overhead in such cases of rechecking preferred
|
||||
* cluster
|
||||
*/
|
||||
if (sched_ktime_clock() - grp->last_update < sched_ravg_window / 10)
|
||||
if (wallclock - grp->last_update < sched_ravg_window / 10)
|
||||
return;
|
||||
|
||||
list_for_each_entry(p, &grp->tasks, grp_list)
|
||||
list_for_each_entry(p, &grp->tasks, grp_list) {
|
||||
if (boost_on_big && task_sched_boost(p)) {
|
||||
group_boost = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (p->ravg.mark_start < wallclock -
|
||||
(sched_ravg_window * sched_ravg_hist_size))
|
||||
continue;
|
||||
|
||||
combined_demand += p->ravg.demand;
|
||||
|
||||
grp->preferred_cluster = best_cluster(grp, combined_demand);
|
||||
}
|
||||
|
||||
grp->preferred_cluster = best_cluster(grp,
|
||||
combined_demand, group_boost);
|
||||
grp->last_update = sched_ktime_clock();
|
||||
trace_sched_set_preferred_cluster(grp, combined_demand);
|
||||
}
|
||||
|
||||
void set_preferred_cluster(struct related_thread_group *grp)
|
||||
{
|
||||
/*
|
||||
* Prevent possible deadlock with update_children(). Not updating
|
||||
* the preferred cluster once is not a big deal.
|
||||
*/
|
||||
if (!raw_spin_trylock(&grp->lock))
|
||||
return;
|
||||
raw_spin_lock(&grp->lock);
|
||||
_set_preferred_cluster(grp);
|
||||
raw_spin_unlock(&grp->lock);
|
||||
}
|
||||
|
@ -3880,6 +3770,8 @@ void set_preferred_cluster(struct related_thread_group *grp)
|
|||
#define ADD_TASK 0
|
||||
#define REM_TASK 1
|
||||
|
||||
#define DEFAULT_CGROUP_COLOC_ID 1
|
||||
|
||||
static inline void free_group_cputime(struct related_thread_group *grp)
|
||||
{
|
||||
free_percpu(grp->cpu_time);
|
||||
|
@ -4116,64 +4008,19 @@ static void free_related_thread_group(struct rcu_head *rcu)
|
|||
kfree(grp);
|
||||
}
|
||||
|
||||
/*
|
||||
* The thread group for a task can change while we are here. However,
|
||||
* add_new_task_to_grp() will take care of any tasks that we miss here.
|
||||
* When a parent exits, and a child thread is simultaneously exiting,
|
||||
* sched_set_group_id() will synchronize those operations.
|
||||
*/
|
||||
static void update_children(struct task_struct *leader,
|
||||
struct related_thread_group *grp, int event)
|
||||
{
|
||||
struct task_struct *child;
|
||||
struct rq *rq;
|
||||
unsigned long flags;
|
||||
|
||||
if (!thread_group_leader(leader))
|
||||
return;
|
||||
|
||||
if (event == ADD_TASK && !sysctl_sched_enable_thread_grouping)
|
||||
return;
|
||||
|
||||
if (thread_group_empty(leader))
|
||||
return;
|
||||
|
||||
child = next_thread(leader);
|
||||
|
||||
do {
|
||||
rq = task_rq_lock(child, &flags);
|
||||
|
||||
if (event == REM_TASK && child->grp && grp == child->grp) {
|
||||
transfer_busy_time(rq, grp, child, event);
|
||||
list_del_init(&child->grp_list);
|
||||
rcu_assign_pointer(child->grp, NULL);
|
||||
} else if (event == ADD_TASK && !child->grp) {
|
||||
transfer_busy_time(rq, grp, child, event);
|
||||
list_add(&child->grp_list, &grp->tasks);
|
||||
rcu_assign_pointer(child->grp, grp);
|
||||
}
|
||||
|
||||
task_rq_unlock(rq, child, &flags);
|
||||
} while_each_thread(leader, child);
|
||||
|
||||
}
|
||||
|
||||
static void remove_task_from_group(struct task_struct *p)
|
||||
{
|
||||
struct related_thread_group *grp = p->grp;
|
||||
struct rq *rq;
|
||||
int empty_group = 1;
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock(&grp->lock);
|
||||
|
||||
rq = task_rq_lock(p, &flags);
|
||||
rq = __task_rq_lock(p);
|
||||
transfer_busy_time(rq, p->grp, p, REM_TASK);
|
||||
list_del_init(&p->grp_list);
|
||||
rcu_assign_pointer(p->grp, NULL);
|
||||
task_rq_unlock(rq, p, &flags);
|
||||
|
||||
update_children(p, grp, REM_TASK);
|
||||
__task_rq_unlock(rq);
|
||||
|
||||
if (!list_empty(&grp->tasks)) {
|
||||
empty_group = 0;
|
||||
|
@ -4182,7 +4029,8 @@ static void remove_task_from_group(struct task_struct *p)
|
|||
|
||||
raw_spin_unlock(&grp->lock);
|
||||
|
||||
if (empty_group) {
|
||||
/* Reserved groups cannot be destroyed */
|
||||
if (empty_group && grp->id != DEFAULT_CGROUP_COLOC_ID) {
|
||||
list_del(&grp->list);
|
||||
call_rcu(&grp->rcu, free_related_thread_group);
|
||||
}
|
||||
|
@ -4192,7 +4040,6 @@ static int
|
|||
add_task_to_group(struct task_struct *p, struct related_thread_group *grp)
|
||||
{
|
||||
struct rq *rq;
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock(&grp->lock);
|
||||
|
||||
|
@ -4200,13 +4047,11 @@ add_task_to_group(struct task_struct *p, struct related_thread_group *grp)
|
|||
* Change p->grp under rq->lock. Will prevent races with read-side
|
||||
* reference of p->grp in various hot-paths
|
||||
*/
|
||||
rq = task_rq_lock(p, &flags);
|
||||
rq = __task_rq_lock(p);
|
||||
transfer_busy_time(rq, grp, p, ADD_TASK);
|
||||
list_add(&p->grp_list, &grp->tasks);
|
||||
rcu_assign_pointer(p->grp, grp);
|
||||
task_rq_unlock(rq, p, &flags);
|
||||
|
||||
update_children(p, grp, ADD_TASK);
|
||||
__task_rq_unlock(rq);
|
||||
|
||||
_set_preferred_cluster(grp);
|
||||
|
||||
|
@ -4219,23 +4064,33 @@ void add_new_task_to_grp(struct task_struct *new)
|
|||
{
|
||||
unsigned long flags;
|
||||
struct related_thread_group *grp;
|
||||
struct task_struct *parent;
|
||||
struct task_struct *leader = new->group_leader;
|
||||
unsigned int leader_grp_id = sched_get_group_id(leader);
|
||||
|
||||
if (!sysctl_sched_enable_thread_grouping)
|
||||
if (!sysctl_sched_enable_thread_grouping &&
|
||||
leader_grp_id != DEFAULT_CGROUP_COLOC_ID)
|
||||
return;
|
||||
|
||||
if (thread_group_leader(new))
|
||||
return;
|
||||
|
||||
parent = new->group_leader;
|
||||
if (leader_grp_id == DEFAULT_CGROUP_COLOC_ID) {
|
||||
if (!same_schedtune(new, leader))
|
||||
return;
|
||||
}
|
||||
|
||||
write_lock_irqsave(&related_thread_group_lock, flags);
|
||||
|
||||
rcu_read_lock();
|
||||
grp = task_related_thread_group(parent);
|
||||
grp = task_related_thread_group(leader);
|
||||
rcu_read_unlock();
|
||||
|
||||
/* Its possible that update_children() already added us to the group */
|
||||
/*
|
||||
* It's possible that someone already added the new task to the
|
||||
* group. A leader's thread group is updated prior to calling
|
||||
* this function. It's also possible that the leader has exited
|
||||
* the group. In either case, there is nothing else to do.
|
||||
*/
|
||||
if (!grp || new->grp) {
|
||||
write_unlock_irqrestore(&related_thread_group_lock, flags);
|
||||
return;
|
||||
|
@ -4250,14 +4105,55 @@ void add_new_task_to_grp(struct task_struct *new)
|
|||
write_unlock_irqrestore(&related_thread_group_lock, flags);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_SCHED_TUNE) && defined(CONFIG_CGROUP_SCHEDTUNE)
|
||||
/*
|
||||
* We create a default colocation group at boot. There is no need to
|
||||
* synchronize tasks between cgroups at creation time because the
|
||||
* correct cgroup hierarchy is not available at boot. Therefore cgroup
|
||||
* colocation is turned off by default even though the colocation group
|
||||
* itself has been allocated. Furthermore this colocation group cannot
|
||||
* be destroyted once it has been created. All of this has been as part
|
||||
* of runtime optimizations.
|
||||
*
|
||||
* The job of synchronizing tasks to the colocation group is done when
|
||||
* the colocation flag in the cgroup is turned on.
|
||||
*/
|
||||
static int __init create_default_coloc_group(void)
|
||||
{
|
||||
struct related_thread_group *grp = NULL;
|
||||
unsigned long flags;
|
||||
|
||||
grp = alloc_related_thread_group(DEFAULT_CGROUP_COLOC_ID);
|
||||
if (IS_ERR(grp)) {
|
||||
WARN_ON(1);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
write_lock_irqsave(&related_thread_group_lock, flags);
|
||||
list_add(&grp->list, &related_thread_groups);
|
||||
write_unlock_irqrestore(&related_thread_group_lock, flags);
|
||||
|
||||
update_freq_aggregate_threshold(MAX_FREQ_AGGR_THRESH);
|
||||
return 0;
|
||||
}
|
||||
late_initcall(create_default_coloc_group);
|
||||
|
||||
int sync_cgroup_colocation(struct task_struct *p, bool insert)
|
||||
{
|
||||
unsigned int grp_id = insert ? DEFAULT_CGROUP_COLOC_ID : 0;
|
||||
|
||||
return sched_set_group_id(p, grp_id);
|
||||
}
|
||||
#endif
|
||||
|
||||
int sched_set_group_id(struct task_struct *p, unsigned int group_id)
|
||||
{
|
||||
int rc = 0;
|
||||
unsigned long flags;
|
||||
struct related_thread_group *grp = NULL;
|
||||
|
||||
/* Prevents tasks from exiting while we are managing groups. */
|
||||
write_lock_irqsave(&related_thread_group_lock, flags);
|
||||
raw_spin_lock_irqsave(&p->pi_lock, flags);
|
||||
write_lock(&related_thread_group_lock);
|
||||
|
||||
/* Switching from one group to another directly is not permitted */
|
||||
if ((current != p && p->flags & PF_EXITING) ||
|
||||
|
@ -4272,6 +4168,12 @@ int sched_set_group_id(struct task_struct *p, unsigned int group_id)
|
|||
|
||||
grp = lookup_related_thread_group(group_id);
|
||||
if (!grp) {
|
||||
/* This is a reserved id */
|
||||
if (group_id == DEFAULT_CGROUP_COLOC_ID) {
|
||||
rc = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
grp = alloc_related_thread_group(group_id);
|
||||
if (IS_ERR(grp)) {
|
||||
rc = -ENOMEM;
|
||||
|
@ -4281,10 +4183,10 @@ int sched_set_group_id(struct task_struct *p, unsigned int group_id)
|
|||
list_add(&grp->list, &related_thread_groups);
|
||||
}
|
||||
|
||||
BUG_ON(!grp);
|
||||
rc = add_task_to_group(p, grp);
|
||||
done:
|
||||
write_unlock_irqrestore(&related_thread_group_lock, flags);
|
||||
write_unlock(&related_thread_group_lock);
|
||||
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -4529,7 +4431,7 @@ bool early_detection_notify(struct rq *rq, u64 wallclock)
|
|||
struct task_struct *p;
|
||||
int loop_max = 10;
|
||||
|
||||
if (!sched_boost() || !rq->cfs.h_nr_running)
|
||||
if (sched_boost_policy() == SCHED_BOOST_NONE || !rq->cfs.h_nr_running)
|
||||
return 0;
|
||||
|
||||
rq->ed_task = NULL;
|
||||
|
|
|
@ -1677,8 +1677,13 @@ static int find_lowest_rq_hmp(struct task_struct *task)
|
|||
int prev_cpu = task_cpu(task);
|
||||
u64 cpu_load, min_load = ULLONG_MAX;
|
||||
int i;
|
||||
int restrict_cluster = sched_boost() ? 0 :
|
||||
sysctl_sched_restrict_cluster_spill;
|
||||
int restrict_cluster;
|
||||
int boost_on_big;
|
||||
|
||||
boost_on_big = sched_boost() == FULL_THROTTLE_BOOST &&
|
||||
sched_boost_policy() == SCHED_BOOST_ON_BIG;
|
||||
|
||||
restrict_cluster = sysctl_sched_restrict_cluster_spill;
|
||||
|
||||
/* Make sure the mask is initialized first */
|
||||
if (unlikely(!lowest_mask))
|
||||
|
@ -1697,6 +1702,9 @@ static int find_lowest_rq_hmp(struct task_struct *task)
|
|||
*/
|
||||
|
||||
for_each_sched_cluster(cluster) {
|
||||
if (boost_on_big && cluster->capacity != max_possible_capacity)
|
||||
continue;
|
||||
|
||||
cpumask_and(&candidate_mask, &cluster->cpus, lowest_mask);
|
||||
cpumask_andnot(&candidate_mask, &candidate_mask,
|
||||
cpu_isolated_mask);
|
||||
|
|
|
@ -1061,8 +1061,6 @@ extern unsigned int max_load_scale_factor;
|
|||
extern unsigned int max_possible_capacity;
|
||||
extern unsigned int min_max_possible_capacity;
|
||||
extern unsigned int max_power_cost;
|
||||
extern unsigned int sched_upmigrate;
|
||||
extern unsigned int sched_downmigrate;
|
||||
extern unsigned int sched_init_task_load_windows;
|
||||
extern unsigned int up_down_migrate_scale_factor;
|
||||
extern unsigned int sysctl_sched_restrict_cluster_spill;
|
||||
|
@ -1106,18 +1104,23 @@ extern void sched_account_irqstart(int cpu, struct task_struct *curr,
|
|||
u64 wallclock);
|
||||
extern unsigned int cpu_temp(int cpu);
|
||||
extern unsigned int nr_eligible_big_tasks(int cpu);
|
||||
extern void update_up_down_migrate(void);
|
||||
extern int update_preferred_cluster(struct related_thread_group *grp,
|
||||
struct task_struct *p, u32 old_load);
|
||||
extern void set_preferred_cluster(struct related_thread_group *grp);
|
||||
extern void add_new_task_to_grp(struct task_struct *new);
|
||||
extern unsigned int update_freq_aggregate_threshold(unsigned int threshold);
|
||||
|
||||
enum sched_boost_type {
|
||||
enum sched_boost_policy {
|
||||
SCHED_BOOST_NONE,
|
||||
SCHED_BOOST_ON_BIG,
|
||||
SCHED_BOOST_ON_ALL,
|
||||
};
|
||||
|
||||
#define NO_BOOST 0
|
||||
#define FULL_THROTTLE_BOOST 1
|
||||
#define CONSERVATIVE_BOOST 2
|
||||
#define RESTRAINED_BOOST 3
|
||||
|
||||
static inline struct sched_cluster *cpu_cluster(int cpu)
|
||||
{
|
||||
return cpu_rq(cpu)->cluster;
|
||||
|
@ -1387,14 +1390,11 @@ extern void set_hmp_defaults(void);
|
|||
extern int power_delta_exceeded(unsigned int cpu_cost, unsigned int base_cost);
|
||||
extern unsigned int power_cost(int cpu, u64 demand);
|
||||
extern void reset_all_window_stats(u64 window_start, unsigned int window_size);
|
||||
extern void boost_kick(int cpu);
|
||||
extern int sched_boost(void);
|
||||
extern int task_load_will_fit(struct task_struct *p, u64 task_load, int cpu,
|
||||
enum sched_boost_type boost_type);
|
||||
extern enum sched_boost_type sched_boost_type(void);
|
||||
enum sched_boost_policy boost_policy);
|
||||
extern enum sched_boost_policy sched_boost_policy(void);
|
||||
extern int task_will_fit(struct task_struct *p, int cpu);
|
||||
extern int group_will_fit(struct sched_cluster *cluster,
|
||||
struct related_thread_group *grp, u64 demand);
|
||||
extern u64 cpu_load(int cpu);
|
||||
extern u64 cpu_load_sync(int cpu, int sync);
|
||||
extern int preferred_cluster(struct sched_cluster *cluster,
|
||||
|
@ -1422,10 +1422,32 @@ extern u64 cpu_upmigrate_discourage_read_u64(struct cgroup_subsys_state *css,
|
|||
struct cftype *cft);
|
||||
extern int cpu_upmigrate_discourage_write_u64(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft, u64 upmigrate_discourage);
|
||||
extern void sched_hmp_parse_dt(void);
|
||||
extern void init_sched_hmp_boost_policy(void);
|
||||
extern void sched_boost_parse_dt(void);
|
||||
extern void clear_top_tasks_bitmap(unsigned long *bitmap);
|
||||
|
||||
#if defined(CONFIG_SCHED_TUNE) && defined(CONFIG_CGROUP_SCHEDTUNE)
|
||||
extern bool task_sched_boost(struct task_struct *p);
|
||||
extern int sync_cgroup_colocation(struct task_struct *p, bool insert);
|
||||
extern bool same_schedtune(struct task_struct *tsk1, struct task_struct *tsk2);
|
||||
extern void update_cgroup_boost_settings(void);
|
||||
extern void restore_cgroup_boost_settings(void);
|
||||
|
||||
#else
|
||||
static inline bool
|
||||
same_schedtune(struct task_struct *tsk1, struct task_struct *tsk2)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool task_sched_boost(struct task_struct *p)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void update_cgroup_boost_settings(void) { }
|
||||
static inline void restore_cgroup_boost_settings(void) { }
|
||||
#endif
|
||||
|
||||
#else /* CONFIG_SCHED_HMP */
|
||||
|
||||
struct hmp_sched_stats;
|
||||
|
@ -1615,8 +1637,7 @@ static inline void post_big_task_count_change(void) { }
|
|||
static inline void set_hmp_defaults(void) { }
|
||||
|
||||
static inline void clear_reserved(int cpu) { }
|
||||
static inline void sched_hmp_parse_dt(void) {}
|
||||
static inline void init_sched_hmp_boost_policy(void) {}
|
||||
static inline void sched_boost_parse_dt(void) {}
|
||||
|
||||
#define trace_sched_cpu_load(...)
|
||||
#define trace_sched_cpu_load_lb(...)
|
||||
|
|
|
@ -25,6 +25,33 @@ struct schedtune {
|
|||
/* Boost value for tasks on that SchedTune CGroup */
|
||||
int boost;
|
||||
|
||||
#ifdef CONFIG_SCHED_HMP
|
||||
/* Toggle ability to override sched boost enabled */
|
||||
bool sched_boost_no_override;
|
||||
|
||||
/*
|
||||
* Controls whether a cgroup is eligible for sched boost or not. This
|
||||
* can temporariliy be disabled by the kernel based on the no_override
|
||||
* flag above.
|
||||
*/
|
||||
bool sched_boost_enabled;
|
||||
|
||||
/*
|
||||
* This tracks the default value of sched_boost_enabled and is used
|
||||
* restore the value following any temporary changes to that flag.
|
||||
*/
|
||||
bool sched_boost_enabled_backup;
|
||||
|
||||
/*
|
||||
* Controls whether tasks of this cgroup should be colocated with each
|
||||
* other and tasks of other cgroups that have the same flag turned on.
|
||||
*/
|
||||
bool colocate;
|
||||
|
||||
/* Controls whether further updates are allowed to the colocate flag */
|
||||
bool colocate_update_disabled;
|
||||
#endif
|
||||
|
||||
};
|
||||
|
||||
static inline struct schedtune *css_st(struct cgroup_subsys_state *css)
|
||||
|
@ -54,6 +81,13 @@ static inline struct schedtune *parent_st(struct schedtune *st)
|
|||
static struct schedtune
|
||||
root_schedtune = {
|
||||
.boost = 0,
|
||||
#ifdef CONFIG_SCHED_HMP
|
||||
.sched_boost_no_override = false,
|
||||
.sched_boost_enabled = true,
|
||||
.sched_boost_enabled_backup = true,
|
||||
.colocate = false,
|
||||
.colocate_update_disabled = false,
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -97,6 +131,121 @@ struct boost_groups {
|
|||
/* Boost groups affecting each CPU in the system */
|
||||
DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups);
|
||||
|
||||
#ifdef CONFIG_SCHED_HMP
|
||||
static inline void init_sched_boost(struct schedtune *st)
|
||||
{
|
||||
st->sched_boost_no_override = false;
|
||||
st->sched_boost_enabled = true;
|
||||
st->sched_boost_enabled_backup = st->sched_boost_enabled;
|
||||
st->colocate = false;
|
||||
st->colocate_update_disabled = false;
|
||||
}
|
||||
|
||||
bool same_schedtune(struct task_struct *tsk1, struct task_struct *tsk2)
|
||||
{
|
||||
return task_schedtune(tsk1) == task_schedtune(tsk2);
|
||||
}
|
||||
|
||||
void update_cgroup_boost_settings(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BOOSTGROUPS_COUNT; i++) {
|
||||
if (!allocated_group[i])
|
||||
break;
|
||||
|
||||
if (allocated_group[i]->sched_boost_no_override)
|
||||
continue;
|
||||
|
||||
allocated_group[i]->sched_boost_enabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
void restore_cgroup_boost_settings(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BOOSTGROUPS_COUNT; i++) {
|
||||
if (!allocated_group[i])
|
||||
break;
|
||||
|
||||
allocated_group[i]->sched_boost_enabled =
|
||||
allocated_group[i]->sched_boost_enabled_backup;
|
||||
}
|
||||
}
|
||||
|
||||
bool task_sched_boost(struct task_struct *p)
|
||||
{
|
||||
struct schedtune *st = task_schedtune(p);
|
||||
|
||||
return st->sched_boost_enabled;
|
||||
}
|
||||
|
||||
static u64
|
||||
sched_boost_override_read(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft)
|
||||
{
|
||||
struct schedtune *st = css_st(css);
|
||||
|
||||
return st->sched_boost_no_override;
|
||||
}
|
||||
|
||||
static int sched_boost_override_write(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft, u64 override)
|
||||
{
|
||||
struct schedtune *st = css_st(css);
|
||||
|
||||
st->sched_boost_no_override = !!override;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 sched_boost_enabled_read(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft)
|
||||
{
|
||||
struct schedtune *st = css_st(css);
|
||||
|
||||
return st->sched_boost_enabled;
|
||||
}
|
||||
|
||||
static int sched_boost_enabled_write(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft, u64 enable)
|
||||
{
|
||||
struct schedtune *st = css_st(css);
|
||||
|
||||
st->sched_boost_enabled = !!enable;
|
||||
st->sched_boost_enabled_backup = st->sched_boost_enabled;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 sched_colocate_read(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft)
|
||||
{
|
||||
struct schedtune *st = css_st(css);
|
||||
|
||||
return st->colocate;
|
||||
}
|
||||
|
||||
static int sched_colocate_write(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft, u64 colocate)
|
||||
{
|
||||
struct schedtune *st = css_st(css);
|
||||
|
||||
if (st->colocate_update_disabled)
|
||||
return -EPERM;
|
||||
|
||||
st->colocate = !!colocate;
|
||||
st->colocate_update_disabled = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else /* CONFIG_SCHED_HMP */
|
||||
|
||||
static inline void init_sched_boost(struct schedtune *st) { }
|
||||
|
||||
#endif /* CONFIG_SCHED_HMP */
|
||||
|
||||
static u64
|
||||
boost_read(struct cgroup_subsys_state *css, struct cftype *cft)
|
||||
{
|
||||
|
@ -121,12 +270,45 @@ boost_write(struct cgroup_subsys_state *css, struct cftype *cft,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void schedtune_attach(struct cgroup_taskset *tset)
|
||||
{
|
||||
struct task_struct *task;
|
||||
struct cgroup_subsys_state *css;
|
||||
struct schedtune *st;
|
||||
bool colocate;
|
||||
|
||||
cgroup_taskset_first(tset, &css);
|
||||
st = css_st(css);
|
||||
|
||||
colocate = st->colocate;
|
||||
|
||||
cgroup_taskset_for_each(task, css, tset)
|
||||
sync_cgroup_colocation(task, colocate);
|
||||
}
|
||||
|
||||
static struct cftype files[] = {
|
||||
{
|
||||
.name = "boost",
|
||||
.read_u64 = boost_read,
|
||||
.write_u64 = boost_write,
|
||||
},
|
||||
#ifdef CONFIG_SCHED_HMP
|
||||
{
|
||||
.name = "sched_boost_no_override",
|
||||
.read_u64 = sched_boost_override_read,
|
||||
.write_u64 = sched_boost_override_write,
|
||||
},
|
||||
{
|
||||
.name = "sched_boost_enabled",
|
||||
.read_u64 = sched_boost_enabled_read,
|
||||
.write_u64 = sched_boost_enabled_write,
|
||||
},
|
||||
{
|
||||
.name = "colocate",
|
||||
.read_u64 = sched_colocate_read,
|
||||
.write_u64 = sched_colocate_write,
|
||||
},
|
||||
#endif
|
||||
{ } /* terminate */
|
||||
};
|
||||
|
||||
|
@ -189,6 +371,7 @@ schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
|
|||
|
||||
/* Initialize per CPUs boost group support */
|
||||
st->idx = idx;
|
||||
init_sched_boost(st);
|
||||
if (schedtune_boostgroup_init(st))
|
||||
goto release;
|
||||
|
||||
|
@ -222,6 +405,7 @@ struct cgroup_subsys schedtune_cgrp_subsys = {
|
|||
.legacy_cftypes = files,
|
||||
.early_init = 1,
|
||||
.allow_attach = subsys_cgroup_allow_attach,
|
||||
.attach = schedtune_attach,
|
||||
};
|
||||
|
||||
#endif /* CONFIG_CGROUP_SCHEDTUNE */
|
||||
|
|
|
@ -124,6 +124,7 @@ static int __maybe_unused neg_one = -1;
|
|||
static int zero;
|
||||
static int __maybe_unused one = 1;
|
||||
static int __maybe_unused two = 2;
|
||||
static int __maybe_unused three = 3;
|
||||
static int __maybe_unused four = 4;
|
||||
static unsigned long one_ul = 1;
|
||||
static int one_hundred = 100;
|
||||
|
@ -376,6 +377,22 @@ static struct ctl_table kern_table[] = {
|
|||
.extra1 = &zero,
|
||||
.extra2 = &one_hundred,
|
||||
},
|
||||
{
|
||||
.procname = "sched_group_upmigrate",
|
||||
.data = &sysctl_sched_group_upmigrate_pct,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = sched_hmp_proc_update_handler,
|
||||
.extra1 = &zero,
|
||||
},
|
||||
{
|
||||
.procname = "sched_group_downmigrate",
|
||||
.data = &sysctl_sched_group_downmigrate_pct,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = sched_hmp_proc_update_handler,
|
||||
.extra1 = &zero,
|
||||
},
|
||||
{
|
||||
.procname = "sched_init_task_load",
|
||||
.data = &sysctl_sched_init_task_load_pct,
|
||||
|
@ -487,6 +504,8 @@ static struct ctl_table kern_table[] = {
|
|||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = sched_boost_handler,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &three,
|
||||
},
|
||||
#endif /* CONFIG_SCHED_HMP */
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
|
|
Loading…
Add table
Reference in a new issue