From f27b626521a14ebe9b9cfa692d59dcd72c1b4def Mon Sep 17 00:00:00 2001 From: Srivatsa Vaddagiri Date: Mon, 21 Jul 2014 08:24:04 -0700 Subject: [PATCH] sched: remove sysctl control for HMP and power-aware task placement There is no real need to control HMP and power-aware task placement at runtime after kernel has booted. Boot-time control should be sufficient. Not allowing for runtime (sysctl) support simplifies the code quite a bit. Also rename sysctl_sched_enable_hmp_task_placement to be shorter. Change-Id: I60cae51a173c6f73b79cbf90c50ddd41a27604aa Signed-off-by: Srivatsa Vaddagiri Signed-off-by: Syed Rameez Mustafa [joonwoop@codeaurora.org: fixed minor conflict. p->nr_cpus_allowed == 1 has moved to core.c Signed-off-by: Joonwoo Park --- include/linux/sched/sysctl.h | 2 -- kernel/sched/core.c | 43 ++++++++++++++++++++++++++++++------ kernel/sched/fair.c | 38 +++++++++++++++---------------- kernel/sched/rt.c | 4 ++-- kernel/sched/sched.h | 5 +++-- kernel/sysctl.c | 14 ------------ 6 files changed, 60 insertions(+), 46 deletions(-) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index fde724969cd8..da06dc1a374b 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -53,7 +53,6 @@ extern int sysctl_sched_freq_dec_notify_slack_pct; #endif #ifdef CONFIG_SCHED_HMP -extern unsigned int sysctl_sched_enable_hmp_task_placement; extern unsigned int sysctl_sched_spill_nr_run; extern unsigned int sysctl_sched_mostly_idle_nr_run; extern unsigned int sysctl_sched_spill_load_pct; @@ -62,7 +61,6 @@ extern unsigned int sysctl_sched_small_task_pct; extern unsigned int sysctl_sched_upmigrate_pct; extern unsigned int sysctl_sched_downmigrate_pct; extern int sysctl_sched_upmigrate_min_nice; -extern unsigned int sysctl_sched_enable_power_aware; extern unsigned int sysctl_sched_powerband_limit_pct; extern unsigned int sysctl_sched_boost; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 843ed0213eba..98bd1a6dc02b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1075,6 +1075,36 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) rq_clock_skip_update(rq, true); } +#ifdef CONFIG_SCHED_HMP + +static int __init set_sched_enable_hmp(char *str) +{ + int enable_hmp = 0; + + get_option(&str, &enable_hmp); + + sched_enable_hmp = !!enable_hmp; + + return 0; +} + +early_param("sched_enable_hmp", set_sched_enable_hmp); + +static int __init set_sched_enable_power_aware(char *str) +{ + int enable_power_aware = 0; + + get_option(&str, &enable_power_aware); + + sched_enable_power_aware = !!enable_power_aware; + + return 0; +} + +early_param("sched_enable_power_aware", set_sched_enable_power_aware); + +#endif /* CONFIG_SCHED_HMP */ + #if defined(CONFIG_SCHED_FREQ_INPUT) || defined(CONFIG_SCHED_HMP) /* Window size (in ns) */ @@ -1135,7 +1165,7 @@ int rq_freq_margin(struct rq *rq) int margin; u64 demand; - if (!sysctl_sched_enable_hmp_task_placement) + if (!sched_enable_hmp) return INT_MAX; demand = scale_load_to_cpu(rq->prev_runnable_sum, rq->cpu); @@ -1393,7 +1423,7 @@ static void init_cpu_efficiency(void) int i, efficiency; unsigned int max = 0, min = UINT_MAX; - if (!sysctl_sched_enable_hmp_task_placement) + if (!sched_enable_hmp) return; for_each_possible_cpu(i) { @@ -1436,7 +1466,7 @@ static inline void set_window_start(struct rq *rq) int cpu = cpu_of(rq); struct rq *sync_rq = cpu_rq(sync_cpu); - if (rq->window_start || !sysctl_sched_enable_hmp_task_placement) + if (rq->window_start || !sched_enable_hmp) return; if (cpu == sync_cpu) { @@ -1720,7 +1750,7 @@ static int register_sched_callback(void) { int ret; - if (!sysctl_sched_enable_hmp_task_placement) + if (!sched_enable_hmp) return 0; ret = cpufreq_register_notifier(¬ifier_policy_block, @@ -2112,8 +2142,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) p->se.nr_migrations++; perf_event_task_migrate(p); - if (sysctl_sched_enable_hmp_task_placement && - (p->on_rq || p->state == TASK_WAKING)) + if (sched_enable_hmp && (p->on_rq || p->state == TASK_WAKING)) fixup_busy_time(p, new_cpu); } @@ -3672,7 +3701,7 @@ void sched_exec(void) unsigned long flags; int dest_cpu; - if (sysctl_sched_enable_hmp_task_placement) + if (sched_enable_hmp) return; raw_spin_lock_irqsave(&p->pi_lock, flags); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 76204fa529f2..69efaf68e7e2 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2564,7 +2564,7 @@ unsigned int max_task_load(void) #ifdef CONFIG_SCHED_HMP /* Use this knob to turn on or off HMP-aware task placement logic */ -unsigned int __read_mostly sysctl_sched_enable_hmp_task_placement = 1; +unsigned int __read_mostly sched_enable_hmp = 1; /* A cpu can no longer accomodate more tasks if: * @@ -2585,7 +2585,7 @@ unsigned int __read_mostly sysctl_sched_mostly_idle_nr_run = 3; * Control whether or not individual CPU power consumption is used to * guide task placement. */ -unsigned int __read_mostly sysctl_sched_enable_power_aware = 1; +unsigned int __read_mostly sched_enable_power_aware = 1; /* * This specifies the maximum percent power difference between 2 @@ -2767,7 +2767,7 @@ int sched_set_boost(int enable) unsigned long flags; int ret = 0; - if (!sysctl_sched_enable_hmp_task_placement) + if (!sched_enable_hmp) return -EINVAL; spin_lock_irqsave(&boost_lock, flags); @@ -2887,7 +2887,7 @@ unsigned int power_cost_at_freq(int cpu, unsigned int freq) struct cpu_pstate_pwr *costs; if (!per_cpu_info || !per_cpu_info[cpu].ptable || - !sysctl_sched_enable_power_aware) + !sched_enable_power_aware) /* When power aware scheduling is not in use, or CPU * power data is not available, just use the CPU * capacity as a rough stand-in for real CPU power @@ -2918,7 +2918,7 @@ static unsigned int power_cost(struct task_struct *p, int cpu) unsigned int task_freq; unsigned int cur_freq = cpu_rq(cpu)->cur_freq; - if (!sysctl_sched_enable_power_aware) + if (!sched_enable_power_aware) return cpu_rq(cpu)->max_possible_capacity; /* calculate % of max freq needed */ @@ -3075,7 +3075,7 @@ done: void inc_nr_big_small_task(struct rq *rq, struct task_struct *p) { - if (!sysctl_sched_enable_hmp_task_placement) + if (!sched_enable_hmp) return; if (is_big_task(p)) @@ -3086,7 +3086,7 @@ void inc_nr_big_small_task(struct rq *rq, struct task_struct *p) void dec_nr_big_small_task(struct rq *rq, struct task_struct *p) { - if (!sysctl_sched_enable_hmp_task_placement) + if (!sched_enable_hmp) return; if (is_big_task(p)) @@ -3154,7 +3154,7 @@ int sched_hmp_proc_update_handler(struct ctl_table *table, int write, unsigned int old_val = *data; ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - if (ret || !write || !sysctl_sched_enable_hmp_task_placement) + if (ret || !write || !sched_enable_hmp) return ret; if ((sysctl_sched_downmigrate_pct > sysctl_sched_upmigrate_pct) || @@ -3278,7 +3278,7 @@ static inline int migration_needed(struct rq *rq, struct task_struct *p) int nice = task_nice(p); if (is_small_task(p) || p->state != TASK_RUNNING || - !sysctl_sched_enable_hmp_task_placement) + !sched_enable_hmp) return 0; /* Todo: cgroup-based control? */ @@ -3289,7 +3289,7 @@ static inline int migration_needed(struct rq *rq, struct task_struct *p) if (!task_will_fit(p, cpu_of(rq))) return 1; - if (sysctl_sched_enable_power_aware && + if (sched_enable_power_aware && lower_power_cpu_available(p, cpu_of(rq))) return 1; @@ -3342,7 +3342,7 @@ static inline int nr_big_tasks(struct rq *rq) #else /* CONFIG_SCHED_HMP */ -#define sysctl_sched_enable_power_aware 0 +#define sched_enable_power_aware 0 static inline int select_best_cpu(struct task_struct *p, int target) { @@ -3819,7 +3819,7 @@ add_to_scaled_stat(int cpu, struct sched_avg *sa, u64 delta) u64 scaled_delta; int sf; - if (!sysctl_sched_enable_hmp_task_placement) + if (!sched_enable_hmp) return; if (unlikely(cur_freq > max_possible_freq || @@ -3836,7 +3836,7 @@ add_to_scaled_stat(int cpu, struct sched_avg *sa, u64 delta) static inline void decay_scaled_stat(struct sched_avg *sa, u64 periods) { - if (!sysctl_sched_enable_hmp_task_placement) + if (!sched_enable_hmp) return; sa->runnable_avg_sum_scaled = @@ -5908,7 +5908,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f int want_affine = 0; int sync = wake_flags & WF_SYNC; - if (sysctl_sched_enable_hmp_task_placement) + if (sched_enable_hmp) return select_best_cpu(p, prev_cpu); if (sd_flag & SD_BALANCE_WAKE) @@ -7919,7 +7919,7 @@ static struct rq *find_busiest_queue(struct lb_env *env, unsigned long busiest_load = 0, busiest_capacity = 1; int i; - if (sysctl_sched_enable_hmp_task_placement) + if (sched_enable_hmp) return find_busiest_queue_hmp(env, group); for_each_cpu_and(i, sched_group_cpus(group), env->cpus) { @@ -8422,7 +8422,7 @@ static int idle_balance(struct rq *this_rq) */ rcu_read_lock(); sd = rcu_dereference_check_sched_domain(this_rq->sd); - if (sd && sysctl_sched_enable_power_aware) { + if (sd && sched_enable_power_aware) { for_each_cpu(i, sched_domain_span(sd)) { if (i == this_cpu || idle_cpu(i)) { cost = power_cost_at_freq(i, 0); @@ -8648,7 +8648,7 @@ static inline int find_new_ilb(int type) { int ilb; - if (sysctl_sched_enable_hmp_task_placement) + if (sched_enable_hmp) return find_new_hmp_ilb(type); ilb = cpumask_first(nohz.idle_cpus_mask); @@ -8902,7 +8902,7 @@ static int select_lowest_power_cpu(struct cpumask *cpus) int lowest_power_cpu = -1; int lowest_power = INT_MAX; - if (sysctl_sched_enable_power_aware) { + if (sched_enable_power_aware) { for_each_cpu(i, cpus) { cost = power_cost_at_freq(i, 0); if (cost < lowest_power) { @@ -9029,7 +9029,7 @@ static inline int _nohz_kick_needed(struct rq *rq, int cpu, int *type) { unsigned long now = jiffies; - if (sysctl_sched_enable_hmp_task_placement) + if (sched_enable_hmp) return _nohz_kick_needed_hmp(rq, cpu, type); /* diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index cbe16bbd4fae..280b9a8da5f8 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1385,7 +1385,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) struct task_struct *curr; struct rq *rq; - if (sysctl_sched_enable_hmp_task_placement) + if (sched_enable_hmp) return select_task_rq_rt_hmp(p, cpu, sd_flag, flags); /* For anything but wake ups, just return the task_cpu */ @@ -1680,7 +1680,7 @@ static int find_lowest_rq(struct task_struct *task) int this_cpu = smp_processor_id(); int cpu = task_cpu(task); - if (sysctl_sched_enable_hmp_task_placement) + if (sched_enable_hmp) return find_lowest_rq_hmp(task); /* Make sure the mask is initialized first */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 351f69457a27..e2a22f661559 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1016,7 +1016,8 @@ static inline unsigned long capacity_scale_cpu_freq(int cpu) #ifdef CONFIG_SCHED_HMP -extern unsigned int sysctl_sched_enable_hmp_task_placement; +extern unsigned int sched_enable_hmp; +extern unsigned int sched_enable_power_aware; int mostly_idle_cpu(int cpu); extern void check_for_migration(struct rq *rq, struct task_struct *p); @@ -1029,7 +1030,7 @@ extern unsigned int power_cost_at_freq(int cpu, unsigned int freq); #else /* CONFIG_SCHED_HMP */ -#define sysctl_sched_enable_hmp_task_placement 0 +#define sched_enable_hmp 0 static inline void check_for_migration(struct rq *rq, struct task_struct *p) { } static inline void pre_big_small_task_count_change(void) { } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9fd5318d17f0..abe1ea74f977 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -325,13 +325,6 @@ static struct ctl_table kern_table[] = { }, #endif #ifdef CONFIG_SCHED_HMP - { - .procname = "sched_enable_hmp_task_placement", - .data = &sysctl_sched_enable_hmp_task_placement, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "sched_small_task", .data = &sysctl_sched_small_task_pct, @@ -395,13 +388,6 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = sched_hmp_proc_update_handler, }, - { - .procname = "sched_enable_power_aware", - .data = &sysctl_sched_enable_power_aware, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "sched_power_band_limit", .data = &sysctl_sched_powerband_limit_pct,