From eedf0821f656d424859d37d018e699b9e58344c6 Mon Sep 17 00:00:00 2001 From: Joonwoo Park Date: Wed, 25 May 2016 12:02:27 -0700 Subject: [PATCH] sched: Remove the sched heavy task frequency guidance feature This has always been unused feature given its limitation of adding phantom load to the system. Since there are no immediate plans of using this and the fact that it adds unnecessary complications to the new load fixup mechanism, remove this feature for now. It can be revisited later in light of the new mechanism. Change-Id: Ie9501a898d0f423338293a8dde6bc56f493f1e75 Signed-off-by: Syed Rameez Mustafa Signed-off-by: Joonwoo Park --- Documentation/scheduler/sched-zone.txt | 30 ++++++------------ include/linux/sched/sysctl.h | 1 - kernel/sched/core.c | 44 -------------------------- kernel/sched/fair.c | 14 -------- kernel/sched/sched.h | 1 - kernel/sysctl.c | 7 ---- 6 files changed, 9 insertions(+), 88 deletions(-) diff --git a/Documentation/scheduler/sched-zone.txt b/Documentation/scheduler/sched-zone.txt index 4d03843df937..e2d2e4e2561b 100644 --- a/Documentation/scheduler/sched-zone.txt +++ b/Documentation/scheduler/sched-zone.txt @@ -1111,19 +1111,7 @@ exceeds sched_freq_dec_notify, where freq_required is the frequency calculated by scheduler to meet current task demand. Note that sched_freq_dec_notify is specified in kHz units. -*** 7.11 sched_heavy_task - -Appears at: /proc/sys/kernel/sched_heavy_task - -Default value: 0 - -This tunable can be used to specify a demand value for tasks above which task -are classified as "heavy" tasks. Task's ravg.demand attribute is used for this -comparison. Scheduler will request a raise in cpu frequency when heavy tasks -wakeup after at least one window of sleep, where window size is defined by -sched_ravg_window. Value 0 will disable this feature. - -*** 7.12 sched_cpu_high_irqload +*** 7.11 sched_cpu_high_irqload Appears at: /proc/sys/kernel/sched_cpu_high_irqload @@ -1141,7 +1129,7 @@ longer eligible for placement. This will affect the task placement logic described above, causing the scheduler to try and steer tasks away from the CPU. -*** 7.13 cpu.upmigrate_discourage +*** 7.12 cpu.upmigrate_discourage Default value : 0 @@ -1156,7 +1144,7 @@ not upmigrated. Any task of the cgroup is allowed to upmigrate only under overcommitted scenario. See notes on sched_spill_nr_run and sched_spill_load for how overcommitment threshold is defined. -*** 7.14 sched_static_cpu_pwr_cost +*** 7.13 sched_static_cpu_pwr_cost Default value: 0 @@ -1171,7 +1159,7 @@ within a cluster and possibly have differing value between clusters as needed. -*** 7.15 sched_static_cluster_pwr_cost +*** 7.14 sched_static_cluster_pwr_cost Default value: 0 @@ -1182,7 +1170,7 @@ power mode. It ignores the actual D-state that a cluster may be in and assumes the worst case power cost of the highest D-state. It is means of biasing task placement away from idle clusters when necessary. -*** 7.16 sched_early_detection_duration +*** 7.15 sched_early_detection_duration Default value: 9500000 @@ -1193,7 +1181,7 @@ tick for it to be eligible for the scheduler's early detection feature under scheduler boost. For more information on the feature itself please refer to section 5.2.1. -*** 7.17 sched_restrict_cluster_spill +*** 7.16 sched_restrict_cluster_spill Default value: 0 @@ -1212,7 +1200,7 @@ CPU across all clusters. When this tunable is enabled, the RT tasks are restricted to the lowest possible power cluster. -*** 7.18 sched_downmigrate +*** 7.17 sched_downmigrate Appears at: /proc/sys/kernel/sched_downmigrate @@ -1225,7 +1213,7 @@ its demand *in reference to the power-efficient cpu* drops less than 60% (sched_downmigrate). -*** 7.19 sched_small_wakee_task_load +*** 7.18 sched_small_wakee_task_load Appears at: /proc/sys/kernel/sched_small_wakee_task_load @@ -1237,7 +1225,7 @@ categorized as small wakee tasks. Scheduler places small wakee tasks on the waker's cluster. -*** 7.20 sched_big_waker_task_load +*** 7.19 sched_big_waker_task_load Appears at: /proc/sys/kernel/sched_big_waker_task_load diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 76de1236054c..fc7dfe60bcdd 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -44,7 +44,6 @@ extern unsigned int sysctl_sched_wakeup_load_threshold; extern unsigned int sysctl_sched_window_stats_policy; extern unsigned int sysctl_sched_ravg_hist_size; extern unsigned int sysctl_sched_cpu_high_irqload; -extern unsigned int sysctl_sched_heavy_task_pct; #if defined(CONFIG_SCHED_FREQ_INPUT) || defined(CONFIG_SCHED_HMP) extern unsigned int sysctl_sched_init_task_load_pct; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 651fa57a5ca9..b6d48fbb0c60 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2155,22 +2155,6 @@ static int account_busy_for_cpu_time(struct rq *rq, struct task_struct *p, return SCHED_FREQ_ACCOUNT_WAIT_TIME; } -static inline int -heavy_task_wakeup(struct task_struct *p, struct rq *rq, int event) -{ - u32 task_demand = p->ravg.demand; - - if (!sched_heavy_task || event != TASK_WAKE || - task_demand < sched_heavy_task || exiting_task(p)) - return 0; - - if (p->ravg.mark_start > rq->window_start) - return 0; - - /* has a full window elapsed since task slept? */ - return (rq->window_start - p->ravg.mark_start > sched_ravg_window); -} - static inline bool is_new_task(struct task_struct *p) { return p->ravg.active_windows < sysctl_sched_new_task_windows; @@ -2524,18 +2508,6 @@ static void update_cpu_busy_time(struct task_struct *p, struct rq *rq, if (p_is_curr_task) { /* p is idle task */ BUG_ON(p != rq->idle); - } else if (heavy_task_wakeup(p, rq, event)) { - /* A new window has started. If p is a waking - * heavy task its prev_window contribution is faked - * to be its window-based demand. Note that this can - * introduce phantom load into the system depending - * on the window policy and task behavior. This feature - * can be controlled via the sched_heavy_task - * tunable. */ - p->ravg.prev_window = p->ravg.demand; - *prev_runnable_sum += p->ravg.demand; - if (new_task) - *nt_prev_runnable_sum += p->ravg.demand; } return; @@ -3593,12 +3565,6 @@ done: static inline void fixup_busy_time(struct task_struct *p, int new_cpu) { } -static inline int -heavy_task_wakeup(struct task_struct *p, struct rq *rq, int event) -{ - return 0; -} - #endif /* CONFIG_SCHED_FREQ_INPUT */ #define sched_up_down_migrate_auto_update 1 @@ -4253,12 +4219,6 @@ static inline int update_preferred_cluster(struct related_thread_group *grp, static inline void fixup_busy_time(struct task_struct *p, int new_cpu) { } -static inline int -heavy_task_wakeup(struct task_struct *p, struct rq *rq, int event) -{ - return 0; -} - static struct cpu_cycle update_task_ravg(struct task_struct *p, struct rq *rq, int event, u64 wallclock, u64 irqtime) @@ -5227,7 +5187,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) int cpu, src_cpu, success = 0; int notify = 0; struct migration_notify_data mnd; - int heavy_task = 0; #ifdef CONFIG_SMP unsigned int old_load; struct rq *rq; @@ -5302,7 +5261,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) old_load = task_load(p); wallclock = sched_ktime_clock(); update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0); - heavy_task = heavy_task_wakeup(p, rq, TASK_WAKE); update_task_ravg(p, rq, TASK_WAKE, wallclock, 0); raw_spin_unlock(&rq->lock); @@ -5364,8 +5322,6 @@ out: false, check_group); check_for_freq_change(cpu_rq(src_cpu), false, check_group); - } else if (heavy_task) { - check_for_freq_change(cpu_rq(cpu), false, false); } else if (success) { check_for_freq_change(cpu_rq(cpu), true, false); } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 10eee61d906c..6c5d393da122 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2724,18 +2724,6 @@ unsigned int __read_mostly sysctl_sched_big_waker_task_load_pct = 25; unsigned int __read_mostly sched_spill_load; unsigned int __read_mostly sysctl_sched_spill_load_pct = 100; -/* - * Tasks with demand >= sched_heavy_task will have their - * window-based demand added to the previous window's CPU - * time when they wake up, if they have slept for at least - * one full window. This feature is disabled when the tunable - * is set to 0 (the default). - */ -#ifdef CONFIG_SCHED_FREQ_INPUT -unsigned int __read_mostly sysctl_sched_heavy_task_pct; -unsigned int __read_mostly sched_heavy_task; -#endif - /* * Tasks whose bandwidth consumption on a cpu is more than * sched_upmigrate are considered "big" tasks. Big tasks will be @@ -2818,8 +2806,6 @@ void set_hmp_defaults(void) update_up_down_migrate(); #ifdef CONFIG_SCHED_FREQ_INPUT - sched_heavy_task = - pct_to_real(sysctl_sched_heavy_task_pct); sched_major_task_runtime = mult_frac(sched_ravg_window, MAJOR_TASK_PCT, 100); #endif diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index df9b972195e5..3d5a89cc6eef 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1058,7 +1058,6 @@ extern unsigned int sched_upmigrate; extern unsigned int sched_downmigrate; extern unsigned int sched_init_task_load_pelt; extern unsigned int sched_init_task_load_windows; -extern unsigned int sched_heavy_task; extern unsigned int up_down_migrate_scale_factor; extern unsigned int sysctl_sched_restrict_cluster_spill; extern unsigned int sched_pred_alert_load; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index deffbdb0abf5..5f0767a2605c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -316,13 +316,6 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec_minmax, .extra1 = &zero, }, - { - .procname = "sched_heavy_task", - .data = &sysctl_sched_heavy_task_pct, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = sched_hmp_proc_update_handler, - }, #endif #ifdef CONFIG_SCHED_HMP {