diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 226d1990eea9..a61c47a030a0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6189,46 +6189,54 @@ static int start_cpu(bool boosted) static inline int find_best_target(struct task_struct *p, bool boosted, bool prefer_idle) { - int target_cpu = -1; - unsigned long target_util = prefer_idle ? ULONG_MAX : 0; - unsigned long backup_capacity = ULONG_MAX; - int best_idle_cpu = -1; - int best_idle_cstate = INT_MAX; - int backup_cpu = -1; + unsigned long best_idle_min_cap_orig = ULONG_MAX; unsigned long min_util = boosted_task_util(p); + unsigned long target_capacity = ULONG_MAX; + unsigned long min_wake_util = ULONG_MAX; + unsigned long target_max_spare_cap = 0; + unsigned long target_util = ULONG_MAX; + unsigned long best_active_util = ULONG_MAX; + int best_idle_cstate = INT_MAX; struct sched_domain *sd; struct sched_group *sg; - int cpu = start_cpu(boosted); + int best_active_cpu = -1; + int best_idle_cpu = -1; + int target_cpu = -1; + int cpu, i; schedstat_inc(p, se.statistics.nr_wakeups_fbt_attempts); schedstat_inc(this_rq(), eas_stats.fbt_attempts); + /* Find start CPU based on boost value */ + cpu = start_cpu(boosted); if (cpu < 0) { schedstat_inc(p, se.statistics.nr_wakeups_fbt_no_cpu); schedstat_inc(this_rq(), eas_stats.fbt_no_cpu); - return target_cpu; + return -1; } + /* Find SD for the start CPU */ sd = rcu_dereference(per_cpu(sd_ea, cpu)); - if (!sd) { schedstat_inc(p, se.statistics.nr_wakeups_fbt_no_sd); schedstat_inc(this_rq(), eas_stats.fbt_no_sd); - return target_cpu; + return -1; } + /* Scan CPUs in all SDs */ sg = sd->groups; - do { - int i; - for_each_cpu_and(i, tsk_cpus_allowed(p), sched_group_cpus(sg)) { - unsigned long cur_capacity, new_util, wake_util; - unsigned long min_wake_util = ULONG_MAX; + unsigned long capacity_curr = capacity_curr_of(i); + unsigned long capacity_orig = capacity_orig_of(i); + unsigned long wake_util, new_util; if (!cpu_online(i)) continue; + if (walt_cpu_high_irqload(i)) + continue; + /* * p's blocked utilization is still accounted for on prev_cpu * so prev_cpu will receive a negative bias due to the double @@ -6243,70 +6251,190 @@ static inline int find_best_target(struct task_struct *p, bool boosted, bool pre * than the one required to boost the task. */ new_util = max(min_util, new_util); - - if (new_util > capacity_orig_of(i)) + if (new_util > capacity_orig) continue; -#ifdef CONFIG_SCHED_WALT - if (walt_cpu_high_irqload(i)) - continue; -#endif - /* - * Unconditionally favoring tasks that prefer idle cpus to + * Case A) Latency sensitive tasks + * + * Unconditionally favoring tasks that prefer idle CPU to * improve latency. + * + * Looking for: + * - an idle CPU, whatever its idle_state is, since + * the first CPUs we explore are more likely to be + * reserved for latency sensitive tasks. + * - a non idle CPU where the task fits in its current + * capacity and has the maximum spare capacity. + * - a non idle CPU with lower contention from other + * tasks and running at the lowest possible OPP. + * + * The last two goals tries to favor a non idle CPU + * where the task can run as if it is "almost alone". + * A maximum spare capacity CPU is favoured since + * the task already fits into that CPU's capacity + * without waiting for an OPP chance. + * + * The following code path is the only one in the CPUs + * exploration loop which is always used by + * prefer_idle tasks. It exits the loop with wither a + * best_active_cpu or a target_cpu which should + * represent an optimal choice for latency sensitive + * tasks. */ - if (idle_cpu(i) && prefer_idle) { - schedstat_inc(p, se.statistics.nr_wakeups_fbt_pref_idle); - schedstat_inc(this_rq(), eas_stats.fbt_pref_idle); - return i; - } + if (prefer_idle) { - cur_capacity = capacity_curr_of(i); - - if (new_util < cur_capacity) { - if (cpu_rq(i)->nr_running) { - /* - * Find a target cpu with the lowest/highest - * utilization if prefer_idle/!prefer_idle. - */ - if (prefer_idle) { - /* Favor the CPU that last ran the task */ - if (new_util > target_util || - wake_util > min_wake_util) - continue; - min_wake_util = wake_util; - target_util = new_util; - target_cpu = i; - } else if (target_util < new_util) { - target_util = new_util; - target_cpu = i; - } - } else if (!prefer_idle) { - int idle_idx = idle_get_state_idx(cpu_rq(i)); - - if (best_idle_cpu < 0 || - (sysctl_sched_cstate_aware && - best_idle_cstate > idle_idx)) { - best_idle_cstate = idle_idx; - best_idle_cpu = i; - } + /* + * Case A.1: IDLE CPU + * Return the first IDLE CPU we find. + */ + if (idle_cpu(i)) { + schedstat_inc(p, se.statistics.nr_wakeups_fbt_pref_idle); + schedstat_inc(this_rq(), eas_stats.fbt_pref_idle); + return i; } - } else if (backup_capacity > cur_capacity) { - /* Find a backup cpu with least capacity. */ - backup_capacity = cur_capacity; - backup_cpu = i; + + /* + * Case A.2: Target ACTIVE CPU + * Favor CPUs with max spare capacity. + */ + if ((capacity_curr > new_util) && + (capacity_orig - new_util > target_max_spare_cap)) { + target_max_spare_cap = capacity_orig - new_util; + target_cpu = i; + continue; + } + if (target_cpu != -1) + continue; + + + /* + * Case A.3: Backup ACTIVE CPU + * Favor CPUs with: + * - lower utilization due to other tasks + * - lower utilization with the task in + */ + if (wake_util > min_wake_util) + continue; + if (new_util > best_active_util) + continue; + min_wake_util = wake_util; + best_active_util = new_util; + best_active_cpu = i; + continue; } + + /* + * Case B) Non latency sensitive tasks on IDLE CPUs. + * + * Find an optimal backup IDLE CPU for non latency + * sensitive tasks. + * + * Looking for: + * - minimizing the capacity_orig, + * i.e. preferring LITTLE CPUs + * - favoring shallowest idle states + * i.e. avoid to wakeup deep-idle CPUs + * + * The following code path is used by non latency + * sensitive tasks if IDLE CPUs are available. If at + * least one of such CPUs are available it sets the + * best_idle_cpu to the most suitable idle CPU to be + * selected. + * + * If idle CPUs are available, favour these CPUs to + * improve performances by spreading tasks. + * Indeed, the energy_diff() computed by the caller + * will take care to ensure the minimization of energy + * consumptions without affecting performance. + */ + if (idle_cpu(i)) { + int idle_idx = idle_get_state_idx(cpu_rq(i)); + + /* Select idle CPU with lower cap_orig */ + if (capacity_orig > best_idle_min_cap_orig) + continue; + + /* + * Skip CPUs in deeper idle state, but only + * if they are also less energy efficient. + * IOW, prefer a deep IDLE LITTLE CPU vs a + * shallow idle big CPU. + */ + if (sysctl_sched_cstate_aware && + best_idle_cstate <= idle_idx) + continue; + + /* Keep track of best idle CPU */ + best_idle_min_cap_orig = capacity_orig; + best_idle_cstate = idle_idx; + best_idle_cpu = i; + continue; + } + + /* + * Case C) Non latency sensitive tasks on ACTIVE CPUs. + * + * Pack tasks in the most energy efficient capacities. + * + * This task packing strategy prefers more energy + * efficient CPUs (i.e. pack on smaller maximum + * capacity CPUs) while also trying to spread tasks to + * run them all at the lower OPP. + * + * This assumes for example that it's more energy + * efficient to run two tasks on two CPUs at a lower + * OPP than packing both on a single CPU but running + * that CPU at an higher OPP. + * + * Thus, this case keep track of the CPU with the + * smallest maximum capacity and highest spare maximum + * capacity. + */ + + /* Favor CPUs with smaller capacity */ + if (capacity_orig > target_capacity) + continue; + + /* Favor CPUs with maximum spare capacity */ + if ((capacity_orig - new_util) < target_max_spare_cap) + continue; + + target_max_spare_cap = capacity_orig - new_util; + target_capacity = capacity_orig; + target_util = new_util; + target_cpu = i; } + } while (sg = sg->next, sg != sd->groups); - if (target_cpu < 0) - target_cpu = best_idle_cpu >= 0 ? best_idle_cpu : backup_cpu; + /* + * For non latency sensitive tasks, cases B and C in the previous loop, + * we pick the best IDLE CPU only if we was not able to find a target + * ACTIVE CPU. + * + * Policies priorities: + * + * - prefer_idle tasks: + * + * a) IDLE CPU available, we return immediately + * b) ACTIVE CPU where task fits and has the bigger maximum spare + * capacity (i.e. target_cpu) + * c) ACTIVE CPU with less contention due to other tasks + * (i.e. best_active_cpu) + * + * - NON prefer_idle tasks: + * + * a) ACTIVE CPU: target_cpu + * b) IDLE CPU: best_idle_cpu + */ + if (target_cpu == -1) + target_cpu = prefer_idle + ? best_active_cpu + : best_idle_cpu; - if (target_cpu >= 0) { - schedstat_inc(p, se.statistics.nr_wakeups_fbt_count); - schedstat_inc(this_rq(), eas_stats.fbt_count); - } + schedstat_inc(p, se.statistics.nr_wakeups_fbt_count); + schedstat_inc(this_rq(), eas_stats.fbt_count); return target_cpu; } diff --git a/kernel/sched/walt.h b/kernel/sched/walt.h index e181c87a928d..f56c4da16d0b 100644 --- a/kernel/sched/walt.h +++ b/kernel/sched/walt.h @@ -55,6 +55,8 @@ static inline void walt_migrate_sync_cpu(int cpu) { } static inline void walt_init_cpu_efficiency(void) { } static inline u64 walt_ktime_clock(void) { return 0; } +#define walt_cpu_high_irqload(cpu) false + #endif /* CONFIG_SCHED_WALT */ extern unsigned int walt_disabled;