From f1018e8b3647f02bcef62b1be5a57799cf5932df Mon Sep 17 00:00:00 2001 From: Srivatsa Vaddagiri Date: Sat, 29 Mar 2014 19:07:28 -0700 Subject: [PATCH] sched: Introduce efficiency, load_scale_factor and capacity Efficiency reflects instructions per cycle capability of a cpu. load_scale_factor reflects magnification factor that is applied for task load when estimating bandwidth it will consume on a cpu. It accounts for the fact that task load is scaled in reference to "best" cpu that has best efficiency factor and also best possible max_freq. Note that there may be no single CPU in the system that has both the best efficiency and best possible max_freq, but that is still the combination that all task load in the system is scaled against. capacity reflects max_freq and efficiency metric of a cpu. It is defined such that the "least" performing cpu (one with lowest efficiency factor and max_freq) gets capacity of 1024. Again, there may not be a CPU in the system that has both the lowest efficiency and lowest max_freq. This is still the combination that is assigned a capacity of 1024 however, other CPU capacities are relative to this. Change-Id: I4a853f1f0f90020721d2a4ee8b10db3d226b287c Signed-off-by: Srivatsa Vaddagiri [rameezmustafa@codeaurora.org]: Port to msm-3.18] Signed-off-by: Syed Rameez Mustafa --- kernel/sched/core.c | 124 +++++++++++++++++++++++++++++++++++++++++++ kernel/sched/sched.h | 19 +++++++ 2 files changed, 143 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 611dec66c978..e92fe05abc68 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1764,6 +1764,9 @@ __read_mostly unsigned int max_sched_ravg_window = 1000000000; __read_mostly unsigned int sysctl_sched_window_stats_policy = WINDOW_STATS_USE_AVG; +unsigned int max_possible_efficiency = 1024; +unsigned int min_possible_efficiency = 1024; + /* * Called when new window is starting for a task, to record cpu usage over * recently concluded window(s). Normally 'samples' should be 1. It can be > 1 @@ -1893,6 +1896,36 @@ void update_task_ravg(struct task_struct *p, struct rq *rq, int update_sum) p->ravg.mark_start = wallclock; } +unsigned long __weak arch_get_cpu_efficiency(int cpu) +{ + return SCHED_LOAD_SCALE; +} + +static void init_cpu_efficiency(void) +{ + int i, efficiency; + unsigned int max = 0, min = UINT_MAX; + + for_each_possible_cpu(i) { + efficiency = arch_get_cpu_efficiency(i); + cpu_rq(i)->efficiency = efficiency; + + if (efficiency > max) + max = efficiency; + if (efficiency < min) + min = efficiency; + } + + BUG_ON(!max || !min); + + max_possible_efficiency = max; + min_possible_efficiency = min; +} + +#else /* CONFIG_SCHED_FREQ_INPUT || CONFIG_SCHED_HMP */ + +static inline void init_cpu_efficiency(void) {} + #endif /* CONFIG_SCHED_FREQ_INPUT || CONFIG_SCHED_HMP */ /* @@ -7533,6 +7566,7 @@ void __init sched_init_smp(void) { cpumask_var_t non_isolated_cpus; + init_cpu_efficiency(); alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); alloc_cpumask_var(&fallback_doms, GFP_KERNEL); @@ -7589,6 +7623,62 @@ unsigned int max_possible_freq = 1; */ unsigned int min_max_freq = 1; +unsigned int max_capacity = 1024; /* max(rq->capacity) */ +unsigned int min_capacity = 1024; /* min(rq->capacity) */ + +/* Keep track of max/min capacity possible across CPUs "currently" */ +static void update_min_max_capacity(void) +{ + int i; + int max = 0, min = INT_MAX; + + for_each_possible_cpu(i) { + if (cpu_rq(i)->capacity > max) + max = cpu_rq(i)->capacity; + if (cpu_rq(i)->capacity < min) + min = cpu_rq(i)->capacity; + } + + max_capacity = max; + min_capacity = min; +} + +/* + * Return 'capacity' of a cpu in reference to "least" efficient cpu, such that + * least efficient cpu gets capacity of 1024 + */ +unsigned long capacity_scale_cpu_efficiency(int cpu) +{ + return (1024 * cpu_rq(cpu)->efficiency) / min_possible_efficiency; +} + +/* + * Return 'capacity' of a cpu in reference to cpu with lowest max_freq + * (min_max_freq), such that one with lowest max_freq gets capacity of 1024. + */ +unsigned long capacity_scale_cpu_freq(int cpu) +{ + return (1024 * cpu_rq(cpu)->max_freq) / min_max_freq; +} + +/* + * Return load_scale_factor of a cpu in reference to "most" efficient cpu, so + * that "most" efficient cpu gets a load_scale_factor of 1 + */ +static inline unsigned long load_scale_cpu_efficiency(int cpu) +{ + return (1024 * max_possible_efficiency) / cpu_rq(cpu)->efficiency; +} + +/* + * Return load_scale_factor of a cpu in reference to cpu with best max_freq + * (max_possible_freq), so that one with best max_freq gets a load_scale_factor + * of 1. + */ +static inline unsigned long load_scale_cpu_freq(int cpu) +{ + return (1024 * max_possible_freq) / cpu_rq(cpu)->max_freq; +} static int cpufreq_notifier_policy(struct notifier_block *nb, unsigned long val, void *data) @@ -7596,6 +7686,9 @@ static int cpufreq_notifier_policy(struct notifier_block *nb, struct cpufreq_policy *policy = (struct cpufreq_policy *)data; int i; unsigned int min_max = min_max_freq; + int cpu = policy->cpu; + int load_scale = 1024; + int capacity = 1024; if (val != CPUFREQ_NOTIFY) return 0; @@ -7613,6 +7706,34 @@ static int cpufreq_notifier_policy(struct notifier_block *nb, BUG_ON(!min_max_freq); BUG_ON(!policy->max); + /* Assumes all cpus in cluster has same efficiency!! */ + capacity *= capacity_scale_cpu_efficiency(cpu); + capacity >>= 10; + + capacity *= capacity_scale_cpu_freq(cpu); + capacity >>= 10; + + /* + * load_scale_factor accounts for the fact that task load + * (p->se.avg.runnable_avg_sum_scaled) is in reference to "best" + * performing cpu. Task's load will need to be scaled (up) by a factor + * to determine suitability to be placed on a particular cpu. + */ + load_scale *= load_scale_cpu_efficiency(cpu); + load_scale >>= 10; + + load_scale *= load_scale_cpu_freq(cpu); + load_scale >>= 10; + + for_each_cpu(i, policy->related_cpus) { + struct rq *rq = cpu_rq(i); + + rq->capacity = capacity; + rq->load_scale_factor = load_scale; + } + + update_min_max_capacity(); + return 0; } @@ -7806,6 +7927,9 @@ void __init sched_init(void) rq->min_freq = 1; rq->max_possible_freq = 1; rq->cumulative_runnable_avg = 0; + rq->efficiency = 1024; + rq->capacity = 1024; + rq->load_scale_factor = 1024; #endif rq->max_idle_balance_cost = sysctl_sched_migration_cost; rq->cstate = 0; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 0267497dd821..7308a48075bb 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -650,6 +650,9 @@ struct rq { */ unsigned int cur_freq, max_freq, min_freq, max_possible_freq; u64 cumulative_runnable_avg; + int efficiency; /* Differentiate cpus with different IPC capability */ + int load_scale_factor; + int capacity; #endif #ifdef CONFIG_IRQ_TIME_ACCOUNTING @@ -924,6 +927,12 @@ extern unsigned int max_possible_freq; extern unsigned int min_max_freq; extern unsigned int pct_task_load(struct task_struct *p); extern void init_new_task_load(struct task_struct *p); +extern unsigned int max_possible_efficiency; +extern unsigned int min_possible_efficiency; +extern unsigned int max_capacity; +extern unsigned int min_capacity; +extern unsigned long capacity_scale_cpu_efficiency(int cpu); +extern unsigned long capacity_scale_cpu_freq(int cpu); static inline void inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p) @@ -954,6 +963,16 @@ dec_cumulative_runnable_avg(struct rq *rq, struct task_struct *p) static inline void init_new_task_load(struct task_struct *p) { } +static inline unsigned long capacity_scale_cpu_efficiency(int cpu) +{ + return SCHED_LOAD_SCALE; +} + +static inline unsigned long capacity_scale_cpu_freq(int cpu) +{ + return SCHED_LOAD_SCALE; +} + #endif /* CONFIG_SCHED_FREQ_INPUT || CONFIG_SCHED_HMP */ #ifdef CONFIG_CGROUP_SCHED