2015-06-23 09:17:54 +01:00
|
|
|
#include <linux/cgroup.h>
|
|
|
|
#include <linux/err.h>
|
2016-01-12 18:12:13 +00:00
|
|
|
#include <linux/kernel.h>
|
2015-06-23 09:17:54 +01:00
|
|
|
#include <linux/percpu.h>
|
|
|
|
#include <linux/printk.h>
|
2015-07-07 15:33:20 +01:00
|
|
|
#include <linux/rcupdate.h>
|
2015-06-23 09:17:54 +01:00
|
|
|
#include <linux/slab.h>
|
|
|
|
|
2015-06-22 13:49:07 +01:00
|
|
|
#include <trace/events/sched.h>
|
|
|
|
|
2015-06-22 18:11:44 +01:00
|
|
|
#include "sched.h"
|
2016-07-29 15:45:57 +01:00
|
|
|
#include "tune.h"
|
2015-06-22 18:11:44 +01:00
|
|
|
|
FIXUP: sched/tune: fix accounting for runnable tasks
Contains:
sched/tune: fix accounting for runnable tasks (1/5)
The accounting for tasks into boost groups of different CPUs is currently
broken mainly because:
a) we do not properly track the change of boost group of a RUNNABLE task
b) there are race conditions between migration code and accounting code
This patch provides a fixes to ensure enqueue/dequeue
accounting also for throttled tasks.
Without this patch is can happen that a task is enqueued into a throttled
RQ thus not being accounted for the boosting of the corresponding RQ.
We could argue that a throttled task should not boost a CPU, however:
a) properly implementing CPU boosting considering throttled tasks will
increase a lot the complexity of the solution
b) it's not easy to quantify the benefits introduced by such a more
complex solution
Since task throttling requires the usage of the CFS bandwidth controller,
which is not widely used on mobile systems (at least not by Android kernels
so far), for the time being we go for the simple solution and boost also
for throttled RQs.
sched/tune: fix accounting for runnable tasks (2/5)
This patch provides the code required to enforce proper locking.
A per boost group spinlock has been added to grant atomic
accounting of tasks as well as to serialise enqueue/dequeue operations,
triggered by tasks migrations, with cgroups's attach/detach operations.
sched/tune: fix accounting for runnable tasks (3/5)
This patch adds cgroups {allow,can,cancel}_attach callbacks.
Since a task can be migrated between boost groups while it's running,
the CGroups's attach callbacks have been added to properly migrate
boost contributions of RUNNABLE tasks.
The RQ's lock is used to serialise enqueue/dequeue operations, triggered
by tasks migrations, with cgroups's attach/detach operations. While the
SchedTune's CPU lock is used to grant atrocity of the accounting within
the CPU.
NOTE: the current implementation does not allows a concurrent CPU migration
and CGroups change.
sched/tune: fix accounting for runnable tasks (4/5)
This fixes accounting for exiting tasks by adding a dedicated call early
in the do_exit() syscall, which disables SchedTune accounting as soon as a
task is flagged PF_EXITING.
This flag is set before the multiple dequeue/enqueue dance triggered
by cgroup_exit() which is useful only to inject useless tasks movements
thus increasing possibilities for race conditions with the migration code.
The schedtune_exit_task() call does the last dequeue of a task from its
current boost group. This is a solution more aligned with what happens in
mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying
task to the root control group.
sched/tune: fix accounting for runnable tasks (5/5)
To avoid accounting issues at startup, this patch disable the SchedTune
accounting until the required data structures have been properly
initialized.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2016-07-28 18:44:40 +01:00
|
|
|
#ifdef CONFIG_CGROUP_SCHEDTUNE
|
2017-03-27 18:20:20 +01:00
|
|
|
bool schedtune_initialized = false;
|
FIXUP: sched/tune: fix accounting for runnable tasks
Contains:
sched/tune: fix accounting for runnable tasks (1/5)
The accounting for tasks into boost groups of different CPUs is currently
broken mainly because:
a) we do not properly track the change of boost group of a RUNNABLE task
b) there are race conditions between migration code and accounting code
This patch provides a fixes to ensure enqueue/dequeue
accounting also for throttled tasks.
Without this patch is can happen that a task is enqueued into a throttled
RQ thus not being accounted for the boosting of the corresponding RQ.
We could argue that a throttled task should not boost a CPU, however:
a) properly implementing CPU boosting considering throttled tasks will
increase a lot the complexity of the solution
b) it's not easy to quantify the benefits introduced by such a more
complex solution
Since task throttling requires the usage of the CFS bandwidth controller,
which is not widely used on mobile systems (at least not by Android kernels
so far), for the time being we go for the simple solution and boost also
for throttled RQs.
sched/tune: fix accounting for runnable tasks (2/5)
This patch provides the code required to enforce proper locking.
A per boost group spinlock has been added to grant atomic
accounting of tasks as well as to serialise enqueue/dequeue operations,
triggered by tasks migrations, with cgroups's attach/detach operations.
sched/tune: fix accounting for runnable tasks (3/5)
This patch adds cgroups {allow,can,cancel}_attach callbacks.
Since a task can be migrated between boost groups while it's running,
the CGroups's attach callbacks have been added to properly migrate
boost contributions of RUNNABLE tasks.
The RQ's lock is used to serialise enqueue/dequeue operations, triggered
by tasks migrations, with cgroups's attach/detach operations. While the
SchedTune's CPU lock is used to grant atrocity of the accounting within
the CPU.
NOTE: the current implementation does not allows a concurrent CPU migration
and CGroups change.
sched/tune: fix accounting for runnable tasks (4/5)
This fixes accounting for exiting tasks by adding a dedicated call early
in the do_exit() syscall, which disables SchedTune accounting as soon as a
task is flagged PF_EXITING.
This flag is set before the multiple dequeue/enqueue dance triggered
by cgroup_exit() which is useful only to inject useless tasks movements
thus increasing possibilities for race conditions with the migration code.
The schedtune_exit_task() call does the last dequeue of a task from its
current boost group. This is a solution more aligned with what happens in
mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying
task to the root control group.
sched/tune: fix accounting for runnable tasks (5/5)
To avoid accounting issues at startup, this patch disable the SchedTune
accounting until the required data structures have been properly
initialized.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2016-07-28 18:44:40 +01:00
|
|
|
#endif
|
2015-06-22 18:11:44 +01:00
|
|
|
|
|
|
|
unsigned int sysctl_sched_cfs_boost __read_mostly;
|
|
|
|
|
2016-10-13 17:31:24 +01:00
|
|
|
extern struct reciprocal_value schedtune_spc_rdiv;
|
2016-07-29 15:45:57 +01:00
|
|
|
extern struct target_nrg schedtune_target_nrg;
|
2016-01-12 18:12:13 +00:00
|
|
|
|
|
|
|
/* Performance Boost region (B) threshold params */
|
|
|
|
static int perf_boost_idx;
|
|
|
|
|
|
|
|
/* Performance Constraint region (C) threshold params */
|
|
|
|
static int perf_constrain_idx;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Performance-Energy (P-E) Space thresholds constants
|
|
|
|
*/
|
|
|
|
struct threshold_params {
|
|
|
|
int nrg_gain;
|
|
|
|
int cap_gain;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* System specific P-E space thresholds constants
|
|
|
|
*/
|
|
|
|
static struct threshold_params
|
|
|
|
threshold_gains[] = {
|
sched/tune: fix PB and PC cuts indexes definition
The current definition of the Performance Boost (PB) and Performance Constraint
(PC) regions is has two main issues:
1) in the computation of the boost index we overflow the thresholds_gains
table for boost=100
2) the two cuts had _NOT_ the same ratio
The last point means that when boost=0 we do _not_ have a "standard" EAS
behaviour, i.e. accepting all candidate which decrease energy regardless
of their impact on performances. Instead, we accept only schedule candidate
which are in the Optimal region, i.e. decrease energy while increasing
performances.
This behaviour can have a negative impact also on CPU selection policies
which tries to spread tasks to reduce latencies. Indeed, for example
we could end up rejecting a schedule candidate which want to move a task
from a congested CPU to an idle one while, specifically in the case where
the target CPU will be running on a lower OPP.
This patch fixes these two issues by properly clamping the boost value
in the appropriate range to compute the threshold indexes as well as
by using the same threshold index for both cuts.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
sched/tune: fix update of threshold index for boost groups
When SchedTune is configured to work with CGroup mode, each time we update
the boost value of a group we do not update the threshed indexes for the
definition of the Performance Boost (PC) and Performance Constraint (PC)
region. This means that while the OPP boosting and CPU biasing selection
is working as expected, the __schedtune_accept_deltas function is always
using the initial values for these cuts.
This patch ensure that each time a new boost value is configured for a
boost group, the cuts for the PB and PC region are properly updated too.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
sched/tune: update PC and PB cuts definition
The current definition of Performance Boost (PB) and Performance
Constraint (PC) cuts defines two "dead regions":
- up to 20% boost: we are in energy-reduction only mode, i.e.
accept all candidate which reduce energy
- over 70% boost: we are in performance-increase only mode, i.e.
accept only sched candidate which do not reduce performances
This patch uses a more fine grained configuration where these two "dead
regions" are reduced to: up to 10% and over 90%.
This should allow to have some boosting benefits starting from 10% boost
values as well as not being to much permissive starting from boost values
of 80%.
Suggested-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
bug: 28312446
Change-Id: Ia326c66521e38c98e7a7eddbbb7c437875efa1ba
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
2016-07-29 15:32:26 +01:00
|
|
|
{ 0, 5 }, /* < 10% */
|
|
|
|
{ 1, 5 }, /* < 20% */
|
|
|
|
{ 2, 5 }, /* < 30% */
|
|
|
|
{ 3, 5 }, /* < 40% */
|
|
|
|
{ 4, 5 }, /* < 50% */
|
|
|
|
{ 5, 4 }, /* < 60% */
|
|
|
|
{ 5, 3 }, /* < 70% */
|
|
|
|
{ 5, 2 }, /* < 80% */
|
|
|
|
{ 5, 1 }, /* < 90% */
|
|
|
|
{ 5, 0 } /* <= 100% */
|
2016-01-12 18:12:13 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
static int
|
|
|
|
__schedtune_accept_deltas(int nrg_delta, int cap_delta,
|
|
|
|
int perf_boost_idx, int perf_constrain_idx)
|
|
|
|
{
|
|
|
|
int payoff = -INT_MAX;
|
2016-07-28 17:38:25 +01:00
|
|
|
int gain_idx = -1;
|
2016-01-12 18:12:13 +00:00
|
|
|
|
|
|
|
/* Performance Boost (B) region */
|
2016-07-28 17:38:25 +01:00
|
|
|
if (nrg_delta >= 0 && cap_delta > 0)
|
|
|
|
gain_idx = perf_boost_idx;
|
2016-01-12 18:12:13 +00:00
|
|
|
/* Performance Constraint (C) region */
|
2016-07-28 17:38:25 +01:00
|
|
|
else if (nrg_delta < 0 && cap_delta <= 0)
|
|
|
|
gain_idx = perf_constrain_idx;
|
2016-01-20 14:06:05 +00:00
|
|
|
|
2016-07-28 17:38:25 +01:00
|
|
|
/* Default: reject schedule candidate */
|
|
|
|
if (gain_idx == -1)
|
2016-01-12 18:12:13 +00:00
|
|
|
return payoff;
|
|
|
|
|
2016-07-28 17:38:25 +01:00
|
|
|
/*
|
|
|
|
* Evaluate "Performance Boost" vs "Energy Increase"
|
|
|
|
*
|
|
|
|
* - Performance Boost (B) region
|
|
|
|
*
|
|
|
|
* Condition: nrg_delta > 0 && cap_delta > 0
|
|
|
|
* Payoff criteria:
|
|
|
|
* cap_gain / nrg_gain < cap_delta / nrg_delta =
|
|
|
|
* cap_gain * nrg_delta < cap_delta * nrg_gain
|
|
|
|
* Note that since both nrg_gain and nrg_delta are positive, the
|
|
|
|
* inequality does not change. Thus:
|
|
|
|
*
|
|
|
|
* payoff = (cap_delta * nrg_gain) - (cap_gain * nrg_delta)
|
|
|
|
*
|
|
|
|
* - Performance Constraint (C) region
|
|
|
|
*
|
|
|
|
* Condition: nrg_delta < 0 && cap_delta < 0
|
|
|
|
* payoff criteria:
|
|
|
|
* cap_gain / nrg_gain > cap_delta / nrg_delta =
|
|
|
|
* cap_gain * nrg_delta < cap_delta * nrg_gain
|
|
|
|
* Note that since nrg_gain > 0 while nrg_delta < 0, the
|
|
|
|
* inequality change. Thus:
|
|
|
|
*
|
|
|
|
* payoff = (cap_delta * nrg_gain) - (cap_gain * nrg_delta)
|
|
|
|
*
|
|
|
|
* This means that, in case of same positive defined {cap,nrg}_gain
|
|
|
|
* for both the B and C regions, we can use the same payoff formula
|
|
|
|
* where a positive value represents the accept condition.
|
|
|
|
*/
|
|
|
|
payoff = cap_delta * threshold_gains[gain_idx].nrg_gain;
|
|
|
|
payoff -= nrg_delta * threshold_gains[gain_idx].cap_gain;
|
|
|
|
|
2016-01-12 18:12:13 +00:00
|
|
|
return payoff;
|
|
|
|
}
|
|
|
|
|
2015-06-23 09:17:54 +01:00
|
|
|
#ifdef CONFIG_CGROUP_SCHEDTUNE
|
|
|
|
|
|
|
|
/*
|
|
|
|
* EAS scheduler tunables for task groups.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* SchdTune tunables for a group of tasks */
|
|
|
|
struct schedtune {
|
|
|
|
/* SchedTune CGroup subsystem */
|
|
|
|
struct cgroup_subsys_state css;
|
|
|
|
|
|
|
|
/* Boost group allocated ID */
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
/* Boost value for tasks on that SchedTune CGroup */
|
|
|
|
int boost;
|
|
|
|
|
2016-08-31 16:54:12 -07:00
|
|
|
#ifdef CONFIG_SCHED_HMP
|
|
|
|
/* Toggle ability to override sched boost enabled */
|
|
|
|
bool sched_boost_no_override;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Controls whether a cgroup is eligible for sched boost or not. This
|
|
|
|
* can temporariliy be disabled by the kernel based on the no_override
|
|
|
|
* flag above.
|
|
|
|
*/
|
|
|
|
bool sched_boost_enabled;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This tracks the default value of sched_boost_enabled and is used
|
|
|
|
* restore the value following any temporary changes to that flag.
|
|
|
|
*/
|
|
|
|
bool sched_boost_enabled_backup;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Controls whether tasks of this cgroup should be colocated with each
|
|
|
|
* other and tasks of other cgroups that have the same flag turned on.
|
|
|
|
*/
|
|
|
|
bool colocate;
|
|
|
|
|
|
|
|
/* Controls whether further updates are allowed to the colocate flag */
|
|
|
|
bool colocate_update_disabled;
|
|
|
|
#endif
|
|
|
|
|
2016-01-12 18:12:13 +00:00
|
|
|
/* Performance Boost (B) region threshold params */
|
|
|
|
int perf_boost_idx;
|
|
|
|
|
|
|
|
/* Performance Constraint (C) region threshold params */
|
|
|
|
int perf_constrain_idx;
|
2016-07-14 13:09:03 -07:00
|
|
|
|
|
|
|
/* Hint to bias scheduling of tasks on that SchedTune CGroup
|
|
|
|
* towards idle CPUs */
|
|
|
|
int prefer_idle;
|
2015-06-23 09:17:54 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
static inline struct schedtune *css_st(struct cgroup_subsys_state *css)
|
|
|
|
{
|
2016-10-11 18:24:43 -07:00
|
|
|
return container_of(css, struct schedtune, css);
|
2015-06-23 09:17:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct schedtune *task_schedtune(struct task_struct *tsk)
|
|
|
|
{
|
|
|
|
return css_st(task_css(tsk, schedtune_cgrp_id));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct schedtune *parent_st(struct schedtune *st)
|
|
|
|
{
|
|
|
|
return css_st(st->css.parent);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* SchedTune root control group
|
|
|
|
* The root control group is used to defined a system-wide boosting tuning,
|
|
|
|
* which is applied to all tasks in the system.
|
|
|
|
* Task specific boost tuning could be specified by creating and
|
|
|
|
* configuring a child control group under the root one.
|
|
|
|
* By default, system-wide boosting is disabled, i.e. no boosting is applied
|
|
|
|
* to tasks which are not into a child control group.
|
|
|
|
*/
|
|
|
|
static struct schedtune
|
|
|
|
root_schedtune = {
|
|
|
|
.boost = 0,
|
2016-08-31 16:54:12 -07:00
|
|
|
#ifdef CONFIG_SCHED_HMP
|
|
|
|
.sched_boost_no_override = false,
|
|
|
|
.sched_boost_enabled = true,
|
|
|
|
.sched_boost_enabled_backup = true,
|
|
|
|
.colocate = false,
|
|
|
|
.colocate_update_disabled = false,
|
|
|
|
#endif
|
2016-01-12 18:12:13 +00:00
|
|
|
.perf_boost_idx = 0,
|
|
|
|
.perf_constrain_idx = 0,
|
2016-07-14 13:09:03 -07:00
|
|
|
.prefer_idle = 0,
|
2015-06-23 09:17:54 +01:00
|
|
|
};
|
|
|
|
|
2016-01-12 18:12:13 +00:00
|
|
|
int
|
|
|
|
schedtune_accept_deltas(int nrg_delta, int cap_delta,
|
|
|
|
struct task_struct *task)
|
|
|
|
{
|
|
|
|
struct schedtune *ct;
|
|
|
|
int perf_boost_idx;
|
|
|
|
int perf_constrain_idx;
|
|
|
|
|
|
|
|
/* Optimal (O) region */
|
2016-01-20 14:06:05 +00:00
|
|
|
if (nrg_delta < 0 && cap_delta > 0) {
|
|
|
|
trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, 1, 0);
|
2016-01-12 18:12:13 +00:00
|
|
|
return INT_MAX;
|
2016-01-20 14:06:05 +00:00
|
|
|
}
|
2016-01-12 18:12:13 +00:00
|
|
|
|
|
|
|
/* Suboptimal (S) region */
|
2016-01-20 14:06:05 +00:00
|
|
|
if (nrg_delta > 0 && cap_delta < 0) {
|
|
|
|
trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, -1, 5);
|
2016-01-12 18:12:13 +00:00
|
|
|
return -INT_MAX;
|
2016-01-20 14:06:05 +00:00
|
|
|
}
|
2016-01-12 18:12:13 +00:00
|
|
|
|
|
|
|
/* Get task specific perf Boost/Constraints indexes */
|
|
|
|
rcu_read_lock();
|
|
|
|
ct = task_schedtune(task);
|
|
|
|
perf_boost_idx = ct->perf_boost_idx;
|
|
|
|
perf_constrain_idx = ct->perf_constrain_idx;
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
return __schedtune_accept_deltas(nrg_delta, cap_delta,
|
|
|
|
perf_boost_idx, perf_constrain_idx);
|
|
|
|
}
|
|
|
|
|
2015-06-23 09:17:54 +01:00
|
|
|
/*
|
|
|
|
* Maximum number of boost groups to support
|
|
|
|
* When per-task boosting is used we still allow only limited number of
|
|
|
|
* boost groups for two main reasons:
|
|
|
|
* 1. on a real system we usually have only few classes of workloads which
|
|
|
|
* make sense to boost with different values (e.g. background vs foreground
|
|
|
|
* tasks, interactive vs low-priority tasks)
|
|
|
|
* 2. a limited number allows for a simpler and more memory/time efficient
|
|
|
|
* implementation especially for the computation of the per-CPU boost
|
|
|
|
* value
|
|
|
|
*/
|
2016-09-02 17:51:39 -07:00
|
|
|
#define BOOSTGROUPS_COUNT 5
|
2015-06-23 09:17:54 +01:00
|
|
|
|
|
|
|
/* Array of configured boostgroups */
|
|
|
|
static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = {
|
|
|
|
&root_schedtune,
|
|
|
|
NULL,
|
|
|
|
};
|
|
|
|
|
|
|
|
/* SchedTune boost groups
|
|
|
|
* Keep track of all the boost groups which impact on CPU, for example when a
|
|
|
|
* CPU has two RUNNABLE tasks belonging to two different boost groups and thus
|
|
|
|
* likely with different boost values.
|
|
|
|
* Since on each system we expect only a limited number of boost groups, here
|
|
|
|
* we use a simple array to keep track of the metrics required to compute the
|
|
|
|
* maximum per-CPU boosting value.
|
|
|
|
*/
|
|
|
|
struct boost_groups {
|
|
|
|
/* Maximum boost value for all RUNNABLE tasks on a CPU */
|
2016-07-28 17:28:55 +01:00
|
|
|
bool idle;
|
|
|
|
int boost_max;
|
2015-06-23 09:17:54 +01:00
|
|
|
struct {
|
|
|
|
/* The boost for tasks on that boost group */
|
2016-07-28 17:28:55 +01:00
|
|
|
int boost;
|
2015-06-23 09:17:54 +01:00
|
|
|
/* Count of RUNNABLE tasks on that boost group */
|
|
|
|
unsigned tasks;
|
|
|
|
} group[BOOSTGROUPS_COUNT];
|
FIXUP: sched/tune: fix accounting for runnable tasks
Contains:
sched/tune: fix accounting for runnable tasks (1/5)
The accounting for tasks into boost groups of different CPUs is currently
broken mainly because:
a) we do not properly track the change of boost group of a RUNNABLE task
b) there are race conditions between migration code and accounting code
This patch provides a fixes to ensure enqueue/dequeue
accounting also for throttled tasks.
Without this patch is can happen that a task is enqueued into a throttled
RQ thus not being accounted for the boosting of the corresponding RQ.
We could argue that a throttled task should not boost a CPU, however:
a) properly implementing CPU boosting considering throttled tasks will
increase a lot the complexity of the solution
b) it's not easy to quantify the benefits introduced by such a more
complex solution
Since task throttling requires the usage of the CFS bandwidth controller,
which is not widely used on mobile systems (at least not by Android kernels
so far), for the time being we go for the simple solution and boost also
for throttled RQs.
sched/tune: fix accounting for runnable tasks (2/5)
This patch provides the code required to enforce proper locking.
A per boost group spinlock has been added to grant atomic
accounting of tasks as well as to serialise enqueue/dequeue operations,
triggered by tasks migrations, with cgroups's attach/detach operations.
sched/tune: fix accounting for runnable tasks (3/5)
This patch adds cgroups {allow,can,cancel}_attach callbacks.
Since a task can be migrated between boost groups while it's running,
the CGroups's attach callbacks have been added to properly migrate
boost contributions of RUNNABLE tasks.
The RQ's lock is used to serialise enqueue/dequeue operations, triggered
by tasks migrations, with cgroups's attach/detach operations. While the
SchedTune's CPU lock is used to grant atrocity of the accounting within
the CPU.
NOTE: the current implementation does not allows a concurrent CPU migration
and CGroups change.
sched/tune: fix accounting for runnable tasks (4/5)
This fixes accounting for exiting tasks by adding a dedicated call early
in the do_exit() syscall, which disables SchedTune accounting as soon as a
task is flagged PF_EXITING.
This flag is set before the multiple dequeue/enqueue dance triggered
by cgroup_exit() which is useful only to inject useless tasks movements
thus increasing possibilities for race conditions with the migration code.
The schedtune_exit_task() call does the last dequeue of a task from its
current boost group. This is a solution more aligned with what happens in
mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying
task to the root control group.
sched/tune: fix accounting for runnable tasks (5/5)
To avoid accounting issues at startup, this patch disable the SchedTune
accounting until the required data structures have been properly
initialized.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2016-07-28 18:44:40 +01:00
|
|
|
/* CPU's boost group locking */
|
|
|
|
raw_spinlock_t lock;
|
2015-06-23 09:17:54 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Boost groups affecting each CPU in the system */
|
|
|
|
DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups);
|
|
|
|
|
2016-08-31 16:54:12 -07:00
|
|
|
#ifdef CONFIG_SCHED_HMP
|
|
|
|
static inline void init_sched_boost(struct schedtune *st)
|
|
|
|
{
|
|
|
|
st->sched_boost_no_override = false;
|
|
|
|
st->sched_boost_enabled = true;
|
|
|
|
st->sched_boost_enabled_backup = st->sched_boost_enabled;
|
|
|
|
st->colocate = false;
|
|
|
|
st->colocate_update_disabled = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool same_schedtune(struct task_struct *tsk1, struct task_struct *tsk2)
|
|
|
|
{
|
|
|
|
return task_schedtune(tsk1) == task_schedtune(tsk2);
|
|
|
|
}
|
|
|
|
|
|
|
|
void update_cgroup_boost_settings(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < BOOSTGROUPS_COUNT; i++) {
|
|
|
|
if (!allocated_group[i])
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (allocated_group[i]->sched_boost_no_override)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
allocated_group[i]->sched_boost_enabled = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void restore_cgroup_boost_settings(void)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < BOOSTGROUPS_COUNT; i++) {
|
|
|
|
if (!allocated_group[i])
|
|
|
|
break;
|
|
|
|
|
|
|
|
allocated_group[i]->sched_boost_enabled =
|
|
|
|
allocated_group[i]->sched_boost_enabled_backup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool task_sched_boost(struct task_struct *p)
|
|
|
|
{
|
|
|
|
struct schedtune *st = task_schedtune(p);
|
|
|
|
|
|
|
|
return st->sched_boost_enabled;
|
|
|
|
}
|
|
|
|
|
|
|
|
static u64
|
|
|
|
sched_boost_override_read(struct cgroup_subsys_state *css,
|
|
|
|
struct cftype *cft)
|
|
|
|
{
|
|
|
|
struct schedtune *st = css_st(css);
|
|
|
|
|
|
|
|
return st->sched_boost_no_override;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sched_boost_override_write(struct cgroup_subsys_state *css,
|
|
|
|
struct cftype *cft, u64 override)
|
|
|
|
{
|
|
|
|
struct schedtune *st = css_st(css);
|
|
|
|
|
|
|
|
st->sched_boost_no_override = !!override;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static u64 sched_boost_enabled_read(struct cgroup_subsys_state *css,
|
|
|
|
struct cftype *cft)
|
|
|
|
{
|
|
|
|
struct schedtune *st = css_st(css);
|
|
|
|
|
|
|
|
return st->sched_boost_enabled;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sched_boost_enabled_write(struct cgroup_subsys_state *css,
|
|
|
|
struct cftype *cft, u64 enable)
|
|
|
|
{
|
|
|
|
struct schedtune *st = css_st(css);
|
|
|
|
|
|
|
|
st->sched_boost_enabled = !!enable;
|
|
|
|
st->sched_boost_enabled_backup = st->sched_boost_enabled;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static u64 sched_colocate_read(struct cgroup_subsys_state *css,
|
|
|
|
struct cftype *cft)
|
|
|
|
{
|
|
|
|
struct schedtune *st = css_st(css);
|
|
|
|
|
|
|
|
return st->colocate;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sched_colocate_write(struct cgroup_subsys_state *css,
|
|
|
|
struct cftype *cft, u64 colocate)
|
|
|
|
{
|
|
|
|
struct schedtune *st = css_st(css);
|
|
|
|
|
|
|
|
if (st->colocate_update_disabled)
|
|
|
|
return -EPERM;
|
|
|
|
|
|
|
|
st->colocate = !!colocate;
|
|
|
|
st->colocate_update_disabled = true;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#else /* CONFIG_SCHED_HMP */
|
|
|
|
|
|
|
|
static inline void init_sched_boost(struct schedtune *st) { }
|
|
|
|
|
|
|
|
#endif /* CONFIG_SCHED_HMP */
|
|
|
|
|
2016-01-14 12:31:35 +00:00
|
|
|
static void
|
|
|
|
schedtune_cpu_update(int cpu)
|
|
|
|
{
|
|
|
|
struct boost_groups *bg;
|
2016-07-28 17:28:55 +01:00
|
|
|
int boost_max;
|
2016-01-14 12:31:35 +00:00
|
|
|
int idx;
|
|
|
|
|
|
|
|
bg = &per_cpu(cpu_boost_groups, cpu);
|
|
|
|
|
|
|
|
/* The root boost group is always active */
|
|
|
|
boost_max = bg->group[0].boost;
|
|
|
|
for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx) {
|
|
|
|
/*
|
|
|
|
* A boost group affects a CPU only if it has
|
|
|
|
* RUNNABLE tasks on that CPU
|
|
|
|
*/
|
|
|
|
if (bg->group[idx].tasks == 0)
|
|
|
|
continue;
|
2016-07-28 17:28:55 +01:00
|
|
|
|
2016-01-14 12:31:35 +00:00
|
|
|
boost_max = max(boost_max, bg->group[idx].boost);
|
|
|
|
}
|
2016-07-28 17:28:55 +01:00
|
|
|
/* Ensures boost_max is non-negative when all cgroup boost values
|
|
|
|
* are neagtive. Avoids under-accounting of cpu capacity which may cause
|
|
|
|
* task stacking and frequency spikes.*/
|
|
|
|
boost_max = max(boost_max, 0);
|
2016-01-14 12:31:35 +00:00
|
|
|
bg->boost_max = boost_max;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
schedtune_boostgroup_update(int idx, int boost)
|
|
|
|
{
|
|
|
|
struct boost_groups *bg;
|
|
|
|
int cur_boost_max;
|
|
|
|
int old_boost;
|
|
|
|
int cpu;
|
|
|
|
|
|
|
|
/* Update per CPU boost groups */
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
|
|
bg = &per_cpu(cpu_boost_groups, cpu);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Keep track of current boost values to compute the per CPU
|
|
|
|
* maximum only when it has been affected by the new value of
|
|
|
|
* the updated boost group
|
|
|
|
*/
|
|
|
|
cur_boost_max = bg->boost_max;
|
|
|
|
old_boost = bg->group[idx].boost;
|
|
|
|
|
|
|
|
/* Update the boost value of this boost group */
|
|
|
|
bg->group[idx].boost = boost;
|
|
|
|
|
|
|
|
/* Check if this update increase current max */
|
|
|
|
if (boost > cur_boost_max && bg->group[idx].tasks) {
|
|
|
|
bg->boost_max = boost;
|
2015-06-24 15:36:08 +01:00
|
|
|
trace_sched_tune_boostgroup_update(cpu, 1, bg->boost_max);
|
2016-01-14 12:31:35 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check if this update has decreased current max */
|
2015-06-24 15:36:08 +01:00
|
|
|
if (cur_boost_max == old_boost && old_boost > boost) {
|
2016-01-14 12:31:35 +00:00
|
|
|
schedtune_cpu_update(cpu);
|
2015-06-24 15:36:08 +01:00
|
|
|
trace_sched_tune_boostgroup_update(cpu, -1, bg->boost_max);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
trace_sched_tune_boostgroup_update(cpu, 0, bg->boost_max);
|
2016-01-14 12:31:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
FIXUP: sched/tune: fix accounting for runnable tasks
Contains:
sched/tune: fix accounting for runnable tasks (1/5)
The accounting for tasks into boost groups of different CPUs is currently
broken mainly because:
a) we do not properly track the change of boost group of a RUNNABLE task
b) there are race conditions between migration code and accounting code
This patch provides a fixes to ensure enqueue/dequeue
accounting also for throttled tasks.
Without this patch is can happen that a task is enqueued into a throttled
RQ thus not being accounted for the boosting of the corresponding RQ.
We could argue that a throttled task should not boost a CPU, however:
a) properly implementing CPU boosting considering throttled tasks will
increase a lot the complexity of the solution
b) it's not easy to quantify the benefits introduced by such a more
complex solution
Since task throttling requires the usage of the CFS bandwidth controller,
which is not widely used on mobile systems (at least not by Android kernels
so far), for the time being we go for the simple solution and boost also
for throttled RQs.
sched/tune: fix accounting for runnable tasks (2/5)
This patch provides the code required to enforce proper locking.
A per boost group spinlock has been added to grant atomic
accounting of tasks as well as to serialise enqueue/dequeue operations,
triggered by tasks migrations, with cgroups's attach/detach operations.
sched/tune: fix accounting for runnable tasks (3/5)
This patch adds cgroups {allow,can,cancel}_attach callbacks.
Since a task can be migrated between boost groups while it's running,
the CGroups's attach callbacks have been added to properly migrate
boost contributions of RUNNABLE tasks.
The RQ's lock is used to serialise enqueue/dequeue operations, triggered
by tasks migrations, with cgroups's attach/detach operations. While the
SchedTune's CPU lock is used to grant atrocity of the accounting within
the CPU.
NOTE: the current implementation does not allows a concurrent CPU migration
and CGroups change.
sched/tune: fix accounting for runnable tasks (4/5)
This fixes accounting for exiting tasks by adding a dedicated call early
in the do_exit() syscall, which disables SchedTune accounting as soon as a
task is flagged PF_EXITING.
This flag is set before the multiple dequeue/enqueue dance triggered
by cgroup_exit() which is useful only to inject useless tasks movements
thus increasing possibilities for race conditions with the migration code.
The schedtune_exit_task() call does the last dequeue of a task from its
current boost group. This is a solution more aligned with what happens in
mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying
task to the root control group.
sched/tune: fix accounting for runnable tasks (5/5)
To avoid accounting issues at startup, this patch disable the SchedTune
accounting until the required data structures have been properly
initialized.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2016-07-28 18:44:40 +01:00
|
|
|
#define ENQUEUE_TASK 1
|
|
|
|
#define DEQUEUE_TASK -1
|
|
|
|
|
2015-07-07 15:33:20 +01:00
|
|
|
static inline void
|
|
|
|
schedtune_tasks_update(struct task_struct *p, int cpu, int idx, int task_count)
|
|
|
|
{
|
FIXUP: sched/tune: fix accounting for runnable tasks
Contains:
sched/tune: fix accounting for runnable tasks (1/5)
The accounting for tasks into boost groups of different CPUs is currently
broken mainly because:
a) we do not properly track the change of boost group of a RUNNABLE task
b) there are race conditions between migration code and accounting code
This patch provides a fixes to ensure enqueue/dequeue
accounting also for throttled tasks.
Without this patch is can happen that a task is enqueued into a throttled
RQ thus not being accounted for the boosting of the corresponding RQ.
We could argue that a throttled task should not boost a CPU, however:
a) properly implementing CPU boosting considering throttled tasks will
increase a lot the complexity of the solution
b) it's not easy to quantify the benefits introduced by such a more
complex solution
Since task throttling requires the usage of the CFS bandwidth controller,
which is not widely used on mobile systems (at least not by Android kernels
so far), for the time being we go for the simple solution and boost also
for throttled RQs.
sched/tune: fix accounting for runnable tasks (2/5)
This patch provides the code required to enforce proper locking.
A per boost group spinlock has been added to grant atomic
accounting of tasks as well as to serialise enqueue/dequeue operations,
triggered by tasks migrations, with cgroups's attach/detach operations.
sched/tune: fix accounting for runnable tasks (3/5)
This patch adds cgroups {allow,can,cancel}_attach callbacks.
Since a task can be migrated between boost groups while it's running,
the CGroups's attach callbacks have been added to properly migrate
boost contributions of RUNNABLE tasks.
The RQ's lock is used to serialise enqueue/dequeue operations, triggered
by tasks migrations, with cgroups's attach/detach operations. While the
SchedTune's CPU lock is used to grant atrocity of the accounting within
the CPU.
NOTE: the current implementation does not allows a concurrent CPU migration
and CGroups change.
sched/tune: fix accounting for runnable tasks (4/5)
This fixes accounting for exiting tasks by adding a dedicated call early
in the do_exit() syscall, which disables SchedTune accounting as soon as a
task is flagged PF_EXITING.
This flag is set before the multiple dequeue/enqueue dance triggered
by cgroup_exit() which is useful only to inject useless tasks movements
thus increasing possibilities for race conditions with the migration code.
The schedtune_exit_task() call does the last dequeue of a task from its
current boost group. This is a solution more aligned with what happens in
mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying
task to the root control group.
sched/tune: fix accounting for runnable tasks (5/5)
To avoid accounting issues at startup, this patch disable the SchedTune
accounting until the required data structures have been properly
initialized.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2016-07-28 18:44:40 +01:00
|
|
|
struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu);
|
|
|
|
int tasks = bg->group[idx].tasks + task_count;
|
2015-07-07 15:33:20 +01:00
|
|
|
|
|
|
|
/* Update boosted tasks count while avoiding to make it negative */
|
FIXUP: sched/tune: fix accounting for runnable tasks
Contains:
sched/tune: fix accounting for runnable tasks (1/5)
The accounting for tasks into boost groups of different CPUs is currently
broken mainly because:
a) we do not properly track the change of boost group of a RUNNABLE task
b) there are race conditions between migration code and accounting code
This patch provides a fixes to ensure enqueue/dequeue
accounting also for throttled tasks.
Without this patch is can happen that a task is enqueued into a throttled
RQ thus not being accounted for the boosting of the corresponding RQ.
We could argue that a throttled task should not boost a CPU, however:
a) properly implementing CPU boosting considering throttled tasks will
increase a lot the complexity of the solution
b) it's not easy to quantify the benefits introduced by such a more
complex solution
Since task throttling requires the usage of the CFS bandwidth controller,
which is not widely used on mobile systems (at least not by Android kernels
so far), for the time being we go for the simple solution and boost also
for throttled RQs.
sched/tune: fix accounting for runnable tasks (2/5)
This patch provides the code required to enforce proper locking.
A per boost group spinlock has been added to grant atomic
accounting of tasks as well as to serialise enqueue/dequeue operations,
triggered by tasks migrations, with cgroups's attach/detach operations.
sched/tune: fix accounting for runnable tasks (3/5)
This patch adds cgroups {allow,can,cancel}_attach callbacks.
Since a task can be migrated between boost groups while it's running,
the CGroups's attach callbacks have been added to properly migrate
boost contributions of RUNNABLE tasks.
The RQ's lock is used to serialise enqueue/dequeue operations, triggered
by tasks migrations, with cgroups's attach/detach operations. While the
SchedTune's CPU lock is used to grant atrocity of the accounting within
the CPU.
NOTE: the current implementation does not allows a concurrent CPU migration
and CGroups change.
sched/tune: fix accounting for runnable tasks (4/5)
This fixes accounting for exiting tasks by adding a dedicated call early
in the do_exit() syscall, which disables SchedTune accounting as soon as a
task is flagged PF_EXITING.
This flag is set before the multiple dequeue/enqueue dance triggered
by cgroup_exit() which is useful only to inject useless tasks movements
thus increasing possibilities for race conditions with the migration code.
The schedtune_exit_task() call does the last dequeue of a task from its
current boost group. This is a solution more aligned with what happens in
mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying
task to the root control group.
sched/tune: fix accounting for runnable tasks (5/5)
To avoid accounting issues at startup, this patch disable the SchedTune
accounting until the required data structures have been properly
initialized.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2016-07-28 18:44:40 +01:00
|
|
|
bg->group[idx].tasks = max(0, tasks);
|
2015-06-24 15:36:08 +01:00
|
|
|
|
|
|
|
trace_sched_tune_tasks_update(p, cpu, tasks, idx,
|
|
|
|
bg->group[idx].boost, bg->boost_max);
|
|
|
|
|
FIXUP: sched/tune: fix accounting for runnable tasks
Contains:
sched/tune: fix accounting for runnable tasks (1/5)
The accounting for tasks into boost groups of different CPUs is currently
broken mainly because:
a) we do not properly track the change of boost group of a RUNNABLE task
b) there are race conditions between migration code and accounting code
This patch provides a fixes to ensure enqueue/dequeue
accounting also for throttled tasks.
Without this patch is can happen that a task is enqueued into a throttled
RQ thus not being accounted for the boosting of the corresponding RQ.
We could argue that a throttled task should not boost a CPU, however:
a) properly implementing CPU boosting considering throttled tasks will
increase a lot the complexity of the solution
b) it's not easy to quantify the benefits introduced by such a more
complex solution
Since task throttling requires the usage of the CFS bandwidth controller,
which is not widely used on mobile systems (at least not by Android kernels
so far), for the time being we go for the simple solution and boost also
for throttled RQs.
sched/tune: fix accounting for runnable tasks (2/5)
This patch provides the code required to enforce proper locking.
A per boost group spinlock has been added to grant atomic
accounting of tasks as well as to serialise enqueue/dequeue operations,
triggered by tasks migrations, with cgroups's attach/detach operations.
sched/tune: fix accounting for runnable tasks (3/5)
This patch adds cgroups {allow,can,cancel}_attach callbacks.
Since a task can be migrated between boost groups while it's running,
the CGroups's attach callbacks have been added to properly migrate
boost contributions of RUNNABLE tasks.
The RQ's lock is used to serialise enqueue/dequeue operations, triggered
by tasks migrations, with cgroups's attach/detach operations. While the
SchedTune's CPU lock is used to grant atrocity of the accounting within
the CPU.
NOTE: the current implementation does not allows a concurrent CPU migration
and CGroups change.
sched/tune: fix accounting for runnable tasks (4/5)
This fixes accounting for exiting tasks by adding a dedicated call early
in the do_exit() syscall, which disables SchedTune accounting as soon as a
task is flagged PF_EXITING.
This flag is set before the multiple dequeue/enqueue dance triggered
by cgroup_exit() which is useful only to inject useless tasks movements
thus increasing possibilities for race conditions with the migration code.
The schedtune_exit_task() call does the last dequeue of a task from its
current boost group. This is a solution more aligned with what happens in
mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying
task to the root control group.
sched/tune: fix accounting for runnable tasks (5/5)
To avoid accounting issues at startup, this patch disable the SchedTune
accounting until the required data structures have been properly
initialized.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2016-07-28 18:44:40 +01:00
|
|
|
/* Boost group activation or deactivation on that RQ */
|
|
|
|
if (tasks == 1 || tasks == 0)
|
|
|
|
schedtune_cpu_update(cpu);
|
2015-07-07 15:33:20 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* NOTE: This function must be called while holding the lock on the CPU RQ
|
|
|
|
*/
|
|
|
|
void schedtune_enqueue_task(struct task_struct *p, int cpu)
|
|
|
|
{
|
FIXUP: sched/tune: fix accounting for runnable tasks
Contains:
sched/tune: fix accounting for runnable tasks (1/5)
The accounting for tasks into boost groups of different CPUs is currently
broken mainly because:
a) we do not properly track the change of boost group of a RUNNABLE task
b) there are race conditions between migration code and accounting code
This patch provides a fixes to ensure enqueue/dequeue
accounting also for throttled tasks.
Without this patch is can happen that a task is enqueued into a throttled
RQ thus not being accounted for the boosting of the corresponding RQ.
We could argue that a throttled task should not boost a CPU, however:
a) properly implementing CPU boosting considering throttled tasks will
increase a lot the complexity of the solution
b) it's not easy to quantify the benefits introduced by such a more
complex solution
Since task throttling requires the usage of the CFS bandwidth controller,
which is not widely used on mobile systems (at least not by Android kernels
so far), for the time being we go for the simple solution and boost also
for throttled RQs.
sched/tune: fix accounting for runnable tasks (2/5)
This patch provides the code required to enforce proper locking.
A per boost group spinlock has been added to grant atomic
accounting of tasks as well as to serialise enqueue/dequeue operations,
triggered by tasks migrations, with cgroups's attach/detach operations.
sched/tune: fix accounting for runnable tasks (3/5)
This patch adds cgroups {allow,can,cancel}_attach callbacks.
Since a task can be migrated between boost groups while it's running,
the CGroups's attach callbacks have been added to properly migrate
boost contributions of RUNNABLE tasks.
The RQ's lock is used to serialise enqueue/dequeue operations, triggered
by tasks migrations, with cgroups's attach/detach operations. While the
SchedTune's CPU lock is used to grant atrocity of the accounting within
the CPU.
NOTE: the current implementation does not allows a concurrent CPU migration
and CGroups change.
sched/tune: fix accounting for runnable tasks (4/5)
This fixes accounting for exiting tasks by adding a dedicated call early
in the do_exit() syscall, which disables SchedTune accounting as soon as a
task is flagged PF_EXITING.
This flag is set before the multiple dequeue/enqueue dance triggered
by cgroup_exit() which is useful only to inject useless tasks movements
thus increasing possibilities for race conditions with the migration code.
The schedtune_exit_task() call does the last dequeue of a task from its
current boost group. This is a solution more aligned with what happens in
mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying
task to the root control group.
sched/tune: fix accounting for runnable tasks (5/5)
To avoid accounting issues at startup, this patch disable the SchedTune
accounting until the required data structures have been properly
initialized.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2016-07-28 18:44:40 +01:00
|
|
|
struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu);
|
|
|
|
unsigned long irq_flags;
|
2015-07-07 15:33:20 +01:00
|
|
|
struct schedtune *st;
|
|
|
|
int idx;
|
|
|
|
|
FIXUP: sched/tune: fix accounting for runnable tasks
Contains:
sched/tune: fix accounting for runnable tasks (1/5)
The accounting for tasks into boost groups of different CPUs is currently
broken mainly because:
a) we do not properly track the change of boost group of a RUNNABLE task
b) there are race conditions between migration code and accounting code
This patch provides a fixes to ensure enqueue/dequeue
accounting also for throttled tasks.
Without this patch is can happen that a task is enqueued into a throttled
RQ thus not being accounted for the boosting of the corresponding RQ.
We could argue that a throttled task should not boost a CPU, however:
a) properly implementing CPU boosting considering throttled tasks will
increase a lot the complexity of the solution
b) it's not easy to quantify the benefits introduced by such a more
complex solution
Since task throttling requires the usage of the CFS bandwidth controller,
which is not widely used on mobile systems (at least not by Android kernels
so far), for the time being we go for the simple solution and boost also
for throttled RQs.
sched/tune: fix accounting for runnable tasks (2/5)
This patch provides the code required to enforce proper locking.
A per boost group spinlock has been added to grant atomic
accounting of tasks as well as to serialise enqueue/dequeue operations,
triggered by tasks migrations, with cgroups's attach/detach operations.
sched/tune: fix accounting for runnable tasks (3/5)
This patch adds cgroups {allow,can,cancel}_attach callbacks.
Since a task can be migrated between boost groups while it's running,
the CGroups's attach callbacks have been added to properly migrate
boost contributions of RUNNABLE tasks.
The RQ's lock is used to serialise enqueue/dequeue operations, triggered
by tasks migrations, with cgroups's attach/detach operations. While the
SchedTune's CPU lock is used to grant atrocity of the accounting within
the CPU.
NOTE: the current implementation does not allows a concurrent CPU migration
and CGroups change.
sched/tune: fix accounting for runnable tasks (4/5)
This fixes accounting for exiting tasks by adding a dedicated call early
in the do_exit() syscall, which disables SchedTune accounting as soon as a
task is flagged PF_EXITING.
This flag is set before the multiple dequeue/enqueue dance triggered
by cgroup_exit() which is useful only to inject useless tasks movements
thus increasing possibilities for race conditions with the migration code.
The schedtune_exit_task() call does the last dequeue of a task from its
current boost group. This is a solution more aligned with what happens in
mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying
task to the root control group.
sched/tune: fix accounting for runnable tasks (5/5)
To avoid accounting issues at startup, this patch disable the SchedTune
accounting until the required data structures have been properly
initialized.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2016-07-28 18:44:40 +01:00
|
|
|
if (!unlikely(schedtune_initialized))
|
|
|
|
return;
|
|
|
|
|
2015-07-07 15:33:20 +01:00
|
|
|
/*
|
|
|
|
* When a task is marked PF_EXITING by do_exit() it's going to be
|
|
|
|
* dequeued and enqueued multiple times in the exit path.
|
|
|
|
* Thus we avoid any further update, since we do not want to change
|
|
|
|
* CPU boosting while the task is exiting.
|
|
|
|
*/
|
|
|
|
if (p->flags & PF_EXITING)
|
|
|
|
return;
|
|
|
|
|
FIXUP: sched/tune: fix accounting for runnable tasks
Contains:
sched/tune: fix accounting for runnable tasks (1/5)
The accounting for tasks into boost groups of different CPUs is currently
broken mainly because:
a) we do not properly track the change of boost group of a RUNNABLE task
b) there are race conditions between migration code and accounting code
This patch provides a fixes to ensure enqueue/dequeue
accounting also for throttled tasks.
Without this patch is can happen that a task is enqueued into a throttled
RQ thus not being accounted for the boosting of the corresponding RQ.
We could argue that a throttled task should not boost a CPU, however:
a) properly implementing CPU boosting considering throttled tasks will
increase a lot the complexity of the solution
b) it's not easy to quantify the benefits introduced by such a more
complex solution
Since task throttling requires the usage of the CFS bandwidth controller,
which is not widely used on mobile systems (at least not by Android kernels
so far), for the time being we go for the simple solution and boost also
for throttled RQs.
sched/tune: fix accounting for runnable tasks (2/5)
This patch provides the code required to enforce proper locking.
A per boost group spinlock has been added to grant atomic
accounting of tasks as well as to serialise enqueue/dequeue operations,
triggered by tasks migrations, with cgroups's attach/detach operations.
sched/tune: fix accounting for runnable tasks (3/5)
This patch adds cgroups {allow,can,cancel}_attach callbacks.
Since a task can be migrated between boost groups while it's running,
the CGroups's attach callbacks have been added to properly migrate
boost contributions of RUNNABLE tasks.
The RQ's lock is used to serialise enqueue/dequeue operations, triggered
by tasks migrations, with cgroups's attach/detach operations. While the
SchedTune's CPU lock is used to grant atrocity of the accounting within
the CPU.
NOTE: the current implementation does not allows a concurrent CPU migration
and CGroups change.
sched/tune: fix accounting for runnable tasks (4/5)
This fixes accounting for exiting tasks by adding a dedicated call early
in the do_exit() syscall, which disables SchedTune accounting as soon as a
task is flagged PF_EXITING.
This flag is set before the multiple dequeue/enqueue dance triggered
by cgroup_exit() which is useful only to inject useless tasks movements
thus increasing possibilities for race conditions with the migration code.
The schedtune_exit_task() call does the last dequeue of a task from its
current boost group. This is a solution more aligned with what happens in
mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying
task to the root control group.
sched/tune: fix accounting for runnable tasks (5/5)
To avoid accounting issues at startup, this patch disable the SchedTune
accounting until the required data structures have been properly
initialized.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2016-07-28 18:44:40 +01:00
|
|
|
/*
|
|
|
|
* Boost group accouting is protected by a per-cpu lock and requires
|
|
|
|
* interrupt to be disabled to avoid race conditions for example on
|
|
|
|
* do_exit()::cgroup_exit() and task migration.
|
|
|
|
*/
|
|
|
|
raw_spin_lock_irqsave(&bg->lock, irq_flags);
|
2015-07-07 15:33:20 +01:00
|
|
|
rcu_read_lock();
|
FIXUP: sched/tune: fix accounting for runnable tasks
Contains:
sched/tune: fix accounting for runnable tasks (1/5)
The accounting for tasks into boost groups of different CPUs is currently
broken mainly because:
a) we do not properly track the change of boost group of a RUNNABLE task
b) there are race conditions between migration code and accounting code
This patch provides a fixes to ensure enqueue/dequeue
accounting also for throttled tasks.
Without this patch is can happen that a task is enqueued into a throttled
RQ thus not being accounted for the boosting of the corresponding RQ.
We could argue that a throttled task should not boost a CPU, however:
a) properly implementing CPU boosting considering throttled tasks will
increase a lot the complexity of the solution
b) it's not easy to quantify the benefits introduced by such a more
complex solution
Since task throttling requires the usage of the CFS bandwidth controller,
which is not widely used on mobile systems (at least not by Android kernels
so far), for the time being we go for the simple solution and boost also
for throttled RQs.
sched/tune: fix accounting for runnable tasks (2/5)
This patch provides the code required to enforce proper locking.
A per boost group spinlock has been added to grant atomic
accounting of tasks as well as to serialise enqueue/dequeue operations,
triggered by tasks migrations, with cgroups's attach/detach operations.
sched/tune: fix accounting for runnable tasks (3/5)
This patch adds cgroups {allow,can,cancel}_attach callbacks.
Since a task can be migrated between boost groups while it's running,
the CGroups's attach callbacks have been added to properly migrate
boost contributions of RUNNABLE tasks.
The RQ's lock is used to serialise enqueue/dequeue operations, triggered
by tasks migrations, with cgroups's attach/detach operations. While the
SchedTune's CPU lock is used to grant atrocity of the accounting within
the CPU.
NOTE: the current implementation does not allows a concurrent CPU migration
and CGroups change.
sched/tune: fix accounting for runnable tasks (4/5)
This fixes accounting for exiting tasks by adding a dedicated call early
in the do_exit() syscall, which disables SchedTune accounting as soon as a
task is flagged PF_EXITING.
This flag is set before the multiple dequeue/enqueue dance triggered
by cgroup_exit() which is useful only to inject useless tasks movements
thus increasing possibilities for race conditions with the migration code.
The schedtune_exit_task() call does the last dequeue of a task from its
current boost group. This is a solution more aligned with what happens in
mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying
task to the root control group.
sched/tune: fix accounting for runnable tasks (5/5)
To avoid accounting issues at startup, this patch disable the SchedTune
accounting until the required data structures have been properly
initialized.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2016-07-28 18:44:40 +01:00
|
|
|
|
2015-07-07 15:33:20 +01:00
|
|
|
st = task_schedtune(p);
|
|
|
|
idx = st->idx;
|
FIXUP: sched/tune: fix accounting for runnable tasks
Contains:
sched/tune: fix accounting for runnable tasks (1/5)
The accounting for tasks into boost groups of different CPUs is currently
broken mainly because:
a) we do not properly track the change of boost group of a RUNNABLE task
b) there are race conditions between migration code and accounting code
This patch provides a fixes to ensure enqueue/dequeue
accounting also for throttled tasks.
Without this patch is can happen that a task is enqueued into a throttled
RQ thus not being accounted for the boosting of the corresponding RQ.
We could argue that a throttled task should not boost a CPU, however:
a) properly implementing CPU boosting considering throttled tasks will
increase a lot the complexity of the solution
b) it's not easy to quantify the benefits introduced by such a more
complex solution
Since task throttling requires the usage of the CFS bandwidth controller,
which is not widely used on mobile systems (at least not by Android kernels
so far), for the time being we go for the simple solution and boost also
for throttled RQs.
sched/tune: fix accounting for runnable tasks (2/5)
This patch provides the code required to enforce proper locking.
A per boost group spinlock has been added to grant atomic
accounting of tasks as well as to serialise enqueue/dequeue operations,
triggered by tasks migrations, with cgroups's attach/detach operations.
sched/tune: fix accounting for runnable tasks (3/5)
This patch adds cgroups {allow,can,cancel}_attach callbacks.
Since a task can be migrated between boost groups while it's running,
the CGroups's attach callbacks have been added to properly migrate
boost contributions of RUNNABLE tasks.
The RQ's lock is used to serialise enqueue/dequeue operations, triggered
by tasks migrations, with cgroups's attach/detach operations. While the
SchedTune's CPU lock is used to grant atrocity of the accounting within
the CPU.
NOTE: the current implementation does not allows a concurrent CPU migration
and CGroups change.
sched/tune: fix accounting for runnable tasks (4/5)
This fixes accounting for exiting tasks by adding a dedicated call early
in the do_exit() syscall, which disables SchedTune accounting as soon as a
task is flagged PF_EXITING.
This flag is set before the multiple dequeue/enqueue dance triggered
by cgroup_exit() which is useful only to inject useless tasks movements
thus increasing possibilities for race conditions with the migration code.
The schedtune_exit_task() call does the last dequeue of a task from its
current boost group. This is a solution more aligned with what happens in
mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying
task to the root control group.
sched/tune: fix accounting for runnable tasks (5/5)
To avoid accounting issues at startup, this patch disable the SchedTune
accounting until the required data structures have been properly
initialized.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2016-07-28 18:44:40 +01:00
|
|
|
|
|
|
|
schedtune_tasks_update(p, cpu, idx, ENQUEUE_TASK);
|
|
|
|
|
2015-07-07 15:33:20 +01:00
|
|
|
rcu_read_unlock();
|
FIXUP: sched/tune: fix accounting for runnable tasks
Contains:
sched/tune: fix accounting for runnable tasks (1/5)
The accounting for tasks into boost groups of different CPUs is currently
broken mainly because:
a) we do not properly track the change of boost group of a RUNNABLE task
b) there are race conditions between migration code and accounting code
This patch provides a fixes to ensure enqueue/dequeue
accounting also for throttled tasks.
Without this patch is can happen that a task is enqueued into a throttled
RQ thus not being accounted for the boosting of the corresponding RQ.
We could argue that a throttled task should not boost a CPU, however:
a) properly implementing CPU boosting considering throttled tasks will
increase a lot the complexity of the solution
b) it's not easy to quantify the benefits introduced by such a more
complex solution
Since task throttling requires the usage of the CFS bandwidth controller,
which is not widely used on mobile systems (at least not by Android kernels
so far), for the time being we go for the simple solution and boost also
for throttled RQs.
sched/tune: fix accounting for runnable tasks (2/5)
This patch provides the code required to enforce proper locking.
A per boost group spinlock has been added to grant atomic
accounting of tasks as well as to serialise enqueue/dequeue operations,
triggered by tasks migrations, with cgroups's attach/detach operations.
sched/tune: fix accounting for runnable tasks (3/5)
This patch adds cgroups {allow,can,cancel}_attach callbacks.
Since a task can be migrated between boost groups while it's running,
the CGroups's attach callbacks have been added to properly migrate
boost contributions of RUNNABLE tasks.
The RQ's lock is used to serialise enqueue/dequeue operations, triggered
by tasks migrations, with cgroups's attach/detach operations. While the
SchedTune's CPU lock is used to grant atrocity of the accounting within
the CPU.
NOTE: the current implementation does not allows a concurrent CPU migration
and CGroups change.
sched/tune: fix accounting for runnable tasks (4/5)
This fixes accounting for exiting tasks by adding a dedicated call early
in the do_exit() syscall, which disables SchedTune accounting as soon as a
task is flagged PF_EXITING.
This flag is set before the multiple dequeue/enqueue dance triggered
by cgroup_exit() which is useful only to inject useless tasks movements
thus increasing possibilities for race conditions with the migration code.
The schedtune_exit_task() call does the last dequeue of a task from its
current boost group. This is a solution more aligned with what happens in
mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying
task to the root control group.
sched/tune: fix accounting for runnable tasks (5/5)
To avoid accounting issues at startup, this patch disable the SchedTune
accounting until the required data structures have been properly
initialized.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2016-07-28 18:44:40 +01:00
|
|
|
raw_spin_unlock_irqrestore(&bg->lock, irq_flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
int schedtune_can_attach(struct cgroup_taskset *tset)
|
|
|
|
{
|
|
|
|
struct task_struct *task;
|
|
|
|
struct cgroup_subsys_state *css;
|
|
|
|
struct boost_groups *bg;
|
|
|
|
unsigned long irq_flags;
|
|
|
|
unsigned int cpu;
|
|
|
|
struct rq *rq;
|
|
|
|
int src_bg; /* Source boost group index */
|
|
|
|
int dst_bg; /* Destination boost group index */
|
|
|
|
int tasks;
|
|
|
|
|
|
|
|
if (!unlikely(schedtune_initialized))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
|
|
|
|
cgroup_taskset_for_each(task, css, tset) {
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Lock the CPU's RQ the task is enqueued to avoid race
|
|
|
|
* conditions with migration code while the task is being
|
|
|
|
* accounted
|
|
|
|
*/
|
|
|
|
rq = lock_rq_of(task, &irq_flags);
|
|
|
|
|
|
|
|
if (!task->on_rq) {
|
|
|
|
unlock_rq_of(rq, task, &irq_flags);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Boost group accouting is protected by a per-cpu lock and requires
|
|
|
|
* interrupt to be disabled to avoid race conditions on...
|
|
|
|
*/
|
|
|
|
cpu = cpu_of(rq);
|
|
|
|
bg = &per_cpu(cpu_boost_groups, cpu);
|
|
|
|
raw_spin_lock(&bg->lock);
|
|
|
|
|
|
|
|
dst_bg = css_st(css)->idx;
|
|
|
|
src_bg = task_schedtune(task)->idx;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Current task is not changing boostgroup, which can
|
|
|
|
* happen when the new hierarchy is in use.
|
|
|
|
*/
|
|
|
|
if (unlikely(dst_bg == src_bg)) {
|
|
|
|
raw_spin_unlock(&bg->lock);
|
|
|
|
unlock_rq_of(rq, task, &irq_flags);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is the case of a RUNNABLE task which is switching its
|
|
|
|
* current boost group.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Move task from src to dst boost group */
|
|
|
|
tasks = bg->group[src_bg].tasks - 1;
|
|
|
|
bg->group[src_bg].tasks = max(0, tasks);
|
|
|
|
bg->group[dst_bg].tasks += 1;
|
|
|
|
|
|
|
|
raw_spin_unlock(&bg->lock);
|
|
|
|
unlock_rq_of(rq, task, &irq_flags);
|
|
|
|
|
|
|
|
/* Update CPU boost group */
|
|
|
|
if (bg->group[src_bg].tasks == 0 || bg->group[dst_bg].tasks == 1)
|
|
|
|
schedtune_cpu_update(task_cpu(task));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2015-07-07 15:33:20 +01:00
|
|
|
|
FIXUP: sched/tune: fix accounting for runnable tasks
Contains:
sched/tune: fix accounting for runnable tasks (1/5)
The accounting for tasks into boost groups of different CPUs is currently
broken mainly because:
a) we do not properly track the change of boost group of a RUNNABLE task
b) there are race conditions between migration code and accounting code
This patch provides a fixes to ensure enqueue/dequeue
accounting also for throttled tasks.
Without this patch is can happen that a task is enqueued into a throttled
RQ thus not being accounted for the boosting of the corresponding RQ.
We could argue that a throttled task should not boost a CPU, however:
a) properly implementing CPU boosting considering throttled tasks will
increase a lot the complexity of the solution
b) it's not easy to quantify the benefits introduced by such a more
complex solution
Since task throttling requires the usage of the CFS bandwidth controller,
which is not widely used on mobile systems (at least not by Android kernels
so far), for the time being we go for the simple solution and boost also
for throttled RQs.
sched/tune: fix accounting for runnable tasks (2/5)
This patch provides the code required to enforce proper locking.
A per boost group spinlock has been added to grant atomic
accounting of tasks as well as to serialise enqueue/dequeue operations,
triggered by tasks migrations, with cgroups's attach/detach operations.
sched/tune: fix accounting for runnable tasks (3/5)
This patch adds cgroups {allow,can,cancel}_attach callbacks.
Since a task can be migrated between boost groups while it's running,
the CGroups's attach callbacks have been added to properly migrate
boost contributions of RUNNABLE tasks.
The RQ's lock is used to serialise enqueue/dequeue operations, triggered
by tasks migrations, with cgroups's attach/detach operations. While the
SchedTune's CPU lock is used to grant atrocity of the accounting within
the CPU.
NOTE: the current implementation does not allows a concurrent CPU migration
and CGroups change.
sched/tune: fix accounting for runnable tasks (4/5)
This fixes accounting for exiting tasks by adding a dedicated call early
in the do_exit() syscall, which disables SchedTune accounting as soon as a
task is flagged PF_EXITING.
This flag is set before the multiple dequeue/enqueue dance triggered
by cgroup_exit() which is useful only to inject useless tasks movements
thus increasing possibilities for race conditions with the migration code.
The schedtune_exit_task() call does the last dequeue of a task from its
current boost group. This is a solution more aligned with what happens in
mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying
task to the root control group.
sched/tune: fix accounting for runnable tasks (5/5)
To avoid accounting issues at startup, this patch disable the SchedTune
accounting until the required data structures have been properly
initialized.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2016-07-28 18:44:40 +01:00
|
|
|
void schedtune_cancel_attach(struct cgroup_taskset *tset)
|
|
|
|
{
|
|
|
|
/* This can happen only if SchedTune controller is mounted with
|
|
|
|
* other hierarchies ane one of them fails. Since usually SchedTune is
|
|
|
|
* mouted on its own hierarcy, for the time being we do not implement
|
|
|
|
* a proper rollback mechanism */
|
|
|
|
WARN(1, "SchedTune cancel attach not implemented");
|
2015-07-07 15:33:20 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* NOTE: This function must be called while holding the lock on the CPU RQ
|
|
|
|
*/
|
|
|
|
void schedtune_dequeue_task(struct task_struct *p, int cpu)
|
|
|
|
{
|
FIXUP: sched/tune: fix accounting for runnable tasks
Contains:
sched/tune: fix accounting for runnable tasks (1/5)
The accounting for tasks into boost groups of different CPUs is currently
broken mainly because:
a) we do not properly track the change of boost group of a RUNNABLE task
b) there are race conditions between migration code and accounting code
This patch provides a fixes to ensure enqueue/dequeue
accounting also for throttled tasks.
Without this patch is can happen that a task is enqueued into a throttled
RQ thus not being accounted for the boosting of the corresponding RQ.
We could argue that a throttled task should not boost a CPU, however:
a) properly implementing CPU boosting considering throttled tasks will
increase a lot the complexity of the solution
b) it's not easy to quantify the benefits introduced by such a more
complex solution
Since task throttling requires the usage of the CFS bandwidth controller,
which is not widely used on mobile systems (at least not by Android kernels
so far), for the time being we go for the simple solution and boost also
for throttled RQs.
sched/tune: fix accounting for runnable tasks (2/5)
This patch provides the code required to enforce proper locking.
A per boost group spinlock has been added to grant atomic
accounting of tasks as well as to serialise enqueue/dequeue operations,
triggered by tasks migrations, with cgroups's attach/detach operations.
sched/tune: fix accounting for runnable tasks (3/5)
This patch adds cgroups {allow,can,cancel}_attach callbacks.
Since a task can be migrated between boost groups while it's running,
the CGroups's attach callbacks have been added to properly migrate
boost contributions of RUNNABLE tasks.
The RQ's lock is used to serialise enqueue/dequeue operations, triggered
by tasks migrations, with cgroups's attach/detach operations. While the
SchedTune's CPU lock is used to grant atrocity of the accounting within
the CPU.
NOTE: the current implementation does not allows a concurrent CPU migration
and CGroups change.
sched/tune: fix accounting for runnable tasks (4/5)
This fixes accounting for exiting tasks by adding a dedicated call early
in the do_exit() syscall, which disables SchedTune accounting as soon as a
task is flagged PF_EXITING.
This flag is set before the multiple dequeue/enqueue dance triggered
by cgroup_exit() which is useful only to inject useless tasks movements
thus increasing possibilities for race conditions with the migration code.
The schedtune_exit_task() call does the last dequeue of a task from its
current boost group. This is a solution more aligned with what happens in
mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying
task to the root control group.
sched/tune: fix accounting for runnable tasks (5/5)
To avoid accounting issues at startup, this patch disable the SchedTune
accounting until the required data structures have been properly
initialized.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2016-07-28 18:44:40 +01:00
|
|
|
struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu);
|
|
|
|
unsigned long irq_flags;
|
2015-07-07 15:33:20 +01:00
|
|
|
struct schedtune *st;
|
|
|
|
int idx;
|
|
|
|
|
FIXUP: sched/tune: fix accounting for runnable tasks
Contains:
sched/tune: fix accounting for runnable tasks (1/5)
The accounting for tasks into boost groups of different CPUs is currently
broken mainly because:
a) we do not properly track the change of boost group of a RUNNABLE task
b) there are race conditions between migration code and accounting code
This patch provides a fixes to ensure enqueue/dequeue
accounting also for throttled tasks.
Without this patch is can happen that a task is enqueued into a throttled
RQ thus not being accounted for the boosting of the corresponding RQ.
We could argue that a throttled task should not boost a CPU, however:
a) properly implementing CPU boosting considering throttled tasks will
increase a lot the complexity of the solution
b) it's not easy to quantify the benefits introduced by such a more
complex solution
Since task throttling requires the usage of the CFS bandwidth controller,
which is not widely used on mobile systems (at least not by Android kernels
so far), for the time being we go for the simple solution and boost also
for throttled RQs.
sched/tune: fix accounting for runnable tasks (2/5)
This patch provides the code required to enforce proper locking.
A per boost group spinlock has been added to grant atomic
accounting of tasks as well as to serialise enqueue/dequeue operations,
triggered by tasks migrations, with cgroups's attach/detach operations.
sched/tune: fix accounting for runnable tasks (3/5)
This patch adds cgroups {allow,can,cancel}_attach callbacks.
Since a task can be migrated between boost groups while it's running,
the CGroups's attach callbacks have been added to properly migrate
boost contributions of RUNNABLE tasks.
The RQ's lock is used to serialise enqueue/dequeue operations, triggered
by tasks migrations, with cgroups's attach/detach operations. While the
SchedTune's CPU lock is used to grant atrocity of the accounting within
the CPU.
NOTE: the current implementation does not allows a concurrent CPU migration
and CGroups change.
sched/tune: fix accounting for runnable tasks (4/5)
This fixes accounting for exiting tasks by adding a dedicated call early
in the do_exit() syscall, which disables SchedTune accounting as soon as a
task is flagged PF_EXITING.
This flag is set before the multiple dequeue/enqueue dance triggered
by cgroup_exit() which is useful only to inject useless tasks movements
thus increasing possibilities for race conditions with the migration code.
The schedtune_exit_task() call does the last dequeue of a task from its
current boost group. This is a solution more aligned with what happens in
mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying
task to the root control group.
sched/tune: fix accounting for runnable tasks (5/5)
To avoid accounting issues at startup, this patch disable the SchedTune
accounting until the required data structures have been properly
initialized.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2016-07-28 18:44:40 +01:00
|
|
|
if (!unlikely(schedtune_initialized))
|
|
|
|
return;
|
|
|
|
|
2015-07-07 15:33:20 +01:00
|
|
|
/*
|
|
|
|
* When a task is marked PF_EXITING by do_exit() it's going to be
|
|
|
|
* dequeued and enqueued multiple times in the exit path.
|
|
|
|
* Thus we avoid any further update, since we do not want to change
|
|
|
|
* CPU boosting while the task is exiting.
|
FIXUP: sched/tune: fix accounting for runnable tasks
Contains:
sched/tune: fix accounting for runnable tasks (1/5)
The accounting for tasks into boost groups of different CPUs is currently
broken mainly because:
a) we do not properly track the change of boost group of a RUNNABLE task
b) there are race conditions between migration code and accounting code
This patch provides a fixes to ensure enqueue/dequeue
accounting also for throttled tasks.
Without this patch is can happen that a task is enqueued into a throttled
RQ thus not being accounted for the boosting of the corresponding RQ.
We could argue that a throttled task should not boost a CPU, however:
a) properly implementing CPU boosting considering throttled tasks will
increase a lot the complexity of the solution
b) it's not easy to quantify the benefits introduced by such a more
complex solution
Since task throttling requires the usage of the CFS bandwidth controller,
which is not widely used on mobile systems (at least not by Android kernels
so far), for the time being we go for the simple solution and boost also
for throttled RQs.
sched/tune: fix accounting for runnable tasks (2/5)
This patch provides the code required to enforce proper locking.
A per boost group spinlock has been added to grant atomic
accounting of tasks as well as to serialise enqueue/dequeue operations,
triggered by tasks migrations, with cgroups's attach/detach operations.
sched/tune: fix accounting for runnable tasks (3/5)
This patch adds cgroups {allow,can,cancel}_attach callbacks.
Since a task can be migrated between boost groups while it's running,
the CGroups's attach callbacks have been added to properly migrate
boost contributions of RUNNABLE tasks.
The RQ's lock is used to serialise enqueue/dequeue operations, triggered
by tasks migrations, with cgroups's attach/detach operations. While the
SchedTune's CPU lock is used to grant atrocity of the accounting within
the CPU.
NOTE: the current implementation does not allows a concurrent CPU migration
and CGroups change.
sched/tune: fix accounting for runnable tasks (4/5)
This fixes accounting for exiting tasks by adding a dedicated call early
in the do_exit() syscall, which disables SchedTune accounting as soon as a
task is flagged PF_EXITING.
This flag is set before the multiple dequeue/enqueue dance triggered
by cgroup_exit() which is useful only to inject useless tasks movements
thus increasing possibilities for race conditions with the migration code.
The schedtune_exit_task() call does the last dequeue of a task from its
current boost group. This is a solution more aligned with what happens in
mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying
task to the root control group.
sched/tune: fix accounting for runnable tasks (5/5)
To avoid accounting issues at startup, this patch disable the SchedTune
accounting until the required data structures have been properly
initialized.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2016-07-28 18:44:40 +01:00
|
|
|
* The last dequeue is already enforce by the do_exit() code path
|
|
|
|
* via schedtune_exit_task().
|
2015-07-07 15:33:20 +01:00
|
|
|
*/
|
|
|
|
if (p->flags & PF_EXITING)
|
|
|
|
return;
|
|
|
|
|
FIXUP: sched/tune: fix accounting for runnable tasks
Contains:
sched/tune: fix accounting for runnable tasks (1/5)
The accounting for tasks into boost groups of different CPUs is currently
broken mainly because:
a) we do not properly track the change of boost group of a RUNNABLE task
b) there are race conditions between migration code and accounting code
This patch provides a fixes to ensure enqueue/dequeue
accounting also for throttled tasks.
Without this patch is can happen that a task is enqueued into a throttled
RQ thus not being accounted for the boosting of the corresponding RQ.
We could argue that a throttled task should not boost a CPU, however:
a) properly implementing CPU boosting considering throttled tasks will
increase a lot the complexity of the solution
b) it's not easy to quantify the benefits introduced by such a more
complex solution
Since task throttling requires the usage of the CFS bandwidth controller,
which is not widely used on mobile systems (at least not by Android kernels
so far), for the time being we go for the simple solution and boost also
for throttled RQs.
sched/tune: fix accounting for runnable tasks (2/5)
This patch provides the code required to enforce proper locking.
A per boost group spinlock has been added to grant atomic
accounting of tasks as well as to serialise enqueue/dequeue operations,
triggered by tasks migrations, with cgroups's attach/detach operations.
sched/tune: fix accounting for runnable tasks (3/5)
This patch adds cgroups {allow,can,cancel}_attach callbacks.
Since a task can be migrated between boost groups while it's running,
the CGroups's attach callbacks have been added to properly migrate
boost contributions of RUNNABLE tasks.
The RQ's lock is used to serialise enqueue/dequeue operations, triggered
by tasks migrations, with cgroups's attach/detach operations. While the
SchedTune's CPU lock is used to grant atrocity of the accounting within
the CPU.
NOTE: the current implementation does not allows a concurrent CPU migration
and CGroups change.
sched/tune: fix accounting for runnable tasks (4/5)
This fixes accounting for exiting tasks by adding a dedicated call early
in the do_exit() syscall, which disables SchedTune accounting as soon as a
task is flagged PF_EXITING.
This flag is set before the multiple dequeue/enqueue dance triggered
by cgroup_exit() which is useful only to inject useless tasks movements
thus increasing possibilities for race conditions with the migration code.
The schedtune_exit_task() call does the last dequeue of a task from its
current boost group. This is a solution more aligned with what happens in
mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying
task to the root control group.
sched/tune: fix accounting for runnable tasks (5/5)
To avoid accounting issues at startup, this patch disable the SchedTune
accounting until the required data structures have been properly
initialized.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2016-07-28 18:44:40 +01:00
|
|
|
/*
|
|
|
|
* Boost group accouting is protected by a per-cpu lock and requires
|
|
|
|
* interrupt to be disabled to avoid race conditions on...
|
|
|
|
*/
|
|
|
|
raw_spin_lock_irqsave(&bg->lock, irq_flags);
|
2015-07-07 15:33:20 +01:00
|
|
|
rcu_read_lock();
|
FIXUP: sched/tune: fix accounting for runnable tasks
Contains:
sched/tune: fix accounting for runnable tasks (1/5)
The accounting for tasks into boost groups of different CPUs is currently
broken mainly because:
a) we do not properly track the change of boost group of a RUNNABLE task
b) there are race conditions between migration code and accounting code
This patch provides a fixes to ensure enqueue/dequeue
accounting also for throttled tasks.
Without this patch is can happen that a task is enqueued into a throttled
RQ thus not being accounted for the boosting of the corresponding RQ.
We could argue that a throttled task should not boost a CPU, however:
a) properly implementing CPU boosting considering throttled tasks will
increase a lot the complexity of the solution
b) it's not easy to quantify the benefits introduced by such a more
complex solution
Since task throttling requires the usage of the CFS bandwidth controller,
which is not widely used on mobile systems (at least not by Android kernels
so far), for the time being we go for the simple solution and boost also
for throttled RQs.
sched/tune: fix accounting for runnable tasks (2/5)
This patch provides the code required to enforce proper locking.
A per boost group spinlock has been added to grant atomic
accounting of tasks as well as to serialise enqueue/dequeue operations,
triggered by tasks migrations, with cgroups's attach/detach operations.
sched/tune: fix accounting for runnable tasks (3/5)
This patch adds cgroups {allow,can,cancel}_attach callbacks.
Since a task can be migrated between boost groups while it's running,
the CGroups's attach callbacks have been added to properly migrate
boost contributions of RUNNABLE tasks.
The RQ's lock is used to serialise enqueue/dequeue operations, triggered
by tasks migrations, with cgroups's attach/detach operations. While the
SchedTune's CPU lock is used to grant atrocity of the accounting within
the CPU.
NOTE: the current implementation does not allows a concurrent CPU migration
and CGroups change.
sched/tune: fix accounting for runnable tasks (4/5)
This fixes accounting for exiting tasks by adding a dedicated call early
in the do_exit() syscall, which disables SchedTune accounting as soon as a
task is flagged PF_EXITING.
This flag is set before the multiple dequeue/enqueue dance triggered
by cgroup_exit() which is useful only to inject useless tasks movements
thus increasing possibilities for race conditions with the migration code.
The schedtune_exit_task() call does the last dequeue of a task from its
current boost group. This is a solution more aligned with what happens in
mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying
task to the root control group.
sched/tune: fix accounting for runnable tasks (5/5)
To avoid accounting issues at startup, this patch disable the SchedTune
accounting until the required data structures have been properly
initialized.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2016-07-28 18:44:40 +01:00
|
|
|
|
2015-07-07 15:33:20 +01:00
|
|
|
st = task_schedtune(p);
|
|
|
|
idx = st->idx;
|
FIXUP: sched/tune: fix accounting for runnable tasks
Contains:
sched/tune: fix accounting for runnable tasks (1/5)
The accounting for tasks into boost groups of different CPUs is currently
broken mainly because:
a) we do not properly track the change of boost group of a RUNNABLE task
b) there are race conditions between migration code and accounting code
This patch provides a fixes to ensure enqueue/dequeue
accounting also for throttled tasks.
Without this patch is can happen that a task is enqueued into a throttled
RQ thus not being accounted for the boosting of the corresponding RQ.
We could argue that a throttled task should not boost a CPU, however:
a) properly implementing CPU boosting considering throttled tasks will
increase a lot the complexity of the solution
b) it's not easy to quantify the benefits introduced by such a more
complex solution
Since task throttling requires the usage of the CFS bandwidth controller,
which is not widely used on mobile systems (at least not by Android kernels
so far), for the time being we go for the simple solution and boost also
for throttled RQs.
sched/tune: fix accounting for runnable tasks (2/5)
This patch provides the code required to enforce proper locking.
A per boost group spinlock has been added to grant atomic
accounting of tasks as well as to serialise enqueue/dequeue operations,
triggered by tasks migrations, with cgroups's attach/detach operations.
sched/tune: fix accounting for runnable tasks (3/5)
This patch adds cgroups {allow,can,cancel}_attach callbacks.
Since a task can be migrated between boost groups while it's running,
the CGroups's attach callbacks have been added to properly migrate
boost contributions of RUNNABLE tasks.
The RQ's lock is used to serialise enqueue/dequeue operations, triggered
by tasks migrations, with cgroups's attach/detach operations. While the
SchedTune's CPU lock is used to grant atrocity of the accounting within
the CPU.
NOTE: the current implementation does not allows a concurrent CPU migration
and CGroups change.
sched/tune: fix accounting for runnable tasks (4/5)
This fixes accounting for exiting tasks by adding a dedicated call early
in the do_exit() syscall, which disables SchedTune accounting as soon as a
task is flagged PF_EXITING.
This flag is set before the multiple dequeue/enqueue dance triggered
by cgroup_exit() which is useful only to inject useless tasks movements
thus increasing possibilities for race conditions with the migration code.
The schedtune_exit_task() call does the last dequeue of a task from its
current boost group. This is a solution more aligned with what happens in
mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying
task to the root control group.
sched/tune: fix accounting for runnable tasks (5/5)
To avoid accounting issues at startup, this patch disable the SchedTune
accounting until the required data structures have been properly
initialized.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2016-07-28 18:44:40 +01:00
|
|
|
|
|
|
|
schedtune_tasks_update(p, cpu, idx, DEQUEUE_TASK);
|
|
|
|
|
2015-07-07 15:33:20 +01:00
|
|
|
rcu_read_unlock();
|
FIXUP: sched/tune: fix accounting for runnable tasks
Contains:
sched/tune: fix accounting for runnable tasks (1/5)
The accounting for tasks into boost groups of different CPUs is currently
broken mainly because:
a) we do not properly track the change of boost group of a RUNNABLE task
b) there are race conditions between migration code and accounting code
This patch provides a fixes to ensure enqueue/dequeue
accounting also for throttled tasks.
Without this patch is can happen that a task is enqueued into a throttled
RQ thus not being accounted for the boosting of the corresponding RQ.
We could argue that a throttled task should not boost a CPU, however:
a) properly implementing CPU boosting considering throttled tasks will
increase a lot the complexity of the solution
b) it's not easy to quantify the benefits introduced by such a more
complex solution
Since task throttling requires the usage of the CFS bandwidth controller,
which is not widely used on mobile systems (at least not by Android kernels
so far), for the time being we go for the simple solution and boost also
for throttled RQs.
sched/tune: fix accounting for runnable tasks (2/5)
This patch provides the code required to enforce proper locking.
A per boost group spinlock has been added to grant atomic
accounting of tasks as well as to serialise enqueue/dequeue operations,
triggered by tasks migrations, with cgroups's attach/detach operations.
sched/tune: fix accounting for runnable tasks (3/5)
This patch adds cgroups {allow,can,cancel}_attach callbacks.
Since a task can be migrated between boost groups while it's running,
the CGroups's attach callbacks have been added to properly migrate
boost contributions of RUNNABLE tasks.
The RQ's lock is used to serialise enqueue/dequeue operations, triggered
by tasks migrations, with cgroups's attach/detach operations. While the
SchedTune's CPU lock is used to grant atrocity of the accounting within
the CPU.
NOTE: the current implementation does not allows a concurrent CPU migration
and CGroups change.
sched/tune: fix accounting for runnable tasks (4/5)
This fixes accounting for exiting tasks by adding a dedicated call early
in the do_exit() syscall, which disables SchedTune accounting as soon as a
task is flagged PF_EXITING.
This flag is set before the multiple dequeue/enqueue dance triggered
by cgroup_exit() which is useful only to inject useless tasks movements
thus increasing possibilities for race conditions with the migration code.
The schedtune_exit_task() call does the last dequeue of a task from its
current boost group. This is a solution more aligned with what happens in
mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying
task to the root control group.
sched/tune: fix accounting for runnable tasks (5/5)
To avoid accounting issues at startup, this patch disable the SchedTune
accounting until the required data structures have been properly
initialized.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2016-07-28 18:44:40 +01:00
|
|
|
raw_spin_unlock_irqrestore(&bg->lock, irq_flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
void schedtune_exit_task(struct task_struct *tsk)
|
|
|
|
{
|
|
|
|
struct schedtune *st;
|
|
|
|
unsigned long irq_flags;
|
|
|
|
unsigned int cpu;
|
|
|
|
struct rq *rq;
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
if (!unlikely(schedtune_initialized))
|
|
|
|
return;
|
2015-07-07 15:33:20 +01:00
|
|
|
|
FIXUP: sched/tune: fix accounting for runnable tasks
Contains:
sched/tune: fix accounting for runnable tasks (1/5)
The accounting for tasks into boost groups of different CPUs is currently
broken mainly because:
a) we do not properly track the change of boost group of a RUNNABLE task
b) there are race conditions between migration code and accounting code
This patch provides a fixes to ensure enqueue/dequeue
accounting also for throttled tasks.
Without this patch is can happen that a task is enqueued into a throttled
RQ thus not being accounted for the boosting of the corresponding RQ.
We could argue that a throttled task should not boost a CPU, however:
a) properly implementing CPU boosting considering throttled tasks will
increase a lot the complexity of the solution
b) it's not easy to quantify the benefits introduced by such a more
complex solution
Since task throttling requires the usage of the CFS bandwidth controller,
which is not widely used on mobile systems (at least not by Android kernels
so far), for the time being we go for the simple solution and boost also
for throttled RQs.
sched/tune: fix accounting for runnable tasks (2/5)
This patch provides the code required to enforce proper locking.
A per boost group spinlock has been added to grant atomic
accounting of tasks as well as to serialise enqueue/dequeue operations,
triggered by tasks migrations, with cgroups's attach/detach operations.
sched/tune: fix accounting for runnable tasks (3/5)
This patch adds cgroups {allow,can,cancel}_attach callbacks.
Since a task can be migrated between boost groups while it's running,
the CGroups's attach callbacks have been added to properly migrate
boost contributions of RUNNABLE tasks.
The RQ's lock is used to serialise enqueue/dequeue operations, triggered
by tasks migrations, with cgroups's attach/detach operations. While the
SchedTune's CPU lock is used to grant atrocity of the accounting within
the CPU.
NOTE: the current implementation does not allows a concurrent CPU migration
and CGroups change.
sched/tune: fix accounting for runnable tasks (4/5)
This fixes accounting for exiting tasks by adding a dedicated call early
in the do_exit() syscall, which disables SchedTune accounting as soon as a
task is flagged PF_EXITING.
This flag is set before the multiple dequeue/enqueue dance triggered
by cgroup_exit() which is useful only to inject useless tasks movements
thus increasing possibilities for race conditions with the migration code.
The schedtune_exit_task() call does the last dequeue of a task from its
current boost group. This is a solution more aligned with what happens in
mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying
task to the root control group.
sched/tune: fix accounting for runnable tasks (5/5)
To avoid accounting issues at startup, this patch disable the SchedTune
accounting until the required data structures have been properly
initialized.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2016-07-28 18:44:40 +01:00
|
|
|
rq = lock_rq_of(tsk, &irq_flags);
|
|
|
|
rcu_read_lock();
|
|
|
|
|
|
|
|
cpu = cpu_of(rq);
|
|
|
|
st = task_schedtune(tsk);
|
|
|
|
idx = st->idx;
|
|
|
|
schedtune_tasks_update(tsk, cpu, idx, DEQUEUE_TASK);
|
|
|
|
|
|
|
|
rcu_read_unlock();
|
|
|
|
unlock_rq_of(rq, tsk, &irq_flags);
|
2015-07-07 15:33:20 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
int schedtune_cpu_boost(int cpu)
|
|
|
|
{
|
|
|
|
struct boost_groups *bg;
|
|
|
|
|
|
|
|
bg = &per_cpu(cpu_boost_groups, cpu);
|
|
|
|
return bg->boost_max;
|
|
|
|
}
|
|
|
|
|
sched/fair: add boosted task utilization
The task utilization signal, which is derived from PELT signals and
properly scaled to be architecture and frequency invariant, is used by
EAS as an estimation of the task requirements in terms of CPU bandwidth.
When the energy aware scheduler is in use, this signal affects the CPU
selection. Thus, a convenient way to bias that decision, which is also
little intrusive, is to boost the task utilization signal each time it
is required to support them.
This patch introduces the new function:
boosted_task_util(task)
which returns a boosted value for the utilization of the specified task.
The margin added to the original utilization is:
1. computed based on the "boosting strategy" in use
2. proportional to boost value defined either by the sysctl interface,
when global boosting is in use, or the "taskgroup" value, when
per-task boosting is enabled.
The boosted signal is used by EAS
a. transparently, via its integration into the task_fits() function
b. explicitly, in the energy-aware wakeup path
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
2016-01-14 18:31:53 +00:00
|
|
|
int schedtune_task_boost(struct task_struct *p)
|
|
|
|
{
|
|
|
|
struct schedtune *st;
|
|
|
|
int task_boost;
|
|
|
|
|
2017-03-27 18:20:20 +01:00
|
|
|
if (!unlikely(schedtune_initialized))
|
|
|
|
return 0;
|
|
|
|
|
sched/fair: add boosted task utilization
The task utilization signal, which is derived from PELT signals and
properly scaled to be architecture and frequency invariant, is used by
EAS as an estimation of the task requirements in terms of CPU bandwidth.
When the energy aware scheduler is in use, this signal affects the CPU
selection. Thus, a convenient way to bias that decision, which is also
little intrusive, is to boost the task utilization signal each time it
is required to support them.
This patch introduces the new function:
boosted_task_util(task)
which returns a boosted value for the utilization of the specified task.
The margin added to the original utilization is:
1. computed based on the "boosting strategy" in use
2. proportional to boost value defined either by the sysctl interface,
when global boosting is in use, or the "taskgroup" value, when
per-task boosting is enabled.
The boosted signal is used by EAS
a. transparently, via its integration into the task_fits() function
b. explicitly, in the energy-aware wakeup path
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
2016-01-14 18:31:53 +00:00
|
|
|
/* Get task boost value */
|
|
|
|
rcu_read_lock();
|
|
|
|
st = task_schedtune(p);
|
|
|
|
task_boost = st->boost;
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
return task_boost;
|
|
|
|
}
|
|
|
|
|
2016-07-14 13:09:03 -07:00
|
|
|
int schedtune_prefer_idle(struct task_struct *p)
|
|
|
|
{
|
|
|
|
struct schedtune *st;
|
|
|
|
int prefer_idle;
|
|
|
|
|
2017-03-27 18:20:20 +01:00
|
|
|
if (!unlikely(schedtune_initialized))
|
|
|
|
return 0;
|
|
|
|
|
2016-07-14 13:09:03 -07:00
|
|
|
/* Get prefer_idle value */
|
|
|
|
rcu_read_lock();
|
|
|
|
st = task_schedtune(p);
|
|
|
|
prefer_idle = st->prefer_idle;
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
return prefer_idle;
|
|
|
|
}
|
|
|
|
|
2015-06-23 09:17:54 +01:00
|
|
|
static u64
|
2016-07-14 13:09:03 -07:00
|
|
|
prefer_idle_read(struct cgroup_subsys_state *css, struct cftype *cft)
|
|
|
|
{
|
|
|
|
struct schedtune *st = css_st(css);
|
|
|
|
|
|
|
|
return st->prefer_idle;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
prefer_idle_write(struct cgroup_subsys_state *css, struct cftype *cft,
|
|
|
|
u64 prefer_idle)
|
|
|
|
{
|
|
|
|
struct schedtune *st = css_st(css);
|
|
|
|
st->prefer_idle = prefer_idle;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-07-28 17:28:55 +01:00
|
|
|
static s64
|
2015-06-23 09:17:54 +01:00
|
|
|
boost_read(struct cgroup_subsys_state *css, struct cftype *cft)
|
|
|
|
{
|
|
|
|
struct schedtune *st = css_st(css);
|
|
|
|
|
|
|
|
return st->boost;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
boost_write(struct cgroup_subsys_state *css, struct cftype *cft,
|
2016-07-28 17:28:55 +01:00
|
|
|
s64 boost)
|
2015-06-23 09:17:54 +01:00
|
|
|
{
|
|
|
|
struct schedtune *st = css_st(css);
|
sched/tune: fix PB and PC cuts indexes definition
The current definition of the Performance Boost (PB) and Performance Constraint
(PC) regions is has two main issues:
1) in the computation of the boost index we overflow the thresholds_gains
table for boost=100
2) the two cuts had _NOT_ the same ratio
The last point means that when boost=0 we do _not_ have a "standard" EAS
behaviour, i.e. accepting all candidate which decrease energy regardless
of their impact on performances. Instead, we accept only schedule candidate
which are in the Optimal region, i.e. decrease energy while increasing
performances.
This behaviour can have a negative impact also on CPU selection policies
which tries to spread tasks to reduce latencies. Indeed, for example
we could end up rejecting a schedule candidate which want to move a task
from a congested CPU to an idle one while, specifically in the case where
the target CPU will be running on a lower OPP.
This patch fixes these two issues by properly clamping the boost value
in the appropriate range to compute the threshold indexes as well as
by using the same threshold index for both cuts.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
sched/tune: fix update of threshold index for boost groups
When SchedTune is configured to work with CGroup mode, each time we update
the boost value of a group we do not update the threshed indexes for the
definition of the Performance Boost (PC) and Performance Constraint (PC)
region. This means that while the OPP boosting and CPU biasing selection
is working as expected, the __schedtune_accept_deltas function is always
using the initial values for these cuts.
This patch ensure that each time a new boost value is configured for a
boost group, the cuts for the PB and PC region are properly updated too.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
sched/tune: update PC and PB cuts definition
The current definition of Performance Boost (PB) and Performance
Constraint (PC) cuts defines two "dead regions":
- up to 20% boost: we are in energy-reduction only mode, i.e.
accept all candidate which reduce energy
- over 70% boost: we are in performance-increase only mode, i.e.
accept only sched candidate which do not reduce performances
This patch uses a more fine grained configuration where these two "dead
regions" are reduced to: up to 10% and over 90%.
This should allow to have some boosting benefits starting from 10% boost
values as well as not being to much permissive starting from boost values
of 80%.
Suggested-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
bug: 28312446
Change-Id: Ia326c66521e38c98e7a7eddbbb7c437875efa1ba
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
2016-07-29 15:32:26 +01:00
|
|
|
unsigned threshold_idx;
|
|
|
|
int boost_pct;
|
2015-06-23 09:17:54 +01:00
|
|
|
|
2016-07-28 17:28:55 +01:00
|
|
|
if (boost < -100 || boost > 100)
|
2015-06-23 09:17:54 +01:00
|
|
|
return -EINVAL;
|
sched/tune: fix PB and PC cuts indexes definition
The current definition of the Performance Boost (PB) and Performance Constraint
(PC) regions is has two main issues:
1) in the computation of the boost index we overflow the thresholds_gains
table for boost=100
2) the two cuts had _NOT_ the same ratio
The last point means that when boost=0 we do _not_ have a "standard" EAS
behaviour, i.e. accepting all candidate which decrease energy regardless
of their impact on performances. Instead, we accept only schedule candidate
which are in the Optimal region, i.e. decrease energy while increasing
performances.
This behaviour can have a negative impact also on CPU selection policies
which tries to spread tasks to reduce latencies. Indeed, for example
we could end up rejecting a schedule candidate which want to move a task
from a congested CPU to an idle one while, specifically in the case where
the target CPU will be running on a lower OPP.
This patch fixes these two issues by properly clamping the boost value
in the appropriate range to compute the threshold indexes as well as
by using the same threshold index for both cuts.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
sched/tune: fix update of threshold index for boost groups
When SchedTune is configured to work with CGroup mode, each time we update
the boost value of a group we do not update the threshed indexes for the
definition of the Performance Boost (PC) and Performance Constraint (PC)
region. This means that while the OPP boosting and CPU biasing selection
is working as expected, the __schedtune_accept_deltas function is always
using the initial values for these cuts.
This patch ensure that each time a new boost value is configured for a
boost group, the cuts for the PB and PC region are properly updated too.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
sched/tune: update PC and PB cuts definition
The current definition of Performance Boost (PB) and Performance
Constraint (PC) cuts defines two "dead regions":
- up to 20% boost: we are in energy-reduction only mode, i.e.
accept all candidate which reduce energy
- over 70% boost: we are in performance-increase only mode, i.e.
accept only sched candidate which do not reduce performances
This patch uses a more fine grained configuration where these two "dead
regions" are reduced to: up to 10% and over 90%.
This should allow to have some boosting benefits starting from 10% boost
values as well as not being to much permissive starting from boost values
of 80%.
Suggested-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
bug: 28312446
Change-Id: Ia326c66521e38c98e7a7eddbbb7c437875efa1ba
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
2016-07-29 15:32:26 +01:00
|
|
|
boost_pct = boost;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Update threshold params for Performance Boost (B)
|
|
|
|
* and Performance Constraint (C) regions.
|
|
|
|
* The current implementatio uses the same cuts for both
|
|
|
|
* B and C regions.
|
|
|
|
*/
|
|
|
|
threshold_idx = clamp(boost_pct, 0, 99) / 10;
|
|
|
|
st->perf_boost_idx = threshold_idx;
|
|
|
|
st->perf_constrain_idx = threshold_idx;
|
2015-06-23 09:17:54 +01:00
|
|
|
|
|
|
|
st->boost = boost;
|
sched/tune: fix PB and PC cuts indexes definition
The current definition of the Performance Boost (PB) and Performance Constraint
(PC) regions is has two main issues:
1) in the computation of the boost index we overflow the thresholds_gains
table for boost=100
2) the two cuts had _NOT_ the same ratio
The last point means that when boost=0 we do _not_ have a "standard" EAS
behaviour, i.e. accepting all candidate which decrease energy regardless
of their impact on performances. Instead, we accept only schedule candidate
which are in the Optimal region, i.e. decrease energy while increasing
performances.
This behaviour can have a negative impact also on CPU selection policies
which tries to spread tasks to reduce latencies. Indeed, for example
we could end up rejecting a schedule candidate which want to move a task
from a congested CPU to an idle one while, specifically in the case where
the target CPU will be running on a lower OPP.
This patch fixes these two issues by properly clamping the boost value
in the appropriate range to compute the threshold indexes as well as
by using the same threshold index for both cuts.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
sched/tune: fix update of threshold index for boost groups
When SchedTune is configured to work with CGroup mode, each time we update
the boost value of a group we do not update the threshed indexes for the
definition of the Performance Boost (PC) and Performance Constraint (PC)
region. This means that while the OPP boosting and CPU biasing selection
is working as expected, the __schedtune_accept_deltas function is always
using the initial values for these cuts.
This patch ensure that each time a new boost value is configured for a
boost group, the cuts for the PB and PC region are properly updated too.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
sched/tune: update PC and PB cuts definition
The current definition of Performance Boost (PB) and Performance
Constraint (PC) cuts defines two "dead regions":
- up to 20% boost: we are in energy-reduction only mode, i.e.
accept all candidate which reduce energy
- over 70% boost: we are in performance-increase only mode, i.e.
accept only sched candidate which do not reduce performances
This patch uses a more fine grained configuration where these two "dead
regions" are reduced to: up to 10% and over 90%.
This should allow to have some boosting benefits starting from 10% boost
values as well as not being to much permissive starting from boost values
of 80%.
Suggested-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
bug: 28312446
Change-Id: Ia326c66521e38c98e7a7eddbbb7c437875efa1ba
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
2016-07-29 15:32:26 +01:00
|
|
|
if (css == &root_schedtune.css) {
|
2015-06-23 09:17:54 +01:00
|
|
|
sysctl_sched_cfs_boost = boost;
|
sched/tune: fix PB and PC cuts indexes definition
The current definition of the Performance Boost (PB) and Performance Constraint
(PC) regions is has two main issues:
1) in the computation of the boost index we overflow the thresholds_gains
table for boost=100
2) the two cuts had _NOT_ the same ratio
The last point means that when boost=0 we do _not_ have a "standard" EAS
behaviour, i.e. accepting all candidate which decrease energy regardless
of their impact on performances. Instead, we accept only schedule candidate
which are in the Optimal region, i.e. decrease energy while increasing
performances.
This behaviour can have a negative impact also on CPU selection policies
which tries to spread tasks to reduce latencies. Indeed, for example
we could end up rejecting a schedule candidate which want to move a task
from a congested CPU to an idle one while, specifically in the case where
the target CPU will be running on a lower OPP.
This patch fixes these two issues by properly clamping the boost value
in the appropriate range to compute the threshold indexes as well as
by using the same threshold index for both cuts.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
sched/tune: fix update of threshold index for boost groups
When SchedTune is configured to work with CGroup mode, each time we update
the boost value of a group we do not update the threshed indexes for the
definition of the Performance Boost (PC) and Performance Constraint (PC)
region. This means that while the OPP boosting and CPU biasing selection
is working as expected, the __schedtune_accept_deltas function is always
using the initial values for these cuts.
This patch ensure that each time a new boost value is configured for a
boost group, the cuts for the PB and PC region are properly updated too.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
sched/tune: update PC and PB cuts definition
The current definition of Performance Boost (PB) and Performance
Constraint (PC) cuts defines two "dead regions":
- up to 20% boost: we are in energy-reduction only mode, i.e.
accept all candidate which reduce energy
- over 70% boost: we are in performance-increase only mode, i.e.
accept only sched candidate which do not reduce performances
This patch uses a more fine grained configuration where these two "dead
regions" are reduced to: up to 10% and over 90%.
This should allow to have some boosting benefits starting from 10% boost
values as well as not being to much permissive starting from boost values
of 80%.
Suggested-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
bug: 28312446
Change-Id: Ia326c66521e38c98e7a7eddbbb7c437875efa1ba
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
2016-07-29 15:32:26 +01:00
|
|
|
perf_boost_idx = threshold_idx;
|
|
|
|
perf_constrain_idx = threshold_idx;
|
|
|
|
}
|
2015-06-23 09:17:54 +01:00
|
|
|
|
2016-01-14 12:31:35 +00:00
|
|
|
/* Update CPU boost */
|
|
|
|
schedtune_boostgroup_update(st->idx, st->boost);
|
|
|
|
|
2015-06-22 13:49:07 +01:00
|
|
|
trace_sched_tune_config(st->boost);
|
2015-06-23 09:17:54 +01:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-08-31 16:54:12 -07:00
|
|
|
static void schedtune_attach(struct cgroup_taskset *tset)
|
|
|
|
{
|
|
|
|
struct task_struct *task;
|
|
|
|
struct cgroup_subsys_state *css;
|
|
|
|
struct schedtune *st;
|
|
|
|
bool colocate;
|
|
|
|
|
|
|
|
cgroup_taskset_first(tset, &css);
|
|
|
|
st = css_st(css);
|
|
|
|
|
|
|
|
colocate = st->colocate;
|
|
|
|
|
|
|
|
cgroup_taskset_for_each(task, css, tset)
|
|
|
|
sync_cgroup_colocation(task, colocate);
|
|
|
|
}
|
|
|
|
|
2015-06-23 09:17:54 +01:00
|
|
|
static struct cftype files[] = {
|
|
|
|
{
|
|
|
|
.name = "boost",
|
2016-07-28 17:28:55 +01:00
|
|
|
.read_s64 = boost_read,
|
|
|
|
.write_s64 = boost_write,
|
2015-06-23 09:17:54 +01:00
|
|
|
},
|
2016-07-14 13:09:03 -07:00
|
|
|
{
|
|
|
|
.name = "prefer_idle",
|
|
|
|
.read_u64 = prefer_idle_read,
|
|
|
|
.write_u64 = prefer_idle_write,
|
2015-06-23 09:17:54 +01:00
|
|
|
},
|
2016-08-31 16:54:12 -07:00
|
|
|
#ifdef CONFIG_SCHED_HMP
|
|
|
|
{
|
|
|
|
.name = "sched_boost_no_override",
|
|
|
|
.read_u64 = sched_boost_override_read,
|
|
|
|
.write_u64 = sched_boost_override_write,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "sched_boost_enabled",
|
|
|
|
.read_u64 = sched_boost_enabled_read,
|
|
|
|
.write_u64 = sched_boost_enabled_write,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.name = "colocate",
|
|
|
|
.read_u64 = sched_colocate_read,
|
|
|
|
.write_u64 = sched_colocate_write,
|
|
|
|
},
|
|
|
|
#endif
|
2015-06-23 09:17:54 +01:00
|
|
|
{ } /* terminate */
|
|
|
|
};
|
|
|
|
|
|
|
|
static int
|
|
|
|
schedtune_boostgroup_init(struct schedtune *st)
|
|
|
|
{
|
|
|
|
struct boost_groups *bg;
|
|
|
|
int cpu;
|
|
|
|
|
2015-06-23 09:17:54 +01:00
|
|
|
/* Keep track of allocated boost groups */
|
|
|
|
allocated_group[st->idx] = st;
|
|
|
|
|
2015-06-23 09:17:54 +01:00
|
|
|
/* Initialize the per CPU boost groups */
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
|
|
bg = &per_cpu(cpu_boost_groups, cpu);
|
2016-01-14 12:31:35 +00:00
|
|
|
bg->group[st->idx].boost = 0;
|
|
|
|
bg->group[st->idx].tasks = 0;
|
2016-11-08 14:53:44 -08:00
|
|
|
raw_spin_lock_init(&bg->lock);
|
2015-06-23 09:17:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct cgroup_subsys_state *
|
|
|
|
schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
|
|
|
|
{
|
|
|
|
struct schedtune *st;
|
|
|
|
int idx;
|
|
|
|
|
2016-07-29 15:19:41 +01:00
|
|
|
if (!parent_css)
|
2015-06-23 09:17:54 +01:00
|
|
|
return &root_schedtune.css;
|
|
|
|
|
|
|
|
/* Allow only single level hierachies */
|
|
|
|
if (parent_css != &root_schedtune.css) {
|
|
|
|
pr_err("Nested SchedTune boosting groups not allowed\n");
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allow only a limited number of boosting groups */
|
|
|
|
for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx)
|
|
|
|
if (!allocated_group[idx])
|
|
|
|
break;
|
|
|
|
if (idx == BOOSTGROUPS_COUNT) {
|
|
|
|
pr_err("Trying to create more than %d SchedTune boosting groups\n",
|
|
|
|
BOOSTGROUPS_COUNT);
|
|
|
|
return ERR_PTR(-ENOSPC);
|
|
|
|
}
|
|
|
|
|
|
|
|
st = kzalloc(sizeof(*st), GFP_KERNEL);
|
|
|
|
if (!st)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
/* Initialize per CPUs boost group support */
|
|
|
|
st->idx = idx;
|
2016-08-31 16:54:12 -07:00
|
|
|
init_sched_boost(st);
|
2015-06-23 09:17:54 +01:00
|
|
|
if (schedtune_boostgroup_init(st))
|
|
|
|
goto release;
|
|
|
|
|
|
|
|
return &st->css;
|
|
|
|
|
|
|
|
release:
|
|
|
|
kfree(st);
|
|
|
|
out:
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
schedtune_boostgroup_release(struct schedtune *st)
|
|
|
|
{
|
2016-01-14 12:31:35 +00:00
|
|
|
/* Reset this boost group */
|
|
|
|
schedtune_boostgroup_update(st->idx, 0);
|
|
|
|
|
2015-06-23 09:17:54 +01:00
|
|
|
/* Keep track of allocated boost groups */
|
|
|
|
allocated_group[st->idx] = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
schedtune_css_free(struct cgroup_subsys_state *css)
|
|
|
|
{
|
|
|
|
struct schedtune *st = css_st(css);
|
|
|
|
|
|
|
|
schedtune_boostgroup_release(st);
|
|
|
|
kfree(st);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct cgroup_subsys schedtune_cgrp_subsys = {
|
|
|
|
.css_alloc = schedtune_css_alloc,
|
|
|
|
.css_free = schedtune_css_free,
|
FIXUP: sched/tune: fix accounting for runnable tasks
Contains:
sched/tune: fix accounting for runnable tasks (1/5)
The accounting for tasks into boost groups of different CPUs is currently
broken mainly because:
a) we do not properly track the change of boost group of a RUNNABLE task
b) there are race conditions between migration code and accounting code
This patch provides a fixes to ensure enqueue/dequeue
accounting also for throttled tasks.
Without this patch is can happen that a task is enqueued into a throttled
RQ thus not being accounted for the boosting of the corresponding RQ.
We could argue that a throttled task should not boost a CPU, however:
a) properly implementing CPU boosting considering throttled tasks will
increase a lot the complexity of the solution
b) it's not easy to quantify the benefits introduced by such a more
complex solution
Since task throttling requires the usage of the CFS bandwidth controller,
which is not widely used on mobile systems (at least not by Android kernels
so far), for the time being we go for the simple solution and boost also
for throttled RQs.
sched/tune: fix accounting for runnable tasks (2/5)
This patch provides the code required to enforce proper locking.
A per boost group spinlock has been added to grant atomic
accounting of tasks as well as to serialise enqueue/dequeue operations,
triggered by tasks migrations, with cgroups's attach/detach operations.
sched/tune: fix accounting for runnable tasks (3/5)
This patch adds cgroups {allow,can,cancel}_attach callbacks.
Since a task can be migrated between boost groups while it's running,
the CGroups's attach callbacks have been added to properly migrate
boost contributions of RUNNABLE tasks.
The RQ's lock is used to serialise enqueue/dequeue operations, triggered
by tasks migrations, with cgroups's attach/detach operations. While the
SchedTune's CPU lock is used to grant atrocity of the accounting within
the CPU.
NOTE: the current implementation does not allows a concurrent CPU migration
and CGroups change.
sched/tune: fix accounting for runnable tasks (4/5)
This fixes accounting for exiting tasks by adding a dedicated call early
in the do_exit() syscall, which disables SchedTune accounting as soon as a
task is flagged PF_EXITING.
This flag is set before the multiple dequeue/enqueue dance triggered
by cgroup_exit() which is useful only to inject useless tasks movements
thus increasing possibilities for race conditions with the migration code.
The schedtune_exit_task() call does the last dequeue of a task from its
current boost group. This is a solution more aligned with what happens in
mainline kernels (>v4.4) where the exit_cgroup does not move anymore a dying
task to the root control group.
sched/tune: fix accounting for runnable tasks (5/5)
To avoid accounting issues at startup, this patch disable the SchedTune
accounting until the required data structures have been properly
initialized.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
[jstultz: fwdported to 4.4]
Signed-off-by: John Stultz <john.stultz@linaro.org>
2016-07-28 18:44:40 +01:00
|
|
|
.can_attach = schedtune_can_attach,
|
|
|
|
.cancel_attach = schedtune_cancel_attach,
|
2015-06-23 09:17:54 +01:00
|
|
|
.legacy_cftypes = files,
|
|
|
|
.early_init = 1,
|
2016-08-31 16:54:12 -07:00
|
|
|
.attach = schedtune_attach,
|
2015-06-23 09:17:54 +01:00
|
|
|
};
|
|
|
|
|
2016-07-29 15:19:41 +01:00
|
|
|
static inline void
|
|
|
|
schedtune_init_cgroups(void)
|
|
|
|
{
|
|
|
|
struct boost_groups *bg;
|
|
|
|
int cpu;
|
|
|
|
|
|
|
|
/* Initialize the per CPU boost groups */
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
|
|
bg = &per_cpu(cpu_boost_groups, cpu);
|
|
|
|
memset(bg, 0, sizeof(struct boost_groups));
|
2016-11-25 13:38:45 +08:00
|
|
|
raw_spin_lock_init(&bg->lock);
|
2016-07-29 15:19:41 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
pr_info("schedtune: configured to support %d boost groups\n",
|
|
|
|
BOOSTGROUPS_COUNT);
|
2016-08-24 11:02:29 +01:00
|
|
|
|
|
|
|
schedtune_initialized = true;
|
2016-07-29 15:19:41 +01:00
|
|
|
}
|
|
|
|
|
2016-01-12 18:12:13 +00:00
|
|
|
#else /* CONFIG_CGROUP_SCHEDTUNE */
|
|
|
|
|
|
|
|
int
|
|
|
|
schedtune_accept_deltas(int nrg_delta, int cap_delta,
|
|
|
|
struct task_struct *task)
|
|
|
|
{
|
|
|
|
/* Optimal (O) region */
|
2016-01-20 14:06:05 +00:00
|
|
|
if (nrg_delta < 0 && cap_delta > 0) {
|
|
|
|
trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, 1, 0);
|
2016-01-12 18:12:13 +00:00
|
|
|
return INT_MAX;
|
2016-01-20 14:06:05 +00:00
|
|
|
}
|
2016-01-12 18:12:13 +00:00
|
|
|
|
|
|
|
/* Suboptimal (S) region */
|
2016-01-20 14:06:05 +00:00
|
|
|
if (nrg_delta > 0 && cap_delta < 0) {
|
|
|
|
trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, -1, 5);
|
2016-01-12 18:12:13 +00:00
|
|
|
return -INT_MAX;
|
2016-01-20 14:06:05 +00:00
|
|
|
}
|
2016-01-12 18:12:13 +00:00
|
|
|
|
|
|
|
return __schedtune_accept_deltas(nrg_delta, cap_delta,
|
|
|
|
perf_boost_idx, perf_constrain_idx);
|
|
|
|
}
|
|
|
|
|
2015-06-23 09:17:54 +01:00
|
|
|
#endif /* CONFIG_CGROUP_SCHEDTUNE */
|
|
|
|
|
2015-06-22 18:11:44 +01:00
|
|
|
int
|
|
|
|
sysctl_sched_cfs_boost_handler(struct ctl_table *table, int write,
|
|
|
|
void __user *buffer, size_t *lenp,
|
|
|
|
loff_t *ppos)
|
|
|
|
{
|
|
|
|
int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
sched/tune: fix PB and PC cuts indexes definition
The current definition of the Performance Boost (PB) and Performance Constraint
(PC) regions is has two main issues:
1) in the computation of the boost index we overflow the thresholds_gains
table for boost=100
2) the two cuts had _NOT_ the same ratio
The last point means that when boost=0 we do _not_ have a "standard" EAS
behaviour, i.e. accepting all candidate which decrease energy regardless
of their impact on performances. Instead, we accept only schedule candidate
which are in the Optimal region, i.e. decrease energy while increasing
performances.
This behaviour can have a negative impact also on CPU selection policies
which tries to spread tasks to reduce latencies. Indeed, for example
we could end up rejecting a schedule candidate which want to move a task
from a congested CPU to an idle one while, specifically in the case where
the target CPU will be running on a lower OPP.
This patch fixes these two issues by properly clamping the boost value
in the appropriate range to compute the threshold indexes as well as
by using the same threshold index for both cuts.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
sched/tune: fix update of threshold index for boost groups
When SchedTune is configured to work with CGroup mode, each time we update
the boost value of a group we do not update the threshed indexes for the
definition of the Performance Boost (PC) and Performance Constraint (PC)
region. This means that while the OPP boosting and CPU biasing selection
is working as expected, the __schedtune_accept_deltas function is always
using the initial values for these cuts.
This patch ensure that each time a new boost value is configured for a
boost group, the cuts for the PB and PC region are properly updated too.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
sched/tune: update PC and PB cuts definition
The current definition of Performance Boost (PB) and Performance
Constraint (PC) cuts defines two "dead regions":
- up to 20% boost: we are in energy-reduction only mode, i.e.
accept all candidate which reduce energy
- over 70% boost: we are in performance-increase only mode, i.e.
accept only sched candidate which do not reduce performances
This patch uses a more fine grained configuration where these two "dead
regions" are reduced to: up to 10% and over 90%.
This should allow to have some boosting benefits starting from 10% boost
values as well as not being to much permissive starting from boost values
of 80%.
Suggested-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
bug: 28312446
Change-Id: Ia326c66521e38c98e7a7eddbbb7c437875efa1ba
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
2016-07-29 15:32:26 +01:00
|
|
|
unsigned threshold_idx;
|
|
|
|
int boost_pct;
|
2015-06-22 18:11:44 +01:00
|
|
|
|
|
|
|
if (ret || !write)
|
|
|
|
return ret;
|
|
|
|
|
sched/tune: fix PB and PC cuts indexes definition
The current definition of the Performance Boost (PB) and Performance Constraint
(PC) regions is has two main issues:
1) in the computation of the boost index we overflow the thresholds_gains
table for boost=100
2) the two cuts had _NOT_ the same ratio
The last point means that when boost=0 we do _not_ have a "standard" EAS
behaviour, i.e. accepting all candidate which decrease energy regardless
of their impact on performances. Instead, we accept only schedule candidate
which are in the Optimal region, i.e. decrease energy while increasing
performances.
This behaviour can have a negative impact also on CPU selection policies
which tries to spread tasks to reduce latencies. Indeed, for example
we could end up rejecting a schedule candidate which want to move a task
from a congested CPU to an idle one while, specifically in the case where
the target CPU will be running on a lower OPP.
This patch fixes these two issues by properly clamping the boost value
in the appropriate range to compute the threshold indexes as well as
by using the same threshold index for both cuts.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
sched/tune: fix update of threshold index for boost groups
When SchedTune is configured to work with CGroup mode, each time we update
the boost value of a group we do not update the threshed indexes for the
definition of the Performance Boost (PC) and Performance Constraint (PC)
region. This means that while the OPP boosting and CPU biasing selection
is working as expected, the __schedtune_accept_deltas function is always
using the initial values for these cuts.
This patch ensure that each time a new boost value is configured for a
boost group, the cuts for the PB and PC region are properly updated too.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
sched/tune: update PC and PB cuts definition
The current definition of Performance Boost (PB) and Performance
Constraint (PC) cuts defines two "dead regions":
- up to 20% boost: we are in energy-reduction only mode, i.e.
accept all candidate which reduce energy
- over 70% boost: we are in performance-increase only mode, i.e.
accept only sched candidate which do not reduce performances
This patch uses a more fine grained configuration where these two "dead
regions" are reduced to: up to 10% and over 90%.
This should allow to have some boosting benefits starting from 10% boost
values as well as not being to much permissive starting from boost values
of 80%.
Suggested-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
bug: 28312446
Change-Id: Ia326c66521e38c98e7a7eddbbb7c437875efa1ba
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
2016-07-29 15:32:26 +01:00
|
|
|
if (sysctl_sched_cfs_boost < -100 || sysctl_sched_cfs_boost > 100)
|
|
|
|
return -EINVAL;
|
|
|
|
boost_pct = sysctl_sched_cfs_boost;
|
2016-01-12 18:12:13 +00:00
|
|
|
|
sched/tune: fix PB and PC cuts indexes definition
The current definition of the Performance Boost (PB) and Performance Constraint
(PC) regions is has two main issues:
1) in the computation of the boost index we overflow the thresholds_gains
table for boost=100
2) the two cuts had _NOT_ the same ratio
The last point means that when boost=0 we do _not_ have a "standard" EAS
behaviour, i.e. accepting all candidate which decrease energy regardless
of their impact on performances. Instead, we accept only schedule candidate
which are in the Optimal region, i.e. decrease energy while increasing
performances.
This behaviour can have a negative impact also on CPU selection policies
which tries to spread tasks to reduce latencies. Indeed, for example
we could end up rejecting a schedule candidate which want to move a task
from a congested CPU to an idle one while, specifically in the case where
the target CPU will be running on a lower OPP.
This patch fixes these two issues by properly clamping the boost value
in the appropriate range to compute the threshold indexes as well as
by using the same threshold index for both cuts.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
sched/tune: fix update of threshold index for boost groups
When SchedTune is configured to work with CGroup mode, each time we update
the boost value of a group we do not update the threshed indexes for the
definition of the Performance Boost (PC) and Performance Constraint (PC)
region. This means that while the OPP boosting and CPU biasing selection
is working as expected, the __schedtune_accept_deltas function is always
using the initial values for these cuts.
This patch ensure that each time a new boost value is configured for a
boost group, the cuts for the PB and PC region are properly updated too.
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
sched/tune: update PC and PB cuts definition
The current definition of Performance Boost (PB) and Performance
Constraint (PC) cuts defines two "dead regions":
- up to 20% boost: we are in energy-reduction only mode, i.e.
accept all candidate which reduce energy
- over 70% boost: we are in performance-increase only mode, i.e.
accept only sched candidate which do not reduce performances
This patch uses a more fine grained configuration where these two "dead
regions" are reduced to: up to 10% and over 90%.
This should allow to have some boosting benefits starting from 10% boost
values as well as not being to much permissive starting from boost values
of 80%.
Suggested-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
Signed-off-by: Srinath Sridharan <srinathsr@google.com>
bug: 28312446
Change-Id: Ia326c66521e38c98e7a7eddbbb7c437875efa1ba
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>
2016-07-29 15:32:26 +01:00
|
|
|
/*
|
|
|
|
* Update threshold params for Performance Boost (B)
|
|
|
|
* and Performance Constraint (C) regions.
|
|
|
|
* The current implementatio uses the same cuts for both
|
|
|
|
* B and C regions.
|
|
|
|
*/
|
|
|
|
threshold_idx = clamp(boost_pct, 0, 99) / 10;
|
|
|
|
perf_boost_idx = threshold_idx;
|
|
|
|
perf_constrain_idx = threshold_idx;
|
2016-01-12 18:12:13 +00:00
|
|
|
|
2015-06-22 18:11:44 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-01-12 18:12:13 +00:00
|
|
|
#ifdef CONFIG_SCHED_DEBUG
|
|
|
|
static void
|
|
|
|
schedtune_test_nrg(unsigned long delta_pwr)
|
|
|
|
{
|
|
|
|
unsigned long test_delta_pwr;
|
|
|
|
unsigned long test_norm_pwr;
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check normalization constants using some constant system
|
|
|
|
* energy values
|
|
|
|
*/
|
|
|
|
pr_info("schedtune: verify normalization constants...\n");
|
|
|
|
for (idx = 0; idx < 6; ++idx) {
|
|
|
|
test_delta_pwr = delta_pwr >> idx;
|
|
|
|
|
|
|
|
/* Normalize on max energy for target platform */
|
|
|
|
test_norm_pwr = reciprocal_divide(
|
|
|
|
test_delta_pwr << SCHED_LOAD_SHIFT,
|
|
|
|
schedtune_target_nrg.rdiv);
|
|
|
|
|
|
|
|
pr_info("schedtune: max_pwr/2^%d: %4lu => norm_pwr: %5lu\n",
|
|
|
|
idx, test_delta_pwr, test_norm_pwr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
#define schedtune_test_nrg(delta_pwr)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Compute the min/max power consumption of a cluster and all its CPUs
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
schedtune_add_cluster_nrg(
|
|
|
|
struct sched_domain *sd,
|
|
|
|
struct sched_group *sg,
|
|
|
|
struct target_nrg *ste)
|
|
|
|
{
|
|
|
|
struct sched_domain *sd2;
|
|
|
|
struct sched_group *sg2;
|
|
|
|
|
|
|
|
struct cpumask *cluster_cpus;
|
|
|
|
char str[32];
|
|
|
|
|
|
|
|
unsigned long min_pwr;
|
|
|
|
unsigned long max_pwr;
|
|
|
|
int cpu;
|
|
|
|
|
|
|
|
/* Get Cluster energy using EM data for the first CPU */
|
|
|
|
cluster_cpus = sched_group_cpus(sg);
|
|
|
|
snprintf(str, 32, "CLUSTER[%*pbl]",
|
|
|
|
cpumask_pr_args(cluster_cpus));
|
|
|
|
|
|
|
|
min_pwr = sg->sge->idle_states[sg->sge->nr_idle_states - 1].power;
|
|
|
|
max_pwr = sg->sge->cap_states[sg->sge->nr_cap_states - 1].power;
|
|
|
|
pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n",
|
|
|
|
str, min_pwr, max_pwr);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Keep track of this cluster's energy in the computation of the
|
|
|
|
* overall system energy
|
|
|
|
*/
|
|
|
|
ste->min_power += min_pwr;
|
|
|
|
ste->max_power += max_pwr;
|
|
|
|
|
|
|
|
/* Get CPU energy using EM data for each CPU in the group */
|
|
|
|
for_each_cpu(cpu, cluster_cpus) {
|
|
|
|
/* Get a SD view for the specific CPU */
|
|
|
|
for_each_domain(cpu, sd2) {
|
|
|
|
/* Get the CPU group */
|
|
|
|
sg2 = sd2->groups;
|
|
|
|
min_pwr = sg2->sge->idle_states[sg2->sge->nr_idle_states - 1].power;
|
|
|
|
max_pwr = sg2->sge->cap_states[sg2->sge->nr_cap_states - 1].power;
|
|
|
|
|
|
|
|
ste->min_power += min_pwr;
|
|
|
|
ste->max_power += max_pwr;
|
|
|
|
|
|
|
|
snprintf(str, 32, "CPU[%d]", cpu);
|
|
|
|
pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n",
|
|
|
|
str, min_pwr, max_pwr);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Assume we have EM data only at the CPU and
|
|
|
|
* the upper CLUSTER level
|
|
|
|
*/
|
|
|
|
BUG_ON(!cpumask_equal(
|
|
|
|
sched_group_cpus(sg),
|
|
|
|
sched_group_cpus(sd2->parent->groups)
|
|
|
|
));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize the constants required to compute normalized energy.
|
|
|
|
* The values of these constants depends on the EM data for the specific
|
|
|
|
* target system and topology.
|
|
|
|
* Thus, this function is expected to be called by the code
|
|
|
|
* that bind the EM to the topology information.
|
|
|
|
*/
|
|
|
|
static int
|
2016-07-29 15:19:41 +01:00
|
|
|
schedtune_init(void)
|
2016-01-12 18:12:13 +00:00
|
|
|
{
|
|
|
|
struct target_nrg *ste = &schedtune_target_nrg;
|
|
|
|
unsigned long delta_pwr = 0;
|
|
|
|
struct sched_domain *sd;
|
|
|
|
struct sched_group *sg;
|
|
|
|
|
|
|
|
pr_info("schedtune: init normalization constants...\n");
|
|
|
|
ste->max_power = 0;
|
|
|
|
ste->min_power = 0;
|
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When EAS is in use, we always have a pointer to the highest SD
|
|
|
|
* which provides EM data.
|
|
|
|
*/
|
|
|
|
sd = rcu_dereference(per_cpu(sd_ea, cpumask_first(cpu_online_mask)));
|
|
|
|
if (!sd) {
|
Merge branch 'v4.4-16.09-android-tmp' into lsk-v4.4-16.09-android
* v4.4-16.09-android-tmp:
unsafe_[get|put]_user: change interface to use a error target label
usercopy: remove page-spanning test for now
usercopy: fix overlap check for kernel text
mm/slub: support left redzone
Linux 4.4.21
lib/mpi: mpi_write_sgl(): fix skipping of leading zero limbs
regulator: anatop: allow regulator to be in bypass mode
hwrng: exynos - Disable runtime PM on probe failure
cpufreq: Fix GOV_LIMITS handling for the userspace governor
metag: Fix atomic_*_return inline asm constraints
scsi: fix upper bounds check of sense key in scsi_sense_key_string()
ALSA: timer: fix NULL pointer dereference on memory allocation failure
ALSA: timer: fix division by zero after SNDRV_TIMER_IOCTL_CONTINUE
ALSA: timer: fix NULL pointer dereference in read()/ioctl() race
ALSA: hda - Enable subwoofer on Dell Inspiron 7559
ALSA: hda - Add headset mic quirk for Dell Inspiron 5468
ALSA: rawmidi: Fix possible deadlock with virmidi registration
ALSA: fireworks: accessing to user space outside spinlock
ALSA: firewire-tascam: accessing to user space outside spinlock
ALSA: usb-audio: Add sample rate inquiry quirk for B850V3 CP2114
crypto: caam - fix IV loading for authenc (giv)decryption
uprobes: Fix the memcg accounting
x86/apic: Do not init irq remapping if ioapic is disabled
vhost/scsi: fix reuse of &vq->iov[out] in response
bcache: RESERVE_PRIO is too small by one when prio_buckets() is a power of two.
ubifs: Fix assertion in layout_in_gaps()
ovl: fix workdir creation
ovl: listxattr: use strnlen()
ovl: remove posix_acl_default from workdir
ovl: don't copy up opaqueness
wrappers for ->i_mutex access
lustre: remove unused declaration
timekeeping: Avoid taking lock in NMI path with CONFIG_DEBUG_TIMEKEEPING
timekeeping: Cap array access in timekeeping_debug
xfs: fix superblock inprogress check
ASoC: atmel_ssc_dai: Don't unconditionally reset SSC on stream startup
drm/msm: fix use of copy_from_user() while holding spinlock
drm: Reject page_flip for !DRIVER_MODESET
drm/radeon: fix radeon_move_blit on 32bit systems
s390/sclp_ctl: fix potential information leak with /dev/sclp
rds: fix an infoleak in rds_inc_info_copy
powerpc/tm: Avoid SLB faults in treclaim/trecheckpoint when RI=0
nvme: Call pci_disable_device on the error path.
cgroup: reduce read locked section of cgroup_threadgroup_rwsem during fork
block: make sure a big bio is split into at most 256 bvecs
block: Fix race triggered by blk_set_queue_dying()
ext4: avoid modifying checksum fields directly during checksum verification
ext4: avoid deadlock when expanding inode size
ext4: properly align shifted xattrs when expanding inodes
ext4: fix xattr shifting when expanding inodes part 2
ext4: fix xattr shifting when expanding inodes
ext4: validate that metadata blocks do not overlap superblock
net: Use ns_capable_noaudit() when determining net sysctl permissions
kernel: Add noaudit variant of ns_capable()
KEYS: Fix ASN.1 indefinite length object parsing
drivers:hv: Lock access to hyperv_mmio resource tree
cxlflash: Move to exponential back-off when cmd_room is not available
netfilter: x_tables: check for size overflow
drm/amdgpu/cz: enable/disable vce dpm even if vce pg is disabled
cred: Reject inodes with invalid ids in set_create_file_as()
fs: Check for invalid i_uid in may_follow_link()
IB/IPoIB: Do not set skb truesize since using one linearskb
udp: properly support MSG_PEEK with truncated buffers
crypto: nx-842 - Mask XERS0 bit in return value
cxlflash: Fix to avoid virtual LUN failover failure
cxlflash: Fix to escalate LINK_RESET also on port 1
tipc: fix nl compat regression for link statistics
tipc: fix an infoleak in tipc_nl_compat_link_dump
netfilter: x_tables: check for size overflow
Bluetooth: Add support for Intel Bluetooth device 8265 [8087:0a2b]
drm/i915: Check VBT for port presence in addition to the strap on VLV/CHV
drm/i915: Only ignore eDP ports that are connected
Input: xpad - move pending clear to the correct location
net: thunderx: Fix link status reporting
x86/hyperv: Avoid reporting bogus NMI status for Gen2 instances
crypto: vmx - IV size failing on skcipher API
tda10071: Fix dependency to REGMAP_I2C
crypto: vmx - Fix ABI detection
crypto: vmx - comply with ABIs that specify vrsave as reserved.
HID: core: prevent out-of-bound readings
lpfc: Fix DMA faults observed upon plugging loopback connector
block: fix blk_rq_get_max_sectors for driver private requests
irqchip/gicv3-its: numa: Enable workaround for Cavium thunderx erratum 23144
clocksource: Allow unregistering the watchdog
btrfs: Continue write in case of can_not_nocow
blk-mq: End unstarted requests on dying queue
cxlflash: Fix to resolve dead-lock during EEH recovery
drm/radeon/mst: fix regression in lane/link handling.
ecryptfs: fix handling of directory opening
ALSA: hda: add AMD Polaris-10/11 AZ PCI IDs with proper driver caps
drm: Balance error path for GEM handle allocation
ntp: Fix ADJ_SETOFFSET being used w/ ADJ_NANO
time: Verify time values in adjtimex ADJ_SETOFFSET to avoid overflow
Input: xpad - correctly handle concurrent LED and FF requests
net: thunderx: Fix receive packet stats
net: thunderx: Fix for multiqset not configured upon interface toggle
perf/x86/cqm: Fix CQM memory leak and notifier leak
perf/x86/cqm: Fix CQM handling of grouping events into a cache_group
s390/crypto: provide correct file mode at device register.
proc: revert /proc/<pid>/maps [stack:TID] annotation
intel_idle: Support for Intel Xeon Phi Processor x200 Product Family
cxlflash: Fix to avoid unnecessary scan with internal LUNs
Drivers: hv: vmbus: don't manipulate with clocksources on crash
Drivers: hv: vmbus: avoid scheduling in interrupt context in vmbus_initiate_unload()
Drivers: hv: vmbus: avoid infinite loop in init_vp_index()
arcmsr: fixes not release allocated resource
arcmsr: fixed getting wrong configuration data
s390/pci_dma: fix DMA table corruption with > 4 TB main memory
net/mlx5e: Don't modify CQ before it was created
net/mlx5e: Don't try to modify CQ moderation if it is not supported
mmc: sdhci: Do not BUG on invalid vdd
UVC: Add support for R200 depth camera
sched/numa: Fix use-after-free bug in the task_numa_compare
ALSA: hda - add codec support for Kabylake display audio codec
drm/i915: Fix hpd live status bits for g4x
tipc: fix nullptr crash during subscription cancel
arm64: Add workaround for Cavium erratum 27456
net: thunderx: Fix for Qset error due to CQ full
drm/radeon: fix dp link rate selection (v2)
drm/amdgpu: fix dp link rate selection (v2)
qla2xxx: Use ATIO type to send correct tmr response
mmc: sdhci: 64-bit DMA actually has 4-byte alignment
drm/atomic: Do not unset crtc when an encoder is stolen
drm/i915/skl: Add missing SKL ids
drm/i915/bxt: update list of PCIIDs
hrtimer: Catch illegal clockids
i40e/i40evf: Fix RSS rx-flow-hash configuration through ethtool
mpt3sas: Fix for Asynchronous completion of timedout IO and task abort of timedout IO.
mpt3sas: A correction in unmap_resources
net: cavium: liquidio: fix check for in progress flag
arm64: KVM: Configure TCR_EL2.PS at runtime
irqchip/gic-v3: Make sure read from ICC_IAR1_EL1 is visible on redestributor
pwm: lpc32xx: fix and simplify duty cycle and period calculations
pwm: lpc32xx: correct number of PWM channels from 2 to 1
pwm: fsl-ftm: Fix clock enable/disable when using PM
megaraid_sas: Add an i/o barrier
megaraid_sas: Fix SMAP issue
megaraid_sas: Do not allow PCI access during OCR
s390/cio: update measurement characteristics
s390/cio: ensure consistent measurement state
s390/cio: fix measurement characteristics memleak
qeth: initialize net_device with carrier off
lpfc: Fix external loopback failure.
lpfc: Fix mbox reuse in PLOGI completion
lpfc: Fix RDP Speed reporting.
lpfc: Fix crash in fcp command completion path.
lpfc: Fix driver crash when module parameter lpfc_fcp_io_channel set to 16
lpfc: Fix RegLogin failed error seen on Lancer FC during port bounce
lpfc: Fix the FLOGI discovery logic to comply with T11 standards
lpfc: Fix FCF Infinite loop in lpfc_sli4_fcf_rr_next_index_get.
cxl: Enable PCI device ID for future IBM CXL adapter
cxl: fix build for GCC 4.6.x
cxlflash: Enable device id for future IBM CXL adapter
cxlflash: Resolve oops in wait_port_offline
cxlflash: Fix to resolve cmd leak after host reset
cxl: Fix DSI misses when the context owning task exits
cxl: Fix possible idr warning when contexts are released
Drivers: hv: vmbus: fix rescind-offer handling for device without a driver
Drivers: hv: vmbus: serialize process_chn_event() and vmbus_close_internal()
Drivers: hv: vss: run only on supported host versions
drivers/hv: cleanup synic msrs if vmbus connect failed
Drivers: hv: util: catch allocation errors
tools: hv: report ENOSPC errors in hv_fcopy_daemon
Drivers: hv: utils: run polling callback always in interrupt context
Drivers: hv: util: Increase the timeout for util services
lightnvm: fix missing grown bad block type
lightnvm: fix locking and mempool in rrpc_lun_gc
lightnvm: unlock rq and free ppa_list on submission fail
lightnvm: add check after mempool allocation
lightnvm: fix incorrect nr_free_blocks stat
lightnvm: fix bio submission issue
cxlflash: a couple off by one bugs
fm10k: Cleanup exception handling for mailbox interrupt
fm10k: Cleanup MSI-X interrupts in case of failure
fm10k: reinitialize queuing scheme after calling init_hw
fm10k: always check init_hw for errors
fm10k: reset max_queues on init_hw_vf failure
fm10k: Fix handling of NAPI budget when multiple queues are enabled per vector
fm10k: Correct MTU for jumbo frames
fm10k: do not assume VF always has 1 queue
clk: xgene: Fix divider with non-zero shift value
e1000e: fix division by zero on jumbo MTUs
e1000: fix data race between tx_ring->next_to_clean
ixgbe: Fix handling of NAPI budget when multiple queues are enabled per vector
igb: fix NULL derefs due to skipped SR-IOV enabling
igb: use the correct i210 register for EEMNGCTL
igb: don't unmap NULL hw_addr
i40e: Fix Rx hash reported to the stack by our driver
i40e: clean whole mac filter list
i40evf: check rings before freeing resources
i40e: don't add zero MAC filter
i40e: properly delete VF MAC filters
i40e: Fix memory leaks, sideband filter programming
i40e: fix: do not sleep in netdev_ops
i40e/i40evf: Fix RS bit update in Tx path and disable force WB workaround
i40evf: handle many MAC filters correctly
i40e: Workaround fix for mss < 256 issue
UPSTREAM: audit: fix a double fetch in audit_log_single_execve_arg()
UPSTREAM: ARM: 8494/1: mm: Enable PXN when running non-LPAE kernel on LPAE processor
FIXUP: sched/tune: update accouting before CPU capacity
FIXUP: sched/tune: add fixes missing from a previous patch
arm: Fix #if/#ifdef typo in topology.c
arm: Fix build error "conflicting types for 'scale_cpu_capacity'"
sched/walt: use do_div instead of division operator
DEBUG: cpufreq: fix cpu_capacity tracing build for non-smp systems
sched/walt: include missing header for arm_timer_read_counter()
cpufreq: Kconfig: Fixup incorrect selection by CPU_FREQ_DEFAULT_GOV_SCHED
sched/fair: Avoid redundant idle_cpu() call in update_sg_lb_stats()
FIXUP: sched: scheduler-driven cpu frequency selection
sched/rt: Add Kconfig option to enable panicking for RT throttling
sched/rt: print RT tasks when RT throttling is activated
UPSTREAM: sched: Fix a race between __kthread_bind() and sched_setaffinity()
sched/fair: Favor higher cpus only for boosted tasks
vmstat: make vmstat_updater deferrable again and shut down on idle
sched/fair: call OPP update when going idle after migration
sched/cpufreq_sched: fix thermal capping events
sched/fair: Picking cpus with low OPPs for tasks that prefer idle CPUs
FIXUP: sched/tune: do initialization as a postcore_initicall
DEBUG: sched: add tracepoint for RD overutilized
sched/tune: Introducing a new schedtune attribute prefer_idle
sched: use util instead of capacity to select busy cpu
arch_timer: add error handling when the MPM global timer is cleared
FIXUP: sched: Fix double-release of spinlock in move_queued_task
FIXUP: sched/fair: Fix hang during suspend in sched_group_energy
FIXUP: sched: fix SchedFreq integration for both PELT and WALT
sched: EAS: Avoid causing spikes to max-freq unnecessarily
FIXUP: sched: fix set_cfs_cpu_capacity when WALT is in use
sched/walt: Accounting for number of irqs pending on each core
sched: Introduce Window Assisted Load Tracking (WALT)
sched/tune: fix PB and PC cuts indexes definition
sched/fair: optimize idle cpu selection for boosted tasks
FIXUP: sched/tune: fix accounting for runnable tasks
sched/tune: use a single initialisation function
sched/{fair,tune}: simplify fair.c code
FIXUP: sched/tune: fix payoff calculation for boost region
sched/tune: Add support for negative boost values
FIX: sched/tune: move schedtune_nornalize_energy into fair.c
FIX: sched/tune: update usage of boosted task utilisation on CPU selection
sched/fair: add tunable to set initial task load
sched/fair: add tunable to force selection at cpu granularity
sched: EAS: take cstate into account when selecting idle core
sched/cpufreq_sched: Consolidated update
FIXUP: sched: fix build for non-SMP target
DEBUG: sched/tune: add tracepoint on P-E space filtering
DEBUG: sched/tune: add tracepoint for energy_diff() values
DEBUG: sched/tune: add tracepoint for task boost signal
arm: topology: Define TC2 energy and provide it to the scheduler
CHROMIUM: sched: update the average of nr_running
DEBUG: schedtune: add tracepoint for schedtune_tasks_update() values
DEBUG: schedtune: add tracepoint for CPU boost signal
DEBUG: schedtune: add tracepoint for SchedTune configuration update
DEBUG: sched: add energy procfs interface
DEBUG: sched,cpufreq: add cpu_capacity change tracepoint
DEBUG: sched: add tracepoint for CPU load/util signals
DEBUG: sched: add tracepoint for task load/util signals
DEBUG: sched: add tracepoint for cpu/freq scale invariance
sched/fair: filter energy_diff() based on energy_payoff value
sched/tune: add support to compute normalized energy
sched/fair: keep track of energy/capacity variations
sched/fair: add boosted task utilization
sched/{fair,tune}: track RUNNABLE tasks impact on per CPU boost value
sched/tune: compute and keep track of per CPU boost value
sched/tune: add initial support for CGroups based boosting
sched/fair: add boosted CPU usage
sched/fair: add function to convert boost value into "margin"
sched/tune: add sysctl interface to define a boost value
sched/tune: add detailed documentation
fixup! sched/fair: jump to max OPP when crossing UP threshold
fixup! sched: scheduler-driven cpu frequency selection
sched: rt scheduler sets capacity requirement
sched: deadline: use deadline bandwidth in scale_rt_capacity
sched: remove call of sched_avg_update from sched_rt_avg_update
sched/cpufreq_sched: add trace events
sched/fair: jump to max OPP when crossing UP threshold
sched/fair: cpufreq_sched triggers for load balancing
sched/{core,fair}: trigger OPP change request on fork()
sched/fair: add triggers for OPP change requests
sched: scheduler-driven cpu frequency selection
cpufreq: introduce cpufreq_driver_is_slow
sched: Consider misfit tasks when load-balancing
sched: Add group_misfit_task load-balance type
sched: Add per-cpu max capacity to sched_group_capacity
sched: Do eas idle balance regardless of the rq avg idle value
arm64: Enable max freq invariant scheduler load-tracking and capacity support
arm: Enable max freq invariant scheduler load-tracking and capacity support
sched: Update max cpu capacity in case of max frequency constraints
cpufreq: Max freq invariant scheduler load-tracking and cpu capacity support
arm64, topology: Updates to use DT bindings for EAS costing data
sched: Support for extracting EAS energy costs from DT
Documentation: DT bindings for energy model cost data required by EAS
sched: Disable energy-unfriendly nohz kicks
sched: Consider a not over-utilized energy-aware system as balanced
sched: Energy-aware wake-up task placement
sched: Determine the current sched_group idle-state
sched, cpuidle: Track cpuidle state index in the scheduler
sched: Add over-utilization/tipping point indicator
sched: Estimate energy impact of scheduling decisions
sched: Extend sched_group_energy to test load-balancing decisions
sched: Calculate energy consumption of sched_group
sched: Highest energy aware balancing sched_domain level pointer
sched: Relocated cpu_util() and change return type
sched: Compute cpu capacity available at current frequency
arm64: Cpu invariant scheduler load-tracking and capacity support
arm: Cpu invariant scheduler load-tracking and capacity support
sched: Introduce SD_SHARE_CAP_STATES sched_domain flag
sched: Initialize energy data structures
sched: Introduce energy data structures
sched: Make energy awareness a sched feature
sched: Documentation for scheduler energy cost model
sched: Prevent unnecessary active balance of single task in sched group
sched: Enable idle balance to pull single task towards cpu with higher capacity
sched: Consider spare cpu capacity at task wake-up
sched: Add cpu capacity awareness to wakeup balancing
sched: Store system-wide maximum cpu capacity in root domain
arm: Update arch_scale_cpu_capacity() to reflect change to define
arm64: Enable frequency invariant scheduler load-tracking support
arm: Enable frequency invariant scheduler load-tracking support
cpufreq: Frequency invariant scheduler load-tracking support
sched/fair: Fix new task's load avg removed from source CPU in wake_up_new_task()
FROMLIST: pstore: drop pmsg bounce buffer
UPSTREAM: usercopy: remove page-spanning test for now
UPSTREAM: usercopy: force check_object_size() inline
BACKPORT: usercopy: fold builtin_const check into inline function
UPSTREAM: x86/uaccess: force copy_*_user() to be inlined
UPSTREAM: HID: core: prevent out-of-bound readings
Android: Fix build breakages.
UPSTREAM: tty: Prevent ldisc drivers from re-using stale tty fields
UPSTREAM: netfilter: nfnetlink: correctly validate length of batch messages
cpuset: Make cpusets restore on hotplug
UPSTREAM: mm/slub: support left redzone
UPSTREAM: Make the hardened user-copy code depend on having a hardened allocator
Android: MMC/UFS IO Latency Histograms.
UPSTREAM: usercopy: fix overlap check for kernel text
UPSTREAM: usercopy: avoid potentially undefined behavior in pointer math
UPSTREAM: unsafe_[get|put]_user: change interface to use a error target label
BACKPORT: arm64: mm: fix location of _etext
BACKPORT: ARM: 8583/1: mm: fix location of _etext
BACKPORT: Don't show empty tag stats for unprivileged uids
UPSTREAM: tcp: fix use after free in tcp_xmit_retransmit_queue()
ANDROID: base-cfg: drop SECCOMP_FILTER config
UPSTREAM: [media] xc2028: unlock on error in xc2028_set_config()
UPSTREAM: [media] xc2028: avoid use after free
ANDROID: base-cfg: enable SECCOMP config
ANDROID: rcu_sync: Export rcu_sync_lockdep_assert
RFC: FROMLIST: cgroup: reduce read locked section of cgroup_threadgroup_rwsem during fork
RFC: FROMLIST: cgroup: avoid synchronize_sched() in __cgroup_procs_write()
RFC: FROMLIST: locking/percpu-rwsem: Optimize readers and reduce global impact
net: ipv6: Fix ping to link-local addresses.
ipv6: fix endianness error in icmpv6_err
ANDROID: dm: android-verity: Allow android-verity to be compiled as an independent module
backporting: a brief introduce of backported feautures on 4.4
Linux 4.4.20
sysfs: correctly handle read offset on PREALLOC attrs
hwmon: (iio_hwmon) fix memory leak in name attribute
ALSA: line6: Fix POD sysfs attributes segfault
ALSA: line6: Give up on the lock while URBs are released.
ALSA: line6: Remove double line6_pcm_release() after failed acquire.
ACPI / SRAT: fix SRAT parsing order with both LAPIC and X2APIC present
ACPI / sysfs: fix error code in get_status()
ACPI / drivers: replace acpi_probe_lock spinlock with mutex
ACPI / drivers: fix typo in ACPI_DECLARE_PROBE_ENTRY macro
staging: comedi: ni_mio_common: fix wrong insn_write handler
staging: comedi: ni_mio_common: fix AO inttrig backwards compatibility
staging: comedi: comedi_test: fix timer race conditions
staging: comedi: daqboard2000: bug fix board type matching code
USB: serial: option: add WeTelecom 0x6802 and 0x6803 products
USB: serial: option: add WeTelecom WM-D200
USB: serial: mos7840: fix non-atomic allocation in write path
USB: serial: mos7720: fix non-atomic allocation in write path
USB: fix typo in wMaxPacketSize validation
usb: chipidea: udc: don't touch DP when controller is in host mode
USB: avoid left shift by -1
dmaengine: usb-dmac: check CHCR.DE bit in usb_dmac_isr_channel()
crypto: qat - fix aes-xts key sizes
crypto: nx - off by one bug in nx_of_update_msc()
Input: i8042 - set up shared ps2_cmd_mutex for AUX ports
Input: i8042 - break load dependency between atkbd/psmouse and i8042
Input: tegra-kbc - fix inverted reset logic
btrfs: properly track when rescan worker is running
btrfs: waiting on qgroup rescan should not always be interruptible
fs/seq_file: fix out-of-bounds read
gpio: Fix OF build problem on UM
usb: renesas_usbhs: gadget: fix return value check in usbhs_mod_gadget_probe()
megaraid_sas: Fix probing cards without io port
mpt3sas: Fix resume on WarpDrive flash cards
cdc-acm: fix wrong pipe type on rx interrupt xfers
i2c: cros-ec-tunnel: Fix usage of cros_ec_cmd_xfer()
mfd: cros_ec: Add cros_ec_cmd_xfer_status() helper
aacraid: Check size values after double-fetch from user
ARC: Elide redundant setup of DMA callbacks
ARC: Call trace_hardirqs_on() before enabling irqs
ARC: use correct offset in pt_regs for saving/restoring user mode r25
ARC: build: Better way to detect ISA compatible toolchain
drm/i915: fix aliasing_ppgtt leak
drm/amdgpu: record error code when ring test failed
drm/amd/amdgpu: sdma resume fail during S4 on CI
drm/amdgpu: skip TV/CV in display parsing
drm/amdgpu: avoid a possible array overflow
drm/amdgpu: fix amdgpu_move_blit on 32bit systems
drm/amdgpu: Change GART offset to 64-bit
iio: fix sched WARNING "do not call blocking ops when !TASK_RUNNING"
sched/nohz: Fix affine unpinned timers mess
sched/cputime: Fix NO_HZ_FULL getrusage() monotonicity regression
of: fix reference counting in of_graph_get_endpoint_by_regs
arm64: dts: rockchip: add reset saradc node for rk3368 SoCs
mac80211: fix purging multicast PS buffer queue
s390/dasd: fix hanging device after clear subchannel
EDAC: Increment correct counter in edac_inc_ue_error()
pinctrl/amd: Remove the default de-bounce time
iommu/arm-smmu: Don't BUG() if we find aborting STEs with disable_bypass
iommu/arm-smmu: Fix CMDQ error handling
iommu/dma: Don't put uninitialised IOVA domains
xhci: Make sure xhci handles USB_SPEED_SUPER_PLUS devices.
USB: serial: ftdi_sio: add PIDs for Ivium Technologies devices
USB: serial: ftdi_sio: add device ID for WICED USB UART dev board
USB: serial: option: add support for Telit LE920A4
USB: serial: option: add D-Link DWM-156/A3
USB: serial: fix memleak in driver-registration error path
xhci: don't dereference a xhci member after removing xhci
usb: xhci: Fix panic if disconnect
xhci: always handle "Command Ring Stopped" events
usb/gadget: fix gadgetfs aio support.
usb: gadget: fsl_qe_udc: off by one in setup_received_handle()
USB: validate wMaxPacketValue entries in endpoint descriptors
usb: renesas_usbhs: Use dmac only if the pipe type is bulk
usb: renesas_usbhs: clear the BRDYSTS in usbhsg_ep_enable()
USB: hub: change the locking in hub_activate
USB: hub: fix up early-exit pathway in hub_activate
usb: hub: Fix unbalanced reference count/memory leak/deadlocks
usb: define USB_SPEED_SUPER_PLUS speed for SuperSpeedPlus USB3.1 devices
usb: dwc3: gadget: increment request->actual once
usb: dwc3: pci: add Intel Kabylake PCI ID
usb: misc: usbtest: add fix for driver hang
usb: ehci: change order of register cleanup during shutdown
crypto: caam - defer aead_set_sh_desc in case of zero authsize
crypto: caam - fix echainiv(authenc) encrypt shared descriptor
crypto: caam - fix non-hmac hashes
genirq/msi: Make sure PCI MSIs are activated early
genirq/msi: Remove unused MSI_FLAG_IDENTITY_MAP
um: Don't discard .text.exit section
ACPI / CPPC: Prevent cpc_desc_ptr points to the invalid data
ACPI: CPPC: Return error if _CPC is invalid on a CPU
mmc: sdhci-acpi: Reduce Baytrail eMMC/SD/SDIO hangs
PCI: Limit config space size for Netronome NFP4000
PCI: Add Netronome NFP4000 PF device ID
PCI: Limit config space size for Netronome NFP6000 family
PCI: Add Netronome vendor and device IDs
PCI: Support PCIe devices with short cfg_size
NVMe: Don't unmap controller registers on reset
ALSA: hda - Manage power well properly for resume
libnvdimm, nd_blk: mask off reserved status bits
perf intel-pt: Fix occasional decoding errors when tracing system-wide
vfio/pci: Fix NULL pointer oops in error interrupt setup handling
virtio: fix memory leak in virtqueue_add()
parisc: Fix order of EREFUSED define in errno.h
arm64: Define AT_VECTOR_SIZE_ARCH for ARCH_DLINFO
ALSA: usb-audio: Add quirk for ELP HD USB Camera
ALSA: usb-audio: Add a sample rate quirk for Creative Live! Cam Socialize HD (VF0610)
powerpc/eeh: eeh_pci_enable(): fix checking of post-request state
SUNRPC: allow for upcalls for same uid but different gss service
SUNRPC: Handle EADDRNOTAVAIL on connection failures
tools/testing/nvdimm: fix SIGTERM vs hotplug crash
uprobes/x86: Fix RIP-relative handling of EVEX-encoded instructions
x86/mm: Disable preemption during CR3 read+write
hugetlb: fix nr_pmds accounting with shared page tables
mm: SLUB hardened usercopy support
mm: SLAB hardened usercopy support
s390/uaccess: Enable hardened usercopy
sparc/uaccess: Enable hardened usercopy
powerpc/uaccess: Enable hardened usercopy
ia64/uaccess: Enable hardened usercopy
arm64/uaccess: Enable hardened usercopy
ARM: uaccess: Enable hardened usercopy
x86/uaccess: Enable hardened usercopy
x86: remove more uaccess_32.h complexity
x86: remove pointless uaccess_32.h complexity
x86: fix SMAP in 32-bit environments
Use the new batched user accesses in generic user string handling
Add 'unsafe' user access functions for batched accesses
x86: reorganize SMAP handling in user space accesses
mm: Hardened usercopy
mm: Implement stack frame object validation
mm: Add is_migrate_cma_page
Linux 4.4.19
Documentation/module-signing.txt: Note need for version info if reusing a key
module: Invalidate signatures on force-loaded modules
dm flakey: error READ bios during the down_interval
rtc: s3c: Add s3c_rtc_{enable/disable}_clk in s3c_rtc_setfreq()
lpfc: fix oops in lpfc_sli4_scmd_to_wqidx_distr() from lpfc_send_taskmgmt()
ACPI / EC: Work around method reentrancy limit in ACPICA for _Qxx
x86/platform/intel_mid_pci: Rework IRQ0 workaround
PCI: Mark Atheros AR9485 and QCA9882 to avoid bus reset
MIPS: hpet: Increase HPET_MIN_PROG_DELTA and decrease HPET_MIN_CYCLES
MIPS: Don't register r4k sched clock when CPUFREQ enabled
MIPS: mm: Fix definition of R6 cache instruction
SUNRPC: Don't allocate a full sockaddr_storage for tracing
Input: elan_i2c - properly wake up touchpad on ASUS laptops
target: Fix ordered task CHECK_CONDITION early exception handling
target: Fix max_unmap_lba_count calc overflow
target: Fix race between iscsi-target connection shutdown + ABORT_TASK
target: Fix missing complete during ABORT_TASK + CMD_T_FABRIC_STOP
target: Fix ordered task target_setup_cmd_from_cdb exception hang
iscsi-target: Fix panic when adding second TCP connection to iSCSI session
ubi: Fix race condition between ubi device creation and udev
ubi: Fix early logging
ubi: Make volume resize power cut aware
of: fix memory leak related to safe_name()
IB/mlx4: Fix memory leak if QP creation failed
IB/mlx4: Fix error flow when sending mads under SRIOV
IB/mlx4: Fix the SQ size of an RC QP
IB/IWPM: Fix a potential skb leak
IB/IPoIB: Don't update neigh validity for unresolved entries
IB/SA: Use correct free function
IB/mlx5: Return PORT_ERR in Active to Initializing tranisition
IB/mlx5: Fix post send fence logic
IB/mlx5: Fix entries check in mlx5_ib_resize_cq
IB/mlx5: Fix returned values of query QP
IB/mlx5: Fix entries checks in mlx5_ib_create_cq
IB/mlx5: Fix MODIFY_QP command input structure
ALSA: hda - Fix headset mic detection problem for two dell machines
ALSA: hda: add AMD Bonaire AZ PCI ID with proper driver caps
ALSA: hda/realtek - Can't adjust speaker's volume on a Dell AIO
ALSA: hda: Fix krealloc() with __GFP_ZERO usage
mm/hugetlb: avoid soft lockup in set_max_huge_pages()
mtd: nand: fix bug writing 1 byte less than page size
block: fix bdi vs gendisk lifetime mismatch
block: add missing group association in bio-cloning functions
metag: Fix __cmpxchg_u32 asm constraint for CMP
ftrace/recordmcount: Work around for addition of metag magic but not relocations
balloon: check the number of available pages in leak balloon
drm/i915/dp: Revert "drm/i915/dp: fall back to 18 bpp when sink capability is unknown"
drm/i915: Never fully mask the the EI up rps interrupt on SNB/IVB
drm/edid: Add 6 bpc quirk for display AEO model 0.
drm: Restore double clflush on the last partial cacheline
drm/nouveau/fbcon: fix font width not divisible by 8
drm/nouveau/gr/nv3x: fix instobj write offsets in gr setup
drm/nouveau: check for supported chipset before booting fbdev off the hw
drm/radeon: support backlight control for UNIPHY3
drm/radeon: fix firmware info version checks
drm/radeon: Poll for both connect/disconnect on analog connectors
drm/radeon: add a delay after ATPX dGPU power off
drm/amdgpu/gmc7: add missing mullins case
drm/amdgpu: fix firmware info version checks
drm/amdgpu: Disable RPM helpers while reprobing connectors on resume
drm/amdgpu: support backlight control for UNIPHY3
drm/amdgpu: Poll for both connect/disconnect on analog connectors
drm/amdgpu: add a delay after ATPX dGPU power off
w1:omap_hdq: fix regression
netlabel: add address family checks to netlbl_{sock,req}_delattr()
ARM: dts: sunxi: Add a startup delay for fixed regulator enabled phys
audit: fix a double fetch in audit_log_single_execve_arg()
iommu/amd: Update Alias-DTE in update_device_table()
iommu/amd: Init unity mappings only for dma_ops domains
iommu/amd: Handle IOMMU_DOMAIN_DMA in ops->domain_free call-back
iommu/vt-d: Return error code in domain_context_mapping_one()
iommu/exynos: Suppress unbinding to prevent system failure
drm/i915: Don't complain about lack of ACPI video bios
nfsd: don't return an unhashed lock stateid after taking mutex
nfsd: Fix race between FREE_STATEID and LOCK
nfs: don't create zero-length requests
MIPS: KVM: Propagate kseg0/mapped tlb fault errors
MIPS: KVM: Fix gfn range check in kseg0 tlb faults
MIPS: KVM: Add missing gfn range check
MIPS: KVM: Fix mapped fault broken commpage handling
random: add interrupt callback to VMBus IRQ handler
random: print a warning for the first ten uninitialized random users
random: initialize the non-blocking pool via add_hwgenerator_randomness()
CIFS: Fix a possible invalid memory access in smb2_query_symlink()
cifs: fix crash due to race in hmac(md5) handling
cifs: Check for existing directory when opening file with O_CREAT
fs/cifs: make share unaccessible at root level mountable
jbd2: make journal y2038 safe
ARC: mm: don't loose PTE_SPECIAL in pte_modify()
remoteproc: Fix potential race condition in rproc_add
ovl: disallow overlayfs as upperdir
HID: uhid: fix timeout when probe races with IO
EDAC: Correct channel count limit
Bluetooth: Fix l2cap_sock_setsockopt() with optname BT_RCVMTU
spi: pxa2xx: Clear all RFT bits in reset_sccr1() on Intel Quark
i2c: efm32: fix a failure path in efm32_i2c_probe()
s5p-mfc: Add release callback for memory region devs
s5p-mfc: Set device name for reserved memory region devs
hp-wmi: Fix wifi cannot be hard-unblocked
dm: set DMF_SUSPENDED* _before_ clearing DMF_NOFLUSH_SUSPENDING
sur40: fix occasional oopses on device close
sur40: lower poll interval to fix occasional FPS drops to ~56 FPS
Fix RC5 decoding with Fintek CIR chipset
vb2: core: Skip planes array verification if pb is NULL
videobuf2-v4l2: Verify planes array in buffer dequeueing
media: dvb_ringbuffer: Add memory barriers
media: usbtv: prevent access to free'd resources
mfd: qcom_rpm: Parametrize also ack selector size
mfd: qcom_rpm: Fix offset error for msm8660
intel_pstate: Fix MSR_CONFIG_TDP_x addressing in core_get_max_pstate()
s390/cio: allow to reset channel measurement block
KVM: nVMX: Fix memory corruption when using VMCS shadowing
KVM: VMX: handle PML full VMEXIT that occurs during event delivery
KVM: MTRR: fix kvm_mtrr_check_gfn_range_consistency page fault
KVM: PPC: Book3S HV: Save/restore TM state in H_CEDE
KVM: PPC: Book3S HV: Pull out TM state save/restore into separate procedures
arm64: mm: avoid fdt_check_header() before the FDT is fully mapped
arm64: dts: rockchip: fixes the gic400 2nd region size for rk3368
pinctrl: cherryview: prevent concurrent access to GPIO controllers
Bluetooth: hci_intel: Fix null gpio desc pointer dereference
gpio: intel-mid: Remove potentially harmful code
gpio: pca953x: Fix NBANK calculation for PCA9536
tty/serial: atmel: fix RS485 half duplex with DMA
serial: samsung: Fix ERR pointer dereference on deferred probe
tty: serial: msm: Don't read off end of tx fifo
arm64: Fix incorrect per-cpu usage for boot CPU
arm64: debug: unmask PSTATE.D earlier
arm64: kernel: Save and restore UAO and addr_limit on exception entry
USB: usbfs: fix potential infoleak in devio
usb: renesas_usbhs: fix NULL pointer dereference in xfer_work()
USB: serial: option: add support for Telit LE910 PID 0x1206
usb: dwc3: fix for the isoc transfer EP_BUSY flag
usb: quirks: Add no-lpm quirk for Elan
usb: renesas_usbhs: protect the CFIFOSEL setting in usbhsg_ep_enable()
usb: f_fs: off by one bug in _ffs_func_bind()
usb: gadget: avoid exposing kernel stack
UPSTREAM: usb: gadget: configfs: add mutex lock before unregister gadget
ANDROID: dm-verity: adopt changes made to dm callbacks
UPSTREAM: ecryptfs: fix handling of directory opening
ANDROID: net: core: fix UID-based routing
ANDROID: net: fib: remove duplicate assignment
FROMLIST: proc: Fix timerslack_ns CAP_SYS_NICE check when adjusting self
ANDROID: dm verity fec: pack the fec_header structure
ANDROID: dm: android-verity: Verify header before fetching table
ANDROID: dm: allow adb disable-verity only in userdebug
ANDROID: dm: mount as linear target if eng build
ANDROID: dm: use default verity public key
ANDROID: dm: fix signature verification flag
ANDROID: dm: use name_to_dev_t
ANDROID: dm: rename dm-linear methods for dm-android-verity
ANDROID: dm: Minor cleanup
ANDROID: dm: Mounting root as linear device when verity disabled
ANDROID: dm-android-verity: Rebase on top of 4.1
ANDROID: dm: Add android verity target
ANDROID: dm: fix dm_substitute_devices()
ANDROID: dm: Rebase on top of 4.1
CHROMIUM: dm: boot time specification of dm=
Implement memory_state_time, used by qcom,cpubw
Revert "panic: Add board ID to panic output"
usb: gadget: f_accessory: remove duplicate endpoint alloc
BACKPORT: brcmfmac: defer DPC processing during probe
FROMLIST: proc: Add LSM hook checks to /proc/<tid>/timerslack_ns
FROMLIST: proc: Relax /proc/<tid>/timerslack_ns capability requirements
UPSTREAM: ppp: defer netns reference release for ppp channel
cpuset: Add allow_attach hook for cpusets on android.
UPSTREAM: KEYS: Fix ASN.1 indefinite length object parsing
ANDROID: sdcardfs: fix itnull.cocci warnings
android-recommended.cfg: enable fstack-protector-strong
Linux 4.4.18
mm: memcontrol: fix memcg id ref counter on swap charge move
mm: memcontrol: fix swap counter leak on swapout from offline cgroup
mm: memcontrol: fix cgroup creation failure after many small jobs
ext4: fix reference counting bug on block allocation error
ext4: short-cut orphan cleanup on error
ext4: validate s_reserved_gdt_blocks on mount
ext4: don't call ext4_should_journal_data() on the journal inode
ext4: fix deadlock during page writeback
ext4: check for extents that wrap around
crypto: scatterwalk - Fix test in scatterwalk_done
crypto: gcm - Filter out async ghash if necessary
fs/dcache.c: avoid soft-lockup in dput()
fuse: fix wrong assignment of ->flags in fuse_send_init()
fuse: fuse_flush must check mapping->flags for errors
fuse: fsync() did not return IO errors
sysv, ipc: fix security-layer leaking
block: fix use-after-free in seq file
x86/syscalls/64: Add compat_sys_keyctl for 32-bit userspace
drm/i915: Pretend cursor is always on for ILK-style WM calculations (v2)
x86/mm/pat: Fix BUG_ON() in mmap_mem() on QEMU/i386
x86/pat: Document the PAT initialization sequence
x86/xen, pat: Remove PAT table init code from Xen
x86/mtrr: Fix PAT init handling when MTRR is disabled
x86/mtrr: Fix Xorg crashes in Qemu sessions
x86/mm/pat: Replace cpu_has_pat with boot_cpu_has()
x86/mm/pat: Add pat_disable() interface
x86/mm/pat: Add support of non-default PAT MSR setting
devpts: clean up interface to pty drivers
random: strengthen input validation for RNDADDTOENTCNT
apparmor: fix ref count leak when profile sha1 hash is read
Revert "s390/kdump: Clear subchannel ID to signal non-CCW/SCSI IPL"
KEYS: 64-bit MIPS needs to use compat_sys_keyctl for 32-bit userspace
arm: oabi compat: add missing access checks
cdc_ncm: do not call usbnet_link_change from cdc_ncm_bind
i2c: i801: Allow ACPI SystemIO OpRegion to conflict with PCI BAR
x86/mm/32: Enable full randomization on i386 and X86_32
HID: sony: do not bail out when the sixaxis refuses the output report
PNP: Add Broadwell to Intel MCH size workaround
PNP: Add Haswell-ULT to Intel MCH size workaround
scsi: ignore errors from scsi_dh_add_device()
ipath: Restrict use of the write() interface
tcp: consider recv buf for the initial window scale
qed: Fix setting/clearing bit in completion bitmap
net/irda: fix NULL pointer dereference on memory allocation failure
net: bgmac: Fix infinite loop in bgmac_dma_tx_add()
bonding: set carrier off for devices created through netlink
ipv4: reject RTNH_F_DEAD and RTNH_F_LINKDOWN from user space
tcp: enable per-socket rate limiting of all 'challenge acks'
tcp: make challenge acks less predictable
arm64: relocatable: suppress R_AARCH64_ABS64 relocations in vmlinux
arm64: vmlinux.lds: make __rela_offset and __dynsym_offset ABSOLUTE
Linux 4.4.17
vfs: fix deadlock in file_remove_privs() on overlayfs
intel_th: Fix a deadlock in modprobing
intel_th: pci: Add Kaby Lake PCH-H support
net: mvneta: set real interrupt per packet for tx_done
libceph: apply new_state before new_up_client on incrementals
libata: LITE-ON CX1-JB256-HP needs lower max_sectors
i2c: mux: reg: wrong condition checked for of_address_to_resource return value
posix_cpu_timer: Exit early when process has been reaped
media: fix airspy usb probe error path
ipr: Clear interrupt on croc/crocodile when running with LSI
SCSI: fix new bug in scsi_dev_info_list string matching
RDS: fix rds_tcp_init() error path
can: fix oops caused by wrong rtnl dellink usage
can: fix handling of unmodifiable configuration options fix
can: c_can: Update D_CAN TX and RX functions to 32 bit - fix Altera Cyclone access
can: at91_can: RX queue could get stuck at high bus load
perf/x86: fix PEBS issues on Intel Atom/Core2
ovl: handle ATTR_KILL*
sched/fair: Fix effective_load() to consistently use smoothed load
mmc: block: fix packed command header endianness
block: fix use-after-free in sys_ioprio_get()
qeth: delete napi struct when removing a qeth device
platform/chrome: cros_ec_dev - double fetch bug in ioctl
clk: rockchip: initialize flags of clk_init_data in mmc-phase clock
spi: sun4i: fix FIFO limit
spi: sunxi: fix transfer timeout
namespace: update event counter when umounting a deleted dentry
9p: use file_dentry()
ext4: verify extent header depth
ecryptfs: don't allow mmap when the lower fs doesn't support it
Revert "ecryptfs: forbid opening files without mmap handler"
locks: use file_inode()
power_supply: power_supply_read_temp only if use_cnt > 0
cgroup: set css->id to -1 during init
pinctrl: imx: Do not treat a PIN without MUX register as an error
pinctrl: single: Fix missing flush of posted write for a wakeirq
pvclock: Add CPU barriers to get correct version value
Input: tsc200x - report proper input_dev name
Input: xpad - validate USB endpoint count during probe
Input: wacom_w8001 - w8001_MAX_LENGTH should be 13
Input: xpad - fix oops when attaching an unknown Xbox One gamepad
Input: elantech - add more IC body types to the list
Input: vmmouse - remove port reservation
ALSA: timer: Fix leak in events via snd_timer_user_tinterrupt
ALSA: timer: Fix leak in events via snd_timer_user_ccallback
ALSA: timer: Fix leak in SNDRV_TIMER_IOCTL_PARAMS
xenbus: don't bail early from xenbus_dev_request_and_reply()
xenbus: don't BUG() on user mode induced condition
xen/pciback: Fix conf_space read/write overlap check.
ARC: unwind: ensure that .debug_frame is generated (vs. .eh_frame)
arc: unwind: warn only once if DW2_UNWIND is disabled
kernel/sysrq, watchdog, sched/core: Reset watchdog on all CPUs while processing sysrq-w
pps: do not crash when failed to register
vmlinux.lds: account for destructor sections
mm, meminit: ensure node is online before checking whether pages are uninitialised
mm, meminit: always return a valid node from early_pfn_to_nid
mm, compaction: prevent VM_BUG_ON when terminating freeing scanner
fs/nilfs2: fix potential underflow in call to crc32_le
mm, compaction: abort free scanner if split fails
mm, sl[au]b: add __GFP_ATOMIC to the GFP reclaim mask
dmaengine: at_xdmac: double FIFO flush needed to compute residue
dmaengine: at_xdmac: fix residue corruption
dmaengine: at_xdmac: align descriptors on 64 bits
x86/quirks: Add early quirk to reset Apple AirPort card
x86/quirks: Reintroduce scanning of secondary buses
x86/quirks: Apply nvidia_bugs quirk only on root bus
USB: OHCI: Don't mark EDs as ED_OPER if scheduling fails
Conflicts:
arch/arm/kernel/topology.c
arch/arm64/include/asm/arch_gicv3.h
arch/arm64/kernel/topology.c
block/bio.c
drivers/cpufreq/Kconfig
drivers/md/Makefile
drivers/media/dvb-core/dvb_ringbuffer.c
drivers/media/tuners/tuner-xc2028.c
drivers/misc/Kconfig
drivers/misc/Makefile
drivers/mmc/core/host.c
drivers/scsi/ufs/ufshcd.c
drivers/scsi/ufs/ufshcd.h
drivers/usb/dwc3/gadget.c
drivers/usb/gadget/configfs.c
fs/ecryptfs/file.c
include/linux/mmc/core.h
include/linux/mmc/host.h
include/linux/mmzone.h
include/linux/sched.h
include/linux/sched/sysctl.h
include/trace/events/power.h
include/trace/events/sched.h
init/Kconfig
kernel/cpuset.c
kernel/exit.c
kernel/sched/Makefile
kernel/sched/core.c
kernel/sched/cputime.c
kernel/sched/fair.c
kernel/sched/features.h
kernel/sched/rt.c
kernel/sched/sched.h
kernel/sched/stop_task.c
kernel/sched/tune.c
lib/Kconfig.debug
mm/Makefile
mm/vmstat.c
Change-Id: I243a43231ca56a6362076fa6301827e1b0493be5
Signed-off-by: Runmin Wang <runminw@codeaurora.org>
2016-12-12 15:32:39 -08:00
|
|
|
if (energy_aware())
|
|
|
|
pr_warn("schedtune: no energy model data\n");
|
2016-01-12 18:12:13 +00:00
|
|
|
goto nodata;
|
|
|
|
}
|
|
|
|
|
|
|
|
sg = sd->groups;
|
|
|
|
do {
|
|
|
|
schedtune_add_cluster_nrg(sd, sg, ste);
|
|
|
|
} while (sg = sg->next, sg != sd->groups);
|
|
|
|
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n",
|
|
|
|
"SYSTEM", ste->min_power, ste->max_power);
|
|
|
|
|
|
|
|
/* Compute normalization constants */
|
|
|
|
delta_pwr = ste->max_power - ste->min_power;
|
|
|
|
ste->rdiv = reciprocal_value(delta_pwr);
|
|
|
|
pr_info("schedtune: using normalization constants mul: %u sh1: %u sh2: %u\n",
|
|
|
|
ste->rdiv.m, ste->rdiv.sh1, ste->rdiv.sh2);
|
|
|
|
|
|
|
|
schedtune_test_nrg(delta_pwr);
|
2016-07-29 15:19:41 +01:00
|
|
|
|
|
|
|
#ifdef CONFIG_CGROUP_SCHEDTUNE
|
|
|
|
schedtune_init_cgroups();
|
|
|
|
#else
|
|
|
|
pr_info("schedtune: configured to support global boosting only\n");
|
|
|
|
#endif
|
|
|
|
|
2016-10-13 17:31:24 +01:00
|
|
|
schedtune_spc_rdiv = reciprocal_value(100);
|
|
|
|
|
2015-06-22 18:11:44 +01:00
|
|
|
return 0;
|
2016-01-12 18:12:13 +00:00
|
|
|
|
|
|
|
nodata:
|
2016-10-13 17:34:47 +01:00
|
|
|
pr_warning("schedtune: disabled!\n");
|
2016-01-12 18:12:13 +00:00
|
|
|
rcu_read_unlock();
|
|
|
|
return -EINVAL;
|
2015-06-22 18:11:44 +01:00
|
|
|
}
|
2016-07-29 16:09:03 +01:00
|
|
|
postcore_initcall(schedtune_init);
|