From 0fb5a62a228abfec36b5d8602ac8ba12ea30a544 Mon Sep 17 00:00:00 2001
From: Mahesh Sivasubramanian <msivasub@codeaurora.org>
Date: Tue, 5 Jan 2016 09:21:15 -0700
Subject: [PATCH 1/4] lpm-levels: Use residency instead of power and energy
 values

The driver looks through all the enabled modes and does energy
calculation runtime before it selects a low power mode to enter.
With residency values, the cpu/cluster can choose the low power
mode as soon as it comes across the low power mode which meets
the residency constraint.

Change-Id: I71235c212b26c3688a8ac5426942f2b88b681d47
Signed-off-by: Mahesh Sivasubramanian <msivasub@codeaurora.org>
---
 drivers/cpuidle/lpm-levels-of.c | 168 +++++++++++++++++++++++++++++---
 drivers/cpuidle/lpm-levels.c    |  47 ++-------
 drivers/cpuidle/lpm-levels.h    |   7 +-
 3 files changed, 165 insertions(+), 57 deletions(-)

diff --git a/drivers/cpuidle/lpm-levels-of.c b/drivers/cpuidle/lpm-levels-of.c
index f4ae70ac9315..29aca0e50df3 100644
--- a/drivers/cpuidle/lpm-levels-of.c
+++ b/drivers/cpuidle/lpm-levels-of.c
@@ -38,34 +38,96 @@ static const struct lpm_type_str lpm_types[] = {
 	{SUSPEND, "suspend_enabled"},
 };
 
+static DEFINE_PER_CPU(uint32_t *, max_residency);
 static struct lpm_level_avail *cpu_level_available[NR_CPUS];
 static struct platform_device *lpm_pdev;
 
-static void *get_avail_val(struct kobject *kobj, struct kobj_attribute *attr)
+static void *get_enabled_ptr(struct kobj_attribute *attr,
+					struct lpm_level_avail *avail)
 {
 	void *arg = NULL;
-	struct lpm_level_avail *avail = NULL;
 
-	if (!strcmp(attr->attr.name, lpm_types[IDLE].str)) {
-		avail = container_of(attr, struct lpm_level_avail,
-					idle_enabled_attr);
+	if (!strcmp(attr->attr.name, lpm_types[IDLE].str))
 		arg = (void *) &avail->idle_enabled;
-	} else if (!strcmp(attr->attr.name, lpm_types[SUSPEND].str)) {
-		avail = container_of(attr, struct lpm_level_avail,
-					suspend_enabled_attr);
+	else if (!strcmp(attr->attr.name, lpm_types[SUSPEND].str))
 		arg = (void *) &avail->suspend_enabled;
-	}
 
 	return arg;
 }
 
+static struct lpm_level_avail *get_avail_ptr(struct kobject *kobj,
+					struct kobj_attribute *attr)
+{
+	struct lpm_level_avail *avail = NULL;
+
+	if (!strcmp(attr->attr.name, lpm_types[IDLE].str))
+		avail = container_of(attr, struct lpm_level_avail,
+					idle_enabled_attr);
+	else if (!strcmp(attr->attr.name, lpm_types[SUSPEND].str))
+		avail = container_of(attr, struct lpm_level_avail,
+					suspend_enabled_attr);
+
+	return avail;
+}
+
+static void set_optimum_cpu_residency(struct lpm_cpu *cpu, int cpu_id,
+		bool probe_time)
+{
+	int i, j;
+	bool mode_avail;
+	uint32_t *residency = per_cpu(max_residency, cpu_id);
+
+	for (i = 0; i < cpu->nlevels; i++) {
+		struct power_params *pwr = &cpu->levels[i].pwr;
+
+		residency[i] = ~0;
+		for (j = i + 1; j < cpu->nlevels; j++) {
+			mode_avail = probe_time ||
+					lpm_cpu_mode_allow(cpu_id, j, true);
+
+			if (mode_avail &&
+				(residency[i] > pwr->residencies[j]) &&
+				(pwr->residencies[j] != 0))
+				residency[i] = pwr->residencies[j];
+		}
+	}
+}
+
+static void set_optimum_cluster_residency(struct lpm_cluster *cluster,
+		bool probe_time)
+{
+	int i, j;
+	bool mode_avail;
+
+	for (i = 0; i < cluster->nlevels; i++) {
+		struct power_params *pwr = &cluster->levels[i].pwr;
+
+		pwr->max_residency = ~0;
+		for (j = 0; j < cluster->nlevels; j++) {
+			if (i >= j)
+				mode_avail = probe_time ||
+					lpm_cluster_mode_allow(cluster, i,
+							true);
+			if (mode_avail &&
+				(pwr->max_residency > pwr->residencies[j]) &&
+				(pwr->residencies[j] != 0))
+				pwr->max_residency = pwr->residencies[j];
+		}
+	}
+}
+
+uint32_t *get_per_cpu_max_residency(int cpu)
+{
+	return per_cpu(max_residency, cpu);
+}
+
 ssize_t lpm_enable_show(struct kobject *kobj, struct kobj_attribute *attr,
 				char *buf)
 {
 	int ret = 0;
 	struct kernel_param kp;
 
-	kp.arg = get_avail_val(kobj, attr);
+	kp.arg = get_enabled_ptr(attr, get_avail_ptr(kobj, attr));
 	ret = param_get_bool(buf, &kp);
 	if (ret > 0) {
 		strlcat(buf, "\n", PAGE_SIZE);
@@ -80,15 +142,25 @@ ssize_t lpm_enable_store(struct kobject *kobj, struct kobj_attribute *attr,
 {
 	int ret = 0;
 	struct kernel_param kp;
+	struct lpm_level_avail *avail;
 
-	kp.arg = get_avail_val(kobj, attr);
+	avail = get_avail_ptr(kobj, attr);
+	if (WARN_ON(!avail))
+		return -EINVAL;
+	kp.arg = get_enabled_ptr(attr, avail);
 	ret = param_set_bool(buf, &kp);
 
+	if (avail->cpu_node)
+		set_optimum_cpu_residency(avail->data, avail->idx, false);
+	else
+		set_optimum_cluster_residency(avail->data, false);
+
 	return ret ? ret : len;
 }
 
 static int create_lvl_avail_nodes(const char *name,
-			struct kobject *parent, struct lpm_level_avail *avail)
+			struct kobject *parent, struct lpm_level_avail *avail,
+			void *data, int index, bool cpu_node)
 {
 	struct attribute_group *attr_group = NULL;
 	struct attribute **attr = NULL;
@@ -139,6 +211,9 @@ static int create_lvl_avail_nodes(const char *name,
 	avail->idle_enabled = true;
 	avail->suspend_enabled = true;
 	avail->kobj = kobj;
+	avail->data = data;
+	avail->idx = index;
+	avail->cpu_node = cpu_node;
 
 	return ret;
 
@@ -181,7 +256,8 @@ static int create_cpu_lvl_nodes(struct lpm_cluster *p, struct kobject *parent)
 		for (i = 0; i < p->cpu->nlevels; i++) {
 
 			ret = create_lvl_avail_nodes(p->cpu->levels[i].name,
-					cpu_kobj[cpu_idx], &level_list[i]);
+					cpu_kobj[cpu_idx], &level_list[i],
+					(void *)p->cpu, cpu, true);
 			if (ret)
 				goto release_kobj;
 		}
@@ -215,7 +291,8 @@ int create_cluster_lvl_nodes(struct lpm_cluster *p, struct kobject *kobj)
 
 	for (i = 0; i < p->nlevels; i++) {
 		ret = create_lvl_avail_nodes(p->levels[i].level_name,
-				cluster_kobj, &p->levels[i].available);
+				cluster_kobj, &p->levels[i].available,
+				(void *)p, 0, false);
 		if (ret)
 			return ret;
 	}
@@ -421,6 +498,9 @@ static int parse_power_params(struct device_node *node,
 
 	key = "qcom,time-overhead";
 	ret = of_property_read_u32(node, key, &pwr->time_overhead_us);
+	if (ret)
+		goto fail;
+
 fail:
 	if (ret)
 		pr_err("%s(): %s Error reading %s\n", __func__, node->name,
@@ -615,11 +695,31 @@ static int get_cpumask_for_node(struct device_node *node, struct cpumask *mask)
 	return 0;
 }
 
+static int calculate_residency(struct power_params *base_pwr,
+					struct power_params *next_pwr)
+{
+	int32_t residency = (int32_t)(next_pwr->energy_overhead -
+						base_pwr->energy_overhead) -
+		((int32_t)(next_pwr->ss_power * next_pwr->time_overhead_us)
+		- (int32_t)(base_pwr->ss_power * base_pwr->time_overhead_us));
+
+	residency /= (int32_t)(base_pwr->ss_power  - next_pwr->ss_power);
+
+	if (residency < 0) {
+		__WARN_printf("%s: Incorrect power attributes for LPM\n",
+				__func__);
+		return next_pwr->time_overhead_us;
+	}
+
+	return residency < next_pwr->time_overhead_us ?
+				next_pwr->time_overhead_us : residency;
+}
+
 static int parse_cpu_levels(struct device_node *node, struct lpm_cluster *c)
 {
 	struct device_node *n;
 	int ret = -ENOMEM;
-	int i;
+	int i, j;
 	char *key;
 
 	c->cpu = devm_kzalloc(&lpm_pdev->dev, sizeof(*c->cpu), GFP_KERNEL);
@@ -676,6 +776,22 @@ static int parse_cpu_levels(struct device_node *node, struct lpm_cluster *c)
 		else if (ret)
 			goto failed;
 	}
+	for (i = 0; i < c->cpu->nlevels; i++) {
+		for (j = 0; j < c->cpu->nlevels; j++) {
+			if (i >= j) {
+				c->cpu->levels[i].pwr.residencies[j] = 0;
+				continue;
+			}
+
+			c->cpu->levels[i].pwr.residencies[j] =
+				calculate_residency(&c->cpu->levels[i].pwr,
+					&c->cpu->levels[j].pwr);
+
+			pr_err("%s: idx %d %u\n", __func__, j,
+					c->cpu->levels[i].pwr.residencies[j]);
+		}
+	}
+
 	return 0;
 failed:
 	for (i = 0; i < c->cpu->nlevels; i++) {
@@ -732,6 +848,7 @@ struct lpm_cluster *parse_cluster(struct device_node *node,
 	struct device_node *n;
 	char *key;
 	int ret = 0;
+	int i, j;
 
 	c = devm_kzalloc(&lpm_pdev->dev, sizeof(*c), GFP_KERNEL);
 	if (!c)
@@ -789,6 +906,16 @@ struct lpm_cluster *parse_cluster(struct device_node *node,
 				goto failed_parse_cluster;
 
 			c->aff_level = 1;
+
+			for_each_cpu(i, &c->child_cpus) {
+				per_cpu(max_residency, i) = devm_kzalloc(
+					&lpm_pdev->dev,
+					sizeof(uint32_t) * c->cpu->nlevels,
+					GFP_KERNEL);
+				if (!per_cpu(max_residency, i))
+					return ERR_PTR(-ENOMEM);
+				set_optimum_cpu_residency(c->cpu, i, true);
+			}
 		}
 	}
 
@@ -797,6 +924,17 @@ struct lpm_cluster *parse_cluster(struct device_node *node,
 	else
 		c->last_level = c->nlevels-1;
 
+	for (i = 0; i < c->nlevels; i++) {
+		for (j = 0; j < c->nlevels; j++) {
+			if (i >= j) {
+				c->levels[i].pwr.residencies[j] = 0;
+				continue;
+			}
+			c->levels[i].pwr.residencies[j] = calculate_residency(
+				&c->levels[i].pwr, &c->levels[j].pwr);
+		}
+	}
+	set_optimum_cluster_residency(c, true);
 	return c;
 
 failed_parse_cluster:
diff --git a/drivers/cpuidle/lpm-levels.c b/drivers/cpuidle/lpm-levels.c
index 4f880fdd1478..6294c5eb14a7 100644
--- a/drivers/cpuidle/lpm-levels.c
+++ b/drivers/cpuidle/lpm-levels.c
@@ -419,18 +419,15 @@ static int cpu_power_select(struct cpuidle_device *dev,
 		struct lpm_cpu *cpu)
 {
 	int best_level = -1;
-	uint32_t best_level_pwr = ~0U;
 	uint32_t latency_us = pm_qos_request_for_cpu(PM_QOS_CPU_DMA_LATENCY,
 							dev->cpu);
 	uint32_t sleep_us =
 		(uint32_t)(ktime_to_us(tick_nohz_get_sleep_length()));
 	uint32_t modified_time_us = 0;
 	uint32_t next_event_us = 0;
-	uint32_t pwr;
 	int i;
 	uint32_t lvl_latency_us = 0;
-	uint32_t lvl_overhead_us = 0;
-	uint32_t lvl_overhead_energy = 0;
+	uint32_t *residency = get_per_cpu_max_residency(dev->cpu);
 
 	if (!cpu)
 		return -EINVAL;
@@ -454,12 +451,8 @@ static int cpu_power_select(struct cpuidle_device *dev,
 
 		lvl_latency_us = pwr_params->latency_us;
 
-		lvl_overhead_us = pwr_params->time_overhead_us;
-
-		lvl_overhead_energy = pwr_params->energy_overhead;
-
 		if (latency_us < lvl_latency_us)
-			continue;
+			break;
 
 		if (next_event_us) {
 			if (next_event_us < lvl_latency_us)
@@ -470,32 +463,15 @@ static int cpu_power_select(struct cpuidle_device *dev,
 				next_wakeup_us = next_event_us - lvl_latency_us;
 		}
 
-		if (next_wakeup_us <= pwr_params->time_overhead_us)
-			continue;
-
-		/*
-		 * If wakeup time greater than overhead by a factor of 1000
-		 * assume that core steady state power dominates the power
-		 * equation
-		 */
-		if ((next_wakeup_us >> 10) > lvl_overhead_us) {
-			pwr = pwr_params->ss_power;
-		} else {
-			pwr = pwr_params->ss_power;
-			pwr -= (lvl_overhead_us * pwr_params->ss_power) /
-						next_wakeup_us;
-			pwr += pwr_params->energy_overhead / next_wakeup_us;
-		}
-
-		if (best_level_pwr >= pwr) {
+		if (next_wakeup_us <= residency[i]) {
 			best_level = i;
-			best_level_pwr = pwr;
 			if (next_event_us && next_event_us < sleep_us &&
 				(mode != MSM_PM_SLEEP_MODE_WAIT_FOR_INTERRUPT))
 				modified_time_us
 					= next_event_us - lvl_latency_us;
 			else
 				modified_time_us = 0;
+			break;
 		}
 	}
 
@@ -554,8 +530,6 @@ static int cluster_select(struct lpm_cluster *cluster, bool from_idle)
 {
 	int best_level = -1;
 	int i;
-	uint32_t best_level_pwr = ~0U;
-	uint32_t pwr;
 	struct cpumask mask;
 	uint32_t latency_us = ~0U;
 	uint32_t sleep_us;
@@ -607,18 +581,9 @@ static int cluster_select(struct lpm_cluster *cluster, bool from_idle)
 		if (level->notify_rpm && msm_rpm_waiting_for_ack())
 			continue;
 
-		if ((sleep_us >> 10) > pwr_params->time_overhead_us) {
-			pwr = pwr_params->ss_power;
-		} else {
-			pwr = pwr_params->ss_power;
-			pwr -= (pwr_params->time_overhead_us *
-					pwr_params->ss_power) / sleep_us;
-			pwr += pwr_params->energy_overhead / sleep_us;
-		}
-
-		if (best_level_pwr >= pwr) {
+		if (sleep_us <= pwr_params->max_residency) {
 			best_level = i;
-			best_level_pwr = pwr;
+			break;
 		}
 	}
 
diff --git a/drivers/cpuidle/lpm-levels.h b/drivers/cpuidle/lpm-levels.h
index 8e05336be21a..f6979c4d4d9f 100644
--- a/drivers/cpuidle/lpm-levels.h
+++ b/drivers/cpuidle/lpm-levels.h
@@ -27,6 +27,8 @@ struct power_params {
 	uint32_t ss_power;		/* Steady state power */
 	uint32_t energy_overhead;	/* Enter + exit over head */
 	uint32_t time_overhead_us;	/* Enter + exit overhead */
+	uint32_t residencies[NR_LPM_LEVELS];
+	uint32_t max_residency;
 };
 
 struct lpm_cpu_level {
@@ -55,6 +57,9 @@ struct lpm_level_avail {
 	struct kobject *kobj;
 	struct kobj_attribute idle_enabled_attr;
 	struct kobj_attribute suspend_enabled_attr;
+	void *data;
+	int idx;
+	bool cpu_node;
 };
 
 struct lpm_cluster_level {
@@ -119,7 +124,7 @@ bool lpm_cpu_mode_allow(unsigned int cpu,
 		unsigned int mode, bool from_idle);
 bool lpm_cluster_mode_allow(struct lpm_cluster *cluster,
 		unsigned int mode, bool from_idle);
-
+uint32_t *get_per_cpu_max_residency(int cpu);
 extern struct lpm_cluster *lpm_root_node;
 
 #ifdef CONFIG_SMP

From 7aef3b1f9b17b0e5bd23d37f9f7a0a16b1075e77 Mon Sep 17 00:00:00 2001
From: "Raju P.L.S.S.S.N" <rplsssn@codeaurora.org>
Date: Thu, 23 Jun 2016 19:07:20 +0530
Subject: [PATCH 2/4] lpm-levels: Fix low power mode selection

The low power mode selection logic has problems while
selecting best mode both in cpu and cluster selection.
When latency requirement is not met, the level selection
loop breaks without selecting another possible shallow
power mode that meets both latency and residency
requirements. The residencies are initialized to ~0
(0xFFFFFFFF) when a particular mode is disabled in idle
context. This results in wrong selection of shallower
mode in suspend context instead of selecting best possible
deepest low power mode (assuming the mode is enabled in
suspend context but disabled in idle context). This can
have impact on power.

The change addresses this by selecting a possible
shallower power mode that can meet the latency criteria.
It also initializes the residencies to 0 when a mode is
not allowed in idle context. This results in selection
of deepest low power mode in suspend context and
saves power.

Change-Id: Iae8b6ad74ef0f28d9c542315745577b1a5924664
Signed-off-by: Raju P.L.S.S.S.N <rplsssn@codeaurora.org>
---
 drivers/cpuidle/lpm-levels-of.c | 24 ++++++++++++++++++++----
 drivers/cpuidle/lpm-levels.c    | 29 +++++++++++++++--------------
 2 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/drivers/cpuidle/lpm-levels-of.c b/drivers/cpuidle/lpm-levels-of.c
index 29aca0e50df3..8316d8aa9227 100644
--- a/drivers/cpuidle/lpm-levels-of.c
+++ b/drivers/cpuidle/lpm-levels-of.c
@@ -80,6 +80,14 @@ static void set_optimum_cpu_residency(struct lpm_cpu *cpu, int cpu_id,
 	for (i = 0; i < cpu->nlevels; i++) {
 		struct power_params *pwr = &cpu->levels[i].pwr;
 
+		mode_avail = probe_time ||
+			lpm_cpu_mode_allow(cpu_id, i, true);
+
+		if (!mode_avail) {
+			residency[i] = 0;
+			continue;
+		}
+
 		residency[i] = ~0;
 		for (j = i + 1; j < cpu->nlevels; j++) {
 			mode_avail = probe_time ||
@@ -102,11 +110,19 @@ static void set_optimum_cluster_residency(struct lpm_cluster *cluster,
 	for (i = 0; i < cluster->nlevels; i++) {
 		struct power_params *pwr = &cluster->levels[i].pwr;
 
+		mode_avail = probe_time ||
+			lpm_cluster_mode_allow(cluster, i,
+					true);
+
+		if (!mode_avail) {
+			pwr->max_residency = 0;
+			continue;
+		}
+
 		pwr->max_residency = ~0;
-		for (j = 0; j < cluster->nlevels; j++) {
-			if (i >= j)
-				mode_avail = probe_time ||
-					lpm_cluster_mode_allow(cluster, i,
+		for (j = i+1; j < cluster->nlevels; j++) {
+			mode_avail = probe_time ||
+					lpm_cluster_mode_allow(cluster, j,
 							true);
 			if (mode_avail &&
 				(pwr->max_residency > pwr->residencies[j]) &&
diff --git a/drivers/cpuidle/lpm-levels.c b/drivers/cpuidle/lpm-levels.c
index 6294c5eb14a7..389e49ef8fca 100644
--- a/drivers/cpuidle/lpm-levels.c
+++ b/drivers/cpuidle/lpm-levels.c
@@ -456,23 +456,24 @@ static int cpu_power_select(struct cpuidle_device *dev,
 
 		if (next_event_us) {
 			if (next_event_us < lvl_latency_us)
-				continue;
+				break;
 
 			if (((next_event_us - lvl_latency_us) < sleep_us) ||
 					(next_event_us < sleep_us))
 				next_wakeup_us = next_event_us - lvl_latency_us;
 		}
 
-		if (next_wakeup_us <= residency[i]) {
-			best_level = i;
-			if (next_event_us && next_event_us < sleep_us &&
+		best_level = i;
+
+		if (next_event_us && next_event_us < sleep_us &&
 				(mode != MSM_PM_SLEEP_MODE_WAIT_FOR_INTERRUPT))
-				modified_time_us
-					= next_event_us - lvl_latency_us;
-			else
-				modified_time_us = 0;
+			modified_time_us
+				= next_event_us - lvl_latency_us;
+		else
+			modified_time_us = 0;
+
+		if (next_wakeup_us <= residency[i])
 			break;
-		}
 	}
 
 	if (modified_time_us)
@@ -570,10 +571,10 @@ static int cluster_select(struct lpm_cluster *cluster, bool from_idle)
 			continue;
 
 		if (from_idle && latency_us < pwr_params->latency_us)
-			continue;
+			break;
 
 		if (sleep_us < pwr_params->time_overhead_us)
-			continue;
+			break;
 
 		if (suspend_in_progress && from_idle && level->notify_rpm)
 			continue;
@@ -581,10 +582,10 @@ static int cluster_select(struct lpm_cluster *cluster, bool from_idle)
 		if (level->notify_rpm && msm_rpm_waiting_for_ack())
 			continue;
 
-		if (sleep_us <= pwr_params->max_residency) {
-			best_level = i;
+		best_level = i;
+
+		if (sleep_us <= pwr_params->max_residency)
 			break;
-		}
 	}
 
 	return best_level;

From 201164f28bac7f0ab17f8bfd9ae5820bfbd1a59f Mon Sep 17 00:00:00 2001
From: "Raju P.L.S.S.S.N" <rplsssn@codeaurora.org>
Date: Tue, 12 Jul 2016 19:12:47 +0530
Subject: [PATCH 3/4] lpm-levels: Compute minimum residencies for LPM levels

Minimum residency of current level is maximum residency of previous
level + 1. If none of the previous levels are enabled, minimum
residency of currenct level is time_overhead of current level.

Minimum residencies are used for New LPM level selection using
prediction logic.

Change-Id: I5e69a847d78d14ecbb7caeac383287a1897f7ce8
Signed-off-by: Raju P.L.S.S.S.N <rplsssn@codeaurora.org>
---
 drivers/cpuidle/lpm-levels-of.c | 42 +++++++++++++++++++++++++++++----
 drivers/cpuidle/lpm-levels.h    |  2 ++
 2 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/drivers/cpuidle/lpm-levels-of.c b/drivers/cpuidle/lpm-levels-of.c
index 8316d8aa9227..b40231dd8dd1 100644
--- a/drivers/cpuidle/lpm-levels-of.c
+++ b/drivers/cpuidle/lpm-levels-of.c
@@ -39,6 +39,7 @@ static const struct lpm_type_str lpm_types[] = {
 };
 
 static DEFINE_PER_CPU(uint32_t *, max_residency);
+static DEFINE_PER_CPU(uint32_t *, min_residency);
 static struct lpm_level_avail *cpu_level_available[NR_CPUS];
 static struct platform_device *lpm_pdev;
 
@@ -75,7 +76,8 @@ static void set_optimum_cpu_residency(struct lpm_cpu *cpu, int cpu_id,
 {
 	int i, j;
 	bool mode_avail;
-	uint32_t *residency = per_cpu(max_residency, cpu_id);
+	uint32_t *maximum_residency = per_cpu(max_residency, cpu_id);
+	uint32_t *minimum_residency = per_cpu(min_residency, cpu_id);
 
 	for (i = 0; i < cpu->nlevels; i++) {
 		struct power_params *pwr = &cpu->levels[i].pwr;
@@ -84,19 +86,28 @@ static void set_optimum_cpu_residency(struct lpm_cpu *cpu, int cpu_id,
 			lpm_cpu_mode_allow(cpu_id, i, true);
 
 		if (!mode_avail) {
-			residency[i] = 0;
+			maximum_residency[i] = 0;
+			minimum_residency[i] = 0;
 			continue;
 		}
 
-		residency[i] = ~0;
+		maximum_residency[i] = ~0;
 		for (j = i + 1; j < cpu->nlevels; j++) {
 			mode_avail = probe_time ||
 					lpm_cpu_mode_allow(cpu_id, j, true);
 
 			if (mode_avail &&
-				(residency[i] > pwr->residencies[j]) &&
+				(maximum_residency[i] > pwr->residencies[j]) &&
 				(pwr->residencies[j] != 0))
-				residency[i] = pwr->residencies[j];
+				maximum_residency[i] = pwr->residencies[j];
+		}
+
+		minimum_residency[i] = pwr->time_overhead_us;
+		for (j = i-1; j >= 0; j--) {
+			if (probe_time || lpm_cpu_mode_allow(cpu_id, j, true)) {
+				minimum_residency[i] = maximum_residency[j] + 1;
+				break;
+			}
 		}
 	}
 }
@@ -116,6 +127,7 @@ static void set_optimum_cluster_residency(struct lpm_cluster *cluster,
 
 		if (!mode_avail) {
 			pwr->max_residency = 0;
+			pwr->min_residency = 0;
 			continue;
 		}
 
@@ -129,6 +141,16 @@ static void set_optimum_cluster_residency(struct lpm_cluster *cluster,
 				(pwr->residencies[j] != 0))
 				pwr->max_residency = pwr->residencies[j];
 		}
+
+		pwr->min_residency = pwr->time_overhead_us;
+		for (j = i-1;  j >= 0; j--) {
+			if (probe_time ||
+				lpm_cluster_mode_allow(cluster, j, true)) {
+				pwr->min_residency =
+				  cluster->levels[j].pwr.max_residency + 1;
+				break;
+			}
+		}
 	}
 }
 
@@ -137,6 +159,10 @@ uint32_t *get_per_cpu_max_residency(int cpu)
 	return per_cpu(max_residency, cpu);
 }
 
+uint32_t *get_per_cpu_min_residency(int cpu)
+{
+	return per_cpu(min_residency, cpu);
+}
 ssize_t lpm_enable_show(struct kobject *kobj, struct kobj_attribute *attr,
 				char *buf)
 {
@@ -930,6 +956,12 @@ struct lpm_cluster *parse_cluster(struct device_node *node,
 					GFP_KERNEL);
 				if (!per_cpu(max_residency, i))
 					return ERR_PTR(-ENOMEM);
+				per_cpu(min_residency, i) = devm_kzalloc(
+					&lpm_pdev->dev,
+					sizeof(uint32_t) * c->cpu->nlevels,
+					GFP_KERNEL);
+				if (!per_cpu(min_residency, i))
+					return ERR_PTR(-ENOMEM);
 				set_optimum_cpu_residency(c->cpu, i, true);
 			}
 		}
diff --git a/drivers/cpuidle/lpm-levels.h b/drivers/cpuidle/lpm-levels.h
index f6979c4d4d9f..63fe0a0fbc08 100644
--- a/drivers/cpuidle/lpm-levels.h
+++ b/drivers/cpuidle/lpm-levels.h
@@ -28,6 +28,7 @@ struct power_params {
 	uint32_t energy_overhead;	/* Enter + exit over head */
 	uint32_t time_overhead_us;	/* Enter + exit overhead */
 	uint32_t residencies[NR_LPM_LEVELS];
+	uint32_t min_residency;
 	uint32_t max_residency;
 };
 
@@ -125,6 +126,7 @@ bool lpm_cpu_mode_allow(unsigned int cpu,
 bool lpm_cluster_mode_allow(struct lpm_cluster *cluster,
 		unsigned int mode, bool from_idle);
 uint32_t *get_per_cpu_max_residency(int cpu);
+uint32_t *get_per_cpu_min_residency(int cpu);
 extern struct lpm_cluster *lpm_root_node;
 
 #ifdef CONFIG_SMP

From 09372fb316e83ad623f867c0bc7928d2d4f6dade Mon Sep 17 00:00:00 2001
From: Srinivas Rao L <lsrao@codeaurora.org>
Date: Thu, 14 Apr 2016 19:12:57 +0530
Subject: [PATCH 4/4] cpuidle: lpm-levels: Consider history during LPM
 selection

Consider recent history (residencies) of the low power modes per
core while the next low power mode to enter is selected. If most
of the history says the pattern of residencies is repeating with
minimal deviation then use the average of these for predicting
the next mode to enter.

If the pattern is not repeating then if more than 50 percent of
the samples out of history have exited a low power mode earlier
than the minumim residency of that mode, restrict it and also low
power modes deeper than that.

In any of the above case, trigger a hrtimer to wakeup cpu with
timeout as predicted+delta or max residency of the mode selected
if a deeper state can be selected after waking up incase if
prediction goes wrong.

Change-Id: I902a06939e19ac51dfd8c2db6b727b203ebfda14
Signed-off-by: Srinivas Rao L <lsrao@codeaurora.org>
---
 drivers/cpuidle/lpm-levels.c               | 293 ++++++++++++++++++++-
 include/trace/events/trace_msm_low_power.h |  50 +++-
 2 files changed, 333 insertions(+), 10 deletions(-)

diff --git a/drivers/cpuidle/lpm-levels.c b/drivers/cpuidle/lpm-levels.c
index 389e49ef8fca..ced95aa2b649 100644
--- a/drivers/cpuidle/lpm-levels.c
+++ b/drivers/cpuidle/lpm-levels.c
@@ -1,4 +1,6 @@
 /* Copyright (c) 2012-2016, The Linux Foundation. All rights reserved.
+ * Copyright (C) 2006-2007 Adam Belay <abelay@novell.com>
+ * Copyright (C) 2009 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -83,9 +85,37 @@ struct lpm_debug {
 
 struct lpm_cluster *lpm_root_node;
 
+#define MAXSAMPLES 5
+
+static bool lpm_prediction;
+module_param_named(lpm_prediction,
+	lpm_prediction, bool, S_IRUGO | S_IWUSR | S_IWGRP);
+
+static uint32_t ref_stddev = 100;
+module_param_named(
+	ref_stddev, ref_stddev, uint, S_IRUGO | S_IWUSR | S_IWGRP
+);
+
+static uint32_t tmr_add = 100;
+module_param_named(
+	tmr_add, tmr_add, uint, S_IRUGO | S_IWUSR | S_IWGRP
+);
+
+struct lpm_history {
+	uint32_t resi[MAXSAMPLES];
+	int mode[MAXSAMPLES];
+	int nsamp;
+	uint32_t hptr;
+	uint32_t hinvalid;
+	uint32_t htmr_wkup;
+};
+
+static DEFINE_PER_CPU(struct lpm_history, hist);
+
 static DEFINE_PER_CPU(struct lpm_cluster*, cpu_cluster);
 static bool suspend_in_progress;
 static struct hrtimer lpm_hrtimer;
+static struct hrtimer histtimer;
 static struct lpm_debug *lpm_debug;
 static phys_addr_t lpm_debug_phys;
 static const int num_dbg_elements = 0x100;
@@ -327,10 +357,37 @@ static enum hrtimer_restart lpm_hrtimer_cb(struct hrtimer *h)
 	return HRTIMER_NORESTART;
 }
 
+static void histtimer_cancel(void)
+{
+	if (!lpm_prediction)
+		return;
+
+	hrtimer_try_to_cancel(&histtimer);
+}
+
+static enum hrtimer_restart histtimer_fn(struct hrtimer *h)
+{
+	int cpu = raw_smp_processor_id();
+	struct lpm_history *history = &per_cpu(hist, cpu);
+
+	history->hinvalid = 1;
+	return HRTIMER_NORESTART;
+}
+
+static void histtimer_start(uint32_t time_us)
+{
+	uint64_t time_ns = time_us * NSEC_PER_USEC;
+	ktime_t hist_ktime = ns_to_ktime(time_ns);
+
+	histtimer.function = histtimer_fn;
+	hrtimer_start(&histtimer, hist_ktime, HRTIMER_MODE_REL_PINNED);
+}
+
 static void msm_pm_set_timer(uint32_t modified_time_us)
 {
 	u64 modified_time_ns = modified_time_us * NSEC_PER_USEC;
 	ktime_t modified_ktime = ns_to_ktime(modified_time_ns);
+
 	lpm_hrtimer.function = lpm_hrtimer_cb;
 	hrtimer_start(&lpm_hrtimer, modified_ktime, HRTIMER_MODE_REL_PINNED);
 }
@@ -415,6 +472,143 @@ static int set_device_mode(struct lpm_cluster *cluster, int ndevice,
 		return -EINVAL;
 }
 
+static uint64_t lpm_cpuidle_predict(struct cpuidle_device *dev,
+		struct lpm_cpu *cpu, int *idx_restrict,
+		uint32_t *idx_restrict_time)
+{
+	int i, j, divisor;
+	uint64_t max, avg, stddev;
+	int64_t thresh = LLONG_MAX;
+	struct lpm_history *history = &per_cpu(hist, dev->cpu);
+	uint32_t *min_residency = get_per_cpu_min_residency(dev->cpu);
+
+	if (!lpm_prediction)
+		return 0;
+
+	/*
+	 * Samples are marked invalid when woken-up due to timer,
+	 * so donot predict.
+	 */
+	if (history->hinvalid) {
+		history->hinvalid = 0;
+		history->htmr_wkup = 1;
+		return 0;
+	}
+
+	/*
+	 * Predict only when all the samples are collected.
+	 */
+	if (history->nsamp < MAXSAMPLES)
+		return 0;
+
+	/*
+	 * Check if the samples are not much deviated, if so use the
+	 * average of those as predicted sleep time. Else if any
+	 * specific mode has more premature exits return the index of
+	 * that mode.
+	 */
+
+again:
+	max = avg = divisor = stddev = 0;
+	for (i = 0; i < MAXSAMPLES; i++) {
+		int64_t value = history->resi[i];
+
+		if (value <= thresh) {
+			avg += value;
+			divisor++;
+			if (value > max)
+				max = value;
+		}
+	}
+	do_div(avg, divisor);
+
+	for (i = 0; i < MAXSAMPLES; i++) {
+		int64_t value = history->resi[i];
+
+		if (value <= thresh) {
+			int64_t diff = value - avg;
+
+			stddev += diff * diff;
+		}
+	}
+	do_div(stddev, divisor);
+	stddev = int_sqrt(stddev);
+
+	/*
+	 * If the deviation is less, return the average, else
+	 * ignore one maximum sample and retry
+	 */
+	if (((avg > stddev * 6) && (divisor >= (MAXSAMPLES - 1)))
+					|| stddev <= ref_stddev) {
+		return avg;
+	} else if (divisor  > (MAXSAMPLES - 1)) {
+		thresh = max - 1;
+		goto again;
+	}
+
+	/*
+	 * Find the number of premature exits for each of the mode,
+	 * excluding clockgating mode, and they are more than fifty
+	 * percent restrict that and deeper modes.
+	 */
+	if (history->htmr_wkup != 1) {
+		for (j = 1; j < cpu->nlevels; j++) {
+			uint32_t failed = 0;
+			uint64_t total = 0;
+
+			for (i = 0; i < MAXSAMPLES; i++) {
+				if ((history->mode[i] == j) &&
+					(history->resi[i] < min_residency[j])) {
+					failed++;
+					total += history->resi[i];
+				}
+			}
+			if (failed > (MAXSAMPLES/2)) {
+				*idx_restrict = j;
+				do_div(total, failed);
+				*idx_restrict_time = total;
+				break;
+			}
+		}
+	}
+	return 0;
+}
+
+static inline void invalidate_predict_history(struct cpuidle_device *dev)
+{
+	struct lpm_history *history = &per_cpu(hist, dev->cpu);
+
+	if (!lpm_prediction)
+		return;
+
+	if (history->hinvalid) {
+		history->hinvalid = 0;
+		history->htmr_wkup = 1;
+	}
+}
+
+static void clear_predict_history(void)
+{
+	struct lpm_history *history;
+	int i;
+	unsigned int cpu;
+
+	if (!lpm_prediction)
+		return;
+
+	for_each_possible_cpu(cpu) {
+		history = &per_cpu(hist, cpu);
+		for (i = 0; i < MAXSAMPLES; i++) {
+			history->resi[i]  = 0;
+			history->mode[i] = -1;
+			history->hptr = 0;
+			history->nsamp = 0;
+		}
+	}
+}
+
+static void update_history(struct cpuidle_device *dev, int idx);
+
 static int cpu_power_select(struct cpuidle_device *dev,
 		struct lpm_cpu *cpu)
 {
@@ -425,9 +619,13 @@ static int cpu_power_select(struct cpuidle_device *dev,
 		(uint32_t)(ktime_to_us(tick_nohz_get_sleep_length()));
 	uint32_t modified_time_us = 0;
 	uint32_t next_event_us = 0;
-	int i;
+	int i, idx_restrict;
 	uint32_t lvl_latency_us = 0;
-	uint32_t *residency = get_per_cpu_max_residency(dev->cpu);
+	uint64_t predicted = 0;
+	uint32_t htime = 0, idx_restrict_time = 0;
+	uint32_t next_wakeup_us = sleep_us;
+	uint32_t *min_residency = get_per_cpu_min_residency(dev->cpu);
+	uint32_t *max_residency = get_per_cpu_max_residency(dev->cpu);
 
 	if (!cpu)
 		return -EINVAL;
@@ -435,12 +633,13 @@ static int cpu_power_select(struct cpuidle_device *dev,
 	if (sleep_disabled)
 		return 0;
 
+	idx_restrict = cpu->nlevels + 1;
+
 	next_event_us = (uint32_t)(ktime_to_us(get_next_event_time(dev->cpu)));
 
 	for (i = 0; i < cpu->nlevels; i++) {
 		struct lpm_cpu_level *level = &cpu->levels[i];
 		struct power_params *pwr_params = &level->pwr;
-		uint32_t next_wakeup_us = sleep_us;
 		enum msm_pm_sleep_mode mode = level->mode;
 		bool allow;
 
@@ -463,24 +662,64 @@ static int cpu_power_select(struct cpuidle_device *dev,
 				next_wakeup_us = next_event_us - lvl_latency_us;
 		}
 
+		if (!i) {
+			/*
+			 * If the next_wake_us itself is not sufficient for
+			 * deeper low power modes than clock gating do not
+			 * call prediction.
+			 */
+			if (next_wakeup_us > max_residency[i]) {
+				predicted = lpm_cpuidle_predict(dev, cpu,
+					&idx_restrict, &idx_restrict_time);
+				if (predicted < min_residency[i])
+					predicted = 0;
+			} else
+				invalidate_predict_history(dev);
+		}
+
+		if (i >= idx_restrict)
+			break;
+
 		best_level = i;
 
 		if (next_event_us && next_event_us < sleep_us &&
-				(mode != MSM_PM_SLEEP_MODE_WAIT_FOR_INTERRUPT))
+			(mode != MSM_PM_SLEEP_MODE_WAIT_FOR_INTERRUPT))
 			modified_time_us
 				= next_event_us - lvl_latency_us;
 		else
 			modified_time_us = 0;
 
-		if (next_wakeup_us <= residency[i])
+		if (predicted ? (predicted <= max_residency[i])
+			: (next_wakeup_us <= max_residency[i]))
 			break;
 	}
 
 	if (modified_time_us)
 		msm_pm_set_timer(modified_time_us);
 
+	/*
+	 * Start timer to avoid staying in shallower mode forever
+	 * incase of misprediciton
+	 */
+	if ((predicted || (idx_restrict != (cpu->nlevels + 1)))
+			&& ((best_level >= 0)
+			&& (best_level < (cpu->nlevels-1)))) {
+		htime = predicted + tmr_add;
+		if (htime == tmr_add)
+			htime = idx_restrict_time;
+		else if (htime > max_residency[best_level])
+			htime = max_residency[best_level];
+
+		if ((next_wakeup_us > htime) &&
+			((next_wakeup_us - htime) > max_residency[best_level]))
+			histtimer_start(htime);
+	}
+
 	trace_cpu_power_select(best_level, sleep_us, latency_us, next_event_us);
 
+	trace_cpu_pred_select(idx_restrict_time ? 2 : (predicted ? 1 : 0),
+			predicted, htime);
+
 	return best_level;
 }
 
@@ -641,6 +880,7 @@ static int cluster_configure(struct lpm_cluster *cluster, int idx,
 		}
 
 		us = us + 1;
+		clear_predict_history();
 		do_div(us, USEC_PER_SEC/SCLK_HZ);
 		msm_mpm_enter_sleep(us, from_idle, cpumask);
 	}
@@ -975,6 +1215,39 @@ static int lpm_cpuidle_select(struct cpuidle_driver *drv,
 	return idx;
 }
 
+static void update_history(struct cpuidle_device *dev, int idx)
+{
+	struct lpm_history *history = &per_cpu(hist, dev->cpu);
+	uint32_t tmr = 0;
+
+	if (!lpm_prediction)
+		return;
+
+	if (history->htmr_wkup) {
+		if (!history->hptr)
+			history->hptr = MAXSAMPLES-1;
+		else
+			history->hptr--;
+
+		history->resi[history->hptr] += dev->last_residency;
+		history->htmr_wkup = 0;
+		tmr = 1;
+	} else
+		history->resi[history->hptr] = dev->last_residency;
+
+	history->mode[history->hptr] = idx;
+
+	trace_cpu_pred_hist(history->mode[history->hptr],
+		history->resi[history->hptr], history->hptr, tmr);
+
+	if (history->nsamp < MAXSAMPLES)
+		history->nsamp++;
+
+	(history->hptr)++;
+	if (history->hptr >= MAXSAMPLES)
+		history->hptr = 0;
+}
+
 static int lpm_cpuidle_enter(struct cpuidle_device *dev,
 		struct cpuidle_driver *drv, int idx)
 {
@@ -1009,12 +1282,13 @@ exit:
 	cluster_unprepare(cluster, cpumask, idx, true, end_time);
 	cpu_unprepare(cluster, idx, true);
 	sched_set_cpu_cstate(smp_processor_id(), 0, 0, 0);
-
-	trace_cpu_idle_exit(idx, success);
 	end_time = ktime_to_ns(ktime_get()) - start_time;
-	dev->last_residency = do_div(end_time, 1000);
+	do_div(end_time, 1000);
+	dev->last_residency = end_time;
+	update_history(dev, idx);
+	trace_cpu_idle_exit(idx, success);
 	local_irq_enable();
-
+	histtimer_cancel();
 	return idx;
 }
 
@@ -1286,6 +1560,7 @@ static int lpm_probe(struct platform_device *pdev)
 	 */
 	suspend_set_ops(&lpm_suspend_ops);
 	hrtimer_init(&lpm_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init(&histtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 
 	ret = remote_spin_lock_init(&scm_handoff_lock, SCM_HANDOFF_LOCK_ID);
 	if (ret) {
diff --git a/include/trace/events/trace_msm_low_power.h b/include/trace/events/trace_msm_low_power.h
index 691df1b2689b..e14cab59e90a 100644
--- a/include/trace/events/trace_msm_low_power.h
+++ b/include/trace/events/trace_msm_low_power.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, 2014-2015, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2012, 2014-2016, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -43,6 +43,54 @@ TRACE_EVENT(cpu_power_select,
 		__entry->next_event_us)
 );
 
+TRACE_EVENT(cpu_pred_select,
+
+	TP_PROTO(u32 predtype, u64 predicted, u32 tmr_time),
+
+	TP_ARGS(predtype, predicted, tmr_time),
+
+	TP_STRUCT__entry(
+		__field(u32, predtype)
+		__field(u64, predicted)
+		__field(u32, tmr_time)
+	),
+
+	TP_fast_assign(
+		__entry->predtype = predtype;
+		__entry->predicted = predicted;
+		__entry->tmr_time = tmr_time;
+	),
+
+	TP_printk("pred:%u time:%lu tmr_time:%u",
+		__entry->predtype, (unsigned long)__entry->predicted,
+		__entry->tmr_time)
+);
+
+TRACE_EVENT(cpu_pred_hist,
+
+	TP_PROTO(int idx, u32 resi, u32 sample, u32 tmr),
+
+	TP_ARGS(idx, resi, sample, tmr),
+
+	TP_STRUCT__entry(
+		__field(int, idx)
+		__field(u32, resi)
+		__field(u32, sample)
+		__field(u32, tmr)
+	),
+
+	TP_fast_assign(
+		__entry->idx = idx;
+		__entry->resi = resi;
+		__entry->sample = sample;
+		__entry->tmr = tmr;
+	),
+
+	TP_printk("idx:%d resi:%u sample:%u tmr:%u",
+		__entry->idx, __entry->resi,
+		__entry->sample, __entry->tmr)
+);
+
 TRACE_EVENT(cpu_idle_enter,
 
 	TP_PROTO(int index),