diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index 12a8e7120c6b..eefaec6b9fcc 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -80,6 +80,18 @@ config DEVFREQ_GOV_CPUFREQ CPU frequency to device frequency mapping table(s). This governor can be useful for controlling devices such as DDR, cache, CCI, etc. +config DEVFREQ_GOV_MSM_BW_HWMON + tristate "HW monitor based governor for device BW" + depends on ARCH_MSM_KRAIT || ARCH_MSM_BIMC_BWMON + help + HW monitor based governor for device to DDR bandwidth voting. + + When this governor is used for Krait CPUs, it sets the Krait CPU BW + vote by using L2 PM counters to monitor the Krait's use of DDR. + Since this uses some of the Krait PM counters it can conflict with + existing profiling tools. This governor is unlikely to be useful + for non-MSM devices. + config DEVFREQ_GOV_MSM_CACHE_HWMON tristate "HW monitor based governor for cache frequency" help diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile index 5ce0b6b48fac..d95ced388a92 100644 --- a/drivers/devfreq/Makefile +++ b/drivers/devfreq/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_DEVFREQ_GOV_USERSPACE) += governor_userspace.o obj-$(CONFIG_DEVFREQ_GOV_QCOM_ADRENO_TZ) += governor_msm_adreno_tz.o obj-$(CONFIG_DEVFREQ_GOV_CPUFREQ) += governor_cpufreq.o obj-$(CONFIG_ARCH_MSM_KRAIT) += krait-l2pm.o +obj-$(CONFIG_DEVFREQ_GOV_MSM_BW_HWMON) += governor_bw_hwmon.o obj-$(CONFIG_DEVFREQ_GOV_MSM_CACHE_HWMON) += governor_cache_hwmon.o # DEVFREQ Drivers diff --git a/drivers/devfreq/governor_bw_hwmon.c b/drivers/devfreq/governor_bw_hwmon.c new file mode 100644 index 000000000000..fe733318d165 --- /dev/null +++ b/drivers/devfreq/governor_bw_hwmon.c @@ -0,0 +1,433 @@ +/* + * Copyright (c) 2013-2014, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) "bw-hwmon: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "governor.h" +#include "governor_bw_hwmon.h" + +struct hwmon_node { + unsigned int tolerance_percent; + unsigned int guard_band_mbps; + unsigned int decay_rate; + unsigned int io_percent; + unsigned int bw_step; + unsigned long prev_ab; + unsigned long *dev_ab; + ktime_t prev_ts; + bool mon_started; + struct list_head list; + void *orig_data; + struct bw_hwmon *hw; + struct devfreq_governor *gov; + struct attribute_group *attr_grp; +}; + +static LIST_HEAD(hwmon_list); +static DEFINE_MUTEX(list_lock); + +static int use_cnt; +static DEFINE_MUTEX(state_lock); + +#define show_attr(name) \ +static ssize_t show_##name(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct devfreq *df = to_devfreq(dev); \ + struct hwmon_node *hw = df->data; \ + return snprintf(buf, PAGE_SIZE, "%u\n", hw->name); \ +} + +#define store_attr(name, _min, _max) \ +static ssize_t store_##name(struct device *dev, \ + struct device_attribute *attr, const char *buf, \ + size_t count) \ +{ \ + struct devfreq *df = to_devfreq(dev); \ + struct hwmon_node *hw = df->data; \ + int ret; \ + unsigned int val; \ + ret = sscanf(buf, "%u", &val); \ + if (ret != 1) \ + return -EINVAL; \ + val = max(val, _min); \ + val = min(val, _max); \ + hw->name = val; \ + return count; \ +} + +#define gov_attr(__attr, min, max) \ +show_attr(__attr) \ +store_attr(__attr, min, max) \ +static DEVICE_ATTR(__attr, 0644, show_##__attr, store_##__attr) + +#define MIN_MS 10U +#define MAX_MS 500U + +static unsigned long measure_bw_and_set_irq(struct hwmon_node *node) +{ + ktime_t ts; + unsigned int us; + unsigned long mbps; + struct bw_hwmon *hw = node->hw; + + /* + * Since we are stopping the counters, we don't want this short work + * to be interrupted by other tasks and cause the measurements to be + * wrong. Not blocking interrupts to avoid affecting interrupt + * latency and since they should be short anyway because they run in + * atomic context. + */ + preempt_disable(); + + ts = ktime_get(); + us = ktime_to_us(ktime_sub(ts, node->prev_ts)); + if (!us) + us = 1; + + mbps = hw->meas_bw_and_set_irq(hw, node->tolerance_percent, us); + node->prev_ts = ts; + + preempt_enable(); + + dev_dbg(hw->df->dev.parent, "BW MBps = %6lu, period = %u\n", mbps, us); + + return mbps; +} + +static void compute_bw(struct hwmon_node *node, int mbps, + unsigned long *freq, unsigned long *ab) +{ + int new_bw; + + mbps += node->guard_band_mbps; + + if (mbps > node->prev_ab) { + new_bw = mbps; + } else { + new_bw = mbps * node->decay_rate + + node->prev_ab * (100 - node->decay_rate); + new_bw /= 100; + } + + node->prev_ab = new_bw; + if (ab) + *ab = roundup(new_bw, node->bw_step); + *freq = (new_bw * 100) / node->io_percent; +} + +static struct hwmon_node *find_hwmon_node(struct devfreq *df) +{ + struct hwmon_node *node, *found = NULL; + + mutex_lock(&list_lock); + list_for_each_entry(node, &hwmon_list, list) + if (node->hw->dev == df->dev.parent || + node->hw->of_node == df->dev.parent->of_node || + (!node->hw->dev && !node->hw->of_node && + node->gov == df->governor)) { + found = node; + break; + } + mutex_unlock(&list_lock); + + return found; +} + +#define TOO_SOON_US (1 * USEC_PER_MSEC) +int update_bw_hwmon(struct bw_hwmon *hwmon) +{ + struct devfreq *df; + struct hwmon_node *node; + ktime_t ts; + unsigned int us; + int ret; + + if (!hwmon) + return -EINVAL; + df = hwmon->df; + if (!df) + return -ENODEV; + node = find_hwmon_node(df); + if (!node) + return -ENODEV; + + if (!node->mon_started) + return -EBUSY; + + dev_dbg(df->dev.parent, "Got update request\n"); + devfreq_monitor_stop(df); + + /* + * Don't recalc bandwidth if the interrupt comes right after a + * previous bandwidth calculation. This is done for two reasons: + * + * 1. Sampling the BW during a very short duration can result in a + * very inaccurate measurement due to very short bursts. + * 2. This can only happen if the limit was hit very close to the end + * of the previous sample period. Which means the current BW + * estimate is not very off and doesn't need to be readjusted. + */ + ts = ktime_get(); + us = ktime_to_us(ktime_sub(ts, node->prev_ts)); + if (us > TOO_SOON_US) { + mutex_lock(&df->lock); + ret = update_devfreq(df); + if (ret) + dev_err(df->dev.parent, + "Unable to update freq on request!\n"); + mutex_unlock(&df->lock); + } + + devfreq_monitor_start(df); + + return 0; +} + +static int start_monitoring(struct devfreq *df) +{ + int ret = 0; + unsigned long mbps; + struct device *dev = df->dev.parent; + struct hwmon_node *node; + struct bw_hwmon *hw; + struct devfreq_dev_status stat; + + node = find_hwmon_node(df); + if (!node) { + dev_err(dev, "Unable to find HW monitor!\n"); + return -ENODEV; + } + hw = node->hw; + + stat.private_data = NULL; + if (df->profile->get_dev_status) + ret = df->profile->get_dev_status(df->dev.parent, &stat); + if (ret || !stat.private_data) + dev_warn(dev, "Device doesn't take AB votes!\n"); + else + node->dev_ab = stat.private_data; + + hw->df = df; + node->orig_data = df->data; + df->data = node; + + node->prev_ts = ktime_get(); + node->prev_ab = 0; + mbps = (df->previous_freq * node->io_percent) / 100; + ret = hw->start_hwmon(hw, mbps); + if (ret) { + dev_err(dev, "Unable to start HW monitor!\n"); + goto err_start; + } + + devfreq_monitor_start(df); + node->mon_started = true; + + ret = sysfs_create_group(&df->dev.kobj, node->attr_grp); + if (ret) + goto err_sysfs; + + return 0; + +err_sysfs: + node->mon_started = false; + devfreq_monitor_stop(df); + hw->stop_hwmon(hw); +err_start: + df->data = node->orig_data; + node->orig_data = NULL; + hw->df = NULL; + node->dev_ab = NULL; + return ret; +} + +static void stop_monitoring(struct devfreq *df) +{ + struct hwmon_node *node = df->data; + struct bw_hwmon *hw = node->hw; + + sysfs_remove_group(&df->dev.kobj, node->attr_grp); + node->mon_started = false; + devfreq_monitor_stop(df); + hw->stop_hwmon(hw); + df->data = node->orig_data; + node->orig_data = NULL; + hw->df = NULL; + /* + * Not all governors know about this additional extended device + * configuration. To avoid leaving the extended configuration at a + * stale state, set it to 0 and let the next governor take it from + * there. + */ + if (node->dev_ab) + *node->dev_ab = 0; + node->dev_ab = NULL; +} + +static int devfreq_bw_hwmon_get_freq(struct devfreq *df, + unsigned long *freq, + u32 *flag) +{ + unsigned long mbps; + struct hwmon_node *node = df->data; + + mbps = measure_bw_and_set_irq(node); + compute_bw(node, mbps, freq, node->dev_ab); + + return 0; +} + +gov_attr(tolerance_percent, 0U, 30U); +gov_attr(guard_band_mbps, 0U, 2000U); +gov_attr(decay_rate, 0U, 100U); +gov_attr(io_percent, 1U, 100U); +gov_attr(bw_step, 50U, 1000U); + +static struct attribute *dev_attr[] = { + &dev_attr_tolerance_percent.attr, + &dev_attr_guard_band_mbps.attr, + &dev_attr_decay_rate.attr, + &dev_attr_io_percent.attr, + &dev_attr_bw_step.attr, + NULL, +}; + +static struct attribute_group dev_attr_group = { + .name = "bw_hwmon", + .attrs = dev_attr, +}; + +static int devfreq_bw_hwmon_ev_handler(struct devfreq *df, + unsigned int event, void *data) +{ + int ret; + unsigned int sample_ms; + + switch (event) { + case DEVFREQ_GOV_START: + sample_ms = df->profile->polling_ms; + sample_ms = max(MIN_MS, sample_ms); + sample_ms = min(MAX_MS, sample_ms); + df->profile->polling_ms = sample_ms; + + ret = start_monitoring(df); + if (ret) + return ret; + + dev_dbg(df->dev.parent, + "Enabled dev BW HW monitor governor\n"); + break; + + case DEVFREQ_GOV_STOP: + stop_monitoring(df); + dev_dbg(df->dev.parent, + "Disabled dev BW HW monitor governor\n"); + break; + + case DEVFREQ_GOV_INTERVAL: + sample_ms = *(unsigned int *)data; + sample_ms = max(MIN_MS, sample_ms); + sample_ms = min(MAX_MS, sample_ms); + devfreq_interval_update(df, &sample_ms); + break; + } + + return 0; +} + +static struct devfreq_governor devfreq_gov_bw_hwmon = { + .name = "bw_hwmon", + .get_target_freq = devfreq_bw_hwmon_get_freq, + .event_handler = devfreq_bw_hwmon_ev_handler, +}; + +int register_bw_hwmon(struct device *dev, struct bw_hwmon *hwmon) +{ + int ret = 0; + struct hwmon_node *node; + struct attribute_group *attr_grp; + + if (!hwmon->gov && !hwmon->dev && !hwmon->of_node) + return -EINVAL; + + node = devm_kzalloc(dev, sizeof(*node), GFP_KERNEL); + if (!node) { + dev_err(dev, "Unable to register gov. Out of memory!\n"); + return -ENOMEM; + } + + if (hwmon->gov) { + attr_grp = devm_kzalloc(dev, sizeof(*attr_grp), GFP_KERNEL); + if (!attr_grp) + return -ENOMEM; + + hwmon->gov->get_target_freq = devfreq_bw_hwmon_get_freq; + hwmon->gov->event_handler = devfreq_bw_hwmon_ev_handler; + attr_grp->name = hwmon->gov->name; + attr_grp->attrs = dev_attr; + + node->gov = hwmon->gov; + node->attr_grp = attr_grp; + } else { + node->gov = &devfreq_gov_bw_hwmon; + node->attr_grp = &dev_attr_group; + } + + node->tolerance_percent = 10; + node->guard_band_mbps = 100; + node->decay_rate = 90; + node->io_percent = 16; + node->bw_step = 190; + node->hw = hwmon; + + mutex_lock(&list_lock); + list_add_tail(&node->list, &hwmon_list); + mutex_unlock(&list_lock); + + if (hwmon->gov) { + ret = devfreq_add_governor(hwmon->gov); + } else { + mutex_lock(&state_lock); + if (!use_cnt) + ret = devfreq_add_governor(&devfreq_gov_bw_hwmon); + if (!ret) + use_cnt++; + mutex_unlock(&state_lock); + } + + if (!ret) + dev_info(dev, "BW HWmon governor registered.\n"); + else + dev_err(dev, "BW HWmon governor registration failed!\n"); + + return ret; +} + +MODULE_DESCRIPTION("HW monitor based dev DDR bandwidth voting driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/devfreq/governor_bw_hwmon.h b/drivers/devfreq/governor_bw_hwmon.h new file mode 100644 index 000000000000..3a4c766b9070 --- /dev/null +++ b/drivers/devfreq/governor_bw_hwmon.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2014, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _GOVERNOR_BW_HWMON_H +#define _GOVERNOR_BW_HWMON_H + +#include +#include + +/** + * struct bw_hwmon - dev BW HW monitor info + * @start_hwmon: Start the HW monitoring of the dev BW + * @stop_hwmon: Stop the HW monitoring of dev BW + * @is_valid_irq: Check whether the IRQ was triggered by the + * counters used to monitor dev BW. + * @meas_bw_and_set_irq: Return the measured bandwidth and set up the + * IRQ to fire if the usage exceeds current + * measurement by @tol percent. + * @irq: IRQ number that corresponds to this HW + * monitor. + * @dev: Pointer to device that this HW monitor can + * monitor. + * @of_node: OF node of device that this HW monitor can + * monitor. + * @gov: devfreq_governor struct that should be used + * when registering this HW monitor with devfreq. + * Only the name field is expected to be + * initialized. + * @df: Devfreq node that this HW monitor is being + * used for. NULL when not actively in use and + * non-NULL when in use. + * + * One of dev, of_node or governor_name needs to be specified for a + * successful registration. + * + */ +struct bw_hwmon { + int (*start_hwmon)(struct bw_hwmon *hw, unsigned long mbps); + void (*stop_hwmon)(struct bw_hwmon *hw); + unsigned long (*meas_bw_and_set_irq)(struct bw_hwmon *hw, + unsigned int tol, unsigned int us); + struct device *dev; + struct device_node *of_node; + struct devfreq_governor *gov; + + struct devfreq *df; +}; + +#ifdef CONFIG_DEVFREQ_GOV_MSM_BW_HWMON +int register_bw_hwmon(struct device *dev, struct bw_hwmon *hwmon); +int update_bw_hwmon(struct bw_hwmon *hwmon); +#else +static inline int register_bw_hwmon(struct device *dev, + struct bw_hwmon *hwmon) +{ + return 0; +} +int update_bw_hwmon(struct bw_hwmon *hwmon) +{ + return 0; +} +#endif + +#endif /* _GOVERNOR_BW_HWMON_H */