From e549f7bdb0d3901171d48ffe32a8f4321183a6aa Mon Sep 17 00:00:00 2001 From: Rohit Gupta Date: Fri, 15 Jan 2016 16:12:41 -0800 Subject: [PATCH 1/2] PM / devfreq: bimc-bwmon: Add support for version 4 The version 4 of the BIMC BWMON hardware now has provisions for counting bytes transferred at a high sampling rate. Modify the existing driver and governor algorithm to take advantage of that. Change-Id: I5080297aef7e310d5c1a19098c177ddecb729c25 Signed-off-by: Rohit Gupta Signed-off-by: David Keitel --- .../bindings/devfreq/bimc-bwmon.txt | 7 +- drivers/devfreq/bimc-bwmon.c | 266 ++++++++++++++++-- 2 files changed, 249 insertions(+), 24 deletions(-) diff --git a/Documentation/devicetree/bindings/devfreq/bimc-bwmon.txt b/Documentation/devicetree/bindings/devfreq/bimc-bwmon.txt index d96c174d2581..c77f84b61eaf 100644 --- a/Documentation/devicetree/bindings/devfreq/bimc-bwmon.txt +++ b/Documentation/devicetree/bindings/devfreq/bimc-bwmon.txt @@ -5,8 +5,8 @@ can be used to measure the bandwidth of read/write traffic from the BIMC master ports. For example, the CPU subsystem sits on one BIMC master port. Required properties: -- compatible: Must be "qcom,bimc-bwmon", "qcom,bimc-bwmon2" or - "qcom,bimc-bwmon3" +- compatible: Must be "qcom,bimc-bwmon", "qcom,bimc-bwmon2", + "qcom,bimc-bwmon3" or "qcom,bimc-bwmon4" - reg: Pairs of physical base addresses and region sizes of memory mapped registers. - reg-names: Names of the bases for the above registers. Expected @@ -14,6 +14,8 @@ Required properties: - interrupts: Lists the threshold IRQ. - qcom,mport: The hardware master port that this device can monitor - qcom,target-dev: The DT device that corresponds to this master port +- qcom,hw-timer-hz: Hardware sampling rate in Hz. This field must be + specified for "qcom,bimc-bwmon4" Example: qcom,cpu-bwmon { @@ -23,4 +25,5 @@ Example: interrupts = <0 183 1>; qcom,mport = <0>; qcom,target-dev = <&cpubw>; + qcom,hw-timer-hz = <19200000>; }; diff --git a/drivers/devfreq/bimc-bwmon.c b/drivers/devfreq/bimc-bwmon.c index 2b0bacdb5f6a..707a244e62e9 100644 --- a/drivers/devfreq/bimc-bwmon.c +++ b/drivers/devfreq/bimc-bwmon.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2015, The Linux Foundation. All rights reserved. + * Copyright (c) 2014-2016, The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -40,10 +41,24 @@ #define MON_MASK(m) ((m)->base + 0x298) #define MON_MATCH(m) ((m)->base + 0x29C) +#define MON2_EN(m) ((m)->base + 0x2A0) +#define MON2_CLEAR(m) ((m)->base + 0x2A4) +#define MON2_SW(m) ((m)->base + 0x2A8) +#define MON2_THRES_HI(m) ((m)->base + 0x2AC) +#define MON2_THRES_MED(m) ((m)->base + 0x2B0) +#define MON2_THRES_LO(m) ((m)->base + 0x2B4) +#define MON2_ZONE_ACTIONS(m) ((m)->base + 0x2B8) +#define MON2_ZONE_CNT_THRES(m) ((m)->base + 0x2BC) +#define MON2_BYTE_CNT(m) ((m)->base + 0x2D0) +#define MON2_WIN_TIMER(m) ((m)->base + 0x2D4) +#define MON2_ZONE_CNT(m) ((m)->base + 0x2D8) +#define MON2_ZONE_MAX(m, zone) ((m)->base + 0x2E0 + 0x4 * zone) + struct bwmon_spec { bool wrap_on_thres; bool overflow; bool throt_adj; + bool hw_sampling; }; struct bwmon { @@ -54,24 +69,37 @@ struct bwmon { const struct bwmon_spec *spec; struct device *dev; struct bw_hwmon hw; + u32 hw_timer_hz; u32 throttle_adj; + u32 sample_size_ms; + u32 intr_status; }; #define to_bwmon(ptr) container_of(ptr, struct bwmon, hw) +#define has_hw_sampling(m) (m->spec->hw_sampling) #define ENABLE_MASK BIT(0) #define THROTTLE_MASK 0x1F #define THROTTLE_SHIFT 16 +#define INT_ENABLE_V1 0x1 +#define INT_STATUS_MASK 0x03 +#define INT_STATUS_MASK_HWS 0xF0 static DEFINE_SPINLOCK(glb_lock); static void mon_enable(struct bwmon *m) { - writel_relaxed((ENABLE_MASK | m->throttle_adj), MON_EN(m)); + if (has_hw_sampling(m)) + writel_relaxed((ENABLE_MASK | m->throttle_adj), MON2_EN(m)); + else + writel_relaxed((ENABLE_MASK | m->throttle_adj), MON_EN(m)); } static void mon_disable(struct bwmon *m) { - writel_relaxed(m->throttle_adj, MON_EN(m)); + if (has_hw_sampling(m)) + writel_relaxed(m->throttle_adj, MON2_EN(m)); + else + writel_relaxed(m->throttle_adj, MON_EN(m)); /* * mon_disable() and mon_irq_clear(), * If latter goes first and count happen to trigger irq, we would @@ -80,17 +108,46 @@ static void mon_disable(struct bwmon *m) mb(); } -static void mon_clear(struct bwmon *m) +#define MON_CLEAR_BIT 0x1 +#define MON_CLEAR_ALL_BIT 0x2 +static void mon_clear(struct bwmon *m, bool clear_all) { - writel_relaxed(0x1, MON_CLEAR(m)); + if (!has_hw_sampling(m)) { + writel_relaxed(MON_CLEAR_BIT, MON_CLEAR(m)); + goto out; + } + + if (clear_all) + writel_relaxed(MON_CLEAR_ALL_BIT, MON2_CLEAR(m)); + else + writel_relaxed(MON_CLEAR_BIT, MON2_CLEAR(m)); + /* * The counter clear and IRQ clear bits are not in the same 4KB * region. So, we need to make sure the counter clear is completed * before we try to clear the IRQ or do any other counter operations. */ +out: mb(); } +#define SAMPLE_WIN_LIM 0xFFFFF +static void mon_set_hw_sampling_window(struct bwmon *m, unsigned int sample_ms) +{ + u32 rate; + + if (unlikely(sample_ms != m->sample_size_ms)) { + rate = mult_frac(sample_ms, m->hw_timer_hz, MSEC_PER_SEC); + m->sample_size_ms = sample_ms; + if (unlikely(rate > SAMPLE_WIN_LIM)) { + rate = SAMPLE_WIN_LIM; + pr_warn("Sample window %u larger than hw limit: %u\n", + rate, SAMPLE_WIN_LIM); + } + writel_relaxed(rate, MON2_SW(m)); + } +} + static void mon_irq_enable(struct bwmon *m) { u32 val; @@ -99,11 +156,11 @@ static void mon_irq_enable(struct bwmon *m) val = readl_relaxed(GLB_INT_EN(m)); val |= 1 << m->mport; writel_relaxed(val, GLB_INT_EN(m)); - spin_unlock(&glb_lock); val = readl_relaxed(MON_INT_EN(m)); - val |= 0x1; + val |= has_hw_sampling(m) ? INT_STATUS_MASK_HWS : INT_ENABLE_V1; writel_relaxed(val, MON_INT_EN(m)); + spin_unlock(&glb_lock); /* * make Sure irq enable complete for local and global * to avoid race with other monitor calls @@ -119,11 +176,11 @@ static void mon_irq_disable(struct bwmon *m) val = readl_relaxed(GLB_INT_EN(m)); val &= ~(1 << m->mport); writel_relaxed(val, GLB_INT_EN(m)); - spin_unlock(&glb_lock); val = readl_relaxed(MON_INT_EN(m)); - val &= ~0x1; + val &= has_hw_sampling(m) ? ~INT_STATUS_MASK_HWS : ~INT_ENABLE_V1; writel_relaxed(val, MON_INT_EN(m)); + spin_unlock(&glb_lock); /* * make Sure irq disable complete for local and global * to avoid race with other monitor calls @@ -140,12 +197,18 @@ static unsigned int mon_irq_status(struct bwmon *m) dev_dbg(m->dev, "IRQ status p:%x, g:%x\n", mval, readl_relaxed(GLB_INT_STATUS(m))); + mval &= has_hw_sampling(m) ? INT_STATUS_MASK_HWS : INT_STATUS_MASK; + return mval; } static void mon_irq_clear(struct bwmon *m) { - writel_relaxed(0x3, MON_INT_CLR(m)); + u32 intclr; + + intclr = has_hw_sampling(m) ? INT_STATUS_MASK_HWS : INT_STATUS_MASK; + + writel_relaxed(intclr, MON_INT_CLR(m)); mb(); writel_relaxed(1 << m->mport, GLB_INT_CLR(m)); mb(); @@ -171,6 +234,90 @@ static u32 mon_get_throttle_adj(struct bw_hwmon *hw) return m->throttle_adj >> THROTTLE_SHIFT; } +#define ZONE1_SHIFT 8 +#define ZONE2_SHIFT 16 +#define ZONE3_SHIFT 24 +#define ZONE0_ACTION 0x01 /* Increment zone 0 count */ +#define ZONE1_ACTION 0x09 /* Increment zone 1 & clear lower zones */ +#define ZONE2_ACTION 0x25 /* Increment zone 2 & clear lower zones */ +#define ZONE3_ACTION 0x95 /* Increment zone 3 & clear lower zones */ +static u32 calc_zone_actions(void) +{ + u32 zone_actions; + + zone_actions = ZONE0_ACTION; + zone_actions |= ZONE1_ACTION << ZONE1_SHIFT; + zone_actions |= ZONE2_ACTION << ZONE2_SHIFT; + zone_actions |= ZONE3_ACTION << ZONE3_SHIFT; + + return zone_actions; +} + +#define ZONE_CNT_LIM 0xFFU +#define UP_CNT_1 1 +static u32 calc_zone_counts(struct bw_hwmon *hw) +{ + u32 zone_counts; + + zone_counts = ZONE_CNT_LIM; + zone_counts |= min(hw->down_cnt, ZONE_CNT_LIM) << ZONE1_SHIFT; + zone_counts |= ZONE_CNT_LIM << ZONE2_SHIFT; + zone_counts |= UP_CNT_1 << ZONE3_SHIFT; + + return zone_counts; +} + +static unsigned int mbps_to_mb(unsigned long mbps, unsigned int ms) +{ + mbps *= ms; + mbps = DIV_ROUND_UP(mbps, MSEC_PER_SEC); + return mbps; +} + +/* + * Define the 4 zones using HI, MED & LO thresholds: + * Zone 0: byte count < THRES_LO + * Zone 1: THRES_LO < byte count < THRES_MED + * Zone 2: THRES_MED < byte count < THRES_HI + * Zone 3: byte count > THRES_HI + */ +#define THRES_LIM 0x7FFU +static void set_zone_thres(struct bwmon *m, unsigned int sample_ms) +{ + struct bw_hwmon *hw = &(m->hw); + u32 hi, med, lo; + + hi = mbps_to_mb(hw->up_wake_mbps, sample_ms); + med = mbps_to_mb(hw->down_wake_mbps, sample_ms); + lo = 0; + + if (unlikely((hi > THRES_LIM) || (med > hi) || (lo > med))) { + pr_warn("Zone thres larger than hw limit: hi:%u med:%u lo:%u\n", + hi, med, lo); + hi = min(hi, THRES_LIM); + med = min(med, hi - 1); + lo = min(lo, med-1); + } + + writel_relaxed(hi, MON2_THRES_HI(m)); + writel_relaxed(med, MON2_THRES_MED(m)); + writel_relaxed(lo, MON2_THRES_LO(m)); + dev_dbg(m->dev, "Thres: hi:%u med:%u lo:%u\n", hi, med, lo); +} + +static void mon_set_zones(struct bwmon *m, unsigned int sample_ms) +{ + struct bw_hwmon *hw = &(m->hw); + u32 zone_cnt_thres = calc_zone_counts(hw); + + mon_set_hw_sampling_window(m, sample_ms); + set_zone_thres(m, sample_ms); + /* Set the zone count thresholds for interrupts */ + writel_relaxed(zone_cnt_thres, MON2_ZONE_CNT_THRES(m)); + + dev_dbg(m->dev, "Zone Count Thres: %0x\n", zone_cnt_thres); +} + static void mon_set_limit(struct bwmon *m, u32 count) { writel_relaxed(count, MON_THRES(m)); @@ -203,6 +350,41 @@ static unsigned long mon_get_count(struct bwmon *m) return count; } +static unsigned int get_zone(struct bwmon *m) +{ + u32 zone_counts; + u32 zone; + + zone = get_bitmask_order((m->intr_status & INT_STATUS_MASK_HWS) >> 4); + if (zone) { + zone--; + } else { + zone_counts = readl_relaxed(MON2_ZONE_CNT(m)); + if (zone_counts) { + zone = get_bitmask_order(zone_counts) - 1; + zone /= 8; + } + } + + m->intr_status = 0; + return zone; +} + +static unsigned long mon_get_zone_stats(struct bwmon *m) +{ + unsigned int zone; + unsigned long count = 0; + + zone = get_zone(m); + + count = readl_relaxed(MON2_ZONE_MAX(m, zone)); + count *= SZ_1M; + + dev_dbg(m->dev, "Zone%d Max byte count: %08lx\n", zone, count); + + return count; +} + /* ********** CPUBW specific code ********** */ /* Returns MBps of read/writes for the sampling window. */ @@ -222,8 +404,8 @@ static unsigned long get_bytes_and_clear(struct bw_hwmon *hw) unsigned long count; mon_disable(m); - count = mon_get_count(m); - mon_clear(m); + count = has_hw_sampling(m) ? mon_get_zone_stats(m) : mon_get_count(m); + mon_clear(m, false); mon_irq_clear(m); mon_enable(m); @@ -238,7 +420,7 @@ static unsigned long set_thres(struct bw_hwmon *hw, unsigned long bytes) mon_disable(m); count = mon_get_count(m); - mon_clear(m); + mon_clear(m, false); mon_irq_clear(m); if (likely(!m->spec->wrap_on_thres)) @@ -252,11 +434,26 @@ static unsigned long set_thres(struct bw_hwmon *hw, unsigned long bytes) return count; } +static unsigned long set_hw_events(struct bw_hwmon *hw, unsigned sample_ms) +{ + struct bwmon *m = to_bwmon(hw); + + mon_disable(m); + mon_clear(m, false); + mon_irq_clear(m); + + mon_set_zones(m, sample_ms); + mon_enable(m); + + return 0; +} + static irqreturn_t bwmon_intr_handler(int irq, void *dev) { struct bwmon *m = dev; - if (!mon_irq_status(m)) + m->intr_status = mon_irq_status(m); + if (!m->intr_status) return IRQ_NONE; if (bw_hwmon_sample_end(&m->hw) > 0) @@ -277,6 +474,7 @@ static int start_bw_hwmon(struct bw_hwmon *hw, unsigned long mbps) { struct bwmon *m = to_bwmon(hw); u32 limit; + u32 zone_actions = calc_zone_actions(); int ret; ret = request_threaded_irq(m->irq, bwmon_intr_handler, @@ -291,10 +489,16 @@ static int start_bw_hwmon(struct bw_hwmon *hw, unsigned long mbps) mon_disable(m); + mon_clear(m, true); limit = mbps_to_bytes(mbps, hw->df->profile->polling_ms, 0); - mon_set_limit(m, limit); + if (has_hw_sampling(m)) { + mon_set_zones(m, hw->df->profile->polling_ms); + /* Set the zone actions to increment appropriate counters */ + writel_relaxed(zone_actions, MON2_ZONE_ACTIONS(m)); + } else { + mon_set_limit(m, limit); + } - mon_clear(m); mon_irq_clear(m); mon_irq_enable(m); mon_enable(m); @@ -309,7 +513,7 @@ static void stop_bw_hwmon(struct bw_hwmon *hw) mon_irq_disable(m); free_irq(m->irq, m); mon_disable(m); - mon_clear(m); + mon_clear(m, true); mon_irq_clear(m); } @@ -330,7 +534,7 @@ static int resume_bw_hwmon(struct bw_hwmon *hw) struct bwmon *m = to_bwmon(hw); int ret; - mon_clear(m); + mon_clear(m, false); ret = request_threaded_irq(m->irq, bwmon_intr_handler, bwmon_intr_thread, IRQF_ONESHOT | IRQF_SHARED, @@ -350,15 +554,21 @@ static int resume_bw_hwmon(struct bw_hwmon *hw) /*************************************************************************/ static const struct bwmon_spec spec[] = { - { .wrap_on_thres = true, .overflow = false, .throt_adj = false}, - { .wrap_on_thres = false, .overflow = true, .throt_adj = false}, - { .wrap_on_thres = false, .overflow = true, .throt_adj = true}, + { .wrap_on_thres = true, .overflow = false, .throt_adj = false, + .hw_sampling = false}, + { .wrap_on_thres = false, .overflow = true, .throt_adj = false, + .hw_sampling = false}, + { .wrap_on_thres = false, .overflow = true, .throt_adj = true, + .hw_sampling = false}, + { .wrap_on_thres = false, .overflow = true, .throt_adj = true, + .hw_sampling = true}, }; static struct of_device_id match_table[] = { { .compatible = "qcom,bimc-bwmon", .data = &spec[0] }, { .compatible = "qcom,bimc-bwmon2", .data = &spec[1] }, { .compatible = "qcom,bimc-bwmon3", .data = &spec[2] }, + { .compatible = "qcom,bimc-bwmon4", .data = &spec[3] }, {} }; @@ -390,6 +600,16 @@ static int bimc_bwmon_driver_probe(struct platform_device *pdev) } m->spec = id->data; + if (has_hw_sampling(m)) { + ret = of_property_read_u32(dev->of_node, + "qcom,hw-timer-hz", &data); + if (ret) { + dev_err(dev, "HW sampling rate not specified!\n"); + return ret; + } + m->hw_timer_hz = data; + } + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "base"); if (!res) { dev_err(dev, "base not found!\n"); @@ -426,7 +646,9 @@ static int bimc_bwmon_driver_probe(struct platform_device *pdev) m->hw.suspend_hwmon = &suspend_bw_hwmon; m->hw.resume_hwmon = &resume_bw_hwmon; m->hw.get_bytes_and_clear = &get_bytes_and_clear; - m->hw.set_thres = &set_thres; + m->hw.set_thres = &set_thres; + if (has_hw_sampling(m)) + m->hw.set_hw_events = &set_hw_events; if (m->spec->throt_adj) { m->hw.set_throttle_adj = &mon_set_throttle_adj; m->hw.get_throttle_adj = &mon_get_throttle_adj; From 17c9994611b15495678453a67506373eb6fd3c7d Mon Sep 17 00:00:00 2001 From: David Keitel Date: Mon, 12 Sep 2016 12:35:48 -0700 Subject: [PATCH 2/2] ARM: dts: msm: enable bwmon4 for cpubw monitor for msmcobalt Add the qcom,hw-timer-hz property and enable qcom,bimc-bwmon4 device for msmcobalt to be able to use the BWMON2 hardware instead of the legacy BWMON hardware. Change-Id: I1d1c2e5d2fcf705d8ffd527ec7434d90cc004834 Signed-off-by: David Keitel --- arch/arm/boot/dts/qcom/msmcobalt.dtsi | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/qcom/msmcobalt.dtsi b/arch/arm/boot/dts/qcom/msmcobalt.dtsi index f830b2172050..726bdeaa9760 100644 --- a/arch/arm/boot/dts/qcom/msmcobalt.dtsi +++ b/arch/arm/boot/dts/qcom/msmcobalt.dtsi @@ -542,12 +542,13 @@ }; qcom,cpu-bwmon { - compatible = "qcom,bimc-bwmon3"; + compatible = "qcom,bimc-bwmon4"; reg = <0x01008000 0x300>, <0x01001000 0x200>; reg-names = "base", "global_base"; interrupts = <0 183 4>; qcom,mport = <0>; qcom,target-dev = <&cpubw>; + qcom,hw-timer-hz = <19200000>; }; mincpubw: qcom,mincpubw {