Merge branch 'for-3.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq

Pull workqueue changes from Tejun Heo: "Surprisingly, Lai and I didn't break too many things implementing custom pools and stuff last time around and there aren't any follow-up changes necessary at this point. The only change in this pull request is Viresh's patches to make some per-cpu workqueues to behave as unbound workqueues dependent on a boot param whose default can be configured via a config option. This leads to higher processing overhead / lower bandwidth as more work items are bounced across CPUs; however, it can lead to noticeable powersave in certain configurations - ~10% w/ idlish constant workload on a big.LITTLE configuration according to Viresh. This is because per-cpu workqueues interfere with how the scheduler perceives whether or not each CPU is idle by forcing pinned tasks on them, which makes the scheduler's power-aware scheduling decisions less effective. Its effectiveness is likely less pronounced on homogenous configurations and this type of optimization can probably be made automatic; however, the changes are pretty minimal and the affected workqueues are clearly marked, so it's an easy gain for some configurations for the time being with pretty unintrusive changes." * 'for-3.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq: fbcon: queue work on power efficient wq block: queue work on power efficient wq PHYLIB: queue work on system_power_efficient_wq workqueue: Add system wide power_efficient workqueues workqueues: Introduce new flag WQ_POWER_EFFICIENT for power oriented workqueues
2013-07-02 19:53:30 -07:00 · 2013-07-02 19:53:30 -07:00 · f317ff9eed
commit f317ff9eed
parent 13cc560138 a85f1a41f0
9 changed files with 113 additions and 12 deletions
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@ -3341,6 +3341,21 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			that this also can be controlled per-workqueue for
 			workqueues visible under /sys/bus/workqueue/.
 	workqueue.power_efficient
 			Per-cpu workqueues are generally preferred because
 			they show better performance thanks to cache
 			locality; unfortunately, per-cpu workqueues tend to
 			be more power hungry than unbound workqueues.
 			Enabling this makes the per-cpu workqueues which
 			were observed to contribute significantly to power
 			consumption unbound, leading to measurably lower
 			power usage at the cost of small performance
 			overhead.
 			The default value of this parameter is determined by
 			the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT.
 	x2apic_phys	[X86-64,APIC] Use x2apic physical mode instead of
 			default x2apic cluster mode on platforms
 			supporting x2apic.
--- a/block/blk-core.c
+++ b/block/blk-core.c
@ -3180,7 +3180,8 @@ int __init blk_dev_init(void)
 	/* used for unplugging and affects IO latency/throughput - HIGHPRI */
 	kblockd_workqueue = alloc_workqueue("kblockd",
-					    WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
+					    WQ_MEM_RECLAIM | WQ_HIGHPRI |
 					    WQ_POWER_EFFICIENT, 0);
 	if (!kblockd_workqueue)
 		panic("Failed to create kblockd\n");
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@ -144,7 +144,8 @@ void put_io_context(struct io_context *ioc)
 	if (atomic_long_dec_and_test(&ioc->refcount)) {
 		spin_lock_irqsave(&ioc->lock, flags);
 		if (!hlist_empty(&ioc->icq_list))
-			schedule_work(&ioc->release_work);
+			queue_work(system_power_efficient_wq,
 					&ioc->release_work);
 		else
 			free_ioc = true;
 		spin_unlock_irqrestore(&ioc->lock, flags);
--- a/block/genhd.c
+++ b/block/genhd.c
@ -1489,9 +1489,11 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now)
 	intv = disk_events_poll_jiffies(disk);
 	set_timer_slack(&ev->dwork.timer, intv / 4);
 	if (check_now)
-		queue_delayed_work(system_freezable_wq, &ev->dwork, 0);
+		queue_delayed_work(system_freezable_power_efficient_wq,
 				&ev->dwork, 0);
 	else if (intv)
-		queue_delayed_work(system_freezable_wq, &ev->dwork, intv);
+		queue_delayed_work(system_freezable_power_efficient_wq,
 				&ev->dwork, intv);
 out_unlock:
 	spin_unlock_irqrestore(&ev->lock, flags);
 }
@ -1534,7 +1536,8 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask)
 	spin_lock_irq(&ev->lock);
 	ev->clearing |= mask;
 	if (!ev->block)
-		mod_delayed_work(system_freezable_wq, &ev->dwork, 0);
+		mod_delayed_work(system_freezable_power_efficient_wq,
 				&ev->dwork, 0);
 	spin_unlock_irq(&ev->lock);
 }
@ -1627,7 +1630,8 @@ static void disk_check_events(struct disk_events *ev,
 	intv = disk_events_poll_jiffies(disk);
 	if (!ev->block && intv)
-		queue_delayed_work(system_freezable_wq, &ev->dwork, intv);
+		queue_delayed_work(system_freezable_power_efficient_wq,
 				&ev->dwork, intv);
 	spin_unlock_irq(&ev->lock);
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@ -439,7 +439,7 @@ void phy_start_machine(struct phy_device *phydev,
 {
 	phydev->adjust_state = handler;
-	schedule_delayed_work(&phydev->state_queue, HZ);
+	queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, HZ);
 }
 /**
@ -500,7 +500,7 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
 	disable_irq_nosync(irq);
 	atomic_inc(&phydev->irq_disable);
-	schedule_work(&phydev->phy_queue);
+	queue_work(system_power_efficient_wq, &phydev->phy_queue);
 	return IRQ_HANDLED;
 }
@ -655,7 +655,7 @@ static void phy_change(struct work_struct *work)
 	/* reschedule state queue work to run as soon as possible */
 	cancel_delayed_work_sync(&phydev->state_queue);
-	schedule_delayed_work(&phydev->state_queue, 0);
+	queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, 0);
 	return;
@ -918,7 +918,8 @@ void phy_state_machine(struct work_struct *work)
 	if (err < 0)
 		phy_error(phydev);
-	schedule_delayed_work(&phydev->state_queue, PHY_STATE_TIME * HZ);
+	queue_delayed_work(system_power_efficient_wq, &phydev->state_queue,
 			PHY_STATE_TIME * HZ);
 }
 static inline void mmd_phy_indirect(struct mii_bus *bus, int prtad, int devad,
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@ -404,7 +404,7 @@ static void cursor_timer_handler(unsigned long dev_addr)
 	struct fb_info *info = (struct fb_info *) dev_addr;
 	struct fbcon_ops *ops = info->fbcon_par;
-	schedule_work(&info->queue);
+	queue_work(system_power_efficient_wq, &info->queue);
 	mod_timer(&ops->cursor_timer, jiffies + HZ/5);
 }
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@ -303,6 +303,33 @@ enum {
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
 	WQ_SYSFS		= 1 << 6, /* visible in sysfs, see wq_sysfs_register() */
 	/*
 	 * Per-cpu workqueues are generally preferred because they tend to
 	 * show better performance thanks to cache locality.  Per-cpu
 	 * workqueues exclude the scheduler from choosing the CPU to
 	 * execute the worker threads, which has an unfortunate side effect
 	 * of increasing power consumption.
 	 *
 	 * The scheduler considers a CPU idle if it doesn't have any task
 	 * to execute and tries to keep idle cores idle to conserve power;
 	 * however, for example, a per-cpu work item scheduled from an
 	 * interrupt handler on an idle CPU will force the scheduler to
 	 * excute the work item on that CPU breaking the idleness, which in
 	 * turn may lead to more scheduling choices which are sub-optimal
 	 * in terms of power consumption.
 	 *
 	 * Workqueues marked with WQ_POWER_EFFICIENT are per-cpu by default
 	 * but become unbound if workqueue.power_efficient kernel param is
 	 * specified.  Per-cpu workqueues which are identified to
 	 * contribute significantly to power-consumption are identified and
 	 * marked with this flag and enabling the power_efficient mode
 	 * leads to noticeable power saving at the cost of small
 	 * performance disadvantage.
 	 *
 	 * http://thread.gmane.org/gmane.linux.kernel/1480396
 	 */
 	WQ_POWER_EFFICIENT	= 1 << 7,
 	__WQ_DRAINING		= 1 << 16, /* internal: workqueue is draining */
 	__WQ_ORDERED		= 1 << 17, /* internal: workqueue is ordered */
@ -333,11 +360,19 @@ enum {
 *
 * system_freezable_wq is equivalent to system_wq except that it's
 * freezable.
 *
 * *_power_efficient_wq are inclined towards saving power and converted
 * into WQ_UNBOUND variants if 'wq_power_efficient' is enabled; otherwise,
 * they are same as their non-power-efficient counterparts - e.g.
 * system_power_efficient_wq is identical to system_wq if
 * 'wq_power_efficient' is disabled.  See WQ_POWER_EFFICIENT for more info.
 */
 extern struct workqueue_struct *system_wq;
 extern struct workqueue_struct *system_long_wq;
 extern struct workqueue_struct *system_unbound_wq;
 extern struct workqueue_struct *system_freezable_wq;
 extern struct workqueue_struct *system_power_efficient_wq;
 extern struct workqueue_struct *system_freezable_power_efficient_wq;
 static inline struct workqueue_struct * __deprecated __system_nrt_wq(void)
 {
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@ -262,6 +262,26 @@ config PM_GENERIC_DOMAINS
 	bool
 	depends on PM
 config WQ_POWER_EFFICIENT_DEFAULT
 	bool "Enable workqueue power-efficient mode by default"
 	depends on PM
 	default n
 	help
 	  Per-cpu workqueues are generally preferred because they show
 	  better performance thanks to cache locality; unfortunately,
 	  per-cpu workqueues tend to be more power hungry than unbound
 	  workqueues.
 	  Enabling workqueue.power_efficient kernel parameter makes the
 	  per-cpu workqueues which were observed to contribute
 	  significantly to power consumption unbound, leading to measurably
 	  lower power usage at the cost of small performance overhead.
 	  This config option determines whether workqueue.power_efficient
 	  is enabled by default.
 	  If in doubt, say N.
 config PM_GENERIC_DOMAINS_SLEEP
 	def_bool y
 	depends on PM_SLEEP && PM_GENERIC_DOMAINS
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@ -272,6 +272,15 @@ static cpumask_var_t *wq_numa_possible_cpumask;
 static bool wq_disable_numa;
 module_param_named(disable_numa, wq_disable_numa, bool, 0444);
 /* see the comment above the definition of WQ_POWER_EFFICIENT */
 #ifdef CONFIG_WQ_POWER_EFFICIENT_DEFAULT
 static bool wq_power_efficient = true;
 #else
 static bool wq_power_efficient;
 #endif
 module_param_named(power_efficient, wq_power_efficient, bool, 0444);
 static bool wq_numa_enabled;		/* unbound NUMA affinity enabled */
 /* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */
@ -305,6 +314,10 @@ struct workqueue_struct *system_unbound_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_unbound_wq);
 struct workqueue_struct *system_freezable_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_freezable_wq);
 struct workqueue_struct *system_power_efficient_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_power_efficient_wq);
 struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
 static int worker_thread(void *__worker);
 static void copy_workqueue_attrs(struct workqueue_attrs *to,
@ -4086,6 +4099,10 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	struct workqueue_struct *wq;
 	struct pool_workqueue *pwq;
 	/* see the comment above the definition of WQ_POWER_EFFICIENT */
 	if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
 		flags |= WQ_UNBOUND;
 	/* allocate wq and format name */
 	if (flags & WQ_UNBOUND)
 		tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]);
@ -4985,8 +5002,15 @@ static int __init init_workqueues(void)
 					    WQ_UNBOUND_MAX_ACTIVE);
 	system_freezable_wq = alloc_workqueue("events_freezable",
 					      WQ_FREEZABLE, 0);
 	system_power_efficient_wq = alloc_workqueue("events_power_efficient",
 					      WQ_POWER_EFFICIENT, 0);
 	system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
 					      WQ_FREEZABLE | WQ_POWER_EFFICIENT,
 					      0);
 	BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
-	       !system_unbound_wq || !system_freezable_wq);
+	       !system_unbound_wq || !system_freezable_wq ||
 	       !system_power_efficient_wq ||
 	       !system_freezable_power_efficient_wq);
 	return 0;
 }
 early_initcall(init_workqueues);