From 5b616a05de88d4be0136156a26fae9da855939f6 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Wed, 30 Mar 2016 08:46:31 +0800
Subject: [PATCH 001/424] block: partition: initialize percpuref before sending
 out KOBJ_ADD

commit b30a337ca27c4f40439e4bfb290cba5f88d73bb7 upstream.

The initialization of partition's percpu_ref should have been done before
sending out KOBJ_ADD uevent, which may cause userspace to read partition
table. So the uninitialized percpu_ref may be accessed in data path.

This patch fixes this issue reported by Naveen.

Reported-by: Naveen Kaje <nkaje@codeaurora.org>
Tested-by: Naveen Kaje <nkaje@codeaurora.org>
Fixes: 6c71013ecb7e2(block: partition: convert percpu ref)
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 block/partition-generic.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/block/partition-generic.c b/block/partition-generic.c
index 746935a5973c..a241e3900bc9 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -349,15 +349,20 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
 			goto out_del;
 	}
 
+	err = hd_ref_init(p);
+	if (err) {
+		if (flags & ADDPART_FLAG_WHOLEDISK)
+			goto out_remove_file;
+		goto out_del;
+	}
+
 	/* everything is up and running, commence */
 	rcu_assign_pointer(ptbl->part[partno], p);
 
 	/* suppress uevent if the disk suppresses it */
 	if (!dev_get_uevent_suppress(ddev))
 		kobject_uevent(&pdev->kobj, KOBJ_ADD);
-
-	if (!hd_ref_init(p))
-		return p;
+	return p;
 
 out_free_info:
 	free_part_info(p);
@@ -366,6 +371,8 @@ out_free_stats:
 out_free:
 	kfree(p);
 	return ERR_PTR(err);
+out_remove_file:
+	device_remove_file(pdev, &dev_attr_whole_disk);
 out_del:
 	kobject_put(p->holder_dir);
 	device_del(pdev);

From 9fed24fe30c1217c640d2b38403034c2c7fdce12 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Fri, 15 Apr 2016 18:51:28 +0800
Subject: [PATCH 002/424] block: loop: fix filesystem corruption in case of
 aio/dio

commit a7297a6a3a3322b054592e8e988981d2f5f29cc4 upstream.

Starting from commit e36f620428(block: split bios to max possible length),
block core starts to split bio in the middle of bvec.

Unfortunately loop dio/aio doesn't consider this situation, and
always treat 'iter.iov_offset' as zero. Then filesystem corruption
is observed.

This patch figures out the offset of the base bvevc via
'bio->bi_iter.bi_bvec_done' and fixes the issue by passing the offset
to iov iterator.

Fixes: e36f6204288088f (block: split bios to max possible length)
Cc: Keith Busch <keith.busch@intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/loop.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 423f4ca7d712..80cf8add46ff 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -488,6 +488,12 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
 	bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
 	iov_iter_bvec(&iter, ITER_BVEC | rw, bvec,
 		      bio_segments(bio), blk_rq_bytes(cmd->rq));
+	/*
+	 * This bio may be started from the middle of the 'bvec'
+	 * because of bio splitting, so offset from the bvec must
+	 * be passed to iov iterator
+	 */
+	iter.iov_offset = bio->bi_iter.bi_bvec_done;
 
 	cmd->iocb.ki_pos = pos;
 	cmd->iocb.ki_filp = file;

From adbe236b953f4537f9e5ce86d1c7ace613dec38c Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Wed, 6 Apr 2016 10:05:16 +0200
Subject: [PATCH 003/424] x86/mce: Avoid using object after free in genpool

commit a3125494cff084b098c80bb36fbe2061ffed9d52 upstream.

When we loop over all queued machine check error records to pass them
to the registered notifiers we use llist_for_each_entry(). But the loop
calls gen_pool_free() for the entry in the body of the loop - and then
the iterator looks at node->next after the free.

Use llist_for_each_entry_safe() instead.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Gong Chen <gong.chen@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/0205920@agluck-desk.sc.intel.com
Link: http://lkml.kernel.org/r/1459929916-12852-4-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/mcheck/mce-genpool.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
index 0a850100c594..2658e2af74ec 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
@@ -29,7 +29,7 @@ static char gen_pool_buf[MCE_POOLSZ];
 void mce_gen_pool_process(void)
 {
 	struct llist_node *head;
-	struct mce_evt_llist *node;
+	struct mce_evt_llist *node, *tmp;
 	struct mce *mce;
 
 	head = llist_del_all(&mce_event_llist);
@@ -37,7 +37,7 @@ void mce_gen_pool_process(void)
 		return;
 
 	head = llist_reverse_order(head);
-	llist_for_each_entry(node, head, llnode) {
+	llist_for_each_entry_safe(node, tmp, head, llnode) {
 		mce = &node->mce;
 		atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
 		gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));

From 1c8497d2035d95e4e26bdf6cec34150bfb972776 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 30 Mar 2016 12:24:47 -0700
Subject: [PATCH 004/424] kvm: x86: do not leak guest xcr0 into host interrupt
 handlers

commit fc5b7f3bf1e1414bd4e91db6918c85ace0c873a5 upstream.

An interrupt handler that uses the fpu can kill a KVM VM, if it runs
under the following conditions:
 - the guest's xcr0 register is loaded on the cpu
 - the guest's fpu context is not loaded
 - the host is using eagerfpu

Note that the guest's xcr0 register and fpu context are not loaded as
part of the atomic world switch into "guest mode". They are loaded by
KVM while the cpu is still in "host mode".

Usage of the fpu in interrupt context is gated by irq_fpu_usable(). The
interrupt handler will look something like this:

if (irq_fpu_usable()) {
        kernel_fpu_begin();

        [... code that uses the fpu ...]

        kernel_fpu_end();
}

As long as the guest's fpu is not loaded and the host is using eager
fpu, irq_fpu_usable() returns true (interrupted_kernel_fpu_idle()
returns true). The interrupt handler proceeds to use the fpu with
the guest's xcr0 live.

kernel_fpu_begin() saves the current fpu context. If this uses
XSAVE[OPT], it may leave the xsave area in an undesirable state.
According to the SDM, during XSAVE bit i of XSTATE_BV is not modified
if bit i is 0 in xcr0. So it's possible that XSTATE_BV[i] == 1 and
xcr0[i] == 0 following an XSAVE.

kernel_fpu_end() restores the fpu context. Now if any bit i in
XSTATE_BV == 1 while xcr0[i] == 0, XRSTOR generates a #GP. The
fault is trapped and SIGSEGV is delivered to the current process.

Only pre-4.2 kernels appear to be vulnerable to this sequence of
events. Commit 653f52c ("kvm,x86: load guest FPU context more eagerly")
from 4.2 forces the guest's fpu to always be loaded on eagerfpu hosts.

This patch fixes the bug by keeping the host's xcr0 loaded outside
of the interrupts-disabled region where KVM switches into guest mode.

Suggested-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: David Matlack <dmatlack@google.com>
[Move load after goto cancel_injection. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/x86.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7eb4ebd3ebea..605cea75eb0d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -697,7 +697,6 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 		if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
 			return 1;
 	}
-	kvm_put_guest_xcr0(vcpu);
 	vcpu->arch.xcr0 = xcr0;
 
 	if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
@@ -6495,8 +6494,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->prepare_guest_switch(vcpu);
 	if (vcpu->fpu_active)
 		kvm_load_guest_fpu(vcpu);
-	kvm_load_guest_xcr0(vcpu);
-
 	vcpu->mode = IN_GUEST_MODE;
 
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
@@ -6519,6 +6516,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		goto cancel_injection;
 	}
 
+	kvm_load_guest_xcr0(vcpu);
+
 	if (req_immediate_exit)
 		smp_send_reschedule(vcpu->cpu);
 
@@ -6568,6 +6567,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	vcpu->mode = OUTSIDE_GUEST_MODE;
 	smp_wmb();
 
+	kvm_put_guest_xcr0(vcpu);
+
 	/* Interrupt is enabled by handle_external_intr() */
 	kvm_x86_ops->handle_external_intr(vcpu);
 
@@ -7215,7 +7216,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 	 * and assume host would use all available bits.
 	 * Guest xcr0 would be loaded later.
 	 */
-	kvm_put_guest_xcr0(vcpu);
 	vcpu->guest_fpu_loaded = 1;
 	__kernel_fpu_begin();
 	__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state);
@@ -7224,8 +7224,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 {
-	kvm_put_guest_xcr0(vcpu);
-
 	if (!vcpu->guest_fpu_loaded) {
 		vcpu->fpu_counter = 0;
 		return;

From 5716a93fef70b4d305e9b3afea50c3027d22cc3c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 6 Apr 2016 09:37:22 +0100
Subject: [PATCH 005/424] KVM: arm/arm64: Handle forward time correction
 gracefully

commit 1c5631c73fc2261a5df64a72c155cb53dcdc0c45 upstream.

On a host that runs NTP, corrections can have a direct impact on
the background timer that we program on the behalf of a vcpu.

In particular, NTP performing a forward correction will result in
a timer expiring sooner than expected from a guest point of view.
Not a big deal, we kick the vcpu anyway.

But on wake-up, the vcpu thread is going to perform a check to
find out whether or not it should block. And at that point, the
timer check is going to say "timer has not expired yet, go back
to sleep". This results in the timer event being lost forever.

There are multiple ways to handle this. One would be record that
the timer has expired and let kvm_cpu_has_pending_timer return
true in that case, but that would be fairly invasive. Another is
to check for the "short sleep" condition in the hrtimer callback,
and restart the timer for the remaining time when the condition
is detected.

This patch implements the latter, with a bit of refactoring in
order to avoid too much code duplication.

Reported-by: Alexander Graf <agraf@suse.de>
Reviewed-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 virt/kvm/arm/arch_timer.c | 49 +++++++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 10 deletions(-)

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index ea6064696fe4..a7b9022b5c8f 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -86,6 +86,8 @@ static void kvm_timer_inject_irq_work(struct work_struct *work)
 	vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
 	vcpu->arch.timer_cpu.armed = false;
 
+	WARN_ON(!kvm_timer_should_fire(vcpu));
+
 	/*
 	 * If the vcpu is blocked we want to wake it up so that it will see
 	 * the timer has expired when entering the guest.
@@ -93,10 +95,46 @@ static void kvm_timer_inject_irq_work(struct work_struct *work)
 	kvm_vcpu_kick(vcpu);
 }
 
+static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu)
+{
+	cycle_t cval, now;
+
+	cval = vcpu->arch.timer_cpu.cntv_cval;
+	now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+
+	if (now < cval) {
+		u64 ns;
+
+		ns = cyclecounter_cyc2ns(timecounter->cc,
+					 cval - now,
+					 timecounter->mask,
+					 &timecounter->frac);
+		return ns;
+	}
+
+	return 0;
+}
+
 static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
 {
 	struct arch_timer_cpu *timer;
+	struct kvm_vcpu *vcpu;
+	u64 ns;
+
 	timer = container_of(hrt, struct arch_timer_cpu, timer);
+	vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
+
+	/*
+	 * Check that the timer has really expired from the guest's
+	 * PoV (NTP on the host may have forced it to expire
+	 * early). If we should have slept longer, restart it.
+	 */
+	ns = kvm_timer_compute_delta(vcpu);
+	if (unlikely(ns)) {
+		hrtimer_forward_now(hrt, ns_to_ktime(ns));
+		return HRTIMER_RESTART;
+	}
+
 	queue_work(wqueue, &timer->expired);
 	return HRTIMER_NORESTART;
 }
@@ -170,8 +208,6 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu)
 void kvm_timer_schedule(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-	u64 ns;
-	cycle_t cval, now;
 
 	BUG_ON(timer_is_armed(timer));
 
@@ -191,14 +227,7 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu)
 		return;
 
 	/*  The timer has not yet expired, schedule a background timer */
-	cval = timer->cntv_cval;
-	now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
-
-	ns = cyclecounter_cyc2ns(timecounter->cc,
-				 cval - now,
-				 timecounter->mask,
-				 &timecounter->frac);
-	timer_arm(timer, ns);
+	timer_arm(timer, kvm_timer_compute_delta(vcpu));
 }
 
 void kvm_timer_unschedule(struct kvm_vcpu *vcpu)

From 4bb48b5f95a9e40451e259e295d03cd301740440 Mon Sep 17 00:00:00 2001
From: Lokesh Vutla <lokeshvutla@ti.com>
Date: Tue, 8 Mar 2016 12:24:35 +0530
Subject: [PATCH 006/424] ARM: dts: AM43x-epos: Fix clk parent for synctimer

commit cfe1580a6415bc37fd62d79eb8102a618f7650b2 upstream.

commit 55ee7017ee31 ("arm: omap2: board-generic: use omap4_local_timer_init
for AM437x") makes synctimer32k as the clocksource on AM43xx. By default
the synctimer32k is clocked by 32K RTC OSC on AM43xx. But this 32K RTC OSC
is not available on epos boards which makes it fail to boot.

Synctimer32k can also be clocked by a peripheral PLL, so making this as
clock parent for synctimer3k on epos boards.

Fixes: 55ee7017ee31 ("arm: omap2: board-generic: use omap4_local_timer_init for AM437x")
Reported-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Lokesh Vutla <lokeshvutla@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/am43x-epos-evm.dts | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts
index 47954ed990f8..00707aac72fc 100644
--- a/arch/arm/boot/dts/am43x-epos-evm.dts
+++ b/arch/arm/boot/dts/am43x-epos-evm.dts
@@ -792,3 +792,8 @@
 	tx-num-evt = <32>;
 	rx-num-evt = <32>;
 };
+
+&synctimer_32kclk {
+	assigned-clocks = <&mux_synctimer32k_ck>;
+	assigned-clock-parents = <&clkdiv32k_ick>;
+};

From c6cf3b71df047f61b568795fe926f107806eae82 Mon Sep 17 00:00:00 2001
From: Patrick Uiterwijk <patrick@puiterwijk.org>
Date: Tue, 29 Mar 2016 16:57:40 +0000
Subject: [PATCH 007/424] ARM: mvebu: Correct unit address for linksys

commit 199831c77c50e6913e893b6bc268ba9f4a9a2bf8 upstream.

The USB2 port for Armada 38x is defined to be at 58000, not at
50000.

Fixes: 2d0a7addbd10 ("ARM: Kirkwood: Add support for many Synology NAS devices")
Signed-off-by: Patrick Uiterwijk <patrick@puiterwijk.org>
Acked-by: Imre Kaloz <kaloz@openwrt.org>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/armada-385-linksys.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/armada-385-linksys.dtsi b/arch/arm/boot/dts/armada-385-linksys.dtsi
index 3710755c6d76..85d2c377c332 100644
--- a/arch/arm/boot/dts/armada-385-linksys.dtsi
+++ b/arch/arm/boot/dts/armada-385-linksys.dtsi
@@ -117,7 +117,7 @@
 			};
 
 			/* USB part of the eSATA/USB 2.0 port */
-			usb@50000 {
+			usb@58000 {
 				status = "okay";
 			};
 

From 81b5ed00246258100df11c87f118a27f0ebceba3 Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Fri, 11 Mar 2016 10:12:28 -0600
Subject: [PATCH 008/424] ARM: OMAP2: Fix up interconnect barrier
 initialization for DRA7

commit 456e8d53482537616899a146b706eccd095404e6 upstream.

The following commits:
commit 3fa609755c11 ("ARM: omap2: restore OMAP4 barrier behaviour")
commit f746929ffdc8 ("Revert "ARM: OMAP4: remove dead kconfig option OMAP4_ERRATA_I688"")
and
commit ea827ad5ffbb ("ARM: DRA7: Provide proper IO map table")
came in around the same time, unfortunately this seem to have missed
initializing the barrier for DRA7 platforms - omap5_map_io was reused
for dra7 till it was split out by the last patch. barrier_init
needs to be hence carried forward as it is valid for DRA7 family of
processors as they are for OMAP5.

Fixes: ea827ad5ffbb7 ("ARM: DRA7: Provide proper IO map table")
Reported-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reported-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Nishanth Menon <nm@ti.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap2/io.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c
index 3eaeaca5da05..3a911d8dea8b 100644
--- a/arch/arm/mach-omap2/io.c
+++ b/arch/arm/mach-omap2/io.c
@@ -368,6 +368,7 @@ void __init omap5_map_io(void)
 void __init dra7xx_map_io(void)
 {
 	iotable_init(dra7xx_io_desc, ARRAY_SIZE(dra7xx_io_desc));
+	omap_barriers_init();
 }
 #endif
 /*

From 882e790b572a7dadf5323e373d0139bfbc6dce15 Mon Sep 17 00:00:00 2001
From: Lokesh Vutla <lokeshvutla@ti.com>
Date: Sat, 26 Mar 2016 23:08:55 -0600
Subject: [PATCH 009/424] ARM: OMAP2+: hwmod: Fix updating of sysconfig
 register

commit 3ca4a238106dedc285193ee47f494a6584b6fd2f upstream.

Commit 127500ccb766f ("ARM: OMAP2+: Only write the sysconfig on idle
when necessary") talks about verification of sysconfig cache value before
updating it, only during idle path. But the patch is adding the
verification in the enable path. So, adding the check in a proper place
as per the commit description.

Not keeping this check during enable path as there is a chance of losing
context and it is safe to do on idle as the context of the register will
never be lost while the device is active.

Signed-off-by: Lokesh Vutla <lokeshvutla@ti.com>
Acked-by: Tero Kristo <t-kristo@ti.com>
Cc: Jon Hunter <jonathanh@nvidia.com>
Fixes: commit 127500ccb766 "ARM: OMAP2+: Only write the sysconfig on idle when necessary"
[paul@pwsan.com: appears to have been caused by my own mismerge of the
 originally posted patch]
Signed-off-by: Paul Walmsley <paul@pwsan.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap2/omap_hwmod.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index 8e0bd5939e5a..147c90e70b2e 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -1416,9 +1416,7 @@ static void _enable_sysc(struct omap_hwmod *oh)
 	    (sf & SYSC_HAS_CLOCKACTIVITY))
 		_set_clockactivity(oh, oh->class->sysc->clockact, &v);
 
-	/* If the cached value is the same as the new value, skip the write */
-	if (oh->_sysc_cache != v)
-		_write_sysconfig(v, oh);
+	_write_sysconfig(v, oh);
 
 	/*
 	 * Set the autoidle bit only after setting the smartidle bit
@@ -1481,7 +1479,9 @@ static void _idle_sysc(struct omap_hwmod *oh)
 		_set_master_standbymode(oh, idlemode, &v);
 	}
 
-	_write_sysconfig(v, oh);
+	/* If the cached value is the same as the new value, skip the write */
+	if (oh->_sysc_cache != v)
+		_write_sysconfig(v, oh);
 }
 
 /**

From 6905c7a4aa1ef675825bc2ab56fd965a573ffb74 Mon Sep 17 00:00:00 2001
From: Jerome Marchand <jmarchan@redhat.com>
Date: Wed, 6 Apr 2016 14:06:48 +0100
Subject: [PATCH 010/424] assoc_array: don't call compare_object() on a node

commit 8d4a2ec1e0b41b0cf9a0c5cd4511da7f8e4f3de2 upstream.

Changes since V1: fixed the description and added KASan warning.

In assoc_array_insert_into_terminal_node(), we call the
compare_object() method on all non-empty slots, even when they're
not leaves, passing a pointer to an unexpected structure to
compare_object(). Currently it causes an out-of-bound read access
in keyring_compare_object detected by KASan (see below). The issue
is easily reproduced with keyutils testsuite.
Only call compare_object() when the slot is a leave.

KASan warning:
==================================================================
BUG: KASAN: slab-out-of-bounds in keyring_compare_object+0x213/0x240 at addr ffff880060a6f838
Read of size 8 by task keyctl/1655
=============================================================================
BUG kmalloc-192 (Not tainted): kasan: bad access detected
-----------------------------------------------------------------------------

Disabling lock debugging due to kernel taint
INFO: Allocated in assoc_array_insert+0xfd0/0x3a60 age=69 cpu=1 pid=1647
	___slab_alloc+0x563/0x5c0
	__slab_alloc+0x51/0x90
	kmem_cache_alloc_trace+0x263/0x300
	assoc_array_insert+0xfd0/0x3a60
	__key_link_begin+0xfc/0x270
	key_create_or_update+0x459/0xaf0
	SyS_add_key+0x1ba/0x350
	entry_SYSCALL_64_fastpath+0x12/0x76
INFO: Slab 0xffffea0001829b80 objects=16 used=8 fp=0xffff880060a6f550 flags=0x3fff8000004080
INFO: Object 0xffff880060a6f740 @offset=5952 fp=0xffff880060a6e5d1

Bytes b4 ffff880060a6f730: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
Object ffff880060a6f740: d1 e5 a6 60 00 88 ff ff 0e 00 00 00 00 00 00 00  ...`............
Object ffff880060a6f750: 02 cf 8e 60 00 88 ff ff 02 c0 8e 60 00 88 ff ff  ...`.......`....
Object ffff880060a6f760: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
Object ffff880060a6f770: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
Object ffff880060a6f780: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
Object ffff880060a6f790: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
Object ffff880060a6f7a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
Object ffff880060a6f7b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
Object ffff880060a6f7c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
Object ffff880060a6f7d0: 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
Object ffff880060a6f7e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
Object ffff880060a6f7f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
CPU: 0 PID: 1655 Comm: keyctl Tainted: G    B           4.5.0-rc4-kasan+ #291
Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
 0000000000000000 000000001b2800b4 ffff880060a179e0 ffffffff81b60491
 ffff88006c802900 ffff880060a6f740 ffff880060a17a10 ffffffff815e2969
 ffff88006c802900 ffffea0001829b80 ffff880060a6f740 ffff880060a6e650
Call Trace:
 [<ffffffff81b60491>] dump_stack+0x85/0xc4
 [<ffffffff815e2969>] print_trailer+0xf9/0x150
 [<ffffffff815e9454>] object_err+0x34/0x40
 [<ffffffff815ebe50>] kasan_report_error+0x230/0x550
 [<ffffffff819949be>] ? keyring_get_key_chunk+0x13e/0x210
 [<ffffffff815ec62d>] __asan_report_load_n_noabort+0x5d/0x70
 [<ffffffff81994cc3>] ? keyring_compare_object+0x213/0x240
 [<ffffffff81994cc3>] keyring_compare_object+0x213/0x240
 [<ffffffff81bc238c>] assoc_array_insert+0x86c/0x3a60
 [<ffffffff81bc1b20>] ? assoc_array_cancel_edit+0x70/0x70
 [<ffffffff8199797d>] ? __key_link_begin+0x20d/0x270
 [<ffffffff8199786c>] __key_link_begin+0xfc/0x270
 [<ffffffff81993389>] key_create_or_update+0x459/0xaf0
 [<ffffffff8128ce0d>] ? trace_hardirqs_on+0xd/0x10
 [<ffffffff81992f30>] ? key_type_lookup+0xc0/0xc0
 [<ffffffff8199e19d>] ? lookup_user_key+0x13d/0xcd0
 [<ffffffff81534763>] ? memdup_user+0x53/0x80
 [<ffffffff819983ea>] SyS_add_key+0x1ba/0x350
 [<ffffffff81998230>] ? key_get_type_from_user.constprop.6+0xa0/0xa0
 [<ffffffff828bcf4e>] ? retint_user+0x18/0x23
 [<ffffffff8128cc7e>] ? trace_hardirqs_on_caller+0x3fe/0x580
 [<ffffffff81004017>] ? trace_hardirqs_on_thunk+0x17/0x19
 [<ffffffff828bc432>] entry_SYSCALL_64_fastpath+0x12/0x76
Memory state around the buggy address:
 ffff880060a6f700: fc fc fc fc fc fc fc fc 00 00 00 00 00 00 00 00
 ffff880060a6f780: 00 00 00 00 00 00 00 00 00 00 00 fc fc fc fc fc
>ffff880060a6f800: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
                                        ^
 ffff880060a6f880: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff880060a6f900: fc fc fc fc fc fc 00 00 00 00 00 00 00 00 00 00
==================================================================

Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/assoc_array.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/assoc_array.c b/lib/assoc_array.c
index 03dd576e6773..59fd7c0b119c 100644
--- a/lib/assoc_array.c
+++ b/lib/assoc_array.c
@@ -524,7 +524,9 @@ static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit,
 			free_slot = i;
 			continue;
 		}
-		if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) {
+		if (assoc_array_ptr_is_leaf(ptr) &&
+		    ops->compare_object(assoc_array_ptr_to_leaf(ptr),
+					index_key)) {
 			pr_devel("replace in slot %d\n", i);
 			edit->leaf_p = &node->slots[i];
 			edit->dead_leaf = node->slots[i];

From 6a20c0a043a73e39b5cd952d7eaf7fd7831e73ac Mon Sep 17 00:00:00 2001
From: Rafal Redzimski <rafal.f.redzimski@intel.com>
Date: Fri, 8 Apr 2016 16:25:05 +0300
Subject: [PATCH 011/424] usb: xhci: applying XHCI_PME_STUCK_QUIRK to Intel BXT
 B0 host

commit 0d46faca6f887a849efb07c1655b5a9f7c288b45 upstream.

Broxton B0 also requires XHCI_PME_STUCK_QUIRK.
Adding PCI device ID for Broxton B and adding to quirk.

Signed-off-by: Rafal Redzimski <rafal.f.redzimski@intel.com>
Signed-off-by: Robert Dobrowolski <robert.dobrowolski@linux.intel.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-pci.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index c2d65206ec6c..6a120a71ca3d 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -48,6 +48,7 @@
 #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI		0xa12f
 #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI	0x9d2f
 #define PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI		0x0aa8
+#define PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI		0x1aa8
 
 static const char hcd_name[] = "xhci_hcd";
 
@@ -156,7 +157,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 		(pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI ||
 		 pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI ||
 		 pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI ||
-		 pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI)) {
+		 pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI ||
+		 pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI)) {
 		xhci->quirks |= XHCI_PME_STUCK_QUIRK;
 	}
 	if (pdev->vendor == PCI_VENDOR_ID_ETRON &&

From ba7aa9a970dc12054252042e2b30e1dedcdc5968 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Fri, 8 Apr 2016 16:25:06 +0300
Subject: [PATCH 012/424] xhci: resume USB 3 roothub first

commit 671ffdff5b13314b1fc65d62cf7604b873fb5dc4 upstream.

Give USB3 devices a better chance to enumerate at USB 3 speeds if
they are connected to a suspended host.
Solves an issue with NEC uPD720200 host hanging when partially
enumerating a USB3 device as USB2 after host controller runtime resume.

Tested-by: Mike Murdoch <main.haarp@gmail.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 776d59c32bc5..0be5beaf3f85 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1103,8 +1103,8 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
 		/* Resume root hubs only when have pending events. */
 		status = readl(&xhci->op_regs->status);
 		if (status & STS_EINT) {
-			usb_hcd_resume_root_hub(hcd);
 			usb_hcd_resume_root_hub(xhci->shared_hcd);
+			usb_hcd_resume_root_hub(hcd);
 		}
 	}
 
@@ -1119,10 +1119,10 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
 
 	/* Re-enable port polling. */
 	xhci_dbg(xhci, "%s: starting port polling.\n", __func__);
-	set_bit(HCD_FLAG_POLL_RH, &hcd->flags);
-	usb_hcd_poll_rh_status(hcd);
 	set_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags);
 	usb_hcd_poll_rh_status(xhci->shared_hcd);
+	set_bit(HCD_FLAG_POLL_RH, &hcd->flags);
+	usb_hcd_poll_rh_status(hcd);
 
 	return retval;
 }

From bb6adb50beb03da007c63e86866f6be81d671075 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Fri, 8 Apr 2016 16:25:09 +0300
Subject: [PATCH 013/424] usb: xhci: fix wild pointers in xhci_mem_cleanup

commit 71504062a7c34838c3fccd92c447f399d3cb5797 upstream.

This patch fixes some wild pointers produced by xhci_mem_cleanup.
These wild pointers will cause system crash if xhci_mem_cleanup()
is called twice.

Reported-and-tested-by: Pengcheng Li <lpc.li@hisilicon.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-mem.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index c48cbe731356..d8dbd7e5194b 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -1875,6 +1875,12 @@ no_bw:
 	kfree(xhci->rh_bw);
 	kfree(xhci->ext_caps);
 
+	xhci->usb2_ports = NULL;
+	xhci->usb3_ports = NULL;
+	xhci->port_array = NULL;
+	xhci->rh_bw = NULL;
+	xhci->ext_caps = NULL;
+
 	xhci->page_size = 0;
 	xhci->page_shift = 0;
 	xhci->bus_state[0].bus_suspended = 0;

From 0eb1e16bf9feb36441440b0bd9fb0ced0fcdfdb6 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Fri, 8 Apr 2016 16:25:10 +0300
Subject: [PATCH 014/424] xhci: fix 10 second timeout on removal of PCI
 hotpluggable xhci controllers

commit 98d74f9ceaefc2b6c4a6440050163a83be0abede upstream.

PCI hotpluggable xhci controllers such as some Alpine Ridge solutions will
remove the xhci controller from the PCI bus when the last USB device is
disconnected.

Add a flag to indicate that the host is being removed to avoid queueing
configure_endpoint commands for the dropped endpoints.
For PCI hotplugged controllers this will prevent 5 second command timeouts
For static xhci controllers the configure_endpoint command is not needed
in the removal case as everything will be returned, freed, and the
controller is reset.

For now the flag is only set for PCI connected host controllers.

Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-pci.c  | 1 +
 drivers/usb/host/xhci-ring.c | 3 ++-
 drivers/usb/host/xhci.c      | 8 +++++---
 drivers/usb/host/xhci.h      | 1 +
 4 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 6a120a71ca3d..ea4fb4b0cd44 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -301,6 +301,7 @@ static void xhci_pci_remove(struct pci_dev *dev)
 	struct xhci_hcd *xhci;
 
 	xhci = hcd_to_xhci(pci_get_drvdata(dev));
+	xhci->xhc_state |= XHCI_STATE_REMOVING;
 	if (xhci->shared_hcd) {
 		usb_remove_hcd(xhci->shared_hcd);
 		usb_put_hcd(xhci->shared_hcd);
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index db0f0831b94f..2b63969c2bbf 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -4008,7 +4008,8 @@ static int queue_command(struct xhci_hcd *xhci, struct xhci_command *cmd,
 	int reserved_trbs = xhci->cmd_ring_reserved_trbs;
 	int ret;
 
-	if (xhci->xhc_state) {
+	if ((xhci->xhc_state & XHCI_STATE_DYING) ||
+		(xhci->xhc_state & XHCI_STATE_HALTED)) {
 		xhci_dbg(xhci, "xHCI dying or halted, can't queue_command\n");
 		return -ESHUTDOWN;
 	}
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 0be5beaf3f85..ec9e758d5fcd 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -146,7 +146,8 @@ static int xhci_start(struct xhci_hcd *xhci)
 				"waited %u microseconds.\n",
 				XHCI_MAX_HALT_USEC);
 	if (!ret)
-		xhci->xhc_state &= ~(XHCI_STATE_HALTED | XHCI_STATE_DYING);
+		/* clear state flags. Including dying, halted or removing */
+		xhci->xhc_state = 0;
 
 	return ret;
 }
@@ -2753,7 +2754,8 @@ int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev)
 	if (ret <= 0)
 		return ret;
 	xhci = hcd_to_xhci(hcd);
-	if (xhci->xhc_state & XHCI_STATE_DYING)
+	if ((xhci->xhc_state & XHCI_STATE_DYING) ||
+		(xhci->xhc_state & XHCI_STATE_REMOVING))
 		return -ENODEV;
 
 	xhci_dbg(xhci, "%s called for udev %p\n", __func__, udev);
@@ -3800,7 +3802,7 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev,
 
 	mutex_lock(&xhci->mutex);
 
-	if (xhci->xhc_state)	/* dying or halted */
+	if (xhci->xhc_state)	/* dying, removing or halted */
 		goto out;
 
 	if (!udev->slot_id) {
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 0b9451250e33..99ac2289dbf3 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1596,6 +1596,7 @@ struct xhci_hcd {
  */
 #define XHCI_STATE_DYING	(1 << 0)
 #define XHCI_STATE_HALTED	(1 << 1)
+#define XHCI_STATE_REMOVING	(1 << 2)
 	/* Statistics */
 	int			error_bitmask;
 	unsigned int		quirks;

From 95b9219e05dafdb76b0707e815e5314cc0cf91af Mon Sep 17 00:00:00 2001
From: Robert Dobrowolski <robert.dobrowolski@linux.intel.com>
Date: Thu, 24 Mar 2016 03:30:07 -0700
Subject: [PATCH 015/424] usb: hcd: out of bounds access in for_each_companion

commit e86103a75705c7c530768f4ffaba74cf382910f2 upstream.

On BXT platform Host Controller and Device Controller figure as
same PCI device but with different device function. HCD should
not pass data to Device Controller but only to Host Controllers.
Checking if companion device is Host Controller, otherwise skip.

Signed-off-by: Robert Dobrowolski <robert.dobrowolski@linux.intel.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd-pci.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c
index 9eb1cff28bd4..b8b580e5ae6e 100644
--- a/drivers/usb/core/hcd-pci.c
+++ b/drivers/usb/core/hcd-pci.c
@@ -74,6 +74,15 @@ static void for_each_companion(struct pci_dev *pdev, struct usb_hcd *hcd,
 		if (companion->bus != pdev->bus ||
 				PCI_SLOT(companion->devfn) != slot)
 			continue;
+
+		/*
+		 * Companion device should be either UHCI,OHCI or EHCI host
+		 * controller, otherwise skip.
+		 */
+		if (companion->class != CL_UHCI && companion->class != CL_OHCI &&
+				companion->class != CL_EHCI)
+			continue;
+
 		companion_hcd = pci_get_drvdata(companion);
 		if (!companion_hcd || !companion_hcd->self.root_hub)
 			continue;

From e4c7ab76586146820b394e0176f286f5a2e70cb3 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Thu, 14 Apr 2016 17:01:17 +0200
Subject: [PATCH 016/424] usb: gadget: f_fs: Fix use-after-free

commit 38740a5b87d53ceb89eb2c970150f6e94e00373a upstream.

When using asynchronous read or write operations on the USB endpoints the
issuer of the IO request is notified by calling the ki_complete() callback
of the submitted kiocb when the URB has been completed.

Calling this ki_complete() callback will free kiocb. Make sure that the
structure is no longer accessed beyond that point, otherwise undefined
behaviour might occur.

Fixes: 2e4c7553cd6f ("usb: gadget: f_fs: add aio support")
Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_fs.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index cf43e9e18368..79d895c2dd71 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -646,6 +646,7 @@ static void ffs_user_copy_worker(struct work_struct *work)
 						   work);
 	int ret = io_data->req->status ? io_data->req->status :
 					 io_data->req->actual;
+	bool kiocb_has_eventfd = io_data->kiocb->ki_flags & IOCB_EVENTFD;
 
 	if (io_data->read && ret > 0) {
 		use_mm(io_data->mm);
@@ -657,13 +658,11 @@ static void ffs_user_copy_worker(struct work_struct *work)
 
 	io_data->kiocb->ki_complete(io_data->kiocb, ret, ret);
 
-	if (io_data->ffs->ffs_eventfd &&
-	    !(io_data->kiocb->ki_flags & IOCB_EVENTFD))
+	if (io_data->ffs->ffs_eventfd && !kiocb_has_eventfd)
 		eventfd_signal(io_data->ffs->ffs_eventfd, 1);
 
 	usb_ep_free_request(io_data->ep, io_data->req);
 
-	io_data->kiocb->private = NULL;
 	if (io_data->read)
 		kfree(io_data->to_free);
 	kfree(io_data->buf);

From 9d58f322ee18ffaca1e0b67d90ab811ad75e62a6 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Tue, 12 Apr 2016 12:14:46 -0400
Subject: [PATCH 017/424] dm cache metadata: fix READ_LOCK macros and cleanup
 WRITE_LOCK macros

commit 9567366fefddeaea4ed1d713270535d93a3b3c76 upstream.

The READ_LOCK macro was incorrectly returning -EINVAL if
dm_bm_is_read_only() was true -- it will always be true once the cache
metadata transitions to read-only by dm_cache_metadata_set_read_only().

Wrap READ_LOCK and WRITE_LOCK multi-statement macros in do {} while(0).
Also, all accesses of the 'cmd' argument passed to these related macros
are now encapsulated in parenthesis.

A follow-up patch can be developed to eliminate the use of macros in
favor of pure C code.  Avoiding that now given that this needs to apply
to stable@.

Reported-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Fixes: d14fcf3dd79 ("dm cache: make sure every metadata function checks fail_io")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/dm-cache-metadata.c | 64 +++++++++++++++++++++-------------
 1 file changed, 40 insertions(+), 24 deletions(-)

diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 27f2ef300f8b..65ce6985f87a 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -867,39 +867,55 @@ static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd,
 	return 0;
 }
 
-#define WRITE_LOCK(cmd)	\
-	down_write(&cmd->root_lock); \
-	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \
-		up_write(&cmd->root_lock); \
-		return -EINVAL; \
+static bool cmd_write_lock(struct dm_cache_metadata *cmd)
+{
+	down_write(&cmd->root_lock);
+	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) {
+		up_write(&cmd->root_lock);
+		return false;
 	}
+	return true;
+}
 
-#define WRITE_LOCK_VOID(cmd) \
-	down_write(&cmd->root_lock); \
-	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \
-		up_write(&cmd->root_lock); \
-		return; \
-	}
+#define WRITE_LOCK(cmd)				\
+	do {					\
+		if (!cmd_write_lock((cmd)))	\
+			return -EINVAL;		\
+	} while(0)
+
+#define WRITE_LOCK_VOID(cmd)			\
+	do {					\
+		if (!cmd_write_lock((cmd)))	\
+			return;			\
+	} while(0)
 
 #define WRITE_UNLOCK(cmd) \
-	up_write(&cmd->root_lock)
+	up_write(&(cmd)->root_lock)
 
-#define READ_LOCK(cmd) \
-	down_read(&cmd->root_lock); \
-	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \
-		up_read(&cmd->root_lock); \
-		return -EINVAL; \
+static bool cmd_read_lock(struct dm_cache_metadata *cmd)
+{
+	down_write(&cmd->root_lock);
+	if (cmd->fail_io) {
+		up_write(&cmd->root_lock);
+		return false;
 	}
+	return true;
+}
 
-#define READ_LOCK_VOID(cmd)	\
-	down_read(&cmd->root_lock); \
-	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) { \
-		up_read(&cmd->root_lock); \
-		return; \
-	}
+#define READ_LOCK(cmd)				\
+	do {					\
+		if (!cmd_read_lock((cmd)))	\
+			return -EINVAL;		\
+	} while(0)
+
+#define READ_LOCK_VOID(cmd)			\
+	do {					\
+		if (!cmd_read_lock((cmd)))	\
+			return;			\
+	} while(0)
 
 #define READ_UNLOCK(cmd) \
-	up_read(&cmd->root_lock)
+	up_read(&(cmd)->root_lock)
 
 int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size)
 {

From be5cbaf31cd318f8aaeeff901f6d27232dfa965f Mon Sep 17 00:00:00 2001
From: Ahmed Samy <f.fallen45@gmail.com>
Date: Sun, 17 Apr 2016 05:37:09 +0000
Subject: [PATCH 018/424] dm cache metadata: fix cmd_read_lock() acquiring
 write lock

commit 6545b60baaf880b0cd29a5e89dbe745a06027e89 upstream.

Commit 9567366fefdd ("dm cache metadata: fix READ_LOCK macros and
cleanup WRITE_LOCK macros") uses down_write() instead of down_read() in
cmd_read_lock(), yet up_read() is used to release the lock in
READ_UNLOCK().  Fix it.

Fixes: 9567366fefdd ("dm cache metadata: fix READ_LOCK macros and cleanup WRITE_LOCK macros")
Signed-off-by: Ahmed Samy <f.fallen45@gmail.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/dm-cache-metadata.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 65ce6985f87a..3970cda10080 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -894,9 +894,9 @@ static bool cmd_write_lock(struct dm_cache_metadata *cmd)
 
 static bool cmd_read_lock(struct dm_cache_metadata *cmd)
 {
-	down_write(&cmd->root_lock);
+	down_read(&cmd->root_lock);
 	if (cmd->fail_io) {
-		up_write(&cmd->root_lock);
+		up_read(&cmd->root_lock);
 		return false;
 	}
 	return true;

From ed643d220692bfc2bfec9fe618d102f13a5dae9f Mon Sep 17 00:00:00 2001
From: Rui Salvaterra <rsalvaterra@gmail.com>
Date: Sat, 9 Apr 2016 22:05:34 +0100
Subject: [PATCH 019/424] lib: lz4: fixed zram with lz4 on big endian machines

commit 3e26a691fe3fe1e02a76e5bab0c143ace4b137b4 upstream.

Based on Sergey's test patch [1], this fixes zram with lz4 compression
on big endian cpus.

Note that the 64-bit preprocessor test is not a cleanup, it's part of
the fix, since those identifiers are bogus (for example, __ppc64__
isn't defined anywhere else in the kernel, which means we'd fall into
the 32-bit definitions on ppc64).

Tested on ppc64 with no regression on x86_64.

[1] http://marc.info/?l=linux-kernel&m=145994470805853&w=4

Suggested-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Rui Salvaterra <rsalvaterra@gmail.com>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/lz4/lz4defs.h | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h
index abcecdc2d0f2..0710a62ad2f6 100644
--- a/lib/lz4/lz4defs.h
+++ b/lib/lz4/lz4defs.h
@@ -11,8 +11,7 @@
 /*
  * Detects 64 bits mode
  */
-#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) \
-	|| defined(__ppc64__) || defined(__LP64__))
+#if defined(CONFIG_64BIT)
 #define LZ4_ARCH64 1
 #else
 #define LZ4_ARCH64 0
@@ -35,6 +34,10 @@ typedef struct _U64_S { u64 v; } U64_S;
 
 #define PUT4(s, d) (A32(d) = A32(s))
 #define PUT8(s, d) (A64(d) = A64(s))
+
+#define LZ4_READ_LITTLEENDIAN_16(d, s, p)	\
+	(d = s - A16(p))
+
 #define LZ4_WRITE_LITTLEENDIAN_16(p, v)	\
 	do {	\
 		A16(p) = v; \
@@ -51,10 +54,13 @@ typedef struct _U64_S { u64 v; } U64_S;
 #define PUT8(s, d) \
 	put_unaligned(get_unaligned((const u64 *) s), (u64 *) d)
 
-#define LZ4_WRITE_LITTLEENDIAN_16(p, v)	\
-	do {	\
-		put_unaligned(v, (u16 *)(p)); \
-		p += 2; \
+#define LZ4_READ_LITTLEENDIAN_16(d, s, p)	\
+	(d = s - get_unaligned_le16(p))
+
+#define LZ4_WRITE_LITTLEENDIAN_16(p, v)			\
+	do {						\
+		put_unaligned_le16(v, (u16 *)(p));	\
+		p += 2;					\
 	} while (0)
 #endif
 
@@ -140,9 +146,6 @@ typedef struct _U64_S { u64 v; } U64_S;
 
 #endif
 
-#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
-	(d = s - get_unaligned_le16(p))
-
 #define LZ4_WILDCOPY(s, d, e)		\
 	do {				\
 		LZ4_COPYPACKET(s, d);	\

From 9b3bd581a0492bdfe788539ca65a14da570faad1 Mon Sep 17 00:00:00 2001
From: Seth Forshee <seth.forshee@canonical.com>
Date: Wed, 9 Mar 2016 09:18:07 -0600
Subject: [PATCH 020/424] debugfs: Make automount point inodes permanently
 empty

commit 87243deb88671f70def4c52dfa7ca7830707bd31 upstream.

Starting with 4.1 the tracing subsystem has its own filesystem
which is automounted in the tracing subdirectory of debugfs.
Prior to this debugfs could be bind mounted in a cloned mount
namespace, but if tracefs has been mounted under debugfs this
now fails because there is a locked child mount. This creates
a regression for container software which bind mounts debugfs
to satisfy the assumption of some userspace software.

In other pseudo filesystems such as proc and sysfs we're already
creating mountpoints like this in such a way that no dirents can
be created in the directories, allowing them to be exceptions to
some MNT_LOCKED tests. In fact we're already do this for the
tracefs mountpoint in sysfs.

Do the same in debugfs_create_automount(), since the intention
here is clearly to create a mountpoint. This fixes the regression,
as locked child mounts on permanently empty directories do not
cause a bind mount to fail.

Signed-off-by: Seth Forshee <seth.forshee@canonical.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/debugfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b7fcc0de0b2f..0f5d05bf2131 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -457,7 +457,7 @@ struct dentry *debugfs_create_automount(const char *name,
 	if (unlikely(!inode))
 		return failed_creating(dentry);
 
-	inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+	make_empty_dir_inode(inode);
 	inode->i_flags |= S_AUTOMOUNT;
 	inode->i_private = data;
 	dentry->d_fsdata = (void *)f;

From 42e6f01a44fe4aab28819b5efa48fbe9da3059e5 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 8 Apr 2016 16:22:17 +0300
Subject: [PATCH 021/424] dmaengine: dw: fix master selection

commit 3fe6409c23e2bee4b2b1b6d671d2da8daa15271c upstream.

The commit 895005202987 ("dmaengine: dw: apply both HS interfaces and remove
slave_id usage") cleaned up the code to avoid usage of depricated slave_id
member of generic slave configuration.

Meanwhile it broke the master selection by removing important call to
dwc_set_masters() in ->device_alloc_chan_resources() which copied masters from
custom slave configuration to the internal channel structure.

Everything works until now since there is no customized connection of
DesignWare DMA IP to the bus, i.e. one bus and one or more masters are in use.
The configurations where 2 masters are connected to the different masters are
not working anymore. We are expecting one user of such configuration and need
to select masters properly. Besides that it is obviously a performance
regression since only one master is in use in multi-master configuration.

Select masters in accordance with what user asked for. Keep this patch in a form
more suitable for back porting.

We are safe to take necessary data in ->device_alloc_chan_resources() because
we don't support generic slave configuration embedded into custom one, and thus
the only way to provide such is to use the parameter to a filter function which
is called exactly before channel resource allocation.

While here, replase BUG_ON to less noisy dev_warn() and prevent channel
allocation in case of error.

Fixes: 895005202987 ("dmaengine: dw: apply both HS interfaces and remove slave_id usage")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dma/dw/core.c | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index 4f099ea29f83..c66133b5e852 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -130,26 +130,14 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
 static void dwc_initialize(struct dw_dma_chan *dwc)
 {
 	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
-	struct dw_dma_slave *dws = dwc->chan.private;
 	u32 cfghi = DWC_CFGH_FIFO_MODE;
 	u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority);
 
 	if (dwc->initialized == true)
 		return;
 
-	if (dws) {
-		/*
-		 * We need controller-specific data to set up slave
-		 * transfers.
-		 */
-		BUG_ON(!dws->dma_dev || dws->dma_dev != dw->dma.dev);
-
-		cfghi |= DWC_CFGH_DST_PER(dws->dst_id);
-		cfghi |= DWC_CFGH_SRC_PER(dws->src_id);
-	} else {
-		cfghi |= DWC_CFGH_DST_PER(dwc->dst_id);
-		cfghi |= DWC_CFGH_SRC_PER(dwc->src_id);
-	}
+	cfghi |= DWC_CFGH_DST_PER(dwc->dst_id);
+	cfghi |= DWC_CFGH_SRC_PER(dwc->src_id);
 
 	channel_writel(dwc, CFG_LO, cfglo);
 	channel_writel(dwc, CFG_HI, cfghi);
@@ -936,7 +924,7 @@ bool dw_dma_filter(struct dma_chan *chan, void *param)
 	struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
 	struct dw_dma_slave *dws = param;
 
-	if (!dws || dws->dma_dev != chan->device->dev)
+	if (dws->dma_dev != chan->device->dev)
 		return false;
 
 	/* We have to copy data since dws can be temporary storage */
@@ -1160,6 +1148,14 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan)
 	 * doesn't mean what you think it means), and status writeback.
 	 */
 
+	/*
+	 * We need controller-specific data to set up slave transfers.
+	 */
+	if (chan->private && !dw_dma_filter(chan, chan->private)) {
+		dev_warn(chan2dev(chan), "Wrong controller-specific data\n");
+		return -EINVAL;
+	}
+
 	/* Enable controller here if needed */
 	if (!dw->in_use)
 		dw_dma_on(dw);
@@ -1221,6 +1217,14 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
 	spin_lock_irqsave(&dwc->lock, flags);
 	list_splice_init(&dwc->free_list, &list);
 	dwc->descs_allocated = 0;
+
+	/* Clear custom channel configuration */
+	dwc->src_id = 0;
+	dwc->dst_id = 0;
+
+	dwc->src_master = 0;
+	dwc->dst_master = 0;
+
 	dwc->initialized = false;
 
 	/* Disable interrupts */

From 34c1b030296c6815c05e416c7a647b68e695004a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 18 Mar 2016 14:26:32 +0200
Subject: [PATCH 022/424] dmaengine: hsu: correct use of channel status
 register

commit 4f4bc0abff79dc9d7ccbd3143adbf8ad1f4fe6ab upstream.

There is a typo in documentation regarding to descriptor empty bit (DESCE)
which is set to 1 when descriptor is empty. Thus, status register at the end of
a transfer usually returns all DESCE bits set and thus it will never be zero.

Moreover, there are 2 bits (CDESC) that encode current descriptor, on which
interrupt has been asserted. In case when we have few descriptors programmed we
might have non-zero value.

Remove DESCE and CDESC bits from DMA channel status register (HSU_CH_SR) when
reading it.

Fixes: 2b49e0c56741 ("dmaengine: append hsu DMA driver")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dma/hsu/hsu.c | 2 +-
 drivers/dma/hsu/hsu.h | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c
index 823ad728aecf..efc02b98e6ba 100644
--- a/drivers/dma/hsu/hsu.c
+++ b/drivers/dma/hsu/hsu.c
@@ -135,7 +135,7 @@ static u32 hsu_dma_chan_get_sr(struct hsu_dma_chan *hsuc)
 	sr = hsu_chan_readl(hsuc, HSU_CH_SR);
 	spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
 
-	return sr;
+	return sr & ~(HSU_CH_SR_DESCE_ANY | HSU_CH_SR_CDESC_ANY);
 }
 
 irqreturn_t hsu_dma_irq(struct hsu_dma_chip *chip, unsigned short nr)
diff --git a/drivers/dma/hsu/hsu.h b/drivers/dma/hsu/hsu.h
index f06579c6d548..26da2865b025 100644
--- a/drivers/dma/hsu/hsu.h
+++ b/drivers/dma/hsu/hsu.h
@@ -41,6 +41,9 @@
 #define HSU_CH_SR_DESCTO(x)	BIT(8 + (x))
 #define HSU_CH_SR_DESCTO_ANY	(BIT(11) | BIT(10) | BIT(9) | BIT(8))
 #define HSU_CH_SR_CHE		BIT(15)
+#define HSU_CH_SR_DESCE(x)	BIT(16 + (x))
+#define HSU_CH_SR_DESCE_ANY	(BIT(19) | BIT(18) | BIT(17) | BIT(16))
+#define HSU_CH_SR_CDESC_ANY	(BIT(31) | BIT(30))
 
 /* Bits in HSU_CH_CR */
 #define HSU_CH_CR_CHA		BIT(0)

From 94d75e190f199dfce1094496927418cb00810683 Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Mon, 15 Feb 2016 21:57:48 +0100
Subject: [PATCH 023/424] dmaengine: pxa_dma: fix the maximum requestor line

commit 6bab1c6afdca0371cfa957079b36b78d12dd2cf5 upstream.

The current number of requestor lines is limited to 31. This was an
error of a previous commit, as this number is platform dependent, and is
actually :
 - for pxa25x: 40 requestor lines
 - for pxa27x: 75 requestor lines
 - for pxa3xx: 100 requestor lines

The previous testing did not reveal the faulty constant as on pxa[23]xx
platforms, only camera, MSL and USB are above requestor 32, and in these
only the camera has a driver using dma.

Fixes: e87ffbdf0697 ("dmaengine: pxa_dma: fix the no-requestor case")
Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Acked-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dma/pxa_dma.c | 39 ++++++++++++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c
index a59061e4221a..55f5d33f6dc7 100644
--- a/drivers/dma/pxa_dma.c
+++ b/drivers/dma/pxa_dma.c
@@ -122,6 +122,7 @@ struct pxad_chan {
 struct pxad_device {
 	struct dma_device		slave;
 	int				nr_chans;
+	int				nr_requestors;
 	void __iomem			*base;
 	struct pxad_phy			*phys;
 	spinlock_t			phy_lock;	/* Phy association */
@@ -473,7 +474,7 @@ static void pxad_free_phy(struct pxad_chan *chan)
 		return;
 
 	/* clear the channel mapping in DRCMR */
-	if (chan->drcmr <= DRCMR_CHLNUM) {
+	if (chan->drcmr <= pdev->nr_requestors) {
 		reg = pxad_drcmr(chan->drcmr);
 		writel_relaxed(0, chan->phy->base + reg);
 	}
@@ -509,6 +510,7 @@ static bool is_running_chan_misaligned(struct pxad_chan *chan)
 
 static void phy_enable(struct pxad_phy *phy, bool misaligned)
 {
+	struct pxad_device *pdev;
 	u32 reg, dalgn;
 
 	if (!phy->vchan)
@@ -518,7 +520,8 @@ static void phy_enable(struct pxad_phy *phy, bool misaligned)
 		"%s(); phy=%p(%d) misaligned=%d\n", __func__,
 		phy, phy->idx, misaligned);
 
-	if (phy->vchan->drcmr <= DRCMR_CHLNUM) {
+	pdev = to_pxad_dev(phy->vchan->vc.chan.device);
+	if (phy->vchan->drcmr <= pdev->nr_requestors) {
 		reg = pxad_drcmr(phy->vchan->drcmr);
 		writel_relaxed(DRCMR_MAPVLD | phy->idx, phy->base + reg);
 	}
@@ -914,6 +917,7 @@ static void pxad_get_config(struct pxad_chan *chan,
 {
 	u32 maxburst = 0, dev_addr = 0;
 	enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED;
+	struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device);
 
 	*dcmd = 0;
 	if (dir == DMA_DEV_TO_MEM) {
@@ -922,7 +926,7 @@ static void pxad_get_config(struct pxad_chan *chan,
 		dev_addr = chan->cfg.src_addr;
 		*dev_src = dev_addr;
 		*dcmd |= PXA_DCMD_INCTRGADDR;
-		if (chan->drcmr <= DRCMR_CHLNUM)
+		if (chan->drcmr <= pdev->nr_requestors)
 			*dcmd |= PXA_DCMD_FLOWSRC;
 	}
 	if (dir == DMA_MEM_TO_DEV) {
@@ -931,7 +935,7 @@ static void pxad_get_config(struct pxad_chan *chan,
 		dev_addr = chan->cfg.dst_addr;
 		*dev_dst = dev_addr;
 		*dcmd |= PXA_DCMD_INCSRCADDR;
-		if (chan->drcmr <= DRCMR_CHLNUM)
+		if (chan->drcmr <= pdev->nr_requestors)
 			*dcmd |= PXA_DCMD_FLOWTRG;
 	}
 	if (dir == DMA_MEM_TO_MEM)
@@ -1341,13 +1345,15 @@ static struct dma_chan *pxad_dma_xlate(struct of_phandle_args *dma_spec,
 
 static int pxad_init_dmadev(struct platform_device *op,
 			    struct pxad_device *pdev,
-			    unsigned int nr_phy_chans)
+			    unsigned int nr_phy_chans,
+			    unsigned int nr_requestors)
 {
 	int ret;
 	unsigned int i;
 	struct pxad_chan *c;
 
 	pdev->nr_chans = nr_phy_chans;
+	pdev->nr_requestors = nr_requestors;
 	INIT_LIST_HEAD(&pdev->slave.channels);
 	pdev->slave.device_alloc_chan_resources = pxad_alloc_chan_resources;
 	pdev->slave.device_free_chan_resources = pxad_free_chan_resources;
@@ -1382,7 +1388,7 @@ static int pxad_probe(struct platform_device *op)
 	const struct of_device_id *of_id;
 	struct mmp_dma_platdata *pdata = dev_get_platdata(&op->dev);
 	struct resource *iores;
-	int ret, dma_channels = 0;
+	int ret, dma_channels = 0, nb_requestors = 0;
 	const enum dma_slave_buswidth widths =
 		DMA_SLAVE_BUSWIDTH_1_BYTE   | DMA_SLAVE_BUSWIDTH_2_BYTES |
 		DMA_SLAVE_BUSWIDTH_4_BYTES;
@@ -1399,13 +1405,23 @@ static int pxad_probe(struct platform_device *op)
 		return PTR_ERR(pdev->base);
 
 	of_id = of_match_device(pxad_dt_ids, &op->dev);
-	if (of_id)
+	if (of_id) {
 		of_property_read_u32(op->dev.of_node, "#dma-channels",
 				     &dma_channels);
-	else if (pdata && pdata->dma_channels)
+		ret = of_property_read_u32(op->dev.of_node, "#dma-requests",
+					   &nb_requestors);
+		if (ret) {
+			dev_warn(pdev->slave.dev,
+				 "#dma-requests set to default 32 as missing in OF: %d",
+				 ret);
+			nb_requestors = 32;
+		};
+	} else if (pdata && pdata->dma_channels) {
 		dma_channels = pdata->dma_channels;
-	else
+		nb_requestors = pdata->nb_requestors;
+	} else {
 		dma_channels = 32;	/* default 32 channel */
+	}
 
 	dma_cap_set(DMA_SLAVE, pdev->slave.cap_mask);
 	dma_cap_set(DMA_MEMCPY, pdev->slave.cap_mask);
@@ -1422,7 +1438,7 @@ static int pxad_probe(struct platform_device *op)
 	pdev->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
 
 	pdev->slave.dev = &op->dev;
-	ret = pxad_init_dmadev(op, pdev, dma_channels);
+	ret = pxad_init_dmadev(op, pdev, dma_channels, nb_requestors);
 	if (ret) {
 		dev_err(pdev->slave.dev, "unable to register\n");
 		return ret;
@@ -1441,7 +1457,8 @@ static int pxad_probe(struct platform_device *op)
 
 	platform_set_drvdata(op, pdev);
 	pxad_init_debugfs(pdev);
-	dev_info(pdev->slave.dev, "initialized %d channels\n", dma_channels);
+	dev_info(pdev->slave.dev, "initialized %d channels on %d requestors\n",
+		 dma_channels, nb_requestors);
 	return 0;
 }
 

From c0944355a74bc9c2b5b3cc5b627efe0c73e30bd9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 16 Mar 2016 16:22:45 +0100
Subject: [PATCH 024/424] sched/cgroup: Fix/cleanup cgroup teardown/init

commit 2f5177f0fd7e531b26d54633be62d1d4cb94621c upstream.

The CPU controller hasn't kept up with the various changes in the whole
cgroup initialization / destruction sequence, and commit:

  2e91fa7f6d45 ("cgroup: keep zombies associated with their original cgroups")

caused it to explode.

The reason for this is that zombies do not inhibit css_offline() from
being called, but do stall css_released(). Now we tear down the cfs_rq
structures on css_offline() but zombies can run after that, leading to
use-after-free issues.

The solution is to move the tear-down to css_released(), which
guarantees nobody (including no zombies) is still using our cgroup.

Furthermore, a few simple cleanups are possible too. There doesn't
appear to be any point to us using css_online() (anymore?) so fold that
in css_alloc().

And since cgroup code guarantees an RCU grace period between
css_released() and css_free() we can forgo using call_rcu() and free the
stuff immediately.

Suggested-by: Tejun Heo <tj@kernel.org>
Reported-by: Kazuki Yamaguchi <k@rhe.jp>
Reported-by: Niklas Cassel <niklas.cassel@axis.com>
Tested-by: Niklas Cassel <niklas.cassel@axis.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 2e91fa7f6d45 ("cgroup: keep zombies associated with their original cgroups")
Link: http://lkml.kernel.org/r/20160316152245.GY6344@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/sched/core.c | 35 ++++++++++++++---------------------
 1 file changed, 14 insertions(+), 21 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 70e5e09341f1..55bebf924946 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7693,7 +7693,7 @@ void set_curr_task(int cpu, struct task_struct *p)
 /* task_group_lock serializes the addition/removal of task groups */
 static DEFINE_SPINLOCK(task_group_lock);
 
-static void free_sched_group(struct task_group *tg)
+static void sched_free_group(struct task_group *tg)
 {
 	free_fair_sched_group(tg);
 	free_rt_sched_group(tg);
@@ -7719,7 +7719,7 @@ struct task_group *sched_create_group(struct task_group *parent)
 	return tg;
 
 err:
-	free_sched_group(tg);
+	sched_free_group(tg);
 	return ERR_PTR(-ENOMEM);
 }
 
@@ -7739,17 +7739,16 @@ void sched_online_group(struct task_group *tg, struct task_group *parent)
 }
 
 /* rcu callback to free various structures associated with a task group */
-static void free_sched_group_rcu(struct rcu_head *rhp)
+static void sched_free_group_rcu(struct rcu_head *rhp)
 {
 	/* now it should be safe to free those cfs_rqs */
-	free_sched_group(container_of(rhp, struct task_group, rcu));
+	sched_free_group(container_of(rhp, struct task_group, rcu));
 }
 
-/* Destroy runqueue etc associated with a task group */
 void sched_destroy_group(struct task_group *tg)
 {
 	/* wait for possible concurrent references to cfs_rqs complete */
-	call_rcu(&tg->rcu, free_sched_group_rcu);
+	call_rcu(&tg->rcu, sched_free_group_rcu);
 }
 
 void sched_offline_group(struct task_group *tg)
@@ -8210,31 +8209,26 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 	if (IS_ERR(tg))
 		return ERR_PTR(-ENOMEM);
 
+	sched_online_group(tg, parent);
+
 	return &tg->css;
 }
 
-static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
+static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
 {
 	struct task_group *tg = css_tg(css);
-	struct task_group *parent = css_tg(css->parent);
 
-	if (parent)
-		sched_online_group(tg, parent);
-	return 0;
+	sched_offline_group(tg);
 }
 
 static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
 {
 	struct task_group *tg = css_tg(css);
 
-	sched_destroy_group(tg);
-}
-
-static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css)
-{
-	struct task_group *tg = css_tg(css);
-
-	sched_offline_group(tg);
+	/*
+	 * Relies on the RCU grace period between css_released() and this.
+	 */
+	sched_free_group(tg);
 }
 
 static void cpu_cgroup_fork(struct task_struct *task, void *private)
@@ -8594,9 +8588,8 @@ static struct cftype cpu_files[] = {
 
 struct cgroup_subsys cpu_cgrp_subsys = {
 	.css_alloc	= cpu_cgroup_css_alloc,
+	.css_released	= cpu_cgroup_css_released,
 	.css_free	= cpu_cgroup_css_free,
-	.css_online	= cpu_cgroup_css_online,
-	.css_offline	= cpu_cgroup_css_offline,
 	.fork		= cpu_cgroup_fork,
 	.can_attach	= cpu_cgroup_can_attach,
 	.attach		= cpu_cgroup_attach,

From bdb0618ad1b9ea6ec6926450c687d133ccddf28c Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 7 Jan 2016 16:07:20 +0000
Subject: [PATCH 025/424] arm64: Honour !PTE_WRITE in set_pte_at() for kernel
 mappings

commit ac15bd63bbb24238f763ec5b24ee175ec301e8cd upstream.

Currently, set_pte_at() only checks the software PTE_WRITE bit for user
mappings when it sets or clears the hardware PTE_RDONLY accordingly. The
kernel ptes are written directly without any modification, relying
solely on the protection bits in macros like PAGE_KERNEL. However,
modifying kernel pte attributes via pte_wrprotect() would be ignored by
set_pte_at(). Since pte_wrprotect() does not set PTE_RDONLY (it only
clears PTE_WRITE), the new permission is not taken into account.

This patch changes set_pte_at() to adjust the read-only permission for
kernel ptes as well. As a side effect, existing PROT_* definitions used
for kernel ioremap*() need to include PTE_DIRTY | PTE_WRITE.

(additionally, white space fix for PTE_KERNEL_ROX)

Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/pgtable.h | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index eaa9cabf4066..298474933ef3 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -69,11 +69,11 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
 #define PROT_DEFAULT		(PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
 #define PROT_SECT_DEFAULT	(PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
 
-#define PROT_DEVICE_nGnRnE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
-#define PROT_DEVICE_nGnRE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_NORMAL_NC		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL_WT		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_WT))
-#define PROT_NORMAL		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL))
+#define PROT_DEVICE_nGnRnE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
+#define PROT_DEVICE_nGnRE	(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_NORMAL_NC		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL_WT		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
+#define PROT_NORMAL		(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
 
 #define PROT_SECT_DEVICE_nGnRE	(PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
 #define PROT_SECT_NORMAL	(PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
@@ -83,7 +83,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
 
 #define PAGE_KERNEL		__pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_RO		__pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
-#define PAGE_KERNEL_ROX	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
+#define PAGE_KERNEL_ROX		__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
 #define PAGE_KERNEL_EXEC	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_EXEC_CONT	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
 
@@ -155,6 +155,7 @@ extern struct page *empty_zero_page;
 #define pte_write(pte)		(!!(pte_val(pte) & PTE_WRITE))
 #define pte_exec(pte)		(!(pte_val(pte) & PTE_UXN))
 #define pte_cont(pte)		(!!(pte_val(pte) & PTE_CONT))
+#define pte_user(pte)		(!!(pte_val(pte) & PTE_USER))
 
 #ifdef CONFIG_ARM64_HW_AFDBM
 #define pte_hw_dirty(pte)	(pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
@@ -165,8 +166,6 @@ extern struct page *empty_zero_page;
 #define pte_dirty(pte)		(pte_sw_dirty(pte) || pte_hw_dirty(pte))
 
 #define pte_valid(pte)		(!!(pte_val(pte) & PTE_VALID))
-#define pte_valid_user(pte) \
-	((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
 #define pte_valid_not_user(pte) \
 	((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
 
@@ -264,13 +263,13 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pte)
 {
-	if (pte_valid_user(pte)) {
-		if (!pte_special(pte) && pte_exec(pte))
-			__sync_icache_dcache(pte, addr);
+	if (pte_valid(pte)) {
 		if (pte_sw_dirty(pte) && pte_write(pte))
 			pte_val(pte) &= ~PTE_RDONLY;
 		else
 			pte_val(pte) |= PTE_RDONLY;
+		if (pte_user(pte) && pte_exec(pte) && !pte_special(pte))
+			__sync_icache_dcache(pte, addr);
 	}
 
 	/*

From 70d65587f0a82f50952cb29af133a5b6b8538611 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Wed, 9 Mar 2016 16:31:29 +0000
Subject: [PATCH 026/424] arm64: Update PTE_RDONLY in set_pte_at() for
 PROT_NONE permission

commit fdc69e7df3cb24f18a93192641786e5b7ecd1dfe upstream.

The set_pte_at() function must update the hardware PTE_RDONLY bit
depending on the state of the PTE_WRITE and PTE_DIRTY bits of the given
entry value. However, it currently only performs this for pte_valid()
entries, ignoring PTE_PROT_NONE. The side-effect is that PROT_NONE
mappings would not have the PTE_RDONLY bit set. Without
CONFIG_ARM64_HW_AFDBM, this is not an issue since such PROT_NONE pages
are not accessible anyway.

With commit 2f4b829c625e ("arm64: Add support for hardware updates of
the access and dirty pte bits"), the ptep_set_wrprotect() function was
re-written to cope with automatic hardware updates of the dirty state.
As an optimisation, only PTE_RDONLY is checked to assess the "dirty"
status. Since set_pte_at() does not set this bit for PROT_NONE mappings,
such pages may be considered "dirty" as a result of
ptep_set_wrprotect().

This patch updates the pte_valid() check to pte_present() in
set_pte_at(). It also adds PTE_PROT_NONE to the swap entry bits comment.

Fixes: 2f4b829c625e ("arm64: Add support for hardware updates of the access and dirty pte bits")
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Ganapatrao Kulkarni <gkulkarni@caviumnetworks.com>
Tested-by: Ganapatrao Kulkarni <gkulkarni@cavium.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/pgtable.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 298474933ef3..c63868ae9a4a 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -263,7 +263,7 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pte)
 {
-	if (pte_valid(pte)) {
+	if (pte_present(pte)) {
 		if (pte_sw_dirty(pte) && pte_write(pte))
 			pte_val(pte) &= ~PTE_RDONLY;
 		else
@@ -640,6 +640,7 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
  *	bits 0-1:	present (must be zero)
  *	bits 2-7:	swap type
  *	bits 8-57:	swap offset
+ *	bit  58:	PTE_PROT_NONE (must be zero)
  */
 #define __SWP_TYPE_SHIFT	2
 #define __SWP_TYPE_BITS		6

From 27b3cc048a5275c53e26c15ffcab3fcf9a03cda0 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Thu, 21 Apr 2016 00:27:04 -0600
Subject: [PATCH 027/424] x86/mm/xen: Suppress hugetlbfs in PV guests

commit 103f6112f253017d7062cd74d17f4a514ed4485c upstream.

Huge pages are not normally available to PV guests. Not suppressing
hugetlbfs use results in an endless loop of page faults when user mode
code tries to access a hugetlbfs mapped area (since the hypervisor
denies such PTEs to be created, but error indications can't be
propagated out of xen_set_pte_at(), just like for various of its
siblings), and - once killed in an oops like this:

  kernel BUG at .../fs/hugetlbfs/inode.c:428!
  invalid opcode: 0000 [#1] SMP
  ...
  RIP: e030:[<ffffffff811c333b>]  [<ffffffff811c333b>] remove_inode_hugepages+0x25b/0x320
  ...
  Call Trace:
   [<ffffffff811c3415>] hugetlbfs_evict_inode+0x15/0x40
   [<ffffffff81167b3d>] evict+0xbd/0x1b0
   [<ffffffff8116514a>] __dentry_kill+0x19a/0x1f0
   [<ffffffff81165b0e>] dput+0x1fe/0x220
   [<ffffffff81150535>] __fput+0x155/0x200
   [<ffffffff81079fc0>] task_work_run+0x60/0xa0
   [<ffffffff81063510>] do_exit+0x160/0x400
   [<ffffffff810637eb>] do_group_exit+0x3b/0xa0
   [<ffffffff8106e8bd>] get_signal+0x1ed/0x470
   [<ffffffff8100f854>] do_signal+0x14/0x110
   [<ffffffff810030e9>] prepare_exit_to_usermode+0xe9/0xf0
   [<ffffffff814178a5>] retint_user+0x8/0x13

This is CVE-2016-3961 / XSA-174.

Reported-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Juergen Gross <JGross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: xen-devel <xen-devel@lists.xenproject.org>
Link: http://lkml.kernel.org/r/57188ED802000078000E431C@prv-mh.provo.novell.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/hugetlb.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index f8a29d2c97b0..e6a8613fbfb0 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -4,6 +4,7 @@
 #include <asm/page.h>
 #include <asm-generic/hugetlb.h>
 
+#define hugepages_supported() cpu_has_pse
 
 static inline int is_hugepage_only_range(struct mm_struct *mm,
 					 unsigned long addr,

From 5582eb00f5b2362234cccf542232101db61ffc8b Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Thu, 14 Apr 2016 10:21:52 -0700
Subject: [PATCH 028/424] x86 EDAC, sb_edac.c: Repair damage introduced when
 "fixing" channel address

commit ff15e95c82768d589957dbb17d7eb7dba7904659 upstream.

In commit:

  eb1af3b71f9d ("Fix computation of channel address")

I switched the "sck_way" variable from holding the log2 value read
from the h/w to instead be the actual number. Unfortunately it
is needed in log2 form when used to shift the address.

Tested-by: Patrick Geary <patrickg@supermicro.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Cc: Aristeu Rozanski <arozansk@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-edac@vger.kernel.org
Fixes: eb1af3b71f9d ("Fix computation of channel address")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/edac/sb_edac.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index cbee3179ec08..90c3fe99c786 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -1396,7 +1396,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 	}
 
 	ch_way = TAD_CH(reg) + 1;
-	sck_way = 1 << TAD_SOCK(reg);
+	sck_way = TAD_SOCK(reg);
 
 	if (ch_way == 3)
 		idx = addr >> 6;
@@ -1435,7 +1435,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 		switch(ch_way) {
 		case 2:
 		case 4:
-			sck_xch = 1 << sck_way * (ch_way >> 1);
+			sck_xch = (1 << sck_way) * (ch_way >> 1);
 			break;
 		default:
 			sprintf(msg, "Invalid mirror set. Can't decode addr");
@@ -1471,7 +1471,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 
 	ch_addr = addr - offset;
 	ch_addr >>= (6 + shiftup);
-	ch_addr /= ch_way * sck_way;
+	ch_addr /= sck_xch;
 	ch_addr <<= (6 + shiftup);
 	ch_addr |= addr & ((1 << (6 + shiftup)) - 1);
 

From 66b7be5743d88c9b8fa69e5ba7d06d33d14de8c7 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sun, 17 Apr 2016 09:39:41 +0200
Subject: [PATCH 029/424] ALSA: hda - Don't trust the reported actual power
 state

commit 50fd4987c4f3c3ebf0ce94d932732011bbdc7c71 upstream.

We've got a regression report that the recording on Mac with a cirrus
codec doesn't work any longer.  This turned out to be the missing
power up to D0 by power_save_node enablement.

After analyzing the traces, we found out that the culprit is that the
codec advertises the "actual" power state of a few nodes to be D0
while the "target" power state is D3.  This inconsistency is usually
OK, as it implies the power transition.  But in the case of cirrus
codec, this seems to be stuck to D3 while it's not actually D0.

This patch addresses the issue by checking the power state difference
more strictly.  It sends the power-state change verb unless both the
target and the actual power states show the given value.

We may introduce yet another flag indicating the possible broken
hardware power state, but it's anyway safer to set the proper power
state even in a transition (at least it's harmless as long as the
target state is same).  So this simpler change was applied now.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=116171
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/hda_generic.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 5c4fa8eba1d0..367dbf0d285e 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -843,7 +843,7 @@ static hda_nid_t path_power_update(struct hda_codec *codec,
 				   bool allow_powerdown)
 {
 	hda_nid_t nid, changed = 0;
-	int i, state;
+	int i, state, power;
 
 	for (i = 0; i < path->depth; i++) {
 		nid = path->path[i];
@@ -855,7 +855,9 @@ static hda_nid_t path_power_update(struct hda_codec *codec,
 			state = AC_PWRST_D0;
 		else
 			state = AC_PWRST_D3;
-		if (!snd_hda_check_power_state(codec, nid, state)) {
+		power = snd_hda_codec_read(codec, nid, 0,
+					   AC_VERB_GET_POWER_STATE, 0);
+		if (power != (state | (state << 4))) {
 			snd_hda_codec_write(codec, nid, 0,
 					    AC_VERB_SET_POWER_STATE, state);
 			changed = nid;

From a38ae6bb1473a02c5e7501fefe2a5ff42ad736c5 Mon Sep 17 00:00:00 2001
From: Bastien Nocera <hadess@hadess.net>
Date: Mon, 18 Apr 2016 11:10:42 +0200
Subject: [PATCH 030/424] ALSA: hda/realtek - Add ALC3234 headset mode for
 Optiplex 9020m

commit afecb146d8d8a60a1dde9cdf570c278649617fde upstream.

The Optiplex 9020m with Haswell-DT processor needs a quirk for the
headset jack at the front of the machine to be able to use microphones.

A quirk for this model was originally added in 3127899, but c77900e
removed it in favour of a more generic version.

Unfortunately, pin configurations can changed based on firmware/BIOS
versions, and the generic version doesn't have any effect on newer
versions of the machine/firmware anymore.

With help from David Henningsson <diwic@ubuntu.com>

Signed-off-by: Bastien Nocera <hadess@hadess.net>
Tested-by: Bastien Nocera <hadess@hadess.net>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 1402ba954b3d..810bceee4fd2 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5449,6 +5449,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x0665, "Dell XPS 13", ALC288_FIXUP_DELL_XPS_13),
+	SND_PCI_QUIRK(0x1028, 0x0669, "Dell Optiplex 9020m", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x069a, "Dell Vostro 5480", ALC290_FIXUP_SUBWOOFER_HSJACK),
 	SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),

From 0df9987a2ec6bc37440b9c4fa176f360039a8b8e Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 19 Apr 2016 22:07:50 +0200
Subject: [PATCH 031/424] ALSA: hda - Keep powering up ADCs on Cirrus codecs

commit de3df8a986b635082a1d94bae2c361d043c57106 upstream.

Although one weird behavior about the input path (inconsistent D0/D3
switch) on Cirrus CS420x codecs was fixed in the previous commit,
there is still an issue on some Mac machines: the capture stream
stalls when switching the ADCs on the fly.  More badly, this keeps
stuck until the next reboot.

The dynamic ADC switching is already a bit fragile and assuming
optimistically that the chip accepts the frequent power changes.  On
Cirrus codecs, this doesn't seem applicable.

As a quick workaround, we pin down the ADCs to keep up in D0 when
spec->dyn_adc_switch is set.  In this way, the ADCs are kept up only
for the system that were confirmed to be broken.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=116171
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_cirrus.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
index a47e8ae0eb30..80bbadc83721 100644
--- a/sound/pci/hda/patch_cirrus.c
+++ b/sound/pci/hda/patch_cirrus.c
@@ -361,6 +361,7 @@ static int cs_parse_auto_config(struct hda_codec *codec)
 {
 	struct cs_spec *spec = codec->spec;
 	int err;
+	int i;
 
 	err = snd_hda_parse_pin_defcfg(codec, &spec->gen.autocfg, NULL, 0);
 	if (err < 0)
@@ -370,6 +371,19 @@ static int cs_parse_auto_config(struct hda_codec *codec)
 	if (err < 0)
 		return err;
 
+	/* keep the ADCs powered up when it's dynamically switchable */
+	if (spec->gen.dyn_adc_switch) {
+		unsigned int done = 0;
+		for (i = 0; i < spec->gen.input_mux.num_items; i++) {
+			int idx = spec->gen.dyn_adc_idx[i];
+			if (done & (1 << idx))
+				continue;
+			snd_hda_gen_fix_pin_power(codec,
+						  spec->gen.adc_nids[idx]);
+			done |= 1 << idx;
+		}
+	}
+
 	return 0;
 }
 

From 79dc55bd02a8dc0b6adf7598c4f8a7356594c363 Mon Sep 17 00:00:00 2001
From: "Lu, Han" <han.lu@intel.com>
Date: Wed, 20 Apr 2016 10:08:43 +0800
Subject: [PATCH 032/424] ALSA: hda - add PCI ID for Intel Broxton-T

commit 9859a971ca228725425238756ee89c6133306ec8 upstream.

Add HD Audio Device PCI ID for the Intel Broxton-T platform.
It is an HDA Intel PCH controller.

Signed-off-by: Lu, Han <han.lu@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/hda_intel.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 2ff692dd2c5f..411630e9c034 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2207,6 +2207,9 @@ static const struct pci_device_id azx_ids[] = {
 	/* Broxton-P(Apollolake) */
 	{ PCI_DEVICE(0x8086, 0x5a98),
 	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON },
+	/* Broxton-T */
+	{ PCI_DEVICE(0x8086, 0x1a98),
+	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BROXTON },
 	/* Haswell */
 	{ PCI_DEVICE(0x8086, 0x0a0c),
 	  .driver_data = AZX_DRIVER_HDMI | AZX_DCAPS_INTEL_HASWELL },

From 519aef523513a58f958e0aa432855e7b2a57a611 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 21 Apr 2016 17:37:54 +0200
Subject: [PATCH 033/424] ALSA: pcxhr: Fix missing mutex unlock

commit 67f3754b51f22b18c4820fb84062f658c30e8644 upstream.

The commit [9bef72bdb26e: ALSA: pcxhr: Use nonatomic PCM ops]
converted to non-atomic PCM ops, but shamelessly with an unbalanced
mutex locking, which leads to the hangup easily.  Fix it.

Fixes: 9bef72bdb26e ('ALSA: pcxhr: Use nonatomic PCM ops')
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=116441
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/pcxhr/pcxhr_core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/pcxhr/pcxhr_core.c b/sound/pci/pcxhr/pcxhr_core.c
index c5194f5b150a..d7e71f309299 100644
--- a/sound/pci/pcxhr/pcxhr_core.c
+++ b/sound/pci/pcxhr/pcxhr_core.c
@@ -1341,5 +1341,6 @@ irqreturn_t pcxhr_threaded_irq(int irq, void *dev_id)
 	}
 
 	pcxhr_msg_thread(mgr);
+	mutex_unlock(&mgr->lock);
 	return IRQ_HANDLED;
 }

From 8dd069c221e299db24ea5937c6130433109d6499 Mon Sep 17 00:00:00 2001
From: Conrad Kostecki <ck+linuxkernel@bl4ckb0x.de>
Date: Tue, 26 Apr 2016 10:08:10 +0200
Subject: [PATCH 034/424] ALSA: hda - Add dock support for ThinkPad X260

commit 037e119738120c1cdc460c6ae33871c3000531f3 upstream.

Fixes audio output on a ThinkPad X260, when using Lenovo CES 2013
docking station series (basic, pro, ultra).

Signed-off-by: Conrad Kostecki <ck+linuxkernel@bl4ckb0x.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 810bceee4fd2..ac4490a96863 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5584,6 +5584,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x17aa, 0x5034, "Thinkpad T450", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x5036, "Thinkpad T450s", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x503c, "Thinkpad L450", ALC292_FIXUP_TPT440_DOCK),
+	SND_PCI_QUIRK(0x17aa, 0x504a, "ThinkPad X260", ALC292_FIXUP_TPT440_DOCK),
 	SND_PCI_QUIRK(0x17aa, 0x504b, "Thinkpad", ALC293_FIXUP_LENOVO_SPK_NOISE),
 	SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
 	SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K),

From 4fad26a279caad671287e502e9b6d2487a56b270 Mon Sep 17 00:00:00 2001
From: Romain Perier <romain.perier@free-electrons.com>
Date: Thu, 14 Apr 2016 15:36:03 +0200
Subject: [PATCH 035/424] asm-generic/futex: Re-enable preemption in
 futex_atomic_cmpxchg_inatomic()

commit fba7cd681b6155e2d93e7862fcd6f970336b83c3 upstream.

The recent decoupling of pagefault disable and preempt disable added an
explicit preempt_disable/enable() pair to the futex_atomic_cmpxchg_inatomic()
implementation in asm-generic/futex.h. But it forgot to add preempt_enable()
calls to the error handling code pathes, which results in a preemption count
imbalance.

This is observable on boot when the test for atomic_cmpxchg() is calling
futex_atomic_cmpxchg_inatomic() on a NULL pointer.

Add the missing preempt_enable() calls to the error handling code pathes.

[ tglx: Massaged changelog ]

Fixes: d9b9ff8c1889 ("sched/preempt, futex: Disable preemption in UP futex_atomic_cmpxchg_inatomic() explicitly")
Signed-off-by: Romain Perier <romain.perier@free-electrons.com>
Cc: linux-arch@vger.kernel.org
Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1460640963-690-1-git-send-email-romain.perier@free-electrons.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/asm-generic/futex.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
index e56272c919b5..bf2d34c9d804 100644
--- a/include/asm-generic/futex.h
+++ b/include/asm-generic/futex.h
@@ -108,11 +108,15 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 	u32 val;
 
 	preempt_disable();
-	if (unlikely(get_user(val, uaddr) != 0))
+	if (unlikely(get_user(val, uaddr) != 0)) {
+		preempt_enable();
 		return -EFAULT;
+	}
 
-	if (val == oldval && unlikely(put_user(newval, uaddr) != 0))
+	if (val == oldval && unlikely(put_user(newval, uaddr) != 0)) {
+		preempt_enable();
 		return -EFAULT;
+	}
 
 	*uval = val;
 	preempt_enable();

From 61fc0ae42c498f8eb782733065d93da6817d28b4 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 15 Apr 2016 14:35:39 +0200
Subject: [PATCH 036/424] futex: Handle unlock_pi race gracefully

commit 89e9e66ba1b3bde9d8ea90566c2aee20697ad681 upstream.

If userspace calls UNLOCK_PI unconditionally without trying the TID -> 0
transition in user space first then the user space value might not have the
waiters bit set. This opens the following race:

CPU0	    	      	    CPU1
uval = get_user(futex)
			    lock(hb)
lock(hb)
			    futex |= FUTEX_WAITERS
			    ....
			    unlock(hb)

cmpxchg(futex, uval, newval)

So the cmpxchg fails and returns -EINVAL to user space, which is wrong because
the futex value is valid.

To handle this (yes, yet another) corner case gracefully, check for a flag
change and retry.

[ tglx: Massaged changelog and slightly reworked implementation ]

Fixes: ccf9e6a80d9e ("futex: Make unlock_pi more robust")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Darren Hart <dvhart@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1460723739-5195-1-git-send-email-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/futex.c | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 461c72b2dac2..eaa3a8dfd345 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1244,10 +1244,20 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
 	if (unlikely(should_fail_futex(true)))
 		ret = -EFAULT;
 
-	if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
+	if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) {
 		ret = -EFAULT;
-	else if (curval != uval)
-		ret = -EINVAL;
+	} else if (curval != uval) {
+		/*
+		 * If a unconditional UNLOCK_PI operation (user space did not
+		 * try the TID->0 transition) raced with a waiter setting the
+		 * FUTEX_WAITERS flag between get_user() and locking the hash
+		 * bucket lock, retry the operation.
+		 */
+		if ((FUTEX_TID_MASK & curval) == uval)
+			ret = -EAGAIN;
+		else
+			ret = -EINVAL;
+	}
 	if (ret) {
 		raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
 		return ret;
@@ -2537,6 +2547,15 @@ retry:
 		 */
 		if (ret == -EFAULT)
 			goto pi_faulted;
+		/*
+		 * A unconditional UNLOCK_PI op raced against a waiter
+		 * setting the FUTEX_WAITERS bit. Try again.
+		 */
+		if (ret == -EAGAIN) {
+			spin_unlock(&hb->lock);
+			put_futex_key(&key);
+			goto retry;
+		}
 		/*
 		 * wake_futex_pi has detected invalid state. Tell user
 		 * space.

From ad4b209d192624e8587f4988171d624346913ddd Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Wed, 20 Apr 2016 20:09:24 -0700
Subject: [PATCH 037/424] futex: Acknowledge a new waiter in counter before
 plist

commit fe1bce9e2107ba3a8faffe572483b6974201a0e6 upstream.

Otherwise an incoming waker on the dest hash bucket can miss
the waiter adding itself to the plist during the lockless
check optimization (small window but still the correct way
of doing this); similarly to the decrement counterpart.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: bigeasy@linutronix.de
Cc: dvhart@infradead.org
Link: http://lkml.kernel.org/r/1461208164-29150-1-git-send-email-dave@stgolabs.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/futex.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index eaa3a8dfd345..9d8163afd87c 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1484,8 +1484,8 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
 	if (likely(&hb1->chain != &hb2->chain)) {
 		plist_del(&q->list, &hb1->chain);
 		hb_waiters_dec(hb1);
-		plist_add(&q->list, &hb2->chain);
 		hb_waiters_inc(hb2);
+		plist_add(&q->list, &hb2->chain);
 		q->lock_ptr = &hb2->lock;
 	}
 	get_futex_key_refs(key2);

From 06e38eaf1a24332b15748f33039d5bf15799c5cb Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sun, 6 Mar 2016 16:06:06 -0500
Subject: [PATCH 038/424] drm/nouveau/core: use vzalloc for allocating ramht

commit 78a121d82da8aff3aca2a6a1c40f5061081760f0 upstream.

Most calls to nvkm_ramht_new use 0x8000 as the size. This results in a
fairly sizeable chunk of memory to be allocated, which may not be
available with kzalloc. Since this is done fairly rarely (once per
channel), use vzalloc instead.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Cc: Sven Joachim <svenjoac@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/nouveau/nvkm/core/ramht.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/core/ramht.c b/drivers/gpu/drm/nouveau/nvkm/core/ramht.c
index 3216e157a8a0..89da47234016 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/ramht.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/ramht.c
@@ -131,7 +131,7 @@ nvkm_ramht_del(struct nvkm_ramht **pramht)
 	struct nvkm_ramht *ramht = *pramht;
 	if (ramht) {
 		nvkm_gpuobj_del(&ramht->gpuobj);
-		kfree(*pramht);
+		vfree(*pramht);
 		*pramht = NULL;
 	}
 }
@@ -143,8 +143,8 @@ nvkm_ramht_new(struct nvkm_device *device, u32 size, u32 align,
 	struct nvkm_ramht *ramht;
 	int ret, i;
 
-	if (!(ramht = *pramht = kzalloc(sizeof(*ramht) + (size >> 3) *
-					sizeof(*ramht->data), GFP_KERNEL)))
+	if (!(ramht = *pramht = vzalloc(sizeof(*ramht) +
+					(size >> 3) * sizeof(*ramht->data))))
 		return -ENOMEM;
 
 	ramht->device = device;

From 20fd4b1bbfbea603fff1d756b39cffc67048aec5 Mon Sep 17 00:00:00 2001
From: John Keeping <john@metanate.com>
Date: Wed, 18 Nov 2015 11:17:25 +0000
Subject: [PATCH 039/424] drm/qxl: fix cursor position with non-zero hotspot

commit d59a1f71ff1aeda4b4630df92d3ad4e3b1dfc885 upstream.

The SPICE protocol considers the position of a cursor to be the location
of its active pixel on the display, so the cursor is drawn with its
top-left corner at "(x - hot_spot_x, y - hot_spot_y)" but the DRM cursor
position gives the location where the top-left corner should be drawn,
with the hotspot being a hint for drivers that need it.

This fixes the location of the window resize cursors when using Fluxbox
with the QXL DRM driver and both the QXL and modesetting X drivers.

Signed-off-by: John Keeping <john@metanate.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/1447845445-2116-1-git-send-email-john@metanate.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/qxl/qxl_display.c | 13 +++++++++----
 drivers/gpu/drm/qxl/qxl_drv.h     |  2 ++
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index 183aea1abebc..5edebf495c07 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -375,10 +375,15 @@ static int qxl_crtc_cursor_set2(struct drm_crtc *crtc,
 
 	qxl_bo_kunmap(user_bo);
 
+	qcrtc->cur_x += qcrtc->hot_spot_x - hot_x;
+	qcrtc->cur_y += qcrtc->hot_spot_y - hot_y;
+	qcrtc->hot_spot_x = hot_x;
+	qcrtc->hot_spot_y = hot_y;
+
 	cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release);
 	cmd->type = QXL_CURSOR_SET;
-	cmd->u.set.position.x = qcrtc->cur_x;
-	cmd->u.set.position.y = qcrtc->cur_y;
+	cmd->u.set.position.x = qcrtc->cur_x + qcrtc->hot_spot_x;
+	cmd->u.set.position.y = qcrtc->cur_y + qcrtc->hot_spot_y;
 
 	cmd->u.set.shape = qxl_bo_physical_address(qdev, cursor_bo, 0);
 
@@ -441,8 +446,8 @@ static int qxl_crtc_cursor_move(struct drm_crtc *crtc,
 
 	cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release);
 	cmd->type = QXL_CURSOR_MOVE;
-	cmd->u.position.x = qcrtc->cur_x;
-	cmd->u.position.y = qcrtc->cur_y;
+	cmd->u.position.x = qcrtc->cur_x + qcrtc->hot_spot_x;
+	cmd->u.position.y = qcrtc->cur_y + qcrtc->hot_spot_y;
 	qxl_release_unmap(qdev, release, &cmd->release_info);
 
 	qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false);
diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index 01a86948eb8c..3ab90179e9ab 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -135,6 +135,8 @@ struct qxl_crtc {
 	int index;
 	int cur_x;
 	int cur_y;
+	int hot_spot_x;
+	int hot_spot_y;
 };
 
 struct qxl_output {

From 67df493c5557c6f34f33c4a3d52784d7ba46312a Mon Sep 17 00:00:00 2001
From: Lyude <cpaul@redhat.com>
Date: Wed, 16 Mar 2016 15:18:04 -0400
Subject: [PATCH 040/424] drm/i915: Fix race condition in
 intel_dp_destroy_mst_connector()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 9e60290dbafdf577766e5fc5f2fdb3be450cf9a6 upstream.

After unplugging a DP MST display from the system, we have to go through
and destroy all of the DRM connectors associated with it since none of
them are valid anymore. Unfortunately, intel_dp_destroy_mst_connector()
doesn't do a good enough job of ensuring that throughout the destruction
process that no modesettings can be done with the connectors. As it is
right now, intel_dp_destroy_mst_connector() works like this:

* Take all modeset locks
* Clear the configuration of the crtc on the connector, if there is one
* Drop all modeset locks, this is required because of circular
  dependency issues that arise with trying to remove the connector from
  sysfs with modeset locks held
* Unregister the connector
* Take all modeset locks, again
* Do the rest of the required cleaning for destroying the connector
* Finally drop all modeset locks for good

This only works sometimes. During the destruction process, it's very
possible that a userspace application will attempt to do a modesetting
using the connector. When we drop the modeset locks, an ioctl handler
such as drm_mode_setcrtc has the oppurtunity to take all of the modeset
locks from us. When this happens, one thing leads to another and
eventually we end up committing a mode with the non-existent connector:

	[drm:intel_dp_link_training_clock_recovery [i915]] *ERROR* failed to enable link training
	[drm:intel_dp_aux_ch] dp_aux_ch timeout status 0x7cf0001f
	[drm:intel_dp_start_link_train [i915]] *ERROR* failed to start channel equalization
	[drm:intel_dp_aux_ch] dp_aux_ch timeout status 0x7cf0001f
	[drm:intel_mst_pre_enable_dp [i915]] *ERROR* failed to allocate vcpi

And in some cases, such as with the T460s using an MST dock, this
results in breaking modesetting and/or panicking the system.

To work around this, we now unregister the connector at the very
beginning of intel_dp_destroy_mst_connector(), grab all the modesetting
locks, and then hold them until we finish the rest of the function.

Signed-off-by: Lyude <cpaul@redhat.com>
Signed-off-by: Rob Clark <rclark@redhat.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/1458155884-13877-1-git-send-email-cpaul@redhat.com
(cherry picked from commit 1f7717552ef1306be3b7ed28c66c6eff550e3a23)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_dp_mst.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index 0639275fc471..06bd9257acdc 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -477,6 +477,8 @@ static void intel_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
 	struct intel_connector *intel_connector = to_intel_connector(connector);
 	struct drm_device *dev = connector->dev;
 
+	intel_connector->unregister(intel_connector);
+
 	/* need to nuke the connector */
 	drm_modeset_lock_all(dev);
 	if (connector->state->crtc) {
@@ -490,11 +492,7 @@ static void intel_dp_destroy_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
 
 		WARN(ret, "Disabling mst crtc failed with %i\n", ret);
 	}
-	drm_modeset_unlock_all(dev);
 
-	intel_connector->unregister(intel_connector);
-
-	drm_modeset_lock_all(dev);
 	intel_connector_remove_from_fbdev(intel_connector);
 	drm_connector_cleanup(connector);
 	drm_modeset_unlock_all(dev);

From 67fb098f6f23ebab7b47ae517c161032dc161cd9 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 18 Apr 2016 11:19:19 -0400
Subject: [PATCH 041/424] Revert "drm/radeon: disable runtime pm on PX laptops
 without dGPU power control"

commit bfaddd9fc8ac048b99475f000dbef6f08297417f upstream.

This reverts commit e64c952efb8e0c15ae82cec8e455ab4910690ef1.

ATPX is the ACPI method for controlling AMD PowerXpress laptops.
There are flags to indicate which methods are supported.  If
the dGPU power down flag is not supported, the driver needs to
implement the dGPU power down manually.  We had previously
always forced the driver to assume the ATPX dGPU power down
was present, but this causes problems on boards where it is
not, leading to GPU hangs when attempting to power down the
dGPU.  Manual dGPU power down is not currently supported in
the Linux driver.  Some laptops indicate that the ATPX
dGPU power down method is not present, but it actually
apparently is.  I'm not sure if this is a bios bug and it should
be set or if there is a reason it was unset and the method should
not be used.  This is not an issue on other OSes since both the
ATPX and the manual driver power down methods are supported.

This is apparently fairly widespread, so just revert for now.

bugs:
https://bugzilla.kernel.org/show_bug.cgi?id=115321
https://bugzilla.kernel.org/show_bug.cgi?id=116581
https://bugzilla.kernel.org/show_bug.cgi?id=116251

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/radeon_atpx_handler.c | 8 ++++----
 drivers/gpu/drm/radeon/radeon_device.c       | 8 +-------
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index 9bc408c9f9f6..c4b4f298a283 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -62,10 +62,6 @@ bool radeon_has_atpx(void) {
 	return radeon_atpx_priv.atpx_detected;
 }
 
-bool radeon_has_atpx_dgpu_power_cntl(void) {
-	return radeon_atpx_priv.atpx.functions.power_cntl;
-}
-
 /**
  * radeon_atpx_call - call an ATPX method
  *
@@ -145,6 +141,10 @@ static void radeon_atpx_parse_functions(struct radeon_atpx_functions *f, u32 mas
  */
 static int radeon_atpx_validate(struct radeon_atpx *atpx)
 {
+	/* make sure required functions are enabled */
+	/* dGPU power control is required */
+	atpx->functions.power_cntl = true;
+
 	if (atpx->functions.px_params) {
 		union acpi_object *info;
 		struct atpx_px_params output;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index f78f111e68de..c566993a2ec3 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -103,12 +103,6 @@ static const char radeon_family_name[][16] = {
 	"LAST",
 };
 
-#if defined(CONFIG_VGA_SWITCHEROO)
-bool radeon_has_atpx_dgpu_power_cntl(void);
-#else
-static inline bool radeon_has_atpx_dgpu_power_cntl(void) { return false; }
-#endif
-
 #define RADEON_PX_QUIRK_DISABLE_PX  (1 << 0)
 #define RADEON_PX_QUIRK_LONG_WAKEUP (1 << 1)
 
@@ -1439,7 +1433,7 @@ int radeon_device_init(struct radeon_device *rdev,
 	 * ignore it */
 	vga_client_register(rdev->pdev, rdev, NULL, radeon_vga_set_decode);
 
-	if ((rdev->flags & RADEON_IS_PX) && radeon_has_atpx_dgpu_power_cntl())
+	if (rdev->flags & RADEON_IS_PX)
 		runtime = true;
 	vga_switcheroo_register_client(rdev->pdev, &radeon_switcheroo_ops, runtime);
 	if (runtime)

From 54aeb5854ec03315a721268b8c207fcdcd7f298f Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 25 Apr 2016 13:12:18 -0400
Subject: [PATCH 042/424] Revert "drm/amdgpu: disable runtime pm on PX laptops
 without dGPU power control"

commit e9bef455af8eb0e837e179aab8988ae2649fd8d3 upstream.

This reverts commit bedf2a65c1aa8fb29ba8527fd00c0f68ec1f55f1.

See the radeon revert for an extended description.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c | 8 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c       | 8 +-------
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 8ac49812a716..5a8fbadbd27b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -63,10 +63,6 @@ bool amdgpu_has_atpx(void) {
 	return amdgpu_atpx_priv.atpx_detected;
 }
 
-bool amdgpu_has_atpx_dgpu_power_cntl(void) {
-	return amdgpu_atpx_priv.atpx.functions.power_cntl;
-}
-
 /**
  * amdgpu_atpx_call - call an ATPX method
  *
@@ -146,6 +142,10 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas
  */
 static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
 {
+	/* make sure required functions are enabled */
+	/* dGPU power control is required */
+	atpx->functions.power_cntl = true;
+
 	if (atpx->functions.px_params) {
 		union acpi_object *info;
 		struct atpx_px_params output;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9d88023df836..c961fe093e12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -61,12 +61,6 @@ static const char *amdgpu_asic_name[] = {
 	"LAST",
 };
 
-#if defined(CONFIG_VGA_SWITCHEROO)
-bool amdgpu_has_atpx_dgpu_power_cntl(void);
-#else
-static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
-#endif
-
 bool amdgpu_device_is_px(struct drm_device *dev)
 {
 	struct amdgpu_device *adev = dev->dev_private;
@@ -1475,7 +1469,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
 	if (amdgpu_runtime_pm == 1)
 		runtime = true;
-	if (amdgpu_device_is_px(ddev) && amdgpu_has_atpx_dgpu_power_cntl())
+	if (amdgpu_device_is_px(ddev))
 		runtime = true;
 	vga_switcheroo_register_client(adev->pdev, &amdgpu_switcheroo_ops, runtime);
 	if (runtime)

From 61fe67520c4394c90f688c61c5c16dd63824cd42 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Fri, 22 Apr 2016 19:53:59 -0700
Subject: [PATCH 043/424] cpufreq: intel_pstate: Fix processing for turbo
 activation ratio

commit 1becf03545a0859ceaaf9e8c2d9861882a71cb01 upstream.

When the config TDP level is not nominal (level = 0), the MSR values for
reading level 1 and level 2 ratios contain power in low 14 bits and actual
ratio bits are at bits [23:16]. The current processing for level 1 and
level 2 is wrong as there is no shift done to get actual ratio.

Fixes: 6a35fc2d6c22 (cpufreq: intel_pstate: get P1 from TAR when available)
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/cpufreq/intel_pstate.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 98fb8821382d..f53b02a6bc05 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -667,6 +667,11 @@ static int core_get_max_pstate(void)
 			if (err)
 				goto skip_tar;
 
+			/* For level 1 and 2, bits[23:16] contain the ratio */
+			if (tdp_ctrl)
+				tdp_ratio >>= 16;
+
+			tdp_ratio &= 0xff; /* ratios are only 8 bits long */
 			if (tdp_ratio - 1 == tar) {
 				max_pstate = tar;
 				pr_debug("max_pstate=TAC %x\n", max_pstate);

From d22ac3a9403bc2a60662ec117dc83f72564d61f9 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Thu, 31 Mar 2016 11:48:31 +0200
Subject: [PATCH 044/424] s390/pci: add extra padding to function measurement
 block

commit 9d89d9e61d361f3adb75e1aebe4bb367faf16cfa upstream.

Newer machines might use a different (larger) format for function
measurement blocks. To ensure that we comply with the alignment
requirement on these machines and prevent memory corruption (when
firmware writes more data than we expect) add 16 padding bytes
at the end of the fmb.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/include/asm/pci.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 2b2ced9dc00a..6dafabb6ae1a 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -45,7 +45,8 @@ struct zpci_fmb {
 	u64 rpcit_ops;
 	u64 dma_rbytes;
 	u64 dma_wbytes;
-} __packed __aligned(64);
+	u64 pad[2];
+} __packed __aligned(128);
 
 enum zpci_state {
 	ZPCI_FN_STATE_RESERVED,

From 0dec867402c0ce4eee7ca0055a99d634fe32a72b Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Thu, 10 Mar 2016 13:07:17 +0200
Subject: [PATCH 045/424] iwlwifi: pcie: lower the debug level for RSA
 semaphore access

commit 9fc515bc9e735c10cd327f05c20f5ef69474188d upstream.

IWL_INFO is not an error but still printed by default.
"can't access the RSA semaphore it is write protected" seems
worrisome but it is not really a problem.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/iwlwifi/pcie/trans.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c
index 8c7204738aa3..00e0332e2544 100644
--- a/drivers/net/wireless/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/iwlwifi/pcie/trans.c
@@ -731,8 +731,8 @@ static int iwl_pcie_rsa_race_bug_wa(struct iwl_trans *trans)
 	 */
 	val = iwl_read_prph(trans, PREG_AUX_BUS_WPROT_0);
 	if (val & (BIT(1) | BIT(17))) {
-		IWL_INFO(trans,
-			 "can't access the RSA semaphore it is write protected\n");
+		IWL_DEBUG_INFO(trans,
+			       "can't access the RSA semaphore it is write protected\n");
 		return 0;
 	}
 

From 72b847aa95584d9e0718c9e3ee38a627bbb24c17 Mon Sep 17 00:00:00 2001
From: Matti Gottlieb <matti.gottlieb@intel.com>
Date: Tue, 15 Mar 2016 13:46:47 +0200
Subject: [PATCH 046/424] iwlwifi: mvm: fix memory leak in paging

commit 7fdf9663261cc77a516396fec82cee8a8ea07e76 upstream.

Currently paging download buffer is freed during the
the unloading of the opmode which happens when the driver
is unloaded.

This causes a memory leak since the paging download
buffer is allocated every time we enable the
interface, so the download buffer can be allocated many
times, but only be freed once.

Free paging download buffer during disabling of the
interface.

Signed-off-by: Matti Gottlieb <matti.gottlieb@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/iwlwifi/mvm/mac80211.c | 2 ++
 drivers/net/wireless/iwlwifi/mvm/ops.c      | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
index e88afac51c5d..f96ab2f4b90e 100644
--- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
@@ -1557,6 +1557,8 @@ void __iwl_mvm_mac_stop(struct iwl_mvm *mvm)
 	/* the fw is stopped, the aux sta is dead: clean up driver state */
 	iwl_mvm_del_aux_sta(mvm);
 
+	iwl_free_fw_paging(mvm);
+
 	/*
 	 * Clear IN_HW_RESTART flag when stopping the hw (as restart_complete()
 	 * won't be called in this case).
diff --git a/drivers/net/wireless/iwlwifi/mvm/ops.c b/drivers/net/wireless/iwlwifi/mvm/ops.c
index c3adf2bcdc85..13c97f665ba8 100644
--- a/drivers/net/wireless/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/iwlwifi/mvm/ops.c
@@ -645,8 +645,6 @@ static void iwl_op_mode_mvm_stop(struct iwl_op_mode *op_mode)
 	for (i = 0; i < NVM_MAX_NUM_SECTIONS; i++)
 		kfree(mvm->nvm_sections[i].data);
 
-	iwl_free_fw_paging(mvm);
-
 	iwl_mvm_tof_clean(mvm);
 
 	ieee80211_free_hw(mvm->hw);

From 1575fcd167e3452cadbcf7d04ad6277c875a482f Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Wed, 13 Apr 2016 10:52:25 -0500
Subject: [PATCH 047/424] crypto: ccp - Prevent information leakage on export

commit f709b45ec461b548c41a00044dba1f1b572783bf upstream.

Prevent information from leaking to userspace by doing a memset to 0 of
the export state structure before setting the structure values and copying
it. This prevents un-initialized padding areas from being copied into the
export area.

Reported-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/crypto/ccp/ccp-crypto-aes-cmac.c | 3 +++
 drivers/crypto/ccp/ccp-crypto-sha.c      | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
index 3d9acc53d247..60fc0fa26fd3 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
@@ -225,6 +225,9 @@ static int ccp_aes_cmac_export(struct ahash_request *req, void *out)
 	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
 	struct ccp_aes_cmac_exp_ctx state;
 
+	/* Don't let anything leak to 'out' */
+	memset(&state, 0, sizeof(state));
+
 	state.null_msg = rctx->null_msg;
 	memcpy(state.iv, rctx->iv, sizeof(state.iv));
 	state.buf_count = rctx->buf_count;
diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c
index 8ef06fad8b14..ab9945f2cb7a 100644
--- a/drivers/crypto/ccp/ccp-crypto-sha.c
+++ b/drivers/crypto/ccp/ccp-crypto-sha.c
@@ -212,6 +212,9 @@ static int ccp_sha_export(struct ahash_request *req, void *out)
 	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
 	struct ccp_sha_exp_ctx state;
 
+	/* Don't let anything leak to 'out' */
+	memset(&state, 0, sizeof(state));
+
 	state.type = rctx->type;
 	state.msg_bits = rctx->msg_bits;
 	state.first = rctx->first;

From 9a10dfc8bf95270073c6c2e2be3de26a652d730a Mon Sep 17 00:00:00 2001
From: Xiaodong Liu <xiaodong.liu@intel.com>
Date: Tue, 12 Apr 2016 09:45:51 +0000
Subject: [PATCH 048/424] crypto: sha1-mb - use corrcet pointer while
 completing jobs

commit 0851561d9c965df086ef8a53f981f5f95a57c2c8 upstream.

In sha_complete_job, incorrect mcryptd_hash_request_ctx pointer is used
when check and complete other jobs. If the memory of first completed req
is freed, while still completing other jobs in the func, kernel will
crash since NULL pointer is assigned to RIP.

Signed-off-by: Xiaodong Liu <xiaodong.liu@intel.com>
Acked-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/crypto/sha-mb/sha1_mb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c
index a841e9765bd6..8381c09d2870 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha-mb/sha1_mb.c
@@ -453,10 +453,10 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx,
 
 			req = cast_mcryptd_ctx_to_req(req_ctx);
 			if (irqs_disabled())
-				rctx->complete(&req->base, ret);
+				req_ctx->complete(&req->base, ret);
 			else {
 				local_bh_disable();
-				rctx->complete(&req->base, ret);
+				req_ctx->complete(&req->base, ret);
 				local_bh_enable();
 			}
 		}

From 0dedb763d08e6fa0da9c1ee14fb5d8522a85e846 Mon Sep 17 00:00:00 2001
From: Jonas Eymann <J.Eymann@gmx.net>
Date: Tue, 19 Apr 2016 20:33:47 +0300
Subject: [PATCH 049/424] crypto: talitos - fix crash in talitos_cra_init()

commit 89d124cb61b39900959e2839ac06b6339b6a54cb upstream.

Conversion of talitos driver to the new AEAD interface
hasn't been properly tested.

AEAD algorithms crash in talitos_cra_init as follows:

[...]
[    1.141095] talitos ffe30000.crypto: hwrng
[    1.145381] Unable to handle kernel paging request for data at address 0x00000058
[    1.152913] Faulting instruction address: 0xc02accc0
[    1.157910] Oops: Kernel access of bad area, sig: 11 [#1]
[    1.163315] SMP NR_CPUS=2 P1020 RDB
[    1.166810] Modules linked in:
[    1.169875] CPU: 0 PID: 1007 Comm: cryptomgr_test Not tainted 4.4.6 #1
[    1.176415] task: db5ec200 ti: db4d6000 task.ti: db4d6000
[    1.181821] NIP: c02accc0 LR: c02acd18 CTR: c02acd04
[    1.186793] REGS: db4d7d30 TRAP: 0300   Not tainted  (4.4.6)
[    1.192457] MSR: 00029000 <CE,EE,ME>  CR: 95009359  XER: e0000000
[    1.198585] DEAR: 00000058 ESR: 00000000
GPR00: c017bdc0 db4d7de0 db5ec200 df424b48 00000000 00000000 df424bfc db75a600
GPR08: df424b48 00000000 db75a628 db4d6000 00000149 00000000 c0044cac db5acda0
GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 00000400 df424940
GPR24: df424900 00003083 00000400 c0180000 db75a640 c03e9f84 df424b40 df424b48
[    1.230978] NIP [c02accc0] talitos_cra_init+0x28/0x6c
[    1.236039] LR [c02acd18] talitos_cra_init_aead+0x14/0x28
[    1.241443] Call Trace:
[    1.243894] [db4d7de0] [c03e9f84] 0xc03e9f84 (unreliable)
[    1.249322] [db4d7df0] [c017bdc0] crypto_create_tfm+0x5c/0xf0
[    1.255083] [db4d7e10] [c017beec] crypto_alloc_tfm+0x98/0xf8
[    1.260769] [db4d7e40] [c0186a20] alg_test_aead+0x28/0xc8
[    1.266181] [db4d7e60] [c0186718] alg_test+0x260/0x2e0
[    1.271333] [db4d7ee0] [c0183860] cryptomgr_test+0x30/0x54
[    1.276843] [db4d7ef0] [c0044d80] kthread+0xd4/0xd8
[    1.281741] [db4d7f40] [c000e4a4] ret_from_kernel_thread+0x5c/0x64
[    1.287930] Instruction dump:
[    1.290902] 38600000 4e800020 81230028 7c681b78 81490010 38e9ffc0 3929ffe8 554a073e
[    1.298691] 2b8a000a 7d474f9e 812a0008 91230030 <80e90058> 39270060 7c0004ac 7cc04828

Fixes: aeb4c132f33d ("crypto: talitos - Convert to new AEAD interface")
Signed-off-by: Jonas Eymann <J.Eymann@gmx.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Fix typo - replaced parameter of __crypto_ahash_alg(): s/tfm/alg
Remove checkpatch warnings.
Add commit message.

Signed-off-by: Horia Geant? <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/talitos.c | 41 ++++++++++++++++++++++++++--------------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index b6f9f42e2985..79dff3b2dfb7 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -2519,21 +2519,11 @@ struct talitos_crypto_alg {
 	struct talitos_alg_template algt;
 };
 
-static int talitos_cra_init(struct crypto_tfm *tfm)
+static int talitos_init_common(struct talitos_ctx *ctx,
+			       struct talitos_crypto_alg *talitos_alg)
 {
-	struct crypto_alg *alg = tfm->__crt_alg;
-	struct talitos_crypto_alg *talitos_alg;
-	struct talitos_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct talitos_private *priv;
 
-	if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_AHASH)
-		talitos_alg = container_of(__crypto_ahash_alg(alg),
-					   struct talitos_crypto_alg,
-					   algt.alg.hash);
-	else
-		talitos_alg = container_of(alg, struct talitos_crypto_alg,
-					   algt.alg.crypto);
-
 	/* update context with ptr to dev */
 	ctx->dev = talitos_alg->dev;
 
@@ -2551,10 +2541,33 @@ static int talitos_cra_init(struct crypto_tfm *tfm)
 	return 0;
 }
 
+static int talitos_cra_init(struct crypto_tfm *tfm)
+{
+	struct crypto_alg *alg = tfm->__crt_alg;
+	struct talitos_crypto_alg *talitos_alg;
+	struct talitos_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if ((alg->cra_flags & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_AHASH)
+		talitos_alg = container_of(__crypto_ahash_alg(alg),
+					   struct talitos_crypto_alg,
+					   algt.alg.hash);
+	else
+		talitos_alg = container_of(alg, struct talitos_crypto_alg,
+					   algt.alg.crypto);
+
+	return talitos_init_common(ctx, talitos_alg);
+}
+
 static int talitos_cra_init_aead(struct crypto_aead *tfm)
 {
-	talitos_cra_init(crypto_aead_tfm(tfm));
-	return 0;
+	struct aead_alg *alg = crypto_aead_alg(tfm);
+	struct talitos_crypto_alg *talitos_alg;
+	struct talitos_ctx *ctx = crypto_aead_ctx(tfm);
+
+	talitos_alg = container_of(alg, struct talitos_crypto_alg,
+				   algt.alg.aead);
+
+	return talitos_init_common(ctx, talitos_alg);
 }
 
 static int talitos_cra_init_ahash(struct crypto_tfm *tfm)

From cd7803563938ce36988e6bc494b8f6610c1537af Mon Sep 17 00:00:00 2001
From: Horia Geant? <horia.geanta@nxp.com>
Date: Tue, 19 Apr 2016 20:33:48 +0300
Subject: [PATCH 050/424] crypto: talitos - fix AEAD tcrypt tests

commit 340ff60ae93a5db2b2be6f38868df9a1293b6007 upstream.

After conversion to new AEAD interface, tcrypt tests fail as follows:

[...]
[    1.145414] alg: aead: Test 1 failed on encryption for authenc-hmac-sha1-cbc-aes-talitos
[    1.153564] 00000000: 53 69 6e 67 6c 65 20 62 6c 6f 63 6b 20 6d 73 67
[    1.160041] 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[    1.166509] 00000020: 00 00 00 00
[...]

Fix them by providing the correct cipher in & cipher out pointers,
i.e. must skip over associated data in src and dst S/G.

While here, fix a problem with the HW S/G table index usage:
tbl_off must be updated after the pointer to the table entries is set.

Fixes: aeb4c132f33d ("crypto: talitos - Convert to new AEAD interface")
Reported-by: Jonas Eymann <J.Eymann@gmx.net>
Signed-off-by: Horia Geant? <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/crypto/talitos.c | 46 ++++++++++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 16 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 79dff3b2dfb7..a04fea4d0063 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -63,6 +63,14 @@ static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr,
 		ptr->eptr = upper_32_bits(dma_addr);
 }
 
+static void copy_talitos_ptr(struct talitos_ptr *dst_ptr,
+			     struct talitos_ptr *src_ptr, bool is_sec1)
+{
+	dst_ptr->ptr = src_ptr->ptr;
+	if (!is_sec1)
+		dst_ptr->eptr = src_ptr->eptr;
+}
+
 static void to_talitos_ptr_len(struct talitos_ptr *ptr, unsigned int len,
 			       bool is_sec1)
 {
@@ -1083,21 +1091,20 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 	sg_count = dma_map_sg(dev, areq->src, edesc->src_nents ?: 1,
 			      (areq->src == areq->dst) ? DMA_BIDIRECTIONAL
 							   : DMA_TO_DEVICE);
-
 	/* hmac data */
 	desc->ptr[1].len = cpu_to_be16(areq->assoclen);
 	if (sg_count > 1 &&
 	    (ret = sg_to_link_tbl_offset(areq->src, sg_count, 0,
 					 areq->assoclen,
 					 &edesc->link_tbl[tbl_off])) > 1) {
-		tbl_off += ret;
-
 		to_talitos_ptr(&desc->ptr[1], edesc->dma_link_tbl + tbl_off *
 			       sizeof(struct talitos_ptr), 0);
 		desc->ptr[1].j_extent = DESC_PTR_LNKTBL_JUMP;
 
 		dma_sync_single_for_device(dev, edesc->dma_link_tbl,
 					   edesc->dma_len, DMA_BIDIRECTIONAL);
+
+		tbl_off += ret;
 	} else {
 		to_talitos_ptr(&desc->ptr[1], sg_dma_address(areq->src), 0);
 		desc->ptr[1].j_extent = 0;
@@ -1126,11 +1133,13 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 	if (edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV)
 		sg_link_tbl_len += authsize;
 
-	if (sg_count > 1 &&
-	    (ret = sg_to_link_tbl_offset(areq->src, sg_count, areq->assoclen,
-					 sg_link_tbl_len,
-					 &edesc->link_tbl[tbl_off])) > 1) {
-		tbl_off += ret;
+	if (sg_count == 1) {
+		to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->src) +
+			       areq->assoclen, 0);
+	} else if ((ret = sg_to_link_tbl_offset(areq->src, sg_count,
+						areq->assoclen, sg_link_tbl_len,
+						&edesc->link_tbl[tbl_off])) >
+		   1) {
 		desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP;
 		to_talitos_ptr(&desc->ptr[4], edesc->dma_link_tbl +
 					      tbl_off *
@@ -1138,8 +1147,10 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 		dma_sync_single_for_device(dev, edesc->dma_link_tbl,
 					   edesc->dma_len,
 					   DMA_BIDIRECTIONAL);
-	} else
-		to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->src), 0);
+		tbl_off += ret;
+	} else {
+		copy_talitos_ptr(&desc->ptr[4], &edesc->link_tbl[tbl_off], 0);
+	}
 
 	/* cipher out */
 	desc->ptr[5].len = cpu_to_be16(cryptlen);
@@ -1151,11 +1162,13 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 
 	edesc->icv_ool = false;
 
-	if (sg_count > 1 &&
-	    (sg_count = sg_to_link_tbl_offset(areq->dst, sg_count,
+	if (sg_count == 1) {
+		to_talitos_ptr(&desc->ptr[5], sg_dma_address(areq->dst) +
+			       areq->assoclen, 0);
+	} else if ((sg_count =
+			sg_to_link_tbl_offset(areq->dst, sg_count,
 					      areq->assoclen, cryptlen,
-					      &edesc->link_tbl[tbl_off])) >
-	    1) {
+					      &edesc->link_tbl[tbl_off])) > 1) {
 		struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off];
 
 		to_talitos_ptr(&desc->ptr[5], edesc->dma_link_tbl +
@@ -1178,8 +1191,9 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq,
 					   edesc->dma_len, DMA_BIDIRECTIONAL);
 
 		edesc->icv_ool = true;
-	} else
-		to_talitos_ptr(&desc->ptr[5], sg_dma_address(areq->dst), 0);
+	} else {
+		copy_talitos_ptr(&desc->ptr[5], &edesc->link_tbl[tbl_off], 0);
+	}
 
 	/* iv out */
 	map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv,

From 53f3e26b3d09ae40318877b74e0d6c1af767a07f Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Fri, 15 Apr 2016 12:06:13 +1000
Subject: [PATCH 051/424] powerpc: scan_features() updates incorrect bits for
 REAL_LE

commit 6997e57d693b07289694239e52a10d2f02c3a46f upstream.

The REAL_LE feature entry in the ibm_pa_feature struct is missing an MMU
feature value, meaning all the remaining elements initialise the wrong
values.

This means instead of checking for byte 5, bit 0, we check for byte 0,
bit 0, and then we incorrectly set the CPU feature bit as well as MMU
feature bit 1 and CPU user feature bits 0 and 2 (5).

Checking byte 0 bit 0 (IBM numbering), means we're looking at the
"Memory Management Unit (MMU)" feature - ie. does the CPU have an MMU.
In practice that bit is set on all platforms which have the property.

This means we set CPU_FTR_REAL_LE always. In practice that seems not to
matter because all the modern cpus which have this property also
implement REAL_LE, and we've never needed to disable it.

We're also incorrectly setting MMU feature bit 1, which is:

  #define MMU_FTR_TYPE_8xx		0x00000002

Luckily the only place that looks for MMU_FTR_TYPE_8xx is in Book3E
code, which can't run on the same cpus as scan_features(). So this also
doesn't matter in practice.

Finally in the CPU user feature mask, we're setting bits 0 and 2. Bit 2
is not currently used, and bit 0 is:

  #define PPC_FEATURE_PPC_LE		0x00000001

Which says the CPU supports the old style "PPC Little Endian" mode.
Again this should be harmless in practice as no 64-bit CPUs implement
that mode.

Fix the code by adding the missing initialisation of the MMU feature.

Also add a comment marking CPU user feature bit 2 (0x4) as reserved. It
would be unsafe to start using it as old kernels incorrectly set it.

Fixes: 44ae3ab3358e ("powerpc: Free up some CPU feature bits by moving out MMU-related features")
Signed-off-by: Anton Blanchard <anton@samba.org>
[mpe: Flesh out changelog, add comment reserving 0x4]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/include/uapi/asm/cputable.h | 1 +
 arch/powerpc/kernel/prom.c               | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h
index 43686043e297..2734c005da21 100644
--- a/arch/powerpc/include/uapi/asm/cputable.h
+++ b/arch/powerpc/include/uapi/asm/cputable.h
@@ -31,6 +31,7 @@
 #define PPC_FEATURE_PSERIES_PERFMON_COMPAT \
 					0x00000040
 
+/* Reserved - do not use		0x00000004 */
 #define PPC_FEATURE_TRUE_LE		0x00000002
 #define PPC_FEATURE_PPC_LE		0x00000001
 
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 7030b035905d..080c96b44a7f 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -158,7 +158,7 @@ static struct ibm_pa_feature {
 	{CPU_FTR_NOEXECUTE, 0, 0,	0, 6, 0},
 	{CPU_FTR_NODSISRALIGN, 0, 0,	1, 1, 1},
 	{0, MMU_FTR_CI_LARGE_PAGE, 0,	1, 2, 0},
-	{CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0},
+	{CPU_FTR_REAL_LE, 0, PPC_FEATURE_TRUE_LE, 5, 0, 0},
 	/*
 	 * If the kernel doesn't support TM (ie. CONFIG_PPC_TRANSACTIONAL_MEM=n),
 	 * we don't want to turn on CPU_FTR_TM here, so we use CPU_FTR_TM_COMP

From 08c9b94505bbe09ed42f658de2a4dbe274fa7468 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Fri, 15 Apr 2016 12:07:24 +1000
Subject: [PATCH 052/424] powerpc: Update cpu_user_features2 in scan_features()

commit beff82374b259d726e2625ec6c518a5f2613f0ae upstream.

scan_features() updates cpu_user_features but not cpu_user_features2.

Amongst other things, cpu_user_features2 contains the user TM feature
bits which we must keep in sync with the kernel TM feature bit.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/prom.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 080c96b44a7f..03fce77e441d 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -148,23 +148,24 @@ static struct ibm_pa_feature {
 	unsigned long	cpu_features;	/* CPU_FTR_xxx bit */
 	unsigned long	mmu_features;	/* MMU_FTR_xxx bit */
 	unsigned int	cpu_user_ftrs;	/* PPC_FEATURE_xxx bit */
+	unsigned int	cpu_user_ftrs2;	/* PPC_FEATURE2_xxx bit */
 	unsigned char	pabyte;		/* byte number in ibm,pa-features */
 	unsigned char	pabit;		/* bit number (big-endian) */
 	unsigned char	invert;		/* if 1, pa bit set => clear feature */
 } ibm_pa_features[] __initdata = {
-	{0, 0, PPC_FEATURE_HAS_MMU,	0, 0, 0},
-	{0, 0, PPC_FEATURE_HAS_FPU,	0, 1, 0},
-	{CPU_FTR_CTRL, 0, 0,		0, 3, 0},
-	{CPU_FTR_NOEXECUTE, 0, 0,	0, 6, 0},
-	{CPU_FTR_NODSISRALIGN, 0, 0,	1, 1, 1},
-	{0, MMU_FTR_CI_LARGE_PAGE, 0,	1, 2, 0},
-	{CPU_FTR_REAL_LE, 0, PPC_FEATURE_TRUE_LE, 5, 0, 0},
+	{0, 0, PPC_FEATURE_HAS_MMU, 0,		0, 0, 0},
+	{0, 0, PPC_FEATURE_HAS_FPU, 0,		0, 1, 0},
+	{CPU_FTR_CTRL, 0, 0, 0,			0, 3, 0},
+	{CPU_FTR_NOEXECUTE, 0, 0, 0,		0, 6, 0},
+	{CPU_FTR_NODSISRALIGN, 0, 0, 0,		1, 1, 1},
+	{0, MMU_FTR_CI_LARGE_PAGE, 0, 0,		1, 2, 0},
+	{CPU_FTR_REAL_LE, 0, PPC_FEATURE_TRUE_LE, 0, 5, 0, 0},
 	/*
 	 * If the kernel doesn't support TM (ie. CONFIG_PPC_TRANSACTIONAL_MEM=n),
 	 * we don't want to turn on CPU_FTR_TM here, so we use CPU_FTR_TM_COMP
 	 * which is 0 if the kernel doesn't support TM.
 	 */
-	{CPU_FTR_TM_COMP, 0, 0,		22, 0, 0},
+	{CPU_FTR_TM_COMP, 0, 0, 0,		22, 0, 0},
 };
 
 static void __init scan_features(unsigned long node, const unsigned char *ftrs,
@@ -195,10 +196,12 @@ static void __init scan_features(unsigned long node, const unsigned char *ftrs,
 		if (bit ^ fp->invert) {
 			cur_cpu_spec->cpu_features |= fp->cpu_features;
 			cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs;
+			cur_cpu_spec->cpu_user_features2 |= fp->cpu_user_ftrs2;
 			cur_cpu_spec->mmu_features |= fp->mmu_features;
 		} else {
 			cur_cpu_spec->cpu_features &= ~fp->cpu_features;
 			cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs;
+			cur_cpu_spec->cpu_user_features2 &= ~fp->cpu_user_ftrs2;
 			cur_cpu_spec->mmu_features &= ~fp->mmu_features;
 		}
 	}

From c89c3225062d64c63532c127c374ea962f336e6b Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Fri, 15 Apr 2016 12:08:19 +1000
Subject: [PATCH 053/424] powerpc: Update TM user feature bits in
 scan_features()

commit 4705e02498d6d5a7ab98dfee9595cd5e91db2017 upstream.

We need to update the user TM feature bits (PPC_FEATURE2_HTM and
PPC_FEATURE2_HTM) to mirror what we do with the kernel TM feature
bit.

At the moment, if firmware reports TM is not available we turn off
the kernel TM feature bit but leave the userspace ones on. Userspace
thinks it can execute TM instructions and it dies trying.

This (together with a QEMU patch) fixes PR KVM, which doesn't currently
support TM.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/prom.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 03fce77e441d..a15fe1d4e84a 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -161,11 +161,12 @@ static struct ibm_pa_feature {
 	{0, MMU_FTR_CI_LARGE_PAGE, 0, 0,		1, 2, 0},
 	{CPU_FTR_REAL_LE, 0, PPC_FEATURE_TRUE_LE, 0, 5, 0, 0},
 	/*
-	 * If the kernel doesn't support TM (ie. CONFIG_PPC_TRANSACTIONAL_MEM=n),
-	 * we don't want to turn on CPU_FTR_TM here, so we use CPU_FTR_TM_COMP
-	 * which is 0 if the kernel doesn't support TM.
+	 * If the kernel doesn't support TM (ie CONFIG_PPC_TRANSACTIONAL_MEM=n),
+	 * we don't want to turn on TM here, so we use the *_COMP versions
+	 * which are 0 if the kernel doesn't support TM.
 	 */
-	{CPU_FTR_TM_COMP, 0, 0, 0,		22, 0, 0},
+	{CPU_FTR_TM_COMP, 0, 0,
+	 PPC_FEATURE2_HTM_COMP|PPC_FEATURE2_HTM_NOSC_COMP, 22, 0, 0},
 };
 
 static void __init scan_features(unsigned long node, const unsigned char *ftrs,

From 56b8eaa38b04f147a6b825a73a31b826b6051604 Mon Sep 17 00:00:00 2001
From: Dmitry Ivanov <dmitrijs.ivanovs@ubnt.com>
Date: Wed, 6 Apr 2016 17:23:18 +0300
Subject: [PATCH 054/424] nl80211: check netlink protocol in socket release
 notification

commit 8f815cdde3e550e10c2736990d791f60c2ce43eb upstream.

A non-privileged user can create a netlink socket with the same port_id as
used by an existing open nl80211 netlink socket (e.g. as used by a hostapd
process) with a different protocol number.

Closing this socket will then lead to the notification going to nl80211's
socket release notification handler, and possibly cause an action such as
removing a virtual interface.

Fix this issue by checking that the netlink protocol is NETLINK_GENERIC.
Since generic netlink has no notifier chain of its own, we can't fix the
problem more generically.

Fixes: 026331c4d9b5 ("cfg80211/mac80211: allow registering for and sending action frames")
Signed-off-by: Dmitry Ivanov <dima@ubnt.com>
[rewrite commit message]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/wireless/nl80211.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 75b0d23ee882..5d89f13a98db 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -13161,7 +13161,7 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
 	struct wireless_dev *wdev;
 	struct cfg80211_beacon_registration *reg, *tmp;
 
-	if (state != NETLINK_URELEASE)
+	if (state != NETLINK_URELEASE || notify->protocol != NETLINK_GENERIC)
 		return NOTIFY_DONE;
 
 	rcu_read_lock();

From 95415ac5786f483c7c69145ae644bc64c2240776 Mon Sep 17 00:00:00 2001
From: Dmitry Ivanov <dmitrijs.ivanovs@ubnt.com>
Date: Thu, 7 Apr 2016 09:31:38 +0200
Subject: [PATCH 055/424] netlink: don't send NETLINK_URELEASE for unbound
 sockets

commit e27260203912b40751fa353d009eaa5a642c739f upstream.

All existing users of NETLINK_URELEASE use it to clean up resources that
were previously allocated to a socket via some command. As a result, no
users require getting this notification for unbound sockets.

Sending it for unbound sockets, however, is a problem because any user
(including unprivileged users) can create a socket that uses the same ID
as an existing socket. Binding this new socket will fail, but if the
NETLINK_URELEASE notification is generated for such sockets, the users
thereof will be tricked into thinking the socket that they allocated the
resources for is closed.

In the nl80211 case, this will cause destruction of virtual interfaces
that still belong to an existing hostapd process; this is the case that
Dmitry noticed. In the NFC case, it will cause a poll abort. In the case
of netlink log/queue it will cause them to stop reporting events, as if
NFULNL_CFG_CMD_UNBIND/NFQNL_CFG_CMD_UNBIND had been called.

Fix this problem by checking that the socket is bound before generating
the NETLINK_URELEASE notification.

Signed-off-by: Dmitry Ivanov <dima@ubnt.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netlink/af_netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 59651af8cc27..992b35fb8615 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1305,7 +1305,7 @@ static int netlink_release(struct socket *sock)
 
 	skb_queue_purge(&sk->sk_write_queue);
 
-	if (nlk->portid) {
+	if (nlk->portid && nlk->bound) {
 		struct netlink_notify n = {
 						.net = sock_net(sk),
 						.protocol = sk->sk_protocol,

From 197b6c5f0d976420c3eeacc7589ebc5869d2d70f Mon Sep 17 00:00:00 2001
From: Vladis Dronov <vdronov@redhat.com>
Date: Thu, 31 Mar 2016 10:53:42 -0700
Subject: [PATCH 056/424] Input: gtco - fix crash on detecting device without
 endpoints

commit 162f98dea487206d9ab79fc12ed64700667a894d upstream.

The gtco driver expects at least one valid endpoint. If given malicious
descriptors that specify 0 for the number of endpoints, it will crash in
the probe function. Ensure there is at least one endpoint on the interface
before using it.

Also let's fix a minor coding style issue.

The full correct report of this issue can be found in the public
Red Hat Bugzilla:

https://bugzilla.redhat.com/show_bug.cgi?id=1283385

Reported-by: Ralf Spenneberg <ralf@spenneberg.net>
Signed-off-by: Vladis Dronov <vdronov@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/tablet/gtco.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/input/tablet/gtco.c b/drivers/input/tablet/gtco.c
index 3a7f3a4a4396..7c18249d6c8e 100644
--- a/drivers/input/tablet/gtco.c
+++ b/drivers/input/tablet/gtco.c
@@ -858,6 +858,14 @@ static int gtco_probe(struct usb_interface *usbinterface,
 		goto err_free_buf;
 	}
 
+	/* Sanity check that a device has an endpoint */
+	if (usbinterface->altsetting[0].desc.bNumEndpoints < 1) {
+		dev_err(&usbinterface->dev,
+			"Invalid number of endpoints\n");
+		error = -EINVAL;
+		goto err_free_urb;
+	}
+
 	/*
 	 * The endpoint is always altsetting 0, we know this since we know
 	 * this device only has one interrupt endpoint
@@ -879,7 +887,7 @@ static int gtco_probe(struct usb_interface *usbinterface,
 	 * HID report descriptor
 	 */
 	if (usb_get_extra_descriptor(usbinterface->cur_altsetting,
-				     HID_DEVICE_TYPE, &hid_desc) != 0){
+				     HID_DEVICE_TYPE, &hid_desc) != 0) {
 		dev_err(&usbinterface->dev,
 			"Can't retrieve exta USB descriptor to get hid report descriptor length\n");
 		error = -EIO;

From 506788dafb7d27c31703991f0b5f7b87bd9a942c Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Sun, 17 Apr 2016 05:21:42 -0700
Subject: [PATCH 057/424] Input: pmic8xxx-pwrkey - fix algorithm for converting
 trigger delay

commit eda5ecc0a6b865561997e177c393f0b0136fe3b7 upstream.

The trigger delay algorithm that converts from microseconds to
the register value looks incorrect. According to most of the PMIC
documentation, the equation is

	delay (Seconds) = (1 / 1024) * 2 ^ (x + 4)

except for one case where the documentation looks to have a
formatting issue and the equation looks like

	delay (Seconds) = (1 / 1024) * 2 x + 4

Most likely this driver was written with the improper
documentation to begin with. According to the downstream sources
the valid delays are from 2 seconds to 1/64 second, and the
latter equation just doesn't make sense for that. Let's fix the
algorithm and the range check to match the documentation and the
downstream sources.

Reported-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Fixes: 92d57a73e410 ("input: Add support for Qualcomm PMIC8XXX power key")
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Tested-by: John Stultz <john.stultz@linaro.org>
Acked-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/misc/pmic8xxx-pwrkey.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/input/misc/pmic8xxx-pwrkey.c b/drivers/input/misc/pmic8xxx-pwrkey.c
index 3f02e0e03d12..67aab86048ad 100644
--- a/drivers/input/misc/pmic8xxx-pwrkey.c
+++ b/drivers/input/misc/pmic8xxx-pwrkey.c
@@ -353,7 +353,8 @@ static int pmic8xxx_pwrkey_probe(struct platform_device *pdev)
 	if (of_property_read_u32(pdev->dev.of_node, "debounce", &kpd_delay))
 		kpd_delay = 15625;
 
-	if (kpd_delay > 62500 || kpd_delay == 0) {
+	/* Valid range of pwr key trigger delay is 1/64 sec to 2 seconds. */
+	if (kpd_delay > USEC_PER_SEC * 2 || kpd_delay < USEC_PER_SEC / 64) {
 		dev_err(&pdev->dev, "invalid power key trigger delay\n");
 		return -EINVAL;
 	}
@@ -385,8 +386,8 @@ static int pmic8xxx_pwrkey_probe(struct platform_device *pdev)
 	pwr->name = "pmic8xxx_pwrkey";
 	pwr->phys = "pmic8xxx_pwrkey/input0";
 
-	delay = (kpd_delay << 10) / USEC_PER_SEC;
-	delay = 1 + ilog2(delay);
+	delay = (kpd_delay << 6) / USEC_PER_SEC;
+	delay = ilog2(delay);
 
 	err = regmap_read(regmap, PON_CNTL_1, &pon_cntl);
 	if (err < 0) {

From 4e8d89e8bb8828faf3c955fe9a50e1ae54918326 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 16 Feb 2016 16:03:23 +0100
Subject: [PATCH 058/424] xen kconfig: don't "select INPUT_XEN_KBDDEV_FRONTEND"

commit 13aa38e291bdd4e4018f40dd2f75e464814dcbf3 upstream.

The Xen framebuffer driver selects the xen keyboard driver, so the latter
will be built-in if XEN_FBDEV_FRONTEND=y. However, when CONFIG_INPUT
is a loadable module, this configuration cannot work. On mainline kernels,
the symbol will be enabled but not used, while in combination with
a patch I have to detect such useless configurations, we get the
expected link failure:

drivers/input/built-in.o: In function `xenkbd_remove':
xen-kbdfront.c:(.text+0x2f0): undefined reference to `input_unregister_device'
xen-kbdfront.c:(.text+0x30e): undefined reference to `input_unregister_device'

This removes the extra "select", as it just causes more trouble than
it helps. In theory, some defconfig file might break if it has
XEN_FBDEV_FRONTEND in it but not INPUT_XEN_KBDDEV_FRONTEND. The Kconfig
fragment we ship in the kernel (kernel/configs/xen.config) however
already enables both, and anyone using an old .config file would
keep having both enabled.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: David Vrabel <david.vrabel@citrix.com>
Fixes: 36c1132e34bd ("xen kconfig: fix select INPUT_XEN_KBDDEV_FRONTEND")
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/video/fbdev/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index e6d16d65e4e6..f07a0974fda2 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -2249,7 +2249,6 @@ config XEN_FBDEV_FRONTEND
 	select FB_SYS_IMAGEBLIT
 	select FB_SYS_FOPS
 	select FB_DEFERRED_IO
-	select INPUT_XEN_KBDDEV_FRONTEND if INPUT_MISC
 	select XEN_XENBUS_FRONTEND
 	default y
 	help

From c7ce82609fda7214292998e3a38901d3944b6c16 Mon Sep 17 00:00:00 2001
From: Yingjoe Chen <yingjoe.chen@mediatek.com>
Date: Sat, 2 Apr 2016 14:57:49 +0800
Subject: [PATCH 059/424] pinctrl: mediatek: correct debounce time unit in
 mtk_gpio_set_debounce

commit 5fedbb923936174ab4d1d5cc92bca1cf6b2e0ca2 upstream.

The debounce time unit for gpio_chip.set_debounce is us but
mtk_gpio_set_debounce regard it as ms.
Fix this by correct debounce time array dbnc_arr so it can find correct
debounce setting. Debounce time for first debounce setting is 500us,
correct this as well.

While I'm at it, also change the debounce time array name to
"debounce_time" for readability.

Signed-off-by: Yingjoe Chen <yingjoe.chen@mediatek.com>
Reviewed-by: Daniel Kurtz <djkurtz@chromium.org>
Acked-by: Hongzhou Yang <hongzhou.yang@mediatek.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pinctrl/mediatek/pinctrl-mtk-common.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
index 5c717275a7fa..3d8019eb3d84 100644
--- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
+++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c
@@ -939,7 +939,8 @@ static int mtk_gpio_set_debounce(struct gpio_chip *chip, unsigned offset,
 	struct mtk_pinctrl *pctl = dev_get_drvdata(chip->dev);
 	int eint_num, virq, eint_offset;
 	unsigned int set_offset, bit, clr_bit, clr_offset, rst, i, unmask, dbnc;
-	static const unsigned int dbnc_arr[] = {0 , 1, 16, 32, 64, 128, 256};
+	static const unsigned int debounce_time[] = {500, 1000, 16000, 32000, 64000,
+						128000, 256000};
 	const struct mtk_desc_pin *pin;
 	struct irq_data *d;
 
@@ -957,9 +958,9 @@ static int mtk_gpio_set_debounce(struct gpio_chip *chip, unsigned offset,
 	if (!mtk_eint_can_en_debounce(pctl, eint_num))
 		return -ENOSYS;
 
-	dbnc = ARRAY_SIZE(dbnc_arr);
-	for (i = 0; i < ARRAY_SIZE(dbnc_arr); i++) {
-		if (debounce <= dbnc_arr[i]) {
+	dbnc = ARRAY_SIZE(debounce_time);
+	for (i = 0; i < ARRAY_SIZE(debounce_time); i++) {
+		if (debounce <= debounce_time[i]) {
 			dbnc = i;
 			break;
 		}

From ee6a1e9eefed56308fcbd5619cb02b926b7ec630 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Thu, 14 Apr 2016 10:29:16 +0530
Subject: [PATCH 060/424] pinctrl: single: Fix pcs_parse_bits_in_pinctrl_entry
 to use __ffs than ffs

commit 56b367c0cd67d4c3006738e7dc9dda9273fd2bfe upstream.

pcs_parse_bits_in_pinctrl_entry uses ffs which gives bit indices
ranging from 1 to MAX. This leads to a corner case where we try to request
the pin number = MAX and fails.

bit_pos value is being calculted using ffs. pin_num_from_lsb uses
bit_pos value. pins array is populated with:

pin + pin_num_from_lsb.

The above is 1 more than usual bit indices as bit_pos uses ffs to compute
first set bit. Hence the last of the pins array is populated with the MAX
value and not MAX - 1 which causes error when we call pin_request.

mask_pos is rightly calculated as ((pcs->fmask) << (bit_pos - 1))
Consequently val_pos and submask are correct.

Hence use __ffs which gives (ffs(x) - 1) as the first bit set.

fixes: 4e7e8017a8 ("pinctrl: pinctrl-single: enhance to configure multiple pins of different modules")
Signed-off-by: Keerthy <j-keerthy@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pinctrl/pinctrl-single.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index ef04b962c3d5..23b6b8c29a99 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -1273,9 +1273,9 @@ static int pcs_parse_bits_in_pinctrl_entry(struct pcs_device *pcs,
 
 		/* Parse pins in each row from LSB */
 		while (mask) {
-			bit_pos = ffs(mask);
+			bit_pos = __ffs(mask);
 			pin_num_from_lsb = bit_pos / pcs->bits_per_pin;
-			mask_pos = ((pcs->fmask) << (bit_pos - 1));
+			mask_pos = ((pcs->fmask) << bit_pos);
 			val_pos = val & mask_pos;
 			submask = mask & mask_pos;
 
@@ -1847,7 +1847,7 @@ static int pcs_probe(struct platform_device *pdev)
 	ret = of_property_read_u32(np, "pinctrl-single,function-mask",
 				   &pcs->fmask);
 	if (!ret) {
-		pcs->fshift = ffs(pcs->fmask) - 1;
+		pcs->fshift = __ffs(pcs->fmask);
 		pcs->fmax = pcs->fmask >> pcs->fshift;
 	} else {
 		/* If mask property doesn't exist, function mux is invalid. */

From 99067b8e854211316200b3d6375a664448c2fabd Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 8 Apr 2016 15:12:24 +0200
Subject: [PATCH 061/424] iommu/amd: Fix checking of pci dma aliases

commit e3156048346c28c695f5cf9db67a8cf88c90f947 upstream.

Commit 61289cb ('iommu/amd: Remove old alias handling code')
removed the old alias handling code from the AMD IOMMU
driver because this is now handled by the IOMMU core code.

But this also removed the handling of PCI aliases, which is
not handled by the core code. This caused issues with PCI
devices that have hidden PCIe-to-PCI bridges that rewrite
the request-id.

Fix this bug by re-introducing some of the removed functions
from commit 61289cbaf6c8 and add a alias field
'struct iommu_dev_data'. This field carrys the return value
of the get_alias() function and uses that instead of the
amd_iommu_alias_table[] array in the code.

Fixes: 61289cbaf6c8 ('iommu/amd: Remove old alias handling code')
Tested-by: Tomasz Golinski <tomaszg@math.uwb.edu.pl>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iommu/amd_iommu.c | 87 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 76 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index fc836f523afa..b9319b76a8a1 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -91,6 +91,7 @@ struct iommu_dev_data {
 	struct list_head dev_data_list;	  /* For global dev_data_list */
 	struct protection_domain *domain; /* Domain the device is bound to */
 	u16 devid;			  /* PCI Device ID */
+	u16 alias;			  /* Alias Device ID */
 	bool iommu_v2;			  /* Device can make use of IOMMUv2 */
 	bool passthrough;		  /* Device is identity mapped */
 	struct {
@@ -125,6 +126,13 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom)
 	return container_of(dom, struct protection_domain, domain);
 }
 
+static inline u16 get_device_id(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return PCI_DEVID(pdev->bus->number, pdev->devfn);
+}
+
 static struct iommu_dev_data *alloc_dev_data(u16 devid)
 {
 	struct iommu_dev_data *dev_data;
@@ -162,6 +170,68 @@ out_unlock:
 	return dev_data;
 }
 
+static int __last_alias(struct pci_dev *pdev, u16 alias, void *data)
+{
+	*(u16 *)data = alias;
+	return 0;
+}
+
+static u16 get_alias(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	u16 devid, ivrs_alias, pci_alias;
+
+	devid = get_device_id(dev);
+	ivrs_alias = amd_iommu_alias_table[devid];
+	pci_for_each_dma_alias(pdev, __last_alias, &pci_alias);
+
+	if (ivrs_alias == pci_alias)
+		return ivrs_alias;
+
+	/*
+	 * DMA alias showdown
+	 *
+	 * The IVRS is fairly reliable in telling us about aliases, but it
+	 * can't know about every screwy device.  If we don't have an IVRS
+	 * reported alias, use the PCI reported alias.  In that case we may
+	 * still need to initialize the rlookup and dev_table entries if the
+	 * alias is to a non-existent device.
+	 */
+	if (ivrs_alias == devid) {
+		if (!amd_iommu_rlookup_table[pci_alias]) {
+			amd_iommu_rlookup_table[pci_alias] =
+				amd_iommu_rlookup_table[devid];
+			memcpy(amd_iommu_dev_table[pci_alias].data,
+			       amd_iommu_dev_table[devid].data,
+			       sizeof(amd_iommu_dev_table[pci_alias].data));
+		}
+
+		return pci_alias;
+	}
+
+	pr_info("AMD-Vi: Using IVRS reported alias %02x:%02x.%d "
+		"for device %s[%04x:%04x], kernel reported alias "
+		"%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias),
+		PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device,
+		PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias),
+		PCI_FUNC(pci_alias));
+
+	/*
+	 * If we don't have a PCI DMA alias and the IVRS alias is on the same
+	 * bus, then the IVRS table may know about a quirk that we don't.
+	 */
+	if (pci_alias == devid &&
+	    PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) {
+		pdev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN;
+		pdev->dma_alias_devfn = ivrs_alias & 0xff;
+		pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n",
+			PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias),
+			dev_name(dev));
+	}
+
+	return ivrs_alias;
+}
+
 static struct iommu_dev_data *find_dev_data(u16 devid)
 {
 	struct iommu_dev_data *dev_data;
@@ -174,13 +244,6 @@ static struct iommu_dev_data *find_dev_data(u16 devid)
 	return dev_data;
 }
 
-static inline u16 get_device_id(struct device *dev)
-{
-	struct pci_dev *pdev = to_pci_dev(dev);
-
-	return PCI_DEVID(pdev->bus->number, pdev->devfn);
-}
-
 static struct iommu_dev_data *get_dev_data(struct device *dev)
 {
 	return dev->archdata.iommu;
@@ -308,6 +371,8 @@ static int iommu_init_device(struct device *dev)
 	if (!dev_data)
 		return -ENOMEM;
 
+	dev_data->alias = get_alias(dev);
+
 	if (pci_iommuv2_capable(pdev)) {
 		struct amd_iommu *iommu;
 
@@ -328,7 +393,7 @@ static void iommu_ignore_device(struct device *dev)
 	u16 devid, alias;
 
 	devid = get_device_id(dev);
-	alias = amd_iommu_alias_table[devid];
+	alias = get_alias(dev);
 
 	memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry));
 	memset(&amd_iommu_dev_table[alias], 0, sizeof(struct dev_table_entry));
@@ -1017,7 +1082,7 @@ static int device_flush_dte(struct iommu_dev_data *dev_data)
 	int ret;
 
 	iommu = amd_iommu_rlookup_table[dev_data->devid];
-	alias = amd_iommu_alias_table[dev_data->devid];
+	alias = dev_data->alias;
 
 	ret = iommu_flush_dte(iommu, dev_data->devid);
 	if (!ret && alias != dev_data->devid)
@@ -1891,7 +1956,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
 	bool ats;
 
 	iommu = amd_iommu_rlookup_table[dev_data->devid];
-	alias = amd_iommu_alias_table[dev_data->devid];
+	alias = dev_data->alias;
 	ats   = dev_data->ats.enabled;
 
 	/* Update data structures */
@@ -1925,7 +1990,7 @@ static void do_detach(struct iommu_dev_data *dev_data)
 		return;
 
 	iommu = amd_iommu_rlookup_table[dev_data->devid];
-	alias = amd_iommu_alias_table[dev_data->devid];
+	alias = dev_data->alias;
 
 	/* decrease reference counters */
 	dev_data->domain->dev_iommu[iommu->index] -= 1;

From e05cff2aa31766746f02c932e11b6b2ae357464c Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 10 Mar 2016 19:28:12 +0000
Subject: [PATCH 062/424] iommu/dma: Restore scatterlist offsets correctly

commit 07b48ac4bbe527e68cfc555f2b2b206908437141 upstream.

With the change to stashing just the IOVA-page-aligned remainder of the
CPU-page offset rather than the whole thing, the failure path in
__invalidate_sg() also needs tweaking to account for that in the case of
differing page sizes where the two offsets may not be equivalent.
Similarly in __finalise_sg(), lest the architecture-specific wrappers
later get the wrong address for cache maintenance on sync or unmap.

Fixes: 164afb1d85b8 ("iommu/dma: Use correct offset in map_sg")
Reported-by: Magnus Damm <damm+renesas@opensource.se>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iommu/dma-iommu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 72d6182666cb..58f2fe687a24 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -403,7 +403,7 @@ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
 		unsigned int s_length = sg_dma_len(s);
 		unsigned int s_dma_len = s->length;
 
-		s->offset = s_offset;
+		s->offset += s_offset;
 		s->length = s_length;
 		sg_dma_address(s) = dma_addr + s_offset;
 		dma_addr += s_dma_len;
@@ -422,7 +422,7 @@ static void __invalidate_sg(struct scatterlist *sg, int nents)
 
 	for_each_sg(sg, s, nents, i) {
 		if (sg_dma_address(s) != DMA_ERROR_CODE)
-			s->offset = sg_dma_address(s);
+			s->offset += sg_dma_address(s);
 		if (sg_dma_len(s))
 			s->length = sg_dma_len(s);
 		sg_dma_address(s) = DMA_ERROR_CODE;

From b7cf6750c05ac80df28d1d66ecd949011f7e0d4b Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Tue, 12 Apr 2016 19:25:52 +0800
Subject: [PATCH 063/424] drm/amdgpu: when suspending, if uvd/vce was running.
 need to cancel delay work.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 85cc88f02eb0ecf44493c1b2ebb6f206cd5fc321 upstream.

fix the issue that when resume back, uvd/vce
dpm was disabled and uvd/vce's performace
dropped.

Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 53f987aeeacf..0d016ce541c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -273,6 +273,8 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
 	memcpy(adev->uvd.cpu_addr, (adev->uvd.fw->data) + offset,
 		(adev->uvd.fw->size) - offset);
 
+	cancel_delayed_work_sync(&adev->uvd.idle_work);
+
 	size = amdgpu_bo_size(adev->uvd.vcpu_bo);
 	size -= le32_to_cpu(hdr->ucode_size_bytes);
 	ptr = adev->uvd.cpu_addr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index a745eeeb5d82..bb0da76051a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -220,6 +220,7 @@ int amdgpu_vce_suspend(struct amdgpu_device *adev)
 	if (i == AMDGPU_MAX_VCE_HANDLES)
 		return 0;
 
+	cancel_delayed_work_sync(&adev->vce.idle_work);
 	/* TODO: suspending running encoding sessions isn't supported */
 	return -EINVAL;
 }

From 57c17683f013be3aca2bf937516da9169e3b6727 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 18 Apr 2016 18:09:57 -0400
Subject: [PATCH 064/424] drm/amdgpu: use defines for CRTCs and AMFT blocks

commit 3ea25f858fd5aeee888059952bbb8e910541eebb upstream.

Prerequiste for the next patch which ups the limits.

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 064ebb347074..92f8b7bf7c64 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -308,8 +308,8 @@ struct amdgpu_mode_info {
 	struct atom_context *atom_context;
 	struct card_info *atom_card_info;
 	bool mode_config_initialized;
-	struct amdgpu_crtc *crtcs[6];
-	struct amdgpu_afmt *afmt[7];
+	struct amdgpu_crtc *crtcs[AMDGPU_MAX_CRTCS];
+	struct amdgpu_afmt *afmt[AMDGPU_MAX_AFMT_BLOCKS];
 	/* DVI-I properties */
 	struct drm_property *coherent_mode_property;
 	/* DAC enable load detect */

From 25d1be8d9fbc1a1a479483c29345f578687c478a Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 18 Apr 2016 18:25:34 -0400
Subject: [PATCH 065/424] drm/amdgpu: bump the afmt limit for CZ, ST, Polaris

commit 83c5cda2ccf40a7a7e4bb674321509b346e23d5a upstream.

Fixes array overflow on these chips.

Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 92f8b7bf7c64..89df7871653d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -52,7 +52,7 @@ struct amdgpu_hpd;
 
 #define AMDGPU_MAX_HPD_PINS 6
 #define AMDGPU_MAX_CRTCS 6
-#define AMDGPU_MAX_AFMT_BLOCKS 7
+#define AMDGPU_MAX_AFMT_BLOCKS 9
 
 enum amdgpu_rmx_type {
 	RMX_OFF,

From fe98d45db9b84d436284ed156e4f5c2f78bb7999 Mon Sep 17 00:00:00 2001
From: Sonny Jiang <sonny.jiang@amd.com>
Date: Mon, 18 Apr 2016 16:05:04 -0400
Subject: [PATCH 066/424] amdgpu/uvd: add uvd fw version for amdgpu

commit 562e2689baebaa2ac25b7ec934385480ed1cb7d6 upstream.

Was previously always hardcoded to 0.

Signed-off-by: Sonny Jiang <sonny.jiang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 3 +++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index bb1099c549df..053fc2f465df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1673,6 +1673,7 @@ struct amdgpu_uvd {
 	struct amdgpu_bo	*vcpu_bo;
 	void			*cpu_addr;
 	uint64_t		gpu_addr;
+	unsigned		fw_version;
 	atomic_t		handles[AMDGPU_MAX_UVD_HANDLES];
 	struct drm_file		*filp[AMDGPU_MAX_UVD_HANDLES];
 	struct delayed_work	idle_work;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index e23843f4d877..4488e82f87b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -303,7 +303,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 			fw_info.feature = adev->vce.fb_version;
 			break;
 		case AMDGPU_INFO_FW_UVD:
-			fw_info.ver = 0;
+			fw_info.ver = adev->uvd.fw_version;
 			fw_info.feature = 0;
 			break;
 		case AMDGPU_INFO_FW_GMC:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 0d016ce541c2..3b35ad83867c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -156,6 +156,9 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 	DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n",
 		version_major, version_minor, family_id);
 
+	adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) |
+				(family_id << 8));
+
 	bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8)
 		 +  AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE;
 	r = amdgpu_bo_create(adev, bo_size, PAGE_SIZE, true,

From 038cf9c1977ed3bd71b107bbabe72cff8e5bb12f Mon Sep 17 00:00:00 2001
From: Grigori Goronzy <greg@chown.ath.cx>
Date: Tue, 22 Mar 2016 15:48:18 -0400
Subject: [PATCH 067/424] drm/amdgpu: fix regression on CIK (v2)

This fix was written against drm-next, but when it was
backported to 4.5 as a stable fix, the driver internal
structure change was missed.  Fix that up here to avoid
a hang due to waiting for the wrong sequence number.

v2: agd: fix up commit message

Signed-off-by: Grigori Goronzy <greg@chown.ath.cx>
Cc: stable@vger.kernel.org
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index aa491540ba85..946300764609 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -3628,7 +3628,7 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					unsigned vm_id, uint64_t pd_addr)
 {
 	int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
-	uint32_t seq = ring->fence_drv.sync_seq;
+	uint32_t seq = ring->fence_drv.sync_seq[ring->idx];
 	uint64_t addr = ring->fence_drv.gpu_addr;
 
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));

From a37564fa5a495a9e3f59fb648315ad33085476cd Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 14 Apr 2016 14:15:16 -0400
Subject: [PATCH 068/424] drm/radeon: add a quirk for a XFX R9 270X

commit bcb31eba4a4ea356fd61cbd5dec5511c3883f57e upstream.

bug:
https://bugs.freedesktop.org/show_bug.cgi?id=76490

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/si_dpm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 7285adb27099..caa73de584a5 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -2931,6 +2931,7 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = {
 	{ PCI_VENDOR_ID_ATI, 0x6811, 0x1462, 0x2015, 0, 120000 },
 	{ PCI_VENDOR_ID_ATI, 0x6811, 0x1043, 0x2015, 0, 120000 },
 	{ PCI_VENDOR_ID_ATI, 0x6811, 0x148c, 0x2015, 0, 120000 },
+	{ PCI_VENDOR_ID_ATI, 0x6810, 0x1682, 0x9275, 0, 120000 },
 	{ 0, 0, 0, 0 },
 };
 

From 2ae4d4093977f2f29af5c92d6e0627eca3a97e20 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 13 Apr 2016 12:08:27 -0400
Subject: [PATCH 069/424] drm/radeon: fix initial connector audio value

commit 7403c515c49c033fec33df0814fffdc977e6acdc upstream.

This got lost somewhere along the way.  This fixes
audio not working until set_property was called.

Noticed-by: Hyungwon Hwang <hyungwon.hwang7@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/radeon_connectors.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 340f3f549f29..9cfc1c3e1965 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -1996,10 +1996,12 @@ radeon_add_atom_connector(struct drm_device *dev,
 						   rdev->mode_info.dither_property,
 						   RADEON_FMT_DITHER_DISABLE);
 
-			if (radeon_audio != 0)
+			if (radeon_audio != 0) {
 				drm_object_attach_property(&radeon_connector->base.base,
 							   rdev->mode_info.audio_property,
 							   RADEON_AUDIO_AUTO);
+				radeon_connector->audio = RADEON_AUDIO_AUTO;
+			}
 			if (ASIC_IS_DCE5(rdev))
 				drm_object_attach_property(&radeon_connector->base.base,
 							   rdev->mode_info.output_csc_property,
@@ -2124,6 +2126,7 @@ radeon_add_atom_connector(struct drm_device *dev,
 				drm_object_attach_property(&radeon_connector->base.base,
 							   rdev->mode_info.audio_property,
 							   RADEON_AUDIO_AUTO);
+				radeon_connector->audio = RADEON_AUDIO_AUTO;
 			}
 			if (connector_type == DRM_MODE_CONNECTOR_DVII) {
 				radeon_connector->dac_load_detect = true;
@@ -2179,6 +2182,7 @@ radeon_add_atom_connector(struct drm_device *dev,
 				drm_object_attach_property(&radeon_connector->base.base,
 							   rdev->mode_info.audio_property,
 							   RADEON_AUDIO_AUTO);
+				radeon_connector->audio = RADEON_AUDIO_AUTO;
 			}
 			if (ASIC_IS_DCE5(rdev))
 				drm_object_attach_property(&radeon_connector->base.base,
@@ -2231,6 +2235,7 @@ radeon_add_atom_connector(struct drm_device *dev,
 				drm_object_attach_property(&radeon_connector->base.base,
 							   rdev->mode_info.audio_property,
 							   RADEON_AUDIO_AUTO);
+				radeon_connector->audio = RADEON_AUDIO_AUTO;
 			}
 			if (ASIC_IS_DCE5(rdev))
 				drm_object_attach_property(&radeon_connector->base.base,

From 7946284184695f2ba338b5c8c45a40f0b732fb2a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= <jglisse@redhat.com>
Date: Tue, 19 Apr 2016 09:07:50 -0400
Subject: [PATCH 070/424] drm/radeon: forbid mapping of userptr bo through
 radeon device file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit b5dcec693f87cb8475f2291c0075b2422addd3d6 upstream.

Allowing userptr bo which are basicly a list of page from some vma
(so either anonymous page or file backed page) would lead to serious
corruption of kernel structures and counters (because we overwrite
the page->mapping field when mapping buffer).

This will already block if the buffer was populated before anyone does
try to mmap it because then TTM_PAGE_FLAG_SG would be set in in the
ttm_tt flags. But that flag is check before ttm_tt_populate in the ttm
vm fault handler.

So to be safe just add a check to verify_access() callback.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/radeon_ttm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index e06ac546a90f..f342aad79cc6 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -235,6 +235,8 @@ static int radeon_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 {
 	struct radeon_bo *rbo = container_of(bo, struct radeon_bo, tbo);
 
+	if (radeon_ttm_tt_has_userptr(bo->ttm))
+		return -EPERM;
 	return drm_vma_node_verify_access(&rbo->gem_base.vma_node, filp);
 }
 

From 4b6b0008f9dd3f18ac1d42c28e25dc09715ebc66 Mon Sep 17 00:00:00 2001
From: Vitaly Prosyak <vitaly.prosyak@amd.com>
Date: Thu, 14 Apr 2016 13:34:03 -0400
Subject: [PATCH 071/424] drm/radeon: fix vertical bars appear on monitor (v2)

commit 5d5b7803c49bbb01bdf4c6e95e8314d0515b9484 upstream.

When crtc/timing is disabled on boot the dig block
should be stopped in order ignore timing from crtc,
reset the steering fifo otherwise we get display
corruption or hung in dp sst mode.

v2: agd: fix coding style

Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/evergreen.c     | 154 ++++++++++++++++++++++++-
 drivers/gpu/drm/radeon/evergreen_reg.h |  46 ++++++++
 2 files changed, 199 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 2ad462896896..32491355a1d4 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2608,10 +2608,152 @@ static void evergreen_agp_enable(struct radeon_device *rdev)
 	WREG32(VM_CONTEXT1_CNTL, 0);
 }
 
+static const unsigned ni_dig_offsets[] =
+{
+	NI_DIG0_REGISTER_OFFSET,
+	NI_DIG1_REGISTER_OFFSET,
+	NI_DIG2_REGISTER_OFFSET,
+	NI_DIG3_REGISTER_OFFSET,
+	NI_DIG4_REGISTER_OFFSET,
+	NI_DIG5_REGISTER_OFFSET
+};
+
+static const unsigned ni_tx_offsets[] =
+{
+	NI_DCIO_UNIPHY0_UNIPHY_TX_CONTROL1,
+	NI_DCIO_UNIPHY1_UNIPHY_TX_CONTROL1,
+	NI_DCIO_UNIPHY2_UNIPHY_TX_CONTROL1,
+	NI_DCIO_UNIPHY3_UNIPHY_TX_CONTROL1,
+	NI_DCIO_UNIPHY4_UNIPHY_TX_CONTROL1,
+	NI_DCIO_UNIPHY5_UNIPHY_TX_CONTROL1
+};
+
+static const unsigned evergreen_dp_offsets[] =
+{
+	EVERGREEN_DP0_REGISTER_OFFSET,
+	EVERGREEN_DP1_REGISTER_OFFSET,
+	EVERGREEN_DP2_REGISTER_OFFSET,
+	EVERGREEN_DP3_REGISTER_OFFSET,
+	EVERGREEN_DP4_REGISTER_OFFSET,
+	EVERGREEN_DP5_REGISTER_OFFSET
+};
+
+
+/*
+ * Assumption is that EVERGREEN_CRTC_MASTER_EN enable for requested crtc
+ * We go from crtc to connector and it is not relible  since it
+ * should be an opposite direction .If crtc is enable then
+ * find the dig_fe which selects this crtc and insure that it enable.
+ * if such dig_fe is found then find dig_be which selects found dig_be and
+ * insure that it enable and in DP_SST mode.
+ * if UNIPHY_PLL_CONTROL1.enable then we should disconnect timing
+ * from dp symbols clocks .
+ */
+static bool evergreen_is_dp_sst_stream_enabled(struct radeon_device *rdev,
+					       unsigned crtc_id, unsigned *ret_dig_fe)
+{
+	unsigned i;
+	unsigned dig_fe;
+	unsigned dig_be;
+	unsigned dig_en_be;
+	unsigned uniphy_pll;
+	unsigned digs_fe_selected;
+	unsigned dig_be_mode;
+	unsigned dig_fe_mask;
+	bool is_enabled = false;
+	bool found_crtc = false;
+
+	/* loop through all running dig_fe to find selected crtc */
+	for (i = 0; i < ARRAY_SIZE(ni_dig_offsets); i++) {
+		dig_fe = RREG32(NI_DIG_FE_CNTL + ni_dig_offsets[i]);
+		if (dig_fe & NI_DIG_FE_CNTL_SYMCLK_FE_ON &&
+		    crtc_id == NI_DIG_FE_CNTL_SOURCE_SELECT(dig_fe)) {
+			/* found running pipe */
+			found_crtc = true;
+			dig_fe_mask = 1 << i;
+			dig_fe = i;
+			break;
+		}
+	}
+
+	if (found_crtc) {
+		/* loop through all running dig_be to find selected dig_fe */
+		for (i = 0; i < ARRAY_SIZE(ni_dig_offsets); i++) {
+			dig_be = RREG32(NI_DIG_BE_CNTL + ni_dig_offsets[i]);
+			/* if dig_fe_selected by dig_be? */
+			digs_fe_selected = NI_DIG_BE_CNTL_FE_SOURCE_SELECT(dig_be);
+			dig_be_mode = NI_DIG_FE_CNTL_MODE(dig_be);
+			if (dig_fe_mask &  digs_fe_selected &&
+			    /* if dig_be in sst mode? */
+			    dig_be_mode == NI_DIG_BE_DPSST) {
+				dig_en_be = RREG32(NI_DIG_BE_EN_CNTL +
+						   ni_dig_offsets[i]);
+				uniphy_pll = RREG32(NI_DCIO_UNIPHY0_PLL_CONTROL1 +
+						    ni_tx_offsets[i]);
+				/* dig_be enable and tx is running */
+				if (dig_en_be & NI_DIG_BE_EN_CNTL_ENABLE &&
+				    dig_en_be & NI_DIG_BE_EN_CNTL_SYMBCLK_ON &&
+				    uniphy_pll & NI_DCIO_UNIPHY0_PLL_CONTROL1_ENABLE) {
+					is_enabled = true;
+					*ret_dig_fe = dig_fe;
+					break;
+				}
+			}
+		}
+	}
+
+	return is_enabled;
+}
+
+/*
+ * Blank dig when in dp sst mode
+ * Dig ignores crtc timing
+ */
+static void evergreen_blank_dp_output(struct radeon_device *rdev,
+				      unsigned dig_fe)
+{
+	unsigned stream_ctrl;
+	unsigned fifo_ctrl;
+	unsigned counter = 0;
+
+	if (dig_fe >= ARRAY_SIZE(evergreen_dp_offsets)) {
+		DRM_ERROR("invalid dig_fe %d\n", dig_fe);
+		return;
+	}
+
+	stream_ctrl = RREG32(EVERGREEN_DP_VID_STREAM_CNTL +
+			     evergreen_dp_offsets[dig_fe]);
+	if (!(stream_ctrl & EVERGREEN_DP_VID_STREAM_CNTL_ENABLE)) {
+		DRM_ERROR("dig %d , should be enable\n", dig_fe);
+		return;
+	}
+
+	stream_ctrl &=~EVERGREEN_DP_VID_STREAM_CNTL_ENABLE;
+	WREG32(EVERGREEN_DP_VID_STREAM_CNTL +
+	       evergreen_dp_offsets[dig_fe], stream_ctrl);
+
+	stream_ctrl = RREG32(EVERGREEN_DP_VID_STREAM_CNTL +
+			     evergreen_dp_offsets[dig_fe]);
+	while (counter < 32 && stream_ctrl & EVERGREEN_DP_VID_STREAM_STATUS) {
+		msleep(1);
+		counter++;
+		stream_ctrl = RREG32(EVERGREEN_DP_VID_STREAM_CNTL +
+				     evergreen_dp_offsets[dig_fe]);
+	}
+	if (counter >= 32 )
+		DRM_ERROR("counter exceeds %d\n", counter);
+
+	fifo_ctrl = RREG32(EVERGREEN_DP_STEER_FIFO + evergreen_dp_offsets[dig_fe]);
+	fifo_ctrl |= EVERGREEN_DP_STEER_FIFO_RESET;
+	WREG32(EVERGREEN_DP_STEER_FIFO + evergreen_dp_offsets[dig_fe], fifo_ctrl);
+
+}
+
 void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save)
 {
 	u32 crtc_enabled, tmp, frame_count, blackout;
 	int i, j;
+	unsigned dig_fe;
 
 	if (!ASIC_IS_NODCE(rdev)) {
 		save->vga_render_control = RREG32(VGA_RENDER_CONTROL);
@@ -2651,7 +2793,17 @@ void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *sav
 					break;
 				udelay(1);
 			}
-
+			/*we should disable dig if it drives dp sst*/
+			/*but we are in radeon_device_init and the topology is unknown*/
+			/*and it is available after radeon_modeset_init*/
+			/*the following method radeon_atom_encoder_dpms_dig*/
+			/*does the job if we initialize it properly*/
+			/*for now we do it this manually*/
+			/**/
+			if (ASIC_IS_DCE5(rdev) &&
+			    evergreen_is_dp_sst_stream_enabled(rdev, i ,&dig_fe))
+				evergreen_blank_dp_output(rdev, dig_fe);
+			/*we could remove 6 lines below*/
 			/* XXX this is a hack to avoid strange behavior with EFI on certain systems */
 			WREG32(EVERGREEN_CRTC_UPDATE_LOCK + crtc_offsets[i], 1);
 			tmp = RREG32(EVERGREEN_CRTC_CONTROL + crtc_offsets[i]);
diff --git a/drivers/gpu/drm/radeon/evergreen_reg.h b/drivers/gpu/drm/radeon/evergreen_reg.h
index aa939dfed3a3..b436badf9efa 100644
--- a/drivers/gpu/drm/radeon/evergreen_reg.h
+++ b/drivers/gpu/drm/radeon/evergreen_reg.h
@@ -250,8 +250,43 @@
 
 /* HDMI blocks at 0x7030, 0x7c30, 0x10830, 0x11430, 0x12030, 0x12c30 */
 #define EVERGREEN_HDMI_BASE				0x7030
+/*DIG block*/
+#define NI_DIG0_REGISTER_OFFSET                 (0x7000  - 0x7000)
+#define NI_DIG1_REGISTER_OFFSET                 (0x7C00  - 0x7000)
+#define NI_DIG2_REGISTER_OFFSET                 (0x10800 - 0x7000)
+#define NI_DIG3_REGISTER_OFFSET                 (0x11400 - 0x7000)
+#define NI_DIG4_REGISTER_OFFSET                 (0x12000 - 0x7000)
+#define NI_DIG5_REGISTER_OFFSET                 (0x12C00 - 0x7000)
+
+
+#define NI_DIG_FE_CNTL                               0x7000
+#       define NI_DIG_FE_CNTL_SOURCE_SELECT(x)        ((x) & 0x3)
+#       define NI_DIG_FE_CNTL_SYMCLK_FE_ON            (1<<24)
+
+
+#define NI_DIG_BE_CNTL                    0x7140
+#       define NI_DIG_BE_CNTL_FE_SOURCE_SELECT(x)     (((x) >> 8 ) & 0x3F)
+#       define NI_DIG_FE_CNTL_MODE(x)                 (((x) >> 16) & 0x7 )
+
+#define NI_DIG_BE_EN_CNTL                              0x7144
+#       define NI_DIG_BE_EN_CNTL_ENABLE               (1 << 0)
+#       define NI_DIG_BE_EN_CNTL_SYMBCLK_ON           (1 << 8)
+#       define NI_DIG_BE_DPSST 0
 
 /* Display Port block */
+#define EVERGREEN_DP0_REGISTER_OFFSET                 (0x730C  - 0x730C)
+#define EVERGREEN_DP1_REGISTER_OFFSET                 (0x7F0C  - 0x730C)
+#define EVERGREEN_DP2_REGISTER_OFFSET                 (0x10B0C - 0x730C)
+#define EVERGREEN_DP3_REGISTER_OFFSET                 (0x1170C - 0x730C)
+#define EVERGREEN_DP4_REGISTER_OFFSET                 (0x1230C - 0x730C)
+#define EVERGREEN_DP5_REGISTER_OFFSET                 (0x12F0C - 0x730C)
+
+
+#define EVERGREEN_DP_VID_STREAM_CNTL                    0x730C
+#       define EVERGREEN_DP_VID_STREAM_CNTL_ENABLE     (1 << 0)
+#       define EVERGREEN_DP_VID_STREAM_STATUS          (1 <<16)
+#define EVERGREEN_DP_STEER_FIFO                         0x7310
+#       define EVERGREEN_DP_STEER_FIFO_RESET           (1 << 0)
 #define EVERGREEN_DP_SEC_CNTL                           0x7280
 #       define EVERGREEN_DP_SEC_STREAM_ENABLE           (1 << 0)
 #       define EVERGREEN_DP_SEC_ASP_ENABLE              (1 << 4)
@@ -266,4 +301,15 @@
 #       define EVERGREEN_DP_SEC_N_BASE_MULTIPLE(x)      (((x) & 0xf) << 24)
 #       define EVERGREEN_DP_SEC_SS_EN                   (1 << 28)
 
+/*DCIO_UNIPHY block*/
+#define NI_DCIO_UNIPHY0_UNIPHY_TX_CONTROL1            (0x6600  -0x6600)
+#define NI_DCIO_UNIPHY1_UNIPHY_TX_CONTROL1            (0x6640  -0x6600)
+#define NI_DCIO_UNIPHY2_UNIPHY_TX_CONTROL1            (0x6680 - 0x6600)
+#define NI_DCIO_UNIPHY3_UNIPHY_TX_CONTROL1            (0x66C0 - 0x6600)
+#define NI_DCIO_UNIPHY4_UNIPHY_TX_CONTROL1            (0x6700 - 0x6600)
+#define NI_DCIO_UNIPHY5_UNIPHY_TX_CONTROL1            (0x6740 - 0x6600)
+
+#define NI_DCIO_UNIPHY0_PLL_CONTROL1                   0x6618
+#       define NI_DCIO_UNIPHY0_PLL_CONTROL1_ENABLE     (1 << 0)
+
 #endif

From 7973c7c36e96d9c2afda1df9e4f4c2518cbe6588 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Tue, 19 Apr 2016 19:19:11 +0800
Subject: [PATCH 072/424] drm: Loongson-3 doesn't fully support wc memory

commit 221004c66a58949a0f25c937a6789c0839feb530 upstream.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/drm/drm_cache.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h
index 461a0558bca4..cebecff536a3 100644
--- a/include/drm/drm_cache.h
+++ b/include/drm/drm_cache.h
@@ -39,6 +39,8 @@ static inline bool drm_arch_can_wc_memory(void)
 {
 #if defined(CONFIG_PPC) && !defined(CONFIG_NOT_COHERENT_CACHE)
 	return false;
+#elif defined(CONFIG_MIPS) && defined(CONFIG_CPU_LOONGSON3)
+	return false;
 #else
 	return true;
 #endif

From 20a32ec7ae6c46768d91c61d99228f13c2a7912b Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Fri, 22 Apr 2016 10:05:21 +1000
Subject: [PATCH 073/424] drm/nouveau/gr/gf100: select a stream master to fixup
 tfb offset queries

commit 28dca90533750c7e31e8641c3df426bad9c12941 upstream.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index 9f5dfc85147a..36655a74c538 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -1717,6 +1717,8 @@ gf100_gr_init(struct gf100_gr *gr)
 
 	gf100_gr_mmio(gr, gr->func->mmio);
 
+	nvkm_mask(device, TPC_UNIT(0, 0, 0x05c), 0x00000001, 0x00000001);
+
 	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
 	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
 		do {

From e51d7655d3dcac3ea2185fec178872b11b9f03be Mon Sep 17 00:00:00 2001
From: "cpaul@redhat.com" <cpaul@redhat.com>
Date: Mon, 4 Apr 2016 19:58:47 -0400
Subject: [PATCH 074/424] drm/dp/mst: Validate port in
 drm_dp_payload_send_msg()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit deba0a2af9592b2022a0bce7b085a318b53ce1db upstream.

With the joys of things running concurrently, there's always a chance
that the port we get passed in drm_dp_payload_send_msg() isn't actually
valid anymore. Because of this, we need to make sure we validate the
reference to the port before we use it otherwise we risk running into
various race conditions. For instance, on the Dell MST monitor I have
here for testing, hotplugging it enough times causes us to kernel panic:

[drm:intel_mst_enable_dp] 1
[drm:drm_dp_update_payload_part2] payload 0 1
[drm:intel_get_hpd_pins] hotplug event received, stat 0x00200000, dig 0x10101011, pins 0x00000020
[drm:intel_hpd_irq_handler] digital hpd port B - short
[drm:intel_dp_hpd_pulse] got hpd irq on port B - short
[drm:intel_dp_check_mst_status] got esi 00 10 00
[drm:drm_dp_update_payload_part2] payload 1 1
general protection fault: 0000 [#1] SMP
…
Call Trace:
 [<ffffffffa012b632>] drm_dp_update_payload_part2+0xc2/0x130 [drm_kms_helper]
 [<ffffffffa032ef08>] intel_mst_enable_dp+0xf8/0x180 [i915]
 [<ffffffffa0310dbd>] haswell_crtc_enable+0x3ed/0x8c0 [i915]
 [<ffffffffa030c84d>] intel_atomic_commit+0x5ad/0x1590 [i915]
 [<ffffffffa01db877>] ? drm_atomic_set_crtc_for_connector+0x57/0xe0 [drm]
 [<ffffffffa01dc4e7>] drm_atomic_commit+0x37/0x60 [drm]
 [<ffffffffa0130a3a>] drm_atomic_helper_set_config+0x7a/0xb0 [drm_kms_helper]
 [<ffffffffa01cc482>] drm_mode_set_config_internal+0x62/0x100 [drm]
 [<ffffffffa01d02ad>] drm_mode_setcrtc+0x3cd/0x4e0 [drm]
 [<ffffffffa01c18e3>] drm_ioctl+0x143/0x510 [drm]
 [<ffffffffa01cfee0>] ? drm_mode_setplane+0x1b0/0x1b0 [drm]
 [<ffffffff810f79a7>] ? hrtimer_start_range_ns+0x1b7/0x3a0
 [<ffffffff81212962>] do_vfs_ioctl+0x92/0x570
 [<ffffffff81590852>] ? __sys_recvmsg+0x42/0x80
 [<ffffffff81212eb9>] SyS_ioctl+0x79/0x90
 [<ffffffff816b4e32>] entry_SYSCALL_64_fastpath+0x1a/0xa4
RIP  [<ffffffffa012b026>] drm_dp_payload_send_msg+0x146/0x1f0 [drm_kms_helper]

Which occurs because of the hotplug event shown in the log, which ends
up causing DRM's dp helpers to drop the port we're updating the payload
on and panic.

Signed-off-by: Lyude <cpaul@redhat.com>
Reviewed-by: David Airlie <airlied@linux.ie>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/drm_dp_mst_topology.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 39d7e2e15c11..a4a3de372b69 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1665,13 +1665,19 @@ static int drm_dp_payload_send_msg(struct drm_dp_mst_topology_mgr *mgr,
 	struct drm_dp_mst_branch *mstb;
 	int len, ret, port_num;
 
+	port = drm_dp_get_validated_port_ref(mgr, port);
+	if (!port)
+		return -EINVAL;
+
 	port_num = port->port_num;
 	mstb = drm_dp_get_validated_mstb_ref(mgr, port->parent);
 	if (!mstb) {
 		mstb = drm_dp_get_last_connected_port_and_mstb(mgr, port->parent, &port_num);
 
-		if (!mstb)
+		if (!mstb) {
+			drm_dp_put_port(port);
 			return -EINVAL;
+		}
 	}
 
 	txmsg = kzalloc(sizeof(*txmsg), GFP_KERNEL);
@@ -1697,6 +1703,7 @@ static int drm_dp_payload_send_msg(struct drm_dp_mst_topology_mgr *mgr,
 	kfree(txmsg);
 fail_put:
 	drm_dp_put_mst_branch_device(mstb);
+	drm_dp_put_port(port);
 	return ret;
 }
 

From 3ae01ae65df95a372451e476725ce278bec8787c Mon Sep 17 00:00:00 2001
From: Lyude <cpaul@redhat.com>
Date: Wed, 13 Apr 2016 16:50:18 -0400
Subject: [PATCH 075/424] drm/dp/mst: Restore primary hub guid on resume

commit 9dc0487d96a0396367a1451b31873482080b527f upstream.

Some hubs are forgetful, and end up forgetting whatever GUID we set
previously after we do a suspend/resume cycle. This can lead to
hotplugging breaking (along with probably other things) since the hub
will start sending connection notifications with the wrong GUID. As
such, we need to check on resume whether or not the GUID the hub is
giving us is valid.

Signed-off-by: Lyude <cpaul@redhat.com>
Reviewed-by: Harry Wentland <harry.wentland@amd.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/1460580618-7421-1-git-send-email-cpaul@redhat.com
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/drm_dp_mst_topology.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index a4a3de372b69..04350dfa4959 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -2116,6 +2116,8 @@ int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr)
 
 	if (mgr->mst_primary) {
 		int sret;
+		u8 guid[16];
+
 		sret = drm_dp_dpcd_read(mgr->aux, DP_DPCD_REV, mgr->dpcd, DP_RECEIVER_CAP_SIZE);
 		if (sret != DP_RECEIVER_CAP_SIZE) {
 			DRM_DEBUG_KMS("dpcd read failed - undocked during suspend?\n");
@@ -2130,6 +2132,16 @@ int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr)
 			ret = -1;
 			goto out_unlock;
 		}
+
+		/* Some hubs forget their guids after they resume */
+		sret = drm_dp_dpcd_read(mgr->aux, DP_GUID, guid, 16);
+		if (sret != 16) {
+			DRM_DEBUG_KMS("dpcd read failed - undocked during suspend?\n");
+			ret = -1;
+			goto out_unlock;
+		}
+		drm_dp_check_mstb_guid(mgr->mst_primary, guid);
+
 		ret = 0;
 	} else
 		ret = -1;

From 385af1d58254412e42d06b19e3cbe60b55cf34a6 Mon Sep 17 00:00:00 2001
From: "cpaul@redhat.com" <cpaul@redhat.com>
Date: Fri, 22 Apr 2016 16:08:46 -0400
Subject: [PATCH 076/424] drm/dp/mst: Get validated port ref in
 drm_dp_update_payload_part1()

commit 263efde31f97c498e1ebad30e4d2906609d7ad6b upstream.

We can thank KASAN for finding this, otherwise I probably would have spent
hours on it. This fixes a somewhat harder to trigger kernel panic, occuring
while enabling MST where the port we were currently updating the payload on
would have all of it's refs dropped before we finished what we were doing:

==================================================================
BUG: KASAN: use-after-free in drm_dp_update_payload_part1+0xb3f/0xdb0 [drm_kms_helper] at addr ffff8800d29de018
Read of size 4 by task Xorg/973
=============================================================================
BUG kmalloc-2048 (Tainted: G    B   W      ): kasan: bad access detected
-----------------------------------------------------------------------------

INFO: Allocated in drm_dp_add_port+0x1aa/0x1ed0 [drm_kms_helper] age=16477 cpu=0 pid=2175
	___slab_alloc+0x472/0x490
	__slab_alloc+0x20/0x40
	kmem_cache_alloc_trace+0x151/0x190
	drm_dp_add_port+0x1aa/0x1ed0 [drm_kms_helper]
	drm_dp_send_link_address+0x526/0x960 [drm_kms_helper]
	drm_dp_check_and_send_link_address+0x1ac/0x210 [drm_kms_helper]
	drm_dp_mst_link_probe_work+0x77/0xd0 [drm_kms_helper]
	process_one_work+0x562/0x1350
	worker_thread+0xd9/0x1390
	kthread+0x1c5/0x260
	ret_from_fork+0x22/0x40
INFO: Freed in drm_dp_free_mst_port+0x50/0x60 [drm_kms_helper] age=7521 cpu=0 pid=2175
	__slab_free+0x17f/0x2d0
	kfree+0x169/0x180
	drm_dp_free_mst_port+0x50/0x60 [drm_kms_helper]
	drm_dp_destroy_connector_work+0x2b8/0x490 [drm_kms_helper]
	process_one_work+0x562/0x1350
	worker_thread+0xd9/0x1390
	kthread+0x1c5/0x260
	ret_from_fork+0x22/0x40

which on this T460s, would eventually lead to kernel panics in somewhat
random places later in intel_mst_enable_dp() if we got lucky enough.

Signed-off-by: Lyude <cpaul@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/drm_dp_mst_topology.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 04350dfa4959..d268bf18a662 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1786,6 +1786,11 @@ int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr)
 		req_payload.start_slot = cur_slots;
 		if (mgr->proposed_vcpis[i]) {
 			port = container_of(mgr->proposed_vcpis[i], struct drm_dp_mst_port, vcpi);
+			port = drm_dp_get_validated_port_ref(mgr, port);
+			if (!port) {
+				mutex_unlock(&mgr->payload_lock);
+				return -EINVAL;
+			}
 			req_payload.num_slots = mgr->proposed_vcpis[i]->num_slots;
 		} else {
 			port = NULL;
@@ -1811,6 +1816,9 @@ int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr)
 			mgr->payloads[i].payload_state = req_payload.payload_state;
 		}
 		cur_slots += req_payload.num_slots;
+
+		if (port)
+			drm_dp_put_port(port);
 	}
 
 	for (i = 0; i < mgr->max_payloads; i++) {

From 194de738b69315721adc4e6dbafe81c790b318c8 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Sun, 6 Mar 2016 03:21:46 +0200
Subject: [PATCH 077/424] pwm: brcmstb: Fix check of devm_ioremap_resource()
 return code

commit c5857e3f94ab2719dfac649a146cb5dd6f21fcf3 upstream.

The change fixes potential oops while accessing iomem on invalid address
if devm_ioremap_resource() fails due to some reason.

The devm_ioremap_resource() function returns ERR_PTR() and never returns
NULL, which makes useless a following check for NULL.

Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Fixes: 3a9f5957020f ("pwm: Add Broadcom BCM7038 PWM controller support")
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pwm/pwm-brcmstb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/pwm/pwm-brcmstb.c b/drivers/pwm/pwm-brcmstb.c
index 423ce087cd9c..5d5adee16886 100644
--- a/drivers/pwm/pwm-brcmstb.c
+++ b/drivers/pwm/pwm-brcmstb.c
@@ -274,8 +274,8 @@ static int brcmstb_pwm_probe(struct platform_device *pdev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	p->base = devm_ioremap_resource(&pdev->dev, res);
-	if (!p->base) {
-		ret = -ENOMEM;
+	if (IS_ERR(p->base)) {
+		ret = PTR_ERR(p->base);
 		goto out_clk;
 	}
 

From 39fa719753bcef274084502c1ec8cfefc556209f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 11 Jan 2016 20:48:32 +0200
Subject: [PATCH 078/424] drm/i915: Cleanup phys status page too
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 7d3fdfff23852fe458a0d0979a3555fe60f1e563 upstream.

Restore the lost phys status page cleanup.

Fixes the following splat with DMA_API_DEBUG=y:

WARNING: CPU: 0 PID: 21615 at ../lib/dma-debug.c:974 dma_debug_device_change+0x190/0x1f0()
pci 0000:00:02.0: DMA-API: device driver has pending DMA allocations while released from device [count=1]
               One of leaked entries details: [device address=0x0000000023163000] [size=4096 bytes] [mapped with DMA_BIDIRECTIONAL] [mapped as coherent]
Modules linked in: i915(-) i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm sha256_generic hmac drbg ctr ccm sch_fq_codel binfmt_misc joydev mousedev arc4 ath5k iTCO_wdt mac80211 smsc_ircc2 ath snd_intel8x0m snd_intel8x0 snd_ac97_codec ac97_bus psmouse snd_pcm input_leds i2c_i801 pcspkr snd_timer cfg80211 snd soundcore i2c_core ehci_pci firewire_ohci ehci_hcd firewire_core lpc_ich 8139too rfkill crc_itu_t mfd_core mii usbcore rng_core intel_agp intel_gtt usb_common agpgart irda crc_ccitt fujitsu_laptop led_class parport_pc video parport evdev backlight
CPU: 0 PID: 21615 Comm: rmmod Tainted: G     U          4.4.0-rc4-mgm-ovl+ #4
Hardware name: FUJITSU SIEMENS LIFEBOOK S6120/FJNB16C, BIOS Version 1.26  05/10/2004
 e31a3de0 e31a3de0 e31a3d9c c128d4bd e31a3dd0 c1045a0c c15e00c4 e31a3dfc
 0000546f c15dfad2 000003ce c12b3740 000003ce c12b3740 00000000 00000001
 f61fb8a0 e31a3de8 c1045a83 00000009 e31a3de0 c15e00c4 e31a3dfc e31a3e4c
Call Trace:
 [<c128d4bd>] dump_stack+0x16/0x19
 [<c1045a0c>] warn_slowpath_common+0x8c/0xd0
 [<c12b3740>] ? dma_debug_device_change+0x190/0x1f0
 [<c12b3740>] ? dma_debug_device_change+0x190/0x1f0
 [<c1045a83>] warn_slowpath_fmt+0x33/0x40
 [<c12b3740>] dma_debug_device_change+0x190/0x1f0
 [<c1065499>] notifier_call_chain+0x59/0x70
 [<c10655af>] __blocking_notifier_call_chain+0x3f/0x80
 [<c106560f>] blocking_notifier_call_chain+0x1f/0x30
 [<c134cfb3>] __device_release_driver+0xc3/0xf0
 [<c134d0d7>] driver_detach+0x97/0xa0
 [<c134c440>] bus_remove_driver+0x40/0x90
 [<c134db18>] driver_unregister+0x28/0x60
 [<c1079e8c>] ? trace_hardirqs_on_caller+0x12c/0x1d0
 [<c12c0618>] pci_unregister_driver+0x18/0x80
 [<f83e96e7>] drm_pci_exit+0x87/0xb0 [drm]
 [<f8b3be2d>] i915_exit+0x1b/0x1ee [i915]
 [<c10b999c>] SyS_delete_module+0x14c/0x210
 [<c1079e8c>] ? trace_hardirqs_on_caller+0x12c/0x1d0
 [<c115a9bd>] ? ____fput+0xd/0x10
 [<c1002014>] do_fast_syscall_32+0xa4/0x450
 [<c149f6fa>] sysenter_past_esp+0x3b/0x5d
---[ end trace c2ecbc77760f10a0 ]---
Mapped at:
 [<c12b3183>] debug_dma_alloc_coherent+0x33/0x90
 [<f83e989c>] drm_pci_alloc+0x18c/0x1e0 [drm]
 [<f8acd59f>] intel_init_ring_buffer+0x2af/0x490 [i915]
 [<f8acd8b0>] intel_init_render_ring_buffer+0x130/0x750 [i915]
 [<f8aaea4e>] i915_gem_init_rings+0x1e/0x110 [i915]

v2: s/BUG_ON/WARN_ON/ since dim doens't like the former anymore

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Fixes: 5c6c600 ("drm/i915: Remove DRI1 ring accessors and API")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
Link: http://patchwork.freedesktop.org/patch/msgid/1452538112-5331-1-git-send-email-ville.syrjala@linux.intel.com
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index f6b2a814e629..0b1015de8536 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1922,6 +1922,17 @@ i915_dispatch_execbuffer(struct drm_i915_gem_request *req,
 	return 0;
 }
 
+static void cleanup_phys_status_page(struct intel_engine_cs *ring)
+{
+	struct drm_i915_private *dev_priv = to_i915(ring->dev);
+
+	if (!dev_priv->status_page_dmah)
+		return;
+
+	drm_pci_free(ring->dev, dev_priv->status_page_dmah);
+	ring->status_page.page_addr = NULL;
+}
+
 static void cleanup_status_page(struct intel_engine_cs *ring)
 {
 	struct drm_i915_gem_object *obj;
@@ -1938,9 +1949,9 @@ static void cleanup_status_page(struct intel_engine_cs *ring)
 
 static int init_status_page(struct intel_engine_cs *ring)
 {
-	struct drm_i915_gem_object *obj;
+	struct drm_i915_gem_object *obj = ring->status_page.obj;
 
-	if ((obj = ring->status_page.obj) == NULL) {
+	if (obj == NULL) {
 		unsigned flags;
 		int ret;
 
@@ -2134,7 +2145,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 		if (ret)
 			goto error;
 	} else {
-		BUG_ON(ring->id != RCS);
+		WARN_ON(ring->id != RCS);
 		ret = init_phys_status_page(ring);
 		if (ret)
 			goto error;
@@ -2179,7 +2190,12 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
 	if (ring->cleanup)
 		ring->cleanup(ring);
 
-	cleanup_status_page(ring);
+	if (I915_NEED_GFX_HWS(ring->dev)) {
+		cleanup_status_page(ring);
+	} else {
+		WARN_ON(ring->id != RCS);
+		cleanup_phys_status_page(ring);
+	}
 
 	i915_cmd_parser_fini_ring(ring);
 	i915_gem_batch_pool_fini(&ring->batch_pool);

From 80220c4827ddde2b1c49ababa6c1ab0ad0691112 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 15 Oct 2015 17:01:58 +0300
Subject: [PATCH 079/424] drm/i915: skl_update_scaler() wants a rotation
 bitmask instead of bit number
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit fa5a7970d372c9c9beb3a0ce79ee1d0c23387d0a upstream.

Pass BIT(DRM_ROTATE_0) instead of DRM_ROTATE_0 to skl_update_scaler().
The former is a mask, the latter just the bit number.

Fortunately the only thing skl_update_scaler() does with the rotation
is check if it's 90/270 degrees or not, and so in this case it would
still do the right thing.

Cc: Chandra Konduru <chandra.konduru@intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1444917718-28495-1-git-send-email-ville.syrjala@linux.intel.com
Fixes: 6156a45602f9 ("drm/i915: skylake primary plane scaling using shared scalers")
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index f859a5b87ed4..afa81691163d 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4447,7 +4447,7 @@ int skl_update_scaler_crtc(struct intel_crtc_state *state)
 		      intel_crtc->base.base.id, intel_crtc->pipe, SKL_CRTC_INDEX);
 
 	return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX,
-		&state->scaler_state.scaler_id, DRM_ROTATE_0,
+		&state->scaler_state.scaler_id, BIT(DRM_ROTATE_0),
 		state->pipe_src_w, state->pipe_src_h,
 		adjusted_mode->crtc_hdisplay, adjusted_mode->crtc_vdisplay);
 }

From 20d948d8b63538e5a1af20b275c4562a5a6bc470 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 11 Mar 2016 10:51:51 +0300
Subject: [PATCH 080/424] drm/amdkfd: uninitialized variable in
 dbgdev_wave_control_set_registers()

commit 93fce954427effee89e44a976299b15dd75b4bbc upstream.

At the end of the function we expect "status" to be zero, but it's
either -EINVAL or uninitialized.

Fixes: 788bf83db301 ('drm/amdkfd: Add wave control operation to debugger')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index c34c393e9aea..d5e19b5fbbfb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -513,7 +513,7 @@ static int dbgdev_wave_control_set_registers(
 				union SQ_CMD_BITS *in_reg_sq_cmd,
 				union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
 {
-	int status;
+	int status = 0;
 	union SQ_CMD_BITS reg_sq_cmd;
 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
 	struct HsaDbgWaveMsgAMDGen2 *pMsg;

From e0a2d244dcb6068887ef7d3c3af302fe9152298f Mon Sep 17 00:00:00 2001
From: Akash Goel <akash.goel@intel.com>
Date: Fri, 11 Mar 2016 14:56:42 +0530
Subject: [PATCH 081/424] drm/i915: Fixup the free space logic in ring_prepare

commit d43f3ebf12f59c57782ec652da65ef61c2662b40 upstream.

Currently for the case where there is enough space at the end of Ring
buffer for accommodating only the base request, the wrapround is done
immediately and as a result the base request gets added at the start
of Ring buffer. But there may not be enough free space at the beginning
to accommodate the base request, as before the wraparound, the wait was
effectively done for the reserved_size free space from the start of
Ring buffer. In such a case there is a potential of Ring buffer overflow,
the instructions at the head of Ring (ACTHD) can get overwritten.

Since the base request can fit in the remaining space, there is no need
to wraparound immediately. The wraparound will anyway happen later when
the reserved part starts getting used.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Akash Goel <akash.goel@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/1457688402-10411-1-git-send-email-akash.goel@intel.com
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
(cherry picked from commit 782f6bc0aba037436d6a04d19b23f8b61020a576)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_lrc.c        | 6 +++---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d69547a65dbb..7058f75c7b42 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -776,11 +776,11 @@ static int logical_ring_prepare(struct drm_i915_gem_request *req, int bytes)
 		if (unlikely(total_bytes > remain_usable)) {
 			/*
 			 * The base request will fit but the reserved space
-			 * falls off the end. So only need to to wait for the
-			 * reserved size after flushing out the remainder.
+			 * falls off the end. So don't need an immediate wrap
+			 * and only need to effectively wait for the reserved
+			 * size space from the start of ringbuffer.
 			 */
 			wait_bytes = remain_actual + ringbuf->reserved_size;
-			need_wrap = true;
 		} else if (total_bytes > ringbuf->space) {
 			/* No wrapping required, just waiting. */
 			wait_bytes = total_bytes;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 0b1015de8536..9d48443bca2e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2357,11 +2357,11 @@ static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes)
 		if (unlikely(total_bytes > remain_usable)) {
 			/*
 			 * The base request will fit but the reserved space
-			 * falls off the end. So only need to to wait for the
-			 * reserved size after flushing out the remainder.
+			 * falls off the end. So don't need an immediate wrap
+			 * and only need to effectively wait for the reserved
+			 * size space from the start of ringbuffer.
 			 */
 			wait_bytes = remain_actual + ringbuf->reserved_size;
-			need_wrap = true;
 		} else if (total_bytes > ringbuf->space) {
 			/* No wrapping required, just waiting. */
 			wait_bytes = total_bytes;

From f4276d5753538996fa93a34646554bfd92f6e071 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Thu, 14 Apr 2016 14:39:02 +0300
Subject: [PATCH 082/424] drm/i915: Use fw_domains_put_with_fifo() on HSW
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 31318a922395ec9e78d6e2ddf70779355afc7594 upstream.

HSW still has the wake FIFO, so let's check it.

Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Deepak S <deepak.s@linux.intel.com>
Fixes: 05a2fb157e44 ("drm/i915: Consolidate forcewake code")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1460633942-24013-1-git-send-email-ville.syrjala@linux.intel.com
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
(cherry picked from commit 3d7d0c85e41afb5a05e98b3a8a72c38357f02594)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_uncore.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 43cba129a0c0..cc91ae832ffb 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1132,7 +1132,11 @@ static void intel_uncore_fw_domains_init(struct drm_device *dev)
 	} else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
 		dev_priv->uncore.funcs.force_wake_get =
 			fw_domains_get_with_thread_status;
-		dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
+		if (IS_HASWELL(dev))
+			dev_priv->uncore.funcs.force_wake_put =
+				fw_domains_put_with_fifo;
+		else
+			dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
 		fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
 			       FORCEWAKE_MT, FORCEWAKE_ACK_HSW);
 	} else if (IS_IVYBRIDGE(dev)) {

From 0ea82073cb5e7039299350aba5bc135994c8cbda Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Mon, 18 Apr 2016 13:57:48 +0300
Subject: [PATCH 083/424] perf intel-pt: Fix segfault tracing transactions

commit 1342e0b7a6c1a060c593037fbac9f4b717f1cb3b upstream.

Tracing a workload that uses transactions gave a seg fault as follows:

  perf record -e intel_pt// workload
  perf report
  Program received signal SIGSEGV, Segmentation fault.
  0x000000000054b58c in intel_pt_reset_last_branch_rb (ptq=0x1a36110)
  	at util/intel-pt.c:929
  929 ptq->last_branch_rb->nr = 0;
  (gdb) p ptq->last_branch_rb
  $1 = (struct branch_stack *) 0x0
  (gdb) up
  1148 intel_pt_reset_last_branch_rb(ptq);
  (gdb) l
  1143 if (ret)
  1144 pr_err("Intel Processor Trace: failed to deliver transaction event
  1145 ret);
  1146
  1147 if (pt->synth_opts.callchain)
  1148 intel_pt_reset_last_branch_rb(ptq);
  1149
  1150 return ret;
  1151 }
  1152
  (gdb) p pt->synth_opts.callchain
  $2 = true
  (gdb)
  (gdb) bt
   #0 0x000000000054b58c in intel_pt_reset_last_branch_rb (ptq=0x1a36110)
   #1 0x000000000054c1e0 in intel_pt_synth_transaction_sample (ptq=0x1a36110)
   #2 0x000000000054c5b2 in intel_pt_sample (ptq=0x1a36110)

Caused by checking the 'callchain' flag when it should have been the
'last_branch' flag.  Fix that.

Reported-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Fixes: f14445ee72c5 ("perf intel-pt: Support generating branch stack")
Link: http://lkml.kernel.org/r/1460977068-11566-1-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/util/intel-pt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 97f963a3dcb9..9227c2f076c3 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1127,7 +1127,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
 		pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
 		       ret);
 
-	if (pt->synth_opts.callchain)
+	if (pt->synth_opts.last_branch)
 		intel_pt_reset_last_branch_rb(ptq);
 
 	return ret;

From 46b9a1550e0ecf73b83c02c8435eedc01dde2055 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 13 Apr 2016 13:59:14 +1000
Subject: [PATCH 084/424] i2c: cpm: Fix build break due to incompatible pointer
 types

commit 609d5a1b2b35bb62b4b3750396e55453160c2a17 upstream.

Since commit ea8daa7b9784 ("kbuild: Add option to turn incompatible
pointer check into error"), assignments from an incompatible pointer
types have become a hard error, eg:

  drivers/i2c/busses/i2c-cpm.c:545:91: error: passing argument 3 of
  'dma_alloc_coherent' from incompatible pointer type

Fix the build break by converting txdma & rxdma to dma_addr_t.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Fixes: ea8daa7b9784
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/i2c/busses/i2c-cpm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-cpm.c b/drivers/i2c/busses/i2c-cpm.c
index 714bdc837769..b167ab25310a 100644
--- a/drivers/i2c/busses/i2c-cpm.c
+++ b/drivers/i2c/busses/i2c-cpm.c
@@ -116,8 +116,8 @@ struct cpm_i2c {
 	cbd_t __iomem *rbase;
 	u_char *txbuf[CPM_MAXBD];
 	u_char *rxbuf[CPM_MAXBD];
-	u32 txdma[CPM_MAXBD];
-	u32 rxdma[CPM_MAXBD];
+	dma_addr_t txdma[CPM_MAXBD];
+	dma_addr_t rxdma[CPM_MAXBD];
 };
 
 static irqreturn_t cpm_i2c_interrupt(int irq, void *dev_id)

From 3b566a5c38b7311a545ac536a3b43944153918d2 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@osg.samsung.com>
Date: Sat, 16 Apr 2016 21:14:52 -0400
Subject: [PATCH 085/424] i2c: exynos5: Fix possible ABBA deadlock by keeping
 I2C clock prepared

commit 10ff4c5239a137abfc896ec73ef3d15a0f86a16a upstream.

The exynos5 I2C controller driver always prepares and enables a clock
before using it and then disables unprepares it when the clock is not
used anymore.

But this can cause a possible ABBA deadlock in some scenarios since a
driver that uses regmap to access its I2C registers, will first grab
the regmap lock and then the I2C xfer function will grab the prepare
lock when preparing the I2C clock. But since the clock driver also
uses regmap for I2C accesses, preparing a clock will first grab the
prepare lock and then the regmap lock when using the regmap API.

An example of this happens on the Exynos5422 Odroid XU4 board where a
s2mps11 PMIC is used and both the s2mps11 regulators and clk drivers
share the same I2C regmap.

The possible deadlock is reported by the kernel lockdep:

  Possible unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(sec_core:428:(regmap)->lock);
                                lock(prepare_lock);
                                lock(sec_core:428:(regmap)->lock);
   lock(prepare_lock);

  *** DEADLOCK ***

Fix it by leaving the code prepared on probe and use {en,dis}able in
the I2C transfer function.

This patch is similar to commit 34e81ad5f0b6 ("i2c: s3c2410: fix ABBA
deadlock by keeping clock prepared") that fixes the same bug in other
driver for an I2C controller found in Samsung SoCs.

Reported-by: Anand Moon <linux.amoon@gmail.com>
Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
Reviewed-by: Anand Moon <linux.amoon@gmail.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/i2c/busses/i2c-exynos5.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/i2c/busses/i2c-exynos5.c b/drivers/i2c/busses/i2c-exynos5.c
index b29c7500461a..f54ece8fce78 100644
--- a/drivers/i2c/busses/i2c-exynos5.c
+++ b/drivers/i2c/busses/i2c-exynos5.c
@@ -671,7 +671,9 @@ static int exynos5_i2c_xfer(struct i2c_adapter *adap,
 		return -EIO;
 	}
 
-	clk_prepare_enable(i2c->clk);
+	ret = clk_enable(i2c->clk);
+	if (ret)
+		return ret;
 
 	for (i = 0; i < num; i++, msgs++) {
 		stop = (i == num - 1);
@@ -695,7 +697,7 @@ static int exynos5_i2c_xfer(struct i2c_adapter *adap,
 	}
 
  out:
-	clk_disable_unprepare(i2c->clk);
+	clk_disable(i2c->clk);
 	return ret;
 }
 
@@ -747,7 +749,9 @@ static int exynos5_i2c_probe(struct platform_device *pdev)
 		return -ENOENT;
 	}
 
-	clk_prepare_enable(i2c->clk);
+	ret = clk_prepare_enable(i2c->clk);
+	if (ret)
+		return ret;
 
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	i2c->regs = devm_ioremap_resource(&pdev->dev, mem);
@@ -799,6 +803,10 @@ static int exynos5_i2c_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, i2c);
 
+	clk_disable(i2c->clk);
+
+	return 0;
+
  err_clk:
 	clk_disable_unprepare(i2c->clk);
 	return ret;
@@ -810,6 +818,8 @@ static int exynos5_i2c_remove(struct platform_device *pdev)
 
 	i2c_del_adapter(&i2c->adap);
 
+	clk_unprepare(i2c->clk);
+
 	return 0;
 }
 
@@ -821,6 +831,8 @@ static int exynos5_i2c_suspend_noirq(struct device *dev)
 
 	i2c->suspended = 1;
 
+	clk_unprepare(i2c->clk);
+
 	return 0;
 }
 
@@ -830,7 +842,9 @@ static int exynos5_i2c_resume_noirq(struct device *dev)
 	struct exynos5_i2c *i2c = platform_get_drvdata(pdev);
 	int ret = 0;
 
-	clk_prepare_enable(i2c->clk);
+	ret = clk_prepare_enable(i2c->clk);
+	if (ret)
+		return ret;
 
 	ret = exynos5_hsi2c_clock_setup(i2c);
 	if (ret) {
@@ -839,7 +853,7 @@ static int exynos5_i2c_resume_noirq(struct device *dev)
 	}
 
 	exynos5_i2c_init(i2c);
-	clk_disable_unprepare(i2c->clk);
+	clk_disable(i2c->clk);
 	i2c->suspended = 0;
 
 	return 0;

From 7f8150d728eef82de079ce4fc9e8b4c47aca101e Mon Sep 17 00:00:00 2001
From: Azael Avalos <coproscefalo@gmail.com>
Date: Fri, 22 Apr 2016 09:29:36 -0600
Subject: [PATCH 086/424] toshiba_acpi: Fix regression caused by hotkey
 enabling value

commit a30b8f81d9d6fe24eab8a023794548b048f08e3c upstream.

Commit 52cbae0127ad ("toshiba_acpi: Change default Hotkey enabling value")
changed the hotkeys enabling value, as it was the same value Windows uses,
however, it turns out that the value tells the EC that the driver will now
take care of the hardware events like the physical RFKill switch or the
pointing device toggle button.

This patch reverts such commit by changing the default hotkey enabling
value to 0x09, which enables hotkey events only, making the hardware
buttons working again.

Fixes bugs 113331 and 114941.

Signed-off-by: Azael Avalos <coproscefalo@gmail.com>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/platform/x86/toshiba_acpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index b0f62141ea4d..f774cb576ffa 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -131,7 +131,7 @@ MODULE_LICENSE("GPL");
 /* Field definitions */
 #define HCI_ACCEL_MASK			0x7fff
 #define HCI_HOTKEY_DISABLE		0x0b
-#define HCI_HOTKEY_ENABLE		0x01
+#define HCI_HOTKEY_ENABLE		0x09
 #define HCI_HOTKEY_SPECIAL_FUNCTIONS	0x10
 #define HCI_LCD_BRIGHTNESS_BITS		3
 #define HCI_LCD_BRIGHTNESS_SHIFT	(16-HCI_LCD_BRIGHTNESS_BITS)

From 4d32650fcd8c9097fa0f69d39f0aae80a4b7fd79 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 29 Apr 2016 15:42:25 +0200
Subject: [PATCH 087/424] EDAC: i7core, sb_edac: Don't return NOTIFY_BAD from
 mce_decoder callback

commit c4fc1956fa31003bfbe4f597e359d751568e2954 upstream.

Both of these drivers can return NOTIFY_BAD, but this terminates
processing other callbacks that were registered later on the chain.
Since the driver did nothing to log the error it seems wrong to prevent
other interested parties from seeing it. E.g. neither of them had even
bothered to check the type of the error to see if it was a memory error
before the return NOTIFY_BAD.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Acked-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/72937355dd92318d2630979666063f8a2853495b.1461864507.git.tony.luck@intel.com
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/edac/i7core_edac.c | 2 +-
 drivers/edac/sb_edac.c     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 01087a38da22..792bdae2b91d 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -1866,7 +1866,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
 
 	i7_dev = get_i7core_dev(mce->socketid);
 	if (!i7_dev)
-		return NOTIFY_BAD;
+		return NOTIFY_DONE;
 
 	mci = i7_dev->mci;
 	pvt = mci->pvt_info;
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 90c3fe99c786..37649221f81c 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -2254,7 +2254,7 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
 
 	mci = get_mci_for_node_id(mce->socketid);
 	if (!mci)
-		return NOTIFY_BAD;
+		return NOTIFY_DONE;
 	pvt = mci->pvt_info;
 
 	/*

From 9d3e910464dbeaae0746ef29c0192caa3e0418c3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 25 Jan 2016 18:07:33 +0100
Subject: [PATCH 088/424] ASoC: s3c24xx: use const snd_soc_component_driver
 pointer

commit ba4bc32eaa39ba7687f0958ae90eec94da613b46 upstream.

An older patch to convert the API in the s3c i2s driver
ended up passing a const pointer into a function that takes
a non-const pointer, so we now get a warning:

sound/soc/samsung/s3c2412-i2s.c: In function 's3c2412_iis_dev_probe':
sound/soc/samsung/s3c2412-i2s.c:172:9: error: passing argument 3 of 's3c_i2sv2_register_component' discards 'const' qualifier from pointer target type [-Werror=discarded-qualifiers]

However, the s3c_i2sv2_register_component() function again
passes the pointer into another function taking a const, so
we just need to change its prototype.

Fixes: eca3b01d0885 ("ASoC: switch over to use snd_soc_register_component() on s3c i2s")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/samsung/s3c-i2s-v2.c | 2 +-
 sound/soc/samsung/s3c-i2s-v2.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/soc/samsung/s3c-i2s-v2.c b/sound/soc/samsung/s3c-i2s-v2.c
index df65c5b494b1..b6ab3fc5789e 100644
--- a/sound/soc/samsung/s3c-i2s-v2.c
+++ b/sound/soc/samsung/s3c-i2s-v2.c
@@ -709,7 +709,7 @@ static int s3c2412_i2s_resume(struct snd_soc_dai *dai)
 #endif
 
 int s3c_i2sv2_register_component(struct device *dev, int id,
-			   struct snd_soc_component_driver *cmp_drv,
+			   const struct snd_soc_component_driver *cmp_drv,
 			   struct snd_soc_dai_driver *dai_drv)
 {
 	struct snd_soc_dai_ops *ops = (struct snd_soc_dai_ops *)dai_drv->ops;
diff --git a/sound/soc/samsung/s3c-i2s-v2.h b/sound/soc/samsung/s3c-i2s-v2.h
index 90abab364b49..d0684145ed1f 100644
--- a/sound/soc/samsung/s3c-i2s-v2.h
+++ b/sound/soc/samsung/s3c-i2s-v2.h
@@ -101,7 +101,7 @@ extern int s3c_i2sv2_probe(struct snd_soc_dai *dai,
  * soc core.
  */
 extern int s3c_i2sv2_register_component(struct device *dev, int id,
-					struct snd_soc_component_driver *cmp_drv,
+					const struct snd_soc_component_driver *cmp_drv,
 					struct snd_soc_dai_driver *dai_drv);
 
 #endif /* __SND_SOC_S3C24XX_S3C_I2SV2_I2S_H */

From c276b2c81f2a10f6d74e5cb1cb7d6b6c7ff85e74 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Wed, 27 Jan 2016 14:26:18 +0100
Subject: [PATCH 089/424] ASoC: ssm4567: Reset device before regcache_sync()

commit 712a8038cc24dba668afe82f0413714ca87184e0 upstream.

When the ssm4567 is powered up the driver calles regcache_sync() to restore
the register map content. regcache_sync() assumes that the device is in its
power-on reset state. Make sure that this is the case by explicitly
resetting the ssm4567 register map before calling regcache_sync() otherwise
we might end up with a incorrect register map which leads to undefined
behaviour.

One such undefined behaviour was observed when returning from system
suspend while a playback stream is active, in that case the ssm4567 was
kept muted after resume.

Fixes: 1ee44ce03011 ("ASoC: ssm4567: Add driver for Analog Devices SSM4567 amplifier")
Reported-by: Harsha Priya <harshapriya.n@intel.com>
Tested-by: Fang, Yang A <yang.a.fang@intel.com>
Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/codecs/ssm4567.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sound/soc/codecs/ssm4567.c b/sound/soc/codecs/ssm4567.c
index e619d5651b09..080c78e88e10 100644
--- a/sound/soc/codecs/ssm4567.c
+++ b/sound/soc/codecs/ssm4567.c
@@ -352,6 +352,11 @@ static int ssm4567_set_power(struct ssm4567 *ssm4567, bool enable)
 	regcache_cache_only(ssm4567->regmap, !enable);
 
 	if (enable) {
+		ret = regmap_write(ssm4567->regmap, SSM4567_REG_SOFT_RESET,
+			0x00);
+		if (ret)
+			return ret;
+
 		ret = regmap_update_bits(ssm4567->regmap,
 			SSM4567_REG_POWER_CTRL,
 			SSM4567_POWER_SPWDN, 0x00);

From 99070b6b5154f69e1f85a6547e8113b03986de7f Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Fri, 18 Mar 2016 12:04:23 +0000
Subject: [PATCH 090/424] ASoC: dapm: Make sure we have a card when displaying
 component widgets

commit 47325078f2a3e543150e7df967e45756b2fff7ec upstream.

The dummy component is reused for all cards so we special case and don't
bind it to any of them.  This means that code like that displaying the
component widgets that tries to look at the card will crash.  In the
future we will fix this by ensuring that the dummy component looks like
other components but that is invasive and so not suitable for a fix.
Instead add a special case check here.

Reported-by: Harry Pan <harry.pan@intel.com>
Suggested-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/soc-dapm.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 416514fe9e63..afb70a5d4fd3 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -2188,6 +2188,13 @@ static ssize_t dapm_widget_show_component(struct snd_soc_component *cmpnt,
 	int count = 0;
 	char *state = "not set";
 
+	/* card won't be set for the dummy component, as a spot fix
+	 * we're checking for that case specifically here but in future
+	 * we will ensure that the dummy component looks like others.
+	 */
+	if (!cmpnt->card)
+		return 0;
+
 	list_for_each_entry(w, &cmpnt->card->widgets, list) {
 		if (w->dapm != dapm)
 			continue;

From d74252fd2010e660b0f4b2b7bca0feccaf0214c9 Mon Sep 17 00:00:00 2001
From: Sugar Zhang <sugar.zhang@rock-chips.com>
Date: Fri, 18 Mar 2016 14:54:22 +0800
Subject: [PATCH 091/424] ASoC: rt5640: Correct the digital interface data
 select

commit 653aa4645244042826f105aab1be3d01b3d493ca upstream.

this patch corrects the interface adc/dac control register definition
according to datasheet.

Signed-off-by: Sugar Zhang <sugar.zhang@rock-chips.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/soc/codecs/rt5640.c |  2 +-
 sound/soc/codecs/rt5640.h | 36 ++++++++++++++++++------------------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c
index f2beb1aa5763..b1c8bb39cdf1 100644
--- a/sound/soc/codecs/rt5640.c
+++ b/sound/soc/codecs/rt5640.c
@@ -359,7 +359,7 @@ static const DECLARE_TLV_DB_RANGE(bst_tlv,
 
 /* Interface data select */
 static const char * const rt5640_data_select[] = {
-	"Normal", "left copy to right", "right copy to left", "Swap"};
+	"Normal", "Swap", "left copy to right", "right copy to left"};
 
 static SOC_ENUM_SINGLE_DECL(rt5640_if1_dac_enum, RT5640_DIG_INF_DATA,
 			    RT5640_IF1_DAC_SEL_SFT, rt5640_data_select);
diff --git a/sound/soc/codecs/rt5640.h b/sound/soc/codecs/rt5640.h
index 3deb8babeabb..243f42633989 100644
--- a/sound/soc/codecs/rt5640.h
+++ b/sound/soc/codecs/rt5640.h
@@ -442,39 +442,39 @@
 #define RT5640_IF1_DAC_SEL_MASK			(0x3 << 14)
 #define RT5640_IF1_DAC_SEL_SFT			14
 #define RT5640_IF1_DAC_SEL_NOR			(0x0 << 14)
-#define RT5640_IF1_DAC_SEL_L2R			(0x1 << 14)
-#define RT5640_IF1_DAC_SEL_R2L			(0x2 << 14)
-#define RT5640_IF1_DAC_SEL_SWAP			(0x3 << 14)
+#define RT5640_IF1_DAC_SEL_SWAP			(0x1 << 14)
+#define RT5640_IF1_DAC_SEL_L2R			(0x2 << 14)
+#define RT5640_IF1_DAC_SEL_R2L			(0x3 << 14)
 #define RT5640_IF1_ADC_SEL_MASK			(0x3 << 12)
 #define RT5640_IF1_ADC_SEL_SFT			12
 #define RT5640_IF1_ADC_SEL_NOR			(0x0 << 12)
-#define RT5640_IF1_ADC_SEL_L2R			(0x1 << 12)
-#define RT5640_IF1_ADC_SEL_R2L			(0x2 << 12)
-#define RT5640_IF1_ADC_SEL_SWAP			(0x3 << 12)
+#define RT5640_IF1_ADC_SEL_SWAP			(0x1 << 12)
+#define RT5640_IF1_ADC_SEL_L2R			(0x2 << 12)
+#define RT5640_IF1_ADC_SEL_R2L			(0x3 << 12)
 #define RT5640_IF2_DAC_SEL_MASK			(0x3 << 10)
 #define RT5640_IF2_DAC_SEL_SFT			10
 #define RT5640_IF2_DAC_SEL_NOR			(0x0 << 10)
-#define RT5640_IF2_DAC_SEL_L2R			(0x1 << 10)
-#define RT5640_IF2_DAC_SEL_R2L			(0x2 << 10)
-#define RT5640_IF2_DAC_SEL_SWAP			(0x3 << 10)
+#define RT5640_IF2_DAC_SEL_SWAP			(0x1 << 10)
+#define RT5640_IF2_DAC_SEL_L2R			(0x2 << 10)
+#define RT5640_IF2_DAC_SEL_R2L			(0x3 << 10)
 #define RT5640_IF2_ADC_SEL_MASK			(0x3 << 8)
 #define RT5640_IF2_ADC_SEL_SFT			8
 #define RT5640_IF2_ADC_SEL_NOR			(0x0 << 8)
-#define RT5640_IF2_ADC_SEL_L2R			(0x1 << 8)
-#define RT5640_IF2_ADC_SEL_R2L			(0x2 << 8)
-#define RT5640_IF2_ADC_SEL_SWAP			(0x3 << 8)
+#define RT5640_IF2_ADC_SEL_SWAP			(0x1 << 8)
+#define RT5640_IF2_ADC_SEL_L2R			(0x2 << 8)
+#define RT5640_IF2_ADC_SEL_R2L			(0x3 << 8)
 #define RT5640_IF3_DAC_SEL_MASK			(0x3 << 6)
 #define RT5640_IF3_DAC_SEL_SFT			6
 #define RT5640_IF3_DAC_SEL_NOR			(0x0 << 6)
-#define RT5640_IF3_DAC_SEL_L2R			(0x1 << 6)
-#define RT5640_IF3_DAC_SEL_R2L			(0x2 << 6)
-#define RT5640_IF3_DAC_SEL_SWAP			(0x3 << 6)
+#define RT5640_IF3_DAC_SEL_SWAP			(0x1 << 6)
+#define RT5640_IF3_DAC_SEL_L2R			(0x2 << 6)
+#define RT5640_IF3_DAC_SEL_R2L			(0x3 << 6)
 #define RT5640_IF3_ADC_SEL_MASK			(0x3 << 4)
 #define RT5640_IF3_ADC_SEL_SFT			4
 #define RT5640_IF3_ADC_SEL_NOR			(0x0 << 4)
-#define RT5640_IF3_ADC_SEL_L2R			(0x1 << 4)
-#define RT5640_IF3_ADC_SEL_R2L			(0x2 << 4)
-#define RT5640_IF3_ADC_SEL_SWAP			(0x3 << 4)
+#define RT5640_IF3_ADC_SEL_SWAP			(0x1 << 4)
+#define RT5640_IF3_ADC_SEL_L2R			(0x2 << 4)
+#define RT5640_IF3_ADC_SEL_R2L			(0x3 << 4)
 
 /* REC Left Mixer Control 1 (0x3b) */
 #define RT5640_G_HP_L_RM_L_MASK			(0x7 << 13)

From b4ea6cf4883569a7c9c0297305033e9e678a03e4 Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Date: Thu, 3 Mar 2016 16:12:48 -0300
Subject: [PATCH 092/424] vb2-memops: Fix over allocation of frame vectors

commit 89a095668304e8a02502ffd35edacffdbf49aa8c upstream.

On page unaligned frames, create_framevec forces get_vaddr_frames to
allocate an extra page at the end of the buffer. Under some
circumstances, this leads to -EINVAL on VIDIOC_QBUF.

E.g:
We have vm_a that vm_area that goes from 0x1000 to 0x3000. And a
frame that goes from 0x1800 to 0x2800, i.e. 2 pages.

frame_vector_create will be called with the following params:

get_vaddr_frames(0x1800, 2, write, 1, vec);

get_vaddr will allocate the first page after checking that the memory
0x1800-0x27ff is valid, but it will not allocate the second page because
the range 0x2800-0x37ff is out of the vm_a range. This results in
create_framevec returning -EFAULT

Error Trace:
[ 9083.793015] video0: VIDIOC_QBUF: 00:00:00.00000000 index=1,
type=vid-cap, flags=0x00002002, field=any, sequence=0,
memory=userptr, bytesused=0, offset/userptr=0x7ff2b023ca80, length=5765760
[ 9083.793028] timecode=00:00:00 type=0, flags=0x00000000,
frames=0, userbits=0x00000000
[ 9083.793117] video0: VIDIOC_QBUF: error -22: 00:00:00.00000000
index=2, type=vid-cap, flags=0x00000000, field=any, sequence=0,
memory=userptr, bytesused=0, offset/userptr=0x7ff2b07bc500, length=5765760

Also use true instead of 1 since that argument is a bool in the
get_vaddr_frames() prototype.

Fixes: 21fb0cb7ec65 ("[media] vb2: Provide helpers for mapping virtual addresses")

Reported-by: Albert Antony <albert@newtec.dk>
Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
[hans.verkuil@cisco.com: merged the 'bool' change into this patch]
Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 drivers/media/v4l2-core/videobuf2-memops.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/v4l2-core/videobuf2-memops.c b/drivers/media/v4l2-core/videobuf2-memops.c
index dbec5923fcf0..3c3b517f1d1c 100644
--- a/drivers/media/v4l2-core/videobuf2-memops.c
+++ b/drivers/media/v4l2-core/videobuf2-memops.c
@@ -49,7 +49,7 @@ struct frame_vector *vb2_create_framevec(unsigned long start,
 	vec = frame_vector_create(nr);
 	if (!vec)
 		return ERR_PTR(-ENOMEM);
-	ret = get_vaddr_frames(start, nr, write, 1, vec);
+	ret = get_vaddr_frames(start & PAGE_MASK, nr, write, true, vec);
 	if (ret < 0)
 		goto out_destroy;
 	/* We accept only complete set of PFNs */

From a9da0b3dc72e074a2f84fad5f176750968a76bdb Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Fri, 22 Apr 2016 04:00:50 -0300
Subject: [PATCH 093/424] v4l2-dv-timings.h: fix polarity for 4k formats

commit 3020ca711871fdaf0c15c8bab677a6bc302e28fe upstream.

The VSync polarity was negative instead of positive for the 4k CEA formats.
I probably copy-and-pasted these from the DMT 4k format, which does have a
negative VSync polarity.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Reported-by: Martin Bugge <marbugge@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/v4l2-dv-timings.h | 30 ++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/include/uapi/linux/v4l2-dv-timings.h b/include/uapi/linux/v4l2-dv-timings.h
index c039f1d68a09..086168e18ca8 100644
--- a/include/uapi/linux/v4l2-dv-timings.h
+++ b/include/uapi/linux/v4l2-dv-timings.h
@@ -183,7 +183,8 @@
 
 #define V4L2_DV_BT_CEA_3840X2160P24 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		297000000, 1276, 88, 296, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, \
 		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
@@ -191,14 +192,16 @@
 
 #define V4L2_DV_BT_CEA_3840X2160P25 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		297000000, 1056, 88, 296, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_3840X2160P30 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		297000000, 176, 88, 296, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, \
 		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
@@ -206,14 +209,16 @@
 
 #define V4L2_DV_BT_CEA_3840X2160P50 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		594000000, 1056, 88, 296, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_3840X2160P60 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		594000000, 176, 88, 296, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, \
 		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
@@ -221,7 +226,8 @@
 
 #define V4L2_DV_BT_CEA_4096X2160P24 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		297000000, 1020, 88, 296, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, \
 		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
@@ -229,14 +235,16 @@
 
 #define V4L2_DV_BT_CEA_4096X2160P25 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		297000000, 968, 88, 128, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_4096X2160P30 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		297000000, 88, 88, 128, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, \
 		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
@@ -244,14 +252,16 @@
 
 #define V4L2_DV_BT_CEA_4096X2160P50 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		594000000, 968, 88, 128, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_4096X2160P60 { \
 	.type = V4L2_DV_BT_656_1120, \
-	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
+	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \
+		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		594000000, 88, 88, 128, 8, 10, 72, 0, 0, 0, \
 		V4L2_DV_BT_STD_CEA861, \
 		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \

From b184522f688a31765a24081ed231e480e76edae6 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Fri, 22 Apr 2016 14:57:48 +1000
Subject: [PATCH 094/424] cxl: Keep IRQ mappings on context teardown

commit d6776bba44d9752f6cdf640046070e71ee4bba7b upstream.

Keep IRQ mappings on context teardown.  This won't leak IRQs as if we
allocate the mapping again, the generic code will give the same
mapping used last time.

Doing this works around a race in the generic code. Masking the
interrupt introduces a race which can crash the kernel or result in
IRQ that is never EOIed. The lost of EOI results in all subsequent
mappings to the same HW IRQ never receiving an interrupt.

We've seen this race with cxl test cases which are doing heavy context
startup and teardown at the same time as heavy interrupt load.

A fix to the generic code is being investigated also.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Tested-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Acked-by: Ian Munsie <imunsie@au1.ibm.com>
Tested-by: Vaibhav Jain <vaibhav@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/cxl/irq.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c
index 09a406058c46..efbb6945eb18 100644
--- a/drivers/misc/cxl/irq.c
+++ b/drivers/misc/cxl/irq.c
@@ -288,7 +288,6 @@ unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq,
 void cxl_unmap_irq(unsigned int virq, void *cookie)
 {
 	free_irq(virq, cookie);
-	irq_dispose_mapping(virq);
 }
 
 static int cxl_register_one_irq(struct cxl *adapter,

From 29ebbba744cf8951202b5f4ea62b4a297f4662c1 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 31 Mar 2016 19:03:25 +0300
Subject: [PATCH 095/424] IB/mlx5: Expose correct max_sge_rd limit

commit 986ef95ecdd3eb6fa29433e68faa94c7624083be upstream.

mlx5 devices (Connect-IB, ConnectX-4, ConnectX-4-LX) has a limitation
where rdma read work queue entries cannot exceed 512 bytes.
A rdma_read wqe needs to fit in 512 bytes:
- wqe control segment (16 bytes)
- rdma segment (16 bytes)
- scatter elements (16 bytes each)

So max_sge_rd should be: (512 - 16 - 16) / 16 = 30.

Reported-by: Christoph Hellwig <hch@lst.de>
Tested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagig@grimberg.me>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/mlx5/main.c |  2 +-
 include/linux/mlx5/device.h       | 11 +++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index c4e091528390..721d63f5b461 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -273,7 +273,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 		     sizeof(struct mlx5_wqe_ctrl_seg)) /
 		     sizeof(struct mlx5_wqe_data_seg);
 	props->max_sge = min(max_rq_sg, max_sq_sg);
-	props->max_sge_rd = props->max_sge;
+	props->max_sge_rd	   = MLX5_MAX_SGE_RD;
 	props->max_cq		   = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
 	props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
 	props->max_mr		   = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 0b473cbfa7ef..a91b67b18a73 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -334,6 +334,17 @@ enum {
 	MLX5_CAP_OFF_CMDIF_CSUM		= 46,
 };
 
+enum {
+	/*
+	 * Max wqe size for rdma read is 512 bytes, so this
+	 * limits our max_sge_rd as the wqe needs to fit:
+	 * - ctrl segment (16 bytes)
+	 * - rdma segment (16 bytes)
+	 * - scatter elements (16 bytes each)
+	 */
+	MLX5_MAX_SGE_RD	= (512 - 16 - 16) / 16
+};
+
 struct mlx5_inbox_hdr {
 	__be16		opcode;
 	u8		rsvd[4];

From c92003c18feb8159cbf64bc0afa7b048869fe3c6 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Date: Sun, 10 Apr 2016 19:13:13 -0600
Subject: [PATCH 096/424] IB/security: Restrict use of the write() interface

commit e6bd18f57aad1a2d1ef40e646d03ed0f2515c9e3 upstream.

The drivers/infiniband stack uses write() as a replacement for
bi-directional ioctl().  This is not safe. There are ways to
trigger write calls that result in the return structure that
is normally written to user space being shunted off to user
specified kernel memory instead.

For the immediate repair, detect and deny suspicious accesses to
the write API.

For long term, update the user space libraries and the kernel API
to something that doesn't present the same security vulnerabilities
(likely a structured ioctl() interface).

The impacted uAPI interfaces are generally only available if
hardware from drivers/infiniband is installed in the system.

Reported-by: Jann Horn <jann@thejh.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
[ Expanded check to all known write() entry points ]
Signed-off-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/core/ucm.c            |  4 ++++
 drivers/infiniband/core/ucma.c           |  3 +++
 drivers/infiniband/core/uverbs_main.c    |  5 +++++
 drivers/infiniband/hw/qib/qib_file_ops.c |  5 +++++
 drivers/staging/rdma/hfi1/TODO           |  2 +-
 drivers/staging/rdma/hfi1/file_ops.c     |  6 ++++++
 include/rdma/ib.h                        | 16 ++++++++++++++++
 7 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 6b4e8a008bc0..564adf3116e8 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -48,6 +48,7 @@
 
 #include <asm/uaccess.h>
 
+#include <rdma/ib.h>
 #include <rdma/ib_cm.h>
 #include <rdma/ib_user_cm.h>
 #include <rdma/ib_marshall.h>
@@ -1103,6 +1104,9 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
 	struct ib_ucm_cmd_hdr hdr;
 	ssize_t result;
 
+	if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
+		return -EACCES;
+
 	if (len < sizeof(hdr))
 		return -EINVAL;
 
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 8b5a934e1133..886f61ea6cc7 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -1574,6 +1574,9 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf,
 	struct rdma_ucm_cmd_hdr hdr;
 	ssize_t ret;
 
+	if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
+		return -EACCES;
+
 	if (len < sizeof(hdr))
 		return -EINVAL;
 
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index e3ef28861be6..24f3ca2c4ad7 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -48,6 +48,8 @@
 
 #include <asm/uaccess.h>
 
+#include <rdma/ib.h>
+
 #include "uverbs.h"
 
 MODULE_AUTHOR("Roland Dreier");
@@ -682,6 +684,9 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 	int srcu_key;
 	ssize_t ret;
 
+	if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
+		return -EACCES;
+
 	if (count < sizeof hdr)
 		return -EINVAL;
 
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index e449e394963f..24f4a782e0f4 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -45,6 +45,8 @@
 #include <linux/export.h>
 #include <linux/uio.h>
 
+#include <rdma/ib.h>
+
 #include "qib.h"
 #include "qib_common.h"
 #include "qib_user_sdma.h"
@@ -2067,6 +2069,9 @@ static ssize_t qib_write(struct file *fp, const char __user *data,
 	ssize_t ret = 0;
 	void *dest;
 
+	if (WARN_ON_ONCE(!ib_safe_file_access(fp)))
+		return -EACCES;
+
 	if (count < sizeof(cmd.type)) {
 		ret = -EINVAL;
 		goto bail;
diff --git a/drivers/staging/rdma/hfi1/TODO b/drivers/staging/rdma/hfi1/TODO
index 05de0dad8762..4c6f1d7d2eaf 100644
--- a/drivers/staging/rdma/hfi1/TODO
+++ b/drivers/staging/rdma/hfi1/TODO
@@ -3,4 +3,4 @@ July, 2015
 - Remove unneeded file entries in sysfs
 - Remove software processing of IB protocol and place in library for use
   by qib, ipath (if still present), hfi1, and eventually soft-roce
-
+- Replace incorrect uAPI
diff --git a/drivers/staging/rdma/hfi1/file_ops.c b/drivers/staging/rdma/hfi1/file_ops.c
index aae9826ec62b..c851e51b1dc3 100644
--- a/drivers/staging/rdma/hfi1/file_ops.c
+++ b/drivers/staging/rdma/hfi1/file_ops.c
@@ -62,6 +62,8 @@
 #include <linux/cred.h>
 #include <linux/uio.h>
 
+#include <rdma/ib.h>
+
 #include "hfi.h"
 #include "pio.h"
 #include "device.h"
@@ -214,6 +216,10 @@ static ssize_t hfi1_file_write(struct file *fp, const char __user *data,
 	int uctxt_required = 1;
 	int must_be_root = 0;
 
+	/* FIXME: This interface cannot continue out of staging */
+	if (WARN_ON_ONCE(!ib_safe_file_access(fp)))
+		return -EACCES;
+
 	if (count < sizeof(cmd)) {
 		ret = -EINVAL;
 		goto bail;
diff --git a/include/rdma/ib.h b/include/rdma/ib.h
index cf8f9e700e48..a6b93706b0fc 100644
--- a/include/rdma/ib.h
+++ b/include/rdma/ib.h
@@ -34,6 +34,7 @@
 #define _RDMA_IB_H
 
 #include <linux/types.h>
+#include <linux/sched.h>
 
 struct ib_addr {
 	union {
@@ -86,4 +87,19 @@ struct sockaddr_ib {
 	__u64			sib_scope_id;
 };
 
+/*
+ * The IB interfaces that use write() as bi-directional ioctl() are
+ * fundamentally unsafe, since there are lots of ways to trigger "write()"
+ * calls from various contexts with elevated privileges. That includes the
+ * traditional suid executable error message writes, but also various kernel
+ * interfaces that can write to file descriptors.
+ *
+ * This function provides protection for the legacy API by restricting the
+ * calling context.
+ */
+static inline bool ib_safe_file_access(struct file *filp)
+{
+	return filp->f_cred == current_cred() && segment_eq(get_fs(), USER_DS);
+}
+
 #endif /* _RDMA_IB_H */

From 513f5c33b5208dbd090f56c843aead053cb3d7a3 Mon Sep 17 00:00:00 2001
From: Laszlo Ersek <lersek@redhat.com>
Date: Thu, 21 Apr 2016 18:21:11 +0200
Subject: [PATCH 097/424] efi: Fix out-of-bounds read in variable_matches()

commit 630ba0cc7a6dbafbdee43795617c872b35cde1b4 upstream.

The variable_matches() function can currently read "var_name[len]", for
example when:

 - var_name[0] == 'a',
 - len == 1
 - match_name points to the NUL-terminated string "ab".

This function is supposed to accept "var_name" inputs that are not
NUL-terminated (hence the "len" parameter"). Document the function, and
access "var_name[*match]" only if "*match" is smaller than "len".

Reported-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Cc: Peter Jones <pjones@redhat.com>
Cc: Matthew Garrett <mjg59@coreos.com>
Cc: Jason Andryuk <jandryuk@gmail.com>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Link: http://thread.gmane.org/gmane.comp.freedesktop.xorg.drivers.intel/86906
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/firmware/efi/vars.c | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index 7f2ea21c730d..6f182fd91a6d 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -202,29 +202,44 @@ static const struct variable_validate variable_validate[] = {
 	{ NULL_GUID, "", NULL },
 };
 
+/*
+ * Check if @var_name matches the pattern given in @match_name.
+ *
+ * @var_name: an array of @len non-NUL characters.
+ * @match_name: a NUL-terminated pattern string, optionally ending in "*". A
+ *              final "*" character matches any trailing characters @var_name,
+ *              including the case when there are none left in @var_name.
+ * @match: on output, the number of non-wildcard characters in @match_name
+ *         that @var_name matches, regardless of the return value.
+ * @return: whether @var_name fully matches @match_name.
+ */
 static bool
 variable_matches(const char *var_name, size_t len, const char *match_name,
 		 int *match)
 {
 	for (*match = 0; ; (*match)++) {
 		char c = match_name[*match];
-		char u = var_name[*match];
 
-		/* Wildcard in the matching name means we've matched */
-		if (c == '*')
+		switch (c) {
+		case '*':
+			/* Wildcard in @match_name means we've matched. */
 			return true;
 
-		/* Case sensitive match */
-		if (!c && *match == len)
-			return true;
+		case '\0':
+			/* @match_name has ended. Has @var_name too? */
+			return (*match == len);
 
-		if (c != u)
+		default:
+			/*
+			 * We've reached a non-wildcard char in @match_name.
+			 * Continue only if there's an identical character in
+			 * @var_name.
+			 */
+			if (*match < len && c == var_name[*match])
+				continue;
 			return false;
-
-		if (!c)
-			return true;
+		}
 	}
-	return true;
 }
 
 bool

From b8f80ba7e09ca1945946d4a6d7391c0795ff99f7 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 1 Feb 2016 22:06:55 +0000
Subject: [PATCH 098/424] efi: Expose non-blocking set_variable() wrapper to
 efivars

commit 9c6672ac9c91f7eb1ec436be1442b8c26d098e55 upstream.

Commit 6d80dba1c9fe ("efi: Provide a non-blocking SetVariable()
operation") implemented a non-blocking alternative for the UEFI
SetVariable() invocation performed by efivars, since it may
occur in atomic context. However, this version of the function
was never exposed via the efivars struct, so the non-blocking
versions was not actually callable. Fix that.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Fixes: 6d80dba1c9fe ("efi: Provide a non-blocking SetVariable() operation")
Link: http://lkml.kernel.org/r/1454364428-494-2-git-send-email-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/firmware/efi/efi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 027ca212179f..3b52677f459a 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -180,6 +180,7 @@ static int generic_ops_register(void)
 {
 	generic_ops.get_variable = efi.get_variable;
 	generic_ops.set_variable = efi.set_variable;
+	generic_ops.set_variable_nonblocking = efi.set_variable_nonblocking;
 	generic_ops.get_next_variable = efi.get_next_variable;
 	generic_ops.query_variable_store = efi_query_variable_store;
 

From 01d5ccd341290e771ac6b94b08c220df6f81a630 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Wed, 27 Apr 2016 14:22:32 -0600
Subject: [PATCH 099/424] x86/apic: Handle zero vector gracefully in
 clear_vector_irq()

commit 1bdb8970392a68489b469c3a330a1adb5ef61beb upstream.

If x86_vector_alloc_irq() fails x86_vector_free_irqs() is invoked to cleanup
the already allocated vectors. This subsequently calls clear_vector_irq().

The failed irq has no vector assigned, which triggers the BUG_ON(!vector) in
clear_vector_irq().

We cannot suppress the call to x86_vector_free_irqs() for the failed
interrupt, because the other data related to this irq must be cleaned up as
well. So calling clear_vector_irq() with vector == 0 is legitimate.

Remove the BUG_ON and return if vector is zero,

[ tglx: Massaged changelog ]

Fixes: b5dc8e6c21e7 "x86/irq: Use hierarchical irqdomain to manage CPU interrupt vectors"
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/apic/vector.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 7af2505f20c2..df6b4eeac0bd 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -254,7 +254,8 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data)
 	struct irq_desc *desc;
 	int cpu, vector;
 
-	BUG_ON(!data->cfg.vector);
+	if (!data->cfg.vector)
+		return;
 
 	vector = data->cfg.vector;
 	for_each_cpu_and(cpu, data->domain, cpu_online_mask)

From 2da9606aea5a8fd1b710f8c8dd5295da4825e9cd Mon Sep 17 00:00:00 2001
From: Roman Pen <roman.penyaev@profitbricks.com>
Date: Tue, 26 Apr 2016 13:15:35 +0200
Subject: [PATCH 100/424] workqueue: fix ghost PENDING flag while doing MQ IO

commit 346c09f80459a3ad97df1816d6d606169a51001a upstream.

The bug in a workqueue leads to a stalled IO request in MQ ctx->rq_list
with the following backtrace:

[  601.347452] INFO: task kworker/u129:5:1636 blocked for more than 120 seconds.
[  601.347574]       Tainted: G           O    4.4.5-1-storage+ #6
[  601.347651] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  601.348142] kworker/u129:5  D ffff880803077988     0  1636      2 0x00000000
[  601.348519] Workqueue: ibnbd_server_fileio_wq ibnbd_dev_file_submit_io_worker [ibnbd_server]
[  601.348999]  ffff880803077988 ffff88080466b900 ffff8808033f9c80 ffff880803078000
[  601.349662]  ffff880807c95000 7fffffffffffffff ffffffff815b0920 ffff880803077ad0
[  601.350333]  ffff8808030779a0 ffffffff815b01d5 0000000000000000 ffff880803077a38
[  601.350965] Call Trace:
[  601.351203]  [<ffffffff815b0920>] ? bit_wait+0x60/0x60
[  601.351444]  [<ffffffff815b01d5>] schedule+0x35/0x80
[  601.351709]  [<ffffffff815b2dd2>] schedule_timeout+0x192/0x230
[  601.351958]  [<ffffffff812d43f7>] ? blk_flush_plug_list+0xc7/0x220
[  601.352208]  [<ffffffff810bd737>] ? ktime_get+0x37/0xa0
[  601.352446]  [<ffffffff815b0920>] ? bit_wait+0x60/0x60
[  601.352688]  [<ffffffff815af784>] io_schedule_timeout+0xa4/0x110
[  601.352951]  [<ffffffff815b3a4e>] ? _raw_spin_unlock_irqrestore+0xe/0x10
[  601.353196]  [<ffffffff815b093b>] bit_wait_io+0x1b/0x70
[  601.353440]  [<ffffffff815b056d>] __wait_on_bit+0x5d/0x90
[  601.353689]  [<ffffffff81127bd0>] wait_on_page_bit+0xc0/0xd0
[  601.353958]  [<ffffffff81096db0>] ? autoremove_wake_function+0x40/0x40
[  601.354200]  [<ffffffff81127cc4>] __filemap_fdatawait_range+0xe4/0x140
[  601.354441]  [<ffffffff81127d34>] filemap_fdatawait_range+0x14/0x30
[  601.354688]  [<ffffffff81129a9f>] filemap_write_and_wait_range+0x3f/0x70
[  601.354932]  [<ffffffff811ced3b>] blkdev_fsync+0x1b/0x50
[  601.355193]  [<ffffffff811c82d9>] vfs_fsync_range+0x49/0xa0
[  601.355432]  [<ffffffff811cf45a>] blkdev_write_iter+0xca/0x100
[  601.355679]  [<ffffffff81197b1a>] __vfs_write+0xaa/0xe0
[  601.355925]  [<ffffffff81198379>] vfs_write+0xa9/0x1a0
[  601.356164]  [<ffffffff811c59d8>] kernel_write+0x38/0x50

The underlying device is a null_blk, with default parameters:

  queue_mode    = MQ
  submit_queues = 1

Verification that nullb0 has something inflight:

root@pserver8:~# cat /sys/block/nullb0/inflight
       0        1
root@pserver8:~# find /sys/block/nullb0/mq/0/cpu* -name rq_list -print -exec cat {} \;
...
/sys/block/nullb0/mq/0/cpu2/rq_list
CTX pending:
        ffff8838038e2400
...

During debug it became clear that stalled request is always inserted in
the rq_list from the following path:

   save_stack_trace_tsk + 34
   blk_mq_insert_requests + 231
   blk_mq_flush_plug_list + 281
   blk_flush_plug_list + 199
   wait_on_page_bit + 192
   __filemap_fdatawait_range + 228
   filemap_fdatawait_range + 20
   filemap_write_and_wait_range + 63
   blkdev_fsync + 27
   vfs_fsync_range + 73
   blkdev_write_iter + 202
   __vfs_write + 170
   vfs_write + 169
   kernel_write + 56

So blk_flush_plug_list() was called with from_schedule == true.

If from_schedule is true, that means that finally blk_mq_insert_requests()
offloads execution of __blk_mq_run_hw_queue() and uses kblockd workqueue,
i.e. it calls kblockd_schedule_delayed_work_on().

That means, that we race with another CPU, which is about to execute
__blk_mq_run_hw_queue() work.

Further debugging shows the following traces from different CPUs:

  CPU#0                                  CPU#1
  ----------------------------------     -------------------------------
  reqeust A inserted
  STORE hctx->ctx_map[0] bit marked
  kblockd_schedule...() returns 1
  <schedule to kblockd workqueue>
                                         request B inserted
                                         STORE hctx->ctx_map[1] bit marked
                                         kblockd_schedule...() returns 0
  *** WORK PENDING bit is cleared ***
  flush_busy_ctxs() is executed, but
  bit 1, set by CPU#1, is not observed

As a result request B pended forever.

This behaviour can be explained by speculative LOAD of hctx->ctx_map on
CPU#0, which is reordered with clear of PENDING bit and executed _before_
actual STORE of bit 1 on CPU#1.

The proper fix is an explicit full barrier <mfence>, which guarantees
that clear of PENDING bit is to be executed before all possible
speculative LOADS or STORES inside actual work function.

Signed-off-by: Roman Pen <roman.penyaev@profitbricks.com>
Cc: Gioh Kim <gi-oh.kim@profitbricks.com>
Cc: Michael Wang <yun.wang@profitbricks.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/workqueue.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 450c21fd0e6e..0ec05948a97b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -649,6 +649,35 @@ static void set_work_pool_and_clear_pending(struct work_struct *work,
 	 */
 	smp_wmb();
 	set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
+	/*
+	 * The following mb guarantees that previous clear of a PENDING bit
+	 * will not be reordered with any speculative LOADS or STORES from
+	 * work->current_func, which is executed afterwards.  This possible
+	 * reordering can lead to a missed execution on attempt to qeueue
+	 * the same @work.  E.g. consider this case:
+	 *
+	 *   CPU#0                         CPU#1
+	 *   ----------------------------  --------------------------------
+	 *
+	 * 1  STORE event_indicated
+	 * 2  queue_work_on() {
+	 * 3    test_and_set_bit(PENDING)
+	 * 4 }                             set_..._and_clear_pending() {
+	 * 5                                 set_work_data() # clear bit
+	 * 6                                 smp_mb()
+	 * 7                               work->current_func() {
+	 * 8				      LOAD event_indicated
+	 *				   }
+	 *
+	 * Without an explicit full barrier speculative LOAD on line 8 can
+	 * be executed before CPU#0 does STORE on line 1.  If that happens,
+	 * CPU#0 observes the PENDING bit is still set and new execution of
+	 * a @work is not queued in a hope, that CPU#1 will eventually
+	 * finish the queued @work.  Meanwhile CPU#1 does not see
+	 * event_indicated is set, because speculative LOAD was executed
+	 * before actual STORE.
+	 */
+	smp_mb();
 }
 
 static void clear_work_data(struct work_struct *work)

From a4e25ff31103e7c9084904418cb95596e3e9d9cf Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Tue, 15 Mar 2016 14:53:32 -0700
Subject: [PATCH 101/424] slub: clean up code for kmem cgroup support to
 kmem_cache_free_bulk

commit 376bf125ac781d32e202760ed7deb1ae4ed35d31 upstream.

This change is primarily an attempt to make it easier to realize the
optimizations the compiler performs in-case CONFIG_MEMCG_KMEM is not
enabled.

Performance wise, even when CONFIG_MEMCG_KMEM is compiled in, the
overhead is zero.  This is because, as long as no process have enabled
kmem cgroups accounting, the assignment is replaced by asm-NOP
operations.  This is possible because memcg_kmem_enabled() uses a
static_key_false() construct.

It also helps readability as it avoid accessing the p[] array like:
p[size - 1] which "expose" that the array is processed backwards inside
helper function build_detached_freelist().

Lastly this also makes the code more robust, in error case like passing
NULL pointers in the array.  Which were previously handled before commit
033745189b1b ("slub: add missing kmem cgroup support to
kmem_cache_free_bulk").

Fixes: 033745189b1b ("slub: add missing kmem cgroup support to kmem_cache_free_bulk")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Vladimir Davydov <vdavydov@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/slub.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 46997517406e..65d5f92d51d2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2819,6 +2819,7 @@ struct detached_freelist {
 	void *tail;
 	void *freelist;
 	int cnt;
+	struct kmem_cache *s;
 };
 
 /*
@@ -2833,8 +2834,9 @@ struct detached_freelist {
  * synchronization primitive.  Look ahead in the array is limited due
  * to performance reasons.
  */
-static int build_detached_freelist(struct kmem_cache *s, size_t size,
-				   void **p, struct detached_freelist *df)
+static inline
+int build_detached_freelist(struct kmem_cache *s, size_t size,
+			    void **p, struct detached_freelist *df)
 {
 	size_t first_skipped_index = 0;
 	int lookahead = 3;
@@ -2850,8 +2852,11 @@ static int build_detached_freelist(struct kmem_cache *s, size_t size,
 	if (!object)
 		return 0;
 
+	/* Support for memcg, compiler can optimize this out */
+	df->s = cache_from_obj(s, object);
+
 	/* Start new detached freelist */
-	set_freepointer(s, object, NULL);
+	set_freepointer(df->s, object, NULL);
 	df->page = virt_to_head_page(object);
 	df->tail = object;
 	df->freelist = object;
@@ -2866,7 +2871,7 @@ static int build_detached_freelist(struct kmem_cache *s, size_t size,
 		/* df->page is always set at this point */
 		if (df->page == virt_to_head_page(object)) {
 			/* Opportunity build freelist */
-			set_freepointer(s, object, df->freelist);
+			set_freepointer(df->s, object, df->freelist);
 			df->freelist = object;
 			df->cnt++;
 			p[size] = NULL; /* mark object processed */
@@ -2885,25 +2890,20 @@ static int build_detached_freelist(struct kmem_cache *s, size_t size,
 	return first_skipped_index;
 }
 
-
 /* Note that interrupts must be enabled when calling this function. */
-void kmem_cache_free_bulk(struct kmem_cache *orig_s, size_t size, void **p)
+void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
 {
 	if (WARN_ON(!size))
 		return;
 
 	do {
 		struct detached_freelist df;
-		struct kmem_cache *s;
-
-		/* Support for memcg */
-		s = cache_from_obj(orig_s, p[size - 1]);
 
 		size = build_detached_freelist(s, size, p, &df);
 		if (unlikely(!df.page))
 			continue;
 
-		slab_free(s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
+		slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_);
 	} while (likely(size));
 }
 EXPORT_SYMBOL(kmem_cache_free_bulk);

From d52097476caeb14f4d7e3417dda08220d2813cc4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 21 Apr 2016 19:06:48 -0400
Subject: [PATCH 102/424] cgroup, cpuset: replace cpuset_post_attach_flush()
 with cgroup_subsys->post_attach callback

commit 5cf1cacb49aee39c3e02ae87068fc3c6430659b0 upstream.

Since e93ad19d0564 ("cpuset: make mm migration asynchronous"), cpuset
kicks off asynchronous NUMA node migration if necessary during task
migration and flushes it from cpuset_post_attach_flush() which is
called at the end of __cgroup_procs_write().  This is to avoid
performing migration with cgroup_threadgroup_rwsem write-locked which
can lead to deadlock through dependency on kworker creation.

memcg has a similar issue with charge moving, so let's convert it to
an official callback rather than the current one-off cpuset specific
function.  This patch adds cgroup_subsys->post_attach callback and
makes cpuset register cpuset_post_attach_flush() as its ->post_attach.

The conversion is mostly one-to-one except that the new callback is
called under cgroup_mutex.  This is to guarantee that no other
migration operations are started before ->post_attach callbacks are
finished.  cgroup_mutex is one of the outermost mutex in the system
and has never been and shouldn't be a problem.  We can add specialized
synchronization around __cgroup_procs_write() but I don't think
there's any noticeable benefit.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/cgroup-defs.h | 1 +
 include/linux/cpuset.h      | 6 ------
 kernel/cgroup.c             | 7 +++++--
 kernel/cpuset.c             | 4 ++--
 4 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index a7c7f74808a4..8da263299754 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -434,6 +434,7 @@ struct cgroup_subsys {
 	int (*can_attach)(struct cgroup_taskset *tset);
 	void (*cancel_attach)(struct cgroup_taskset *tset);
 	void (*attach)(struct cgroup_taskset *tset);
+	void (*post_attach)(void);
 	int (*can_fork)(struct task_struct *task, void **priv_p);
 	void (*cancel_fork)(struct task_struct *task, void *priv);
 	void (*fork)(struct task_struct *task, void *priv);
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index fea160ee5803..85a868ccb493 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -137,8 +137,6 @@ static inline void set_mems_allowed(nodemask_t nodemask)
 	task_unlock(current);
 }
 
-extern void cpuset_post_attach_flush(void);
-
 #else /* !CONFIG_CPUSETS */
 
 static inline bool cpusets_enabled(void) { return false; }
@@ -245,10 +243,6 @@ static inline bool read_mems_allowed_retry(unsigned int seq)
 	return false;
 }
 
-static inline void cpuset_post_attach_flush(void)
-{
-}
-
 #endif /* !CONFIG_CPUSETS */
 
 #endif /* _LINUX_CPUSET_H */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index dc94f8beb097..b0ea3aebc05a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2721,9 +2721,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
 				    size_t nbytes, loff_t off, bool threadgroup)
 {
 	struct task_struct *tsk;
+	struct cgroup_subsys *ss;
 	struct cgroup *cgrp;
 	pid_t pid;
-	int ret;
+	int ssid, ret;
 
 	if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
 		return -EINVAL;
@@ -2771,8 +2772,10 @@ out_unlock_rcu:
 	rcu_read_unlock();
 out_unlock_threadgroup:
 	percpu_up_write(&cgroup_threadgroup_rwsem);
+	for_each_subsys(ss, ssid)
+		if (ss->post_attach)
+			ss->post_attach();
 	cgroup_kn_unlock(of->kn);
-	cpuset_post_attach_flush();
 	return ret ?: nbytes;
 }
 
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 2ade632197d5..11eaf14b52c2 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -57,7 +57,6 @@
 #include <asm/uaccess.h>
 #include <linux/atomic.h>
 #include <linux/mutex.h>
-#include <linux/workqueue.h>
 #include <linux/cgroup.h>
 #include <linux/wait.h>
 
@@ -1015,7 +1014,7 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
 	}
 }
 
-void cpuset_post_attach_flush(void)
+static void cpuset_post_attach(void)
 {
 	flush_workqueue(cpuset_migrate_mm_wq);
 }
@@ -2083,6 +2082,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
 	.can_attach	= cpuset_can_attach,
 	.cancel_attach	= cpuset_cancel_attach,
 	.attach		= cpuset_attach,
+	.post_attach	= cpuset_post_attach,
 	.bind		= cpuset_bind,
 	.legacy_cftypes	= files,
 	.early_init	= 1,

From 52526076a5a686906a0acc22d27530ecb9364d84 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 21 Apr 2016 19:09:02 -0400
Subject: [PATCH 103/424] memcg: relocate charge moving from ->attach to
 ->post_attach

commit 264a0ae164bc0e9144bebcd25ff030d067b1a878 upstream.

Hello,

So, this ended up a lot simpler than I originally expected.  I tested
it lightly and it seems to work fine.  Petr, can you please test these
two patches w/o the lru drain drop patch and see whether the problem
is gone?

Thanks.
------ 8< ------
If charge moving is used, memcg performs relabeling of the affected
pages from its ->attach callback which is called under both
cgroup_threadgroup_rwsem and thus can't create new kthreads.  This is
fragile as various operations may depend on workqueues making forward
progress which relies on the ability to create new kthreads.

There's no reason to perform charge moving from ->attach which is deep
in the task migration path.  Move it to ->post_attach which is called
after the actual migration is finished and cgroup_threadgroup_rwsem is
dropped.

* move_charge_struct->mm is added and ->can_attach is now responsible
  for pinning and recording the target mm.  mem_cgroup_clear_mc() is
  updated accordingly.  This also simplifies mem_cgroup_move_task().

* mem_cgroup_move_task() is now called from ->post_attach instead of
  ->attach.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@kernel.org>
Debugged-and-tested-by: Petr Mladek <pmladek@suse.com>
Reported-by: Cyril Hrubis <chrubis@suse.cz>
Reported-by: Johannes Weiner <hannes@cmpxchg.org>
Fixes: 1ed1328792ff ("sched, cgroup: replace signal_struct->group_rwsem with a global percpu_rwsem")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/memcontrol.c | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index fc0bcc41d57f..6ba4dd988e2e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -196,6 +196,7 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
 /* "mc" and its members are protected by cgroup_mutex */
 static struct move_charge_struct {
 	spinlock_t	  lock; /* for from, to */
+	struct mm_struct  *mm;
 	struct mem_cgroup *from;
 	struct mem_cgroup *to;
 	unsigned long flags;
@@ -4800,6 +4801,8 @@ static void __mem_cgroup_clear_mc(void)
 
 static void mem_cgroup_clear_mc(void)
 {
+	struct mm_struct *mm = mc.mm;
+
 	/*
 	 * we must clear moving_task before waking up waiters at the end of
 	 * task migration.
@@ -4809,7 +4812,10 @@ static void mem_cgroup_clear_mc(void)
 	spin_lock(&mc.lock);
 	mc.from = NULL;
 	mc.to = NULL;
+	mc.mm = NULL;
 	spin_unlock(&mc.lock);
+
+	mmput(mm);
 }
 
 static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
@@ -4866,6 +4872,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
 		VM_BUG_ON(mc.moved_swap);
 
 		spin_lock(&mc.lock);
+		mc.mm = mm;
 		mc.from = from;
 		mc.to = memcg;
 		mc.flags = move_flags;
@@ -4875,8 +4882,9 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
 		ret = mem_cgroup_precharge_mc(mm);
 		if (ret)
 			mem_cgroup_clear_mc();
+	} else {
+		mmput(mm);
 	}
-	mmput(mm);
 	return ret;
 }
 
@@ -4985,11 +4993,11 @@ put:			/* get_mctgt_type() gets the page */
 	return ret;
 }
 
-static void mem_cgroup_move_charge(struct mm_struct *mm)
+static void mem_cgroup_move_charge(void)
 {
 	struct mm_walk mem_cgroup_move_charge_walk = {
 		.pmd_entry = mem_cgroup_move_charge_pte_range,
-		.mm = mm,
+		.mm = mc.mm,
 	};
 
 	lru_add_drain_all();
@@ -5001,7 +5009,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
 	atomic_inc(&mc.from->moving_account);
 	synchronize_rcu();
 retry:
-	if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
+	if (unlikely(!down_read_trylock(&mc.mm->mmap_sem))) {
 		/*
 		 * Someone who are holding the mmap_sem might be waiting in
 		 * waitq. So we cancel all extra charges, wake up all waiters,
@@ -5018,23 +5026,16 @@ retry:
 	 * additional charge, the page walk just aborts.
 	 */
 	walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk);
-	up_read(&mm->mmap_sem);
+	up_read(&mc.mm->mmap_sem);
 	atomic_dec(&mc.from->moving_account);
 }
 
-static void mem_cgroup_move_task(struct cgroup_taskset *tset)
+static void mem_cgroup_move_task(void)
 {
-	struct cgroup_subsys_state *css;
-	struct task_struct *p = cgroup_taskset_first(tset, &css);
-	struct mm_struct *mm = get_task_mm(p);
-
-	if (mm) {
-		if (mc.to)
-			mem_cgroup_move_charge(mm);
-		mmput(mm);
-	}
-	if (mc.to)
+	if (mc.to) {
+		mem_cgroup_move_charge();
 		mem_cgroup_clear_mc();
+	}
 }
 #else	/* !CONFIG_MMU */
 static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
@@ -5044,7 +5045,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
 static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
 {
 }
-static void mem_cgroup_move_task(struct cgroup_taskset *tset)
+static void mem_cgroup_move_task(void)
 {
 }
 #endif
@@ -5258,7 +5259,7 @@ struct cgroup_subsys memory_cgrp_subsys = {
 	.css_reset = mem_cgroup_css_reset,
 	.can_attach = mem_cgroup_can_attach,
 	.cancel_attach = mem_cgroup_cancel_attach,
-	.attach = mem_cgroup_move_task,
+	.post_attach = mem_cgroup_move_task,
 	.bind = mem_cgroup_bind,
 	.dfl_cftypes = memory_files,
 	.legacy_cftypes = mem_cgroup_legacy_files,

From be591a683e3b4cc58466e08cd6b5e4a71c02b19a Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <koct9i@gmail.com>
Date: Thu, 28 Apr 2016 16:18:32 -0700
Subject: [PATCH 104/424] mm/huge_memory: replace VM_NO_THP VM_BUG_ON with
 actual VMA check

commit 3486b85a29c1741db99d0c522211c82d2b7a56d0 upstream.

Khugepaged detects own VMAs by checking vm_file and vm_ops but this way
it cannot distinguish private /dev/zero mappings from other special
mappings like /dev/hpet which has no vm_ops and popultes PTEs in mmap.

This fixes false-positive VM_BUG_ON and prevents installing THP where
they are not expected.

Link: http://lkml.kernel.org/r/CACT4Y+ZmuZMV5CjSFOeXviwQdABAgT7T+StKfTqan9YDtgEi5g@mail.gmail.com
Fixes: 78f11a255749 ("mm: thp: fix /dev/zero MAP_PRIVATE and vm_flags cleanups")
Signed-off-by: Konstantin Khlebnikov <koct9i@gmail.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/huge_memory.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 62fe06bb7d04..530e6427f823 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2134,10 +2134,9 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
 		 * page fault if needed.
 		 */
 		return 0;
-	if (vma->vm_ops)
+	if (vma->vm_ops || (vm_flags & VM_NO_THP))
 		/* khugepaged not yet working on file or special mappings */
 		return 0;
-	VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma);
 	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
 	hend = vma->vm_end & HPAGE_PMD_MASK;
 	if (hstart < hend)
@@ -2498,8 +2497,7 @@ static bool hugepage_vma_check(struct vm_area_struct *vma)
 		return false;
 	if (is_vma_temporary_stack(vma))
 		return false;
-	VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma);
-	return true;
+	return !(vma->vm_flags & VM_NO_THP);
 }
 
 static void collapse_huge_page(struct mm_struct *mm,

From e513b90a9aef91e6399decb8e9592f2d75f7ebad Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Thu, 28 Apr 2016 16:18:35 -0700
Subject: [PATCH 105/424] numa: fix /proc/<pid>/numa_maps for THP

commit 28093f9f34cedeaea0f481c58446d9dac6dd620f upstream.

In gather_pte_stats() a THP pmd is cast into a pte, which is wrong
because the layouts may differ depending on the architecture.  On s390
this will lead to inaccurate numa_maps accounting in /proc because of
misguided pte_present() and pte_dirty() checks on the fake pte.

On other architectures pte_present() and pte_dirty() may work by chance,
but there may be an issue with direct-access (dax) mappings w/o
underlying struct pages when HAVE_PTE_SPECIAL is set and THP is
available.  In vm_normal_page() the fake pte will be checked with
pte_special() and because there is no "special" bit in a pmd, this will
always return false and the VM_PFNMAP | VM_MIXEDMAP checking will be
skipped.  On dax mappings w/o struct pages, an invalid struct page
pointer would then be returned that can crash the kernel.

This patch fixes the numa_maps THP handling by introducing new "_pmd"
variants of the can_gather_numa_stats() and vm_normal_page() functions.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Jerome Marchand <jmarchan@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/proc/task_mmu.c | 33 ++++++++++++++++++++++++++++++---
 include/linux/mm.h |  2 ++
 mm/memory.c        | 40 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 09cd3edde08a..f6478301db00 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1435,6 +1435,32 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
 	return page;
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static struct page *can_gather_numa_stats_pmd(pmd_t pmd,
+					      struct vm_area_struct *vma,
+					      unsigned long addr)
+{
+	struct page *page;
+	int nid;
+
+	if (!pmd_present(pmd))
+		return NULL;
+
+	page = vm_normal_page_pmd(vma, addr, pmd);
+	if (!page)
+		return NULL;
+
+	if (PageReserved(page))
+		return NULL;
+
+	nid = page_to_nid(page);
+	if (!node_isset(nid, node_states[N_MEMORY]))
+		return NULL;
+
+	return page;
+}
+#endif
+
 static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
 		unsigned long end, struct mm_walk *walk)
 {
@@ -1444,13 +1470,13 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
 	pte_t *orig_pte;
 	pte_t *pte;
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
-		pte_t huge_pte = *(pte_t *)pmd;
 		struct page *page;
 
-		page = can_gather_numa_stats(huge_pte, vma, addr);
+		page = can_gather_numa_stats_pmd(*pmd, vma, addr);
 		if (page)
-			gather_stats(page, md, pte_dirty(huge_pte),
+			gather_stats(page, md, pmd_dirty(*pmd),
 				     HPAGE_PMD_SIZE/PAGE_SIZE);
 		spin_unlock(ptl);
 		return 0;
@@ -1458,6 +1484,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
 
 	if (pmd_trans_unstable(pmd))
 		return 0;
+#endif
 	orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
 	do {
 		struct page *page = can_gather_numa_stats(*pte, vma, addr);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 00bad7793788..fb8b20e5d021 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1084,6 +1084,8 @@ struct zap_details {
 
 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 		pte_t pte);
+struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
+				pmd_t pmd);
 
 int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
 		unsigned long size);
diff --git a/mm/memory.c b/mm/memory.c
index b80bf4746b67..76dcee317714 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -797,6 +797,46 @@ out:
 	return pfn_to_page(pfn);
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
+				pmd_t pmd)
+{
+	unsigned long pfn = pmd_pfn(pmd);
+
+	/*
+	 * There is no pmd_special() but there may be special pmds, e.g.
+	 * in a direct-access (dax) mapping, so let's just replicate the
+	 * !HAVE_PTE_SPECIAL case from vm_normal_page() here.
+	 */
+	if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
+		if (vma->vm_flags & VM_MIXEDMAP) {
+			if (!pfn_valid(pfn))
+				return NULL;
+			goto out;
+		} else {
+			unsigned long off;
+			off = (addr - vma->vm_start) >> PAGE_SHIFT;
+			if (pfn == vma->vm_pgoff + off)
+				return NULL;
+			if (!is_cow_mapping(vma->vm_flags))
+				return NULL;
+		}
+	}
+
+	if (is_zero_pfn(pfn))
+		return NULL;
+	if (unlikely(pfn > highest_memmap_pfn))
+		return NULL;
+
+	/*
+	 * NOTE! We still have PageReserved() pages in the page tables.
+	 * eg. VDSO mappings can cause them to exist.
+	 */
+out:
+	return pfn_to_page(pfn);
+}
+#endif
+
 /*
  * copy one vm_area from one task to the other. Assumes the page tables
  * already present in the new task to be cleared in the whole range

From 87c855f150be9317b9b6ad82c1611ed8d577d986 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 28 Apr 2016 16:18:38 -0700
Subject: [PATCH 106/424] mm: vmscan: reclaim highmem zone if buffer_heads is
 over limit

commit 7bf52fb891b64b8d61caf0b82060adb9db761aec upstream.

We have been reclaimed highmem zone if buffer_heads is over limit but
commit 6b4f7799c6a5 ("mm: vmscan: invoke slab shrinkers from
shrink_zone()") changed the behavior so it doesn't reclaim highmem zone
although buffer_heads is over the limit.  This patch restores the logic.

Fixes: 6b4f7799c6a5 ("mm: vmscan: invoke slab shrinkers from shrink_zone()")
Signed-off-by: Minchan Kim <minchan@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/vmscan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2aec4241b42a..0c114e2b01d3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2534,7 +2534,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 		sc->gfp_mask |= __GFP_HIGHMEM;
 
 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
-					requested_highidx, sc->nodemask) {
+					gfp_zone(sc->gfp_mask), sc->nodemask) {
 		enum zone_type classzone_idx;
 
 		if (!populated_zone(zone))

From 36abe7272a248a7e47a4cec8d8ec9c76ef387bac Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 28 Apr 2016 16:18:44 -0700
Subject: [PATCH 107/424] mm/hwpoison: fix wrong num_poisoned_pages accounting

commit d7e69488bd04de165667f6bc741c1c0ec6042ab9 upstream.

Currently, migration code increses num_poisoned_pages on *failed*
migration page as well as successfully migrated one at the trial of
memory-failure.  It will make the stat wrong.  As well, it marks the
page as PG_HWPoison even if the migration trial failed.  It would mean
we cannot recover the corrupted page using memory-failure facility.

This patches fixes it.

Signed-off-by: Minchan Kim <minchan@kernel.org>
Reported-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/migrate.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 6d17e0ab42d4..bbeb0b71fcf4 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -963,7 +963,13 @@ out:
 		dec_zone_page_state(page, NR_ISOLATED_ANON +
 				page_is_file_cache(page));
 		/* Soft-offlined page shouldn't go through lru cache list */
-		if (reason == MR_MEMORY_FAILURE) {
+		if (reason == MR_MEMORY_FAILURE && rc == MIGRATEPAGE_SUCCESS) {
+			/*
+			 * With this release, we free successfully migrated
+			 * page and set PG_HWPoison on just freed page
+			 * intentionally. Although it's rather weird, it's how
+			 * HWPoison flag works at the moment.
+			 */
 			put_page(page);
 			if (!test_set_page_hwpoison(page))
 				num_poisoned_pages_inc();

From 3c6266d57c4c4fa02588070347acf21b610bbd96 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 21 Jan 2016 15:32:15 -0500
Subject: [PATCH 108/424] cgroup: make sure a parent css isn't freed before its
 children

commit 8bb5ef79bc0f4016ecf79e8dce6096a3c63603e4 upstream.

There are three subsystem callbacks in css shutdown path -
css_offline(), css_released() and css_free().  Except for
css_released(), cgroup core didn't guarantee the order of invocation.
css_offline() or css_free() could be called on a parent css before its
children.  This behavior is unexpected and led to bugs in cpu and
memory controller.

The previous patch updated ordering for css_offline() which fixes the
cpu controller issue.  While there currently isn't a known bug caused
by misordering of css_free() invocations, let's fix it too for
consistency.

css_free() ordering can be trivially fixed by moving putting of the
parent css below css_free() invocation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/cgroup.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b0ea3aebc05a..1c9d701f7a72 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4692,14 +4692,15 @@ static void css_free_work_fn(struct work_struct *work)
 
 	if (ss) {
 		/* css free path */
+		struct cgroup_subsys_state *parent = css->parent;
 		int id = css->id;
 
-		if (css->parent)
-			css_put(css->parent);
-
 		ss->css_free(css);
 		cgroup_idr_remove(&ss->css_idr, id);
 		cgroup_put(cgrp);
+
+		if (parent)
+			css_put(parent);
 	} else {
 		/* cgroup free path */
 		atomic_dec(&cgrp->root->nr_cgrps);

From 4a1bb501e4b65908b102f0b371b0621ff18ad5c3 Mon Sep 17 00:00:00 2001
From: Ignat Korchagin <ignat.korchagin@gmail.com>
Date: Thu, 17 Mar 2016 18:00:29 +0000
Subject: [PATCH 109/424] USB: usbip: fix potential out-of-bounds write

commit b348d7dddb6c4fbfc810b7a0626e8ec9e29f7cbb upstream.

Fix potential out-of-bounds write to urb->transfer_buffer
usbip handles network communication directly in the kernel. When receiving a
packet from its peer, usbip code parses headers according to protocol. As
part of this parsing urb->actual_length is filled. Since the input for
urb->actual_length comes from the network, it should be treated as untrusted.
Any entity controlling the network may put any value in the input and the
preallocated urb->transfer_buffer may not be large enough to hold the data.
Thus, the malicious entity is able to write arbitrary data to kernel memory.

Signed-off-by: Ignat Korchagin <ignat.korchagin@gmail.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/usbip/usbip_common.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c
index facaaf003f19..e40da7759a0e 100644
--- a/drivers/usb/usbip/usbip_common.c
+++ b/drivers/usb/usbip/usbip_common.c
@@ -741,6 +741,17 @@ int usbip_recv_xbuff(struct usbip_device *ud, struct urb *urb)
 	if (!(size > 0))
 		return 0;
 
+	if (size > urb->transfer_buffer_length) {
+		/* should not happen, probably malicious packet */
+		if (ud->side == USBIP_STUB) {
+			usbip_event_add(ud, SDEV_EVENT_ERROR_TCP);
+			return 0;
+		} else {
+			usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
+			return -EPIPE;
+		}
+	}
+
 	ret = usbip_recv(ud->tcp_socket, urb->transfer_buffer, size);
 	if (ret != size) {
 		dev_err(&urb->dev->dev, "recv xbuf, %d\n", ret);

From 3a4b3d187dba0255cbbb749f64c3b71f8105f44f Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Sun, 3 Apr 2016 16:15:00 -0300
Subject: [PATCH 110/424] videobuf2-core: Check user space planes array in
 dqbuf

commit e7e0c3e26587749b62d17b9dd0532874186c77f7 upstream.

The number of planes in videobuf2 is specific to a buffer. In order to
verify that the planes array provided by the user is long enough, a new
vb2_buf_op is required.

Call __verify_planes_array() when the dequeued buffer is known. Return an
error to the caller if there was one, otherwise remove the buffer from the
done list.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/v4l2-core/videobuf2-core.c | 10 +++++-----
 include/media/videobuf2-core.h           |  1 +
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/media/v4l2-core/videobuf2-core.c b/drivers/media/v4l2-core/videobuf2-core.c
index 33bdd81065e8..11f39791ec33 100644
--- a/drivers/media/v4l2-core/videobuf2-core.c
+++ b/drivers/media/v4l2-core/videobuf2-core.c
@@ -1502,7 +1502,7 @@ static int __vb2_wait_for_done_vb(struct vb2_queue *q, int nonblocking)
  * Will sleep if required for nonblocking == false.
  */
 static int __vb2_get_done_vb(struct vb2_queue *q, struct vb2_buffer **vb,
-				int nonblocking)
+			     void *pb, int nonblocking)
 {
 	unsigned long flags;
 	int ret;
@@ -1523,10 +1523,10 @@ static int __vb2_get_done_vb(struct vb2_queue *q, struct vb2_buffer **vb,
 	/*
 	 * Only remove the buffer from done_list if v4l2_buffer can handle all
 	 * the planes.
-	 * Verifying planes is NOT necessary since it already has been checked
-	 * before the buffer is queued/prepared. So it can never fail.
 	 */
-	list_del(&(*vb)->done_entry);
+	ret = call_bufop(q, verify_planes_array, *vb, pb);
+	if (!ret)
+		list_del(&(*vb)->done_entry);
 	spin_unlock_irqrestore(&q->done_lock, flags);
 
 	return ret;
@@ -1604,7 +1604,7 @@ int vb2_core_dqbuf(struct vb2_queue *q, void *pb, bool nonblocking)
 	struct vb2_buffer *vb = NULL;
 	int ret;
 
-	ret = __vb2_get_done_vb(q, &vb, nonblocking);
+	ret = __vb2_get_done_vb(q, &vb, pb, nonblocking);
 	if (ret < 0)
 		return ret;
 
diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index 647ebfe5174f..d4227a8a2a23 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -363,6 +363,7 @@ struct vb2_ops {
 };
 
 struct vb2_buf_ops {
+	int (*verify_planes_array)(struct vb2_buffer *vb, const void *pb);
 	int (*fill_user_buffer)(struct vb2_buffer *vb, void *pb);
 	int (*fill_vb2_buffer)(struct vb2_buffer *vb, const void *pb,
 				struct vb2_plane *planes);

From 19a4e46b4513bab7d6b368175be2e24ad4665e5a Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Sun, 3 Apr 2016 16:31:03 -0300
Subject: [PATCH 111/424] videobuf2-v4l2: Verify planes array in buffer
 dequeueing

commit 2c1f6951a8a82e6de0d82b1158b5e493fc6c54ab upstream.

When a buffer is being dequeued using VIDIOC_DQBUF IOCTL, the exact buffer
which will be dequeued is not known until the buffer has been removed from
the queue. The number of planes is specific to a buffer, not to the queue.

This does lead to the situation where multi-plane buffers may be requested
and queued with n planes, but VIDIOC_DQBUF IOCTL may be passed an argument
struct with fewer planes.

__fill_v4l2_buffer() however uses the number of planes from the dequeued
videobuf2 buffer, overwriting kernel memory (the m.planes array allocated
in video_usercopy() in v4l2-ioctl.c)  if the user provided fewer
planes than the dequeued buffer had. Oops!

Fixes: b0e0e1f83de3 ("[media] media: videobuf2: Prepare to divide videobuf2")

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/v4l2-core/videobuf2-v4l2.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/media/v4l2-core/videobuf2-v4l2.c b/drivers/media/v4l2-core/videobuf2-v4l2.c
index 502984c724ff..6c441be8f893 100644
--- a/drivers/media/v4l2-core/videobuf2-v4l2.c
+++ b/drivers/media/v4l2-core/videobuf2-v4l2.c
@@ -67,6 +67,11 @@ static int __verify_planes_array(struct vb2_buffer *vb, const struct v4l2_buffer
 	return 0;
 }
 
+static int __verify_planes_array_core(struct vb2_buffer *vb, const void *pb)
+{
+	return __verify_planes_array(vb, pb);
+}
+
 /**
  * __verify_length() - Verify that the bytesused value for each plane fits in
  * the plane length and that the data offset doesn't exceed the bytesused value.
@@ -432,6 +437,7 @@ static int __fill_vb2_buffer(struct vb2_buffer *vb,
 }
 
 static const struct vb2_buf_ops v4l2_buf_ops = {
+	.verify_planes_array	= __verify_planes_array_core,
 	.fill_user_buffer	= __fill_v4l2_buffer,
 	.fill_vb2_buffer	= __fill_vb2_buffer,
 	.set_timestamp		= __set_timestamp,

From 34af67eb941ae5371110c9adbd5392c7a3aa841e Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 2 May 2016 11:14:34 -0700
Subject: [PATCH 112/424] Revert "regulator: core: Fix nested locking of
 supplies"

This reverts commit b1999fa6e8145305a6c8bda30ea20783717708e6 which was
commit 70a7fb80e85ae7f78f8e90cec3fbd862ea6a4d4b upstream.

It causes run-time breakage in the 4.4-stable tree and more patches are
needed to be applied first before this one in order to resolve the
issue.

Reported-by: Guenter Roeck <linux@roeck-us.net>
Cc: Mark Brown <broonie@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Thierry Reding <treding@nvidia.com>
Cc: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/regulator/core.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 7b94b8ee087c..c70017d5f74b 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -132,14 +132,6 @@ static bool have_full_constraints(void)
 	return has_full_constraints || of_have_populated_dt();
 }
 
-static inline struct regulator_dev *rdev_get_supply(struct regulator_dev *rdev)
-{
-	if (rdev && rdev->supply)
-		return rdev->supply->rdev;
-
-	return NULL;
-}
-
 /**
  * regulator_lock_supply - lock a regulator and its supplies
  * @rdev:         regulator source
@@ -148,7 +140,7 @@ static void regulator_lock_supply(struct regulator_dev *rdev)
 {
 	int i;
 
-	for (i = 0; rdev->supply; rdev = rdev_get_supply(rdev), i++)
+	for (i = 0; rdev->supply; rdev = rdev->supply->rdev, i++)
 		mutex_lock_nested(&rdev->mutex, i);
 }
 

From f500da32a1663c1bb2587ff04be08d5220b3afca Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 27 Nov 2015 14:46:41 +0100
Subject: [PATCH 113/424] regulator: core: fix regulator_lock_supply regression

commit bb41897e38c53458a88b271f2fbcd905ee1f9584 upstream.

As noticed by Geert Uytterhoeven, my patch to avoid a harmless build warning
in regulator_lock_supply() was total crap and introduced a real bug:

> [ BUG: bad unlock balance detected! ]
> kworker/u4:0/6 is trying to release lock (&rdev->mutex) at:
> [<c0247b84>] regulator_set_voltage+0x38/0x50

we still lock the regulator supplies, but not the actual regulators,
so we are missing a lock, and the unlock is unbalanced.

This rectifies it by first locking the regulator device itself before
using the same loop as before to lock its supplies.

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 716fec9d1965 ("[SUBMITTED] regulator: core: avoid unused variable warning")
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/regulator/core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index c70017d5f74b..daffff83ced2 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -140,7 +140,8 @@ static void regulator_lock_supply(struct regulator_dev *rdev)
 {
 	int i;
 
-	for (i = 0; rdev->supply; rdev = rdev->supply->rdev, i++)
+	mutex_lock(&rdev->mutex);
+	for (i = 1; rdev->supply; rdev = rdev->supply->rdev, i++)
 		mutex_lock_nested(&rdev->mutex, i);
 }
 

From 29c9f634cb132107df3e4f07c8e48b35d04b527b Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 1 Dec 2015 15:51:52 +0000
Subject: [PATCH 114/424] regulator: core: Ensure we lock all regulators

commit 49a6bb7a1c0963f260e4b0dcc2c0e56ec65a28b2 upstream.

The latest workaround for the lockdep interface's not using the second
argument of mutex_lock_nested() changed the loop missed locking the last
regulator due to a thinko with the loop termination condition exiting
one regulator too soon.

Reported-by: Tyler Baker <tyler.baker@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/regulator/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index daffff83ced2..f71db02fcb71 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -141,7 +141,7 @@ static void regulator_lock_supply(struct regulator_dev *rdev)
 	int i;
 
 	mutex_lock(&rdev->mutex);
-	for (i = 1; rdev->supply; rdev = rdev->supply->rdev, i++)
+	for (i = 1; rdev; rdev = rdev->supply->rdev, i++)
 		mutex_lock_nested(&rdev->mutex, i);
 }
 

From 5a58f809d731c23c0b898d2021903db8dee4466f Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 2 Dec 2015 16:54:50 +0100
Subject: [PATCH 115/424] regulator: core: Fix nested locking of supplies

commit 70a7fb80e85ae7f78f8e90cec3fbd862ea6a4d4b upstream.

Commit fa731ac7ea04 ("regulator: core: avoid unused variable warning")
introduced a subtle change in how supplies are locked. Where previously
code was always locking the regulator of the current iteration, the new
implementation only locks the regulator if it has a supply. For any
given power tree that means that the root will never get locked.

On the other hand the regulator_unlock_supply() will still release all
the locks, which in turn causes the lock debugging code to warn about a
mutex being unlocked which wasn't locked.

Cc: Mark Brown <broonie@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Fixes: fa731ac7ea04 ("regulator: core: avoid unused variable warning")
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/regulator/core.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index f71db02fcb71..732ac71b82cd 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -132,6 +132,14 @@ static bool have_full_constraints(void)
 	return has_full_constraints || of_have_populated_dt();
 }
 
+static inline struct regulator_dev *rdev_get_supply(struct regulator_dev *rdev)
+{
+	if (rdev && rdev->supply)
+		return rdev->supply->rdev;
+
+	return NULL;
+}
+
 /**
  * regulator_lock_supply - lock a regulator and its supplies
  * @rdev:         regulator source
@@ -140,8 +148,7 @@ static void regulator_lock_supply(struct regulator_dev *rdev)
 {
 	int i;
 
-	mutex_lock(&rdev->mutex);
-	for (i = 1; rdev; rdev = rdev->supply->rdev, i++)
+	for (i = 0; rdev; rdev = rdev_get_supply(rdev), i++)
 		mutex_lock_nested(&rdev->mutex, i);
 }
 

From 23a67ddd4636584816e2dc2c6393511d55944974 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 1 Feb 2016 15:11:28 +0100
Subject: [PATCH 116/424] locking/mcs: Fix mcs_spin_lock() ordering

commit 920c720aa5aa3900a7f1689228fdfc2580a91e7e upstream.

Similar to commit b4b29f94856a ("locking/osq: Fix ordering of node
initialisation in osq_lock") the use of xchg_acquire() is
fundamentally broken with MCS like constructs.

Furthermore, it turns out we rely on the global transitivity of this
operation because the unlock path observes the pointer with a
READ_ONCE(), not an smp_load_acquire().

This is non-critical because the MCS code isn't actually used and
mostly serves as documentation, a stepping stone to the more complex
things we've build on top of the idea.

Reported-by: Andrea Parri <parri.andrea@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Fixes: 3552a07a9c4a ("locking/mcs: Use acquire/release semantics")
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/locking/mcs_spinlock.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index 5b9102a47ea5..c835270f0c2f 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -67,7 +67,13 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 	node->locked = 0;
 	node->next   = NULL;
 
-	prev = xchg_acquire(lock, node);
+	/*
+	 * We rely on the full barrier with global transitivity implied by the
+	 * below xchg() to order the initialization stores above against any
+	 * observation of @node. And to provide the ACQUIRE ordering associated
+	 * with a LOCK primitive.
+	 */
+	prev = xchg(lock, node);
 	if (likely(prev == NULL)) {
 		/*
 		 * Lock acquired, don't need to set node->locked to 1. Threads

From 791e8462e48c6259375f59acf905b05884d648c3 Mon Sep 17 00:00:00 2001
From: Huibin Hong <huibin.hong@rock-chips.com>
Date: Wed, 24 Feb 2016 18:00:04 +0800
Subject: [PATCH 117/424] spi/rockchip: Make sure spi clk is on in
 rockchip_spi_set_cs

commit b920cc3191d7612f26f36ee494e05b5ffd9044c0 upstream.

Rockchip_spi_set_cs could be called by spi_setup, but
spi_setup may be called by device driver after runtime suspend.
Then the spi clock is closed, rockchip_spi_set_cs may access the
spi registers, which causes cpu block in some socs.

Fixes: 64e36824b32 ("spi/rockchip: add driver for Rockchip RK3xxx")
Signed-off-by: Huibin Hong <huibin.hong@rock-chips.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/spi/spi-rockchip.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 79a8bc4f6cec..035767c02072 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -265,7 +265,10 @@ static inline u32 rx_max(struct rockchip_spi *rs)
 static void rockchip_spi_set_cs(struct spi_device *spi, bool enable)
 {
 	u32 ser;
-	struct rockchip_spi *rs = spi_master_get_devdata(spi->master);
+	struct spi_master *master = spi->master;
+	struct rockchip_spi *rs = spi_master_get_devdata(master);
+
+	pm_runtime_get_sync(rs->dev);
 
 	ser = readl_relaxed(rs->regs + ROCKCHIP_SPI_SER) & SER_MASK;
 
@@ -290,6 +293,8 @@ static void rockchip_spi_set_cs(struct spi_device *spi, bool enable)
 		ser &= ~(1 << spi->chip_select);
 
 	writel_relaxed(ser, rs->regs + ROCKCHIP_SPI_SER);
+
+	pm_runtime_put_sync(rs->dev);
 }
 
 static int rockchip_spi_prepare_message(struct spi_master *master,

From fd66dc5d5123672069acba5cb5856e7b0aa9e6d4 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Wed, 9 Mar 2016 03:21:29 +0200
Subject: [PATCH 118/424] irqchip/sunxi-nmi: Fix error check of
 of_io_request_and_map()

commit cfe199afefe6201e998ddc07102fc1fdb55f196c upstream.

The of_io_request_and_map() returns a valid pointer in iomem region or
ERR_PTR(), check for NULL always fails and may cause a NULL pointer
dereference on error path.

Fixes: 0e841b04c829 ("irqchip/sunxi-nmi: Switch to of_io_request_and_map() from of_iomap()")
Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Chen-Yu Tsai <wens@csie.org>
Cc: Maxime Ripard <maxime.ripard@free-electrons.com>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1457486489-10189-1-git-send-email-vz@mleia.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/irqchip/irq-sunxi-nmi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c
index 4ef178078e5b..1254e98f6b57 100644
--- a/drivers/irqchip/irq-sunxi-nmi.c
+++ b/drivers/irqchip/irq-sunxi-nmi.c
@@ -154,9 +154,9 @@ static int __init sunxi_sc_nmi_irq_init(struct device_node *node,
 
 	gc = irq_get_domain_generic_chip(domain, 0);
 	gc->reg_base = of_io_request_and_map(node, 0, of_node_full_name(node));
-	if (!gc->reg_base) {
+	if (IS_ERR(gc->reg_base)) {
 		pr_err("unable to map resource\n");
-		ret = -ENOMEM;
+		ret = PTR_ERR(gc->reg_base);
 		goto fail_irqd_remove;
 	}
 

From e60711a18bccf26d0159b263dfb05b374532caf5 Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Wed, 9 Mar 2016 03:21:40 +0200
Subject: [PATCH 119/424] irqchip/mxs: Fix error check of
 of_io_request_and_map()

commit edf8fcdc6b254236be005851af35ea5e826e7e09 upstream.

The of_io_request_and_map() returns a valid pointer in iomem region or
ERR_PTR(), check for NULL always fails and may cause a NULL pointer
dereference on error path.

Fixes: 25e34b44313b ("irqchip/mxs: Prepare driver for hardware with different offsets")
Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Oleksij Rempel <linux@rempel-privat.de>
Cc: Sascha Hauer <kernel@pengutronix.de>
Cc: Shawn Guo <shawnguo@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1457486500-10237-1-git-send-email-vz@mleia.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/irqchip/irq-mxs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c
index efe50845939d..17304705f2cf 100644
--- a/drivers/irqchip/irq-mxs.c
+++ b/drivers/irqchip/irq-mxs.c
@@ -183,7 +183,7 @@ static void __iomem * __init icoll_init_iobase(struct device_node *np)
 	void __iomem *icoll_base;
 
 	icoll_base = of_io_request_and_map(np, 0, np->name);
-	if (!icoll_base)
+	if (IS_ERR(icoll_base))
 		panic("%s: unable to map resource", np->full_name);
 	return icoll_base;
 }

From 72291d619e2928556db1d446f0b4afff330276a7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 16 Feb 2016 15:53:11 +0100
Subject: [PATCH 120/424] regulator: s5m8767: fix get_register() error handling

commit e07ff9434167981c993a26d2edbbcb8e13801dbb upstream.

The s5m8767_pmic_probe() function calls s5m8767_get_register() to
read data without checking the return code, which produces a compile-time
warning when that data is accessed:

drivers/regulator/s5m8767.c: In function 's5m8767_pmic_probe':
drivers/regulator/s5m8767.c:924:7: error: 'enable_reg' may be used uninitialized in this function [-Werror=maybe-uninitialized]
drivers/regulator/s5m8767.c:944:30: error: 'enable_val' may be used uninitialized in this function [-Werror=maybe-uninitialized]

This changes the s5m8767_get_register() function to return a -EINVAL
not just for an invalid register number but also for an invalid
regulator number, as both would result in returning uninitialized
data. The s5m8767_pmic_probe() function is then changed accordingly
to fail on a read error, as all the other callers of s5m8767_get_register()
already do.

In practice this probably cannot happen, as we don't call
s5m8767_get_register() with invalid arguments, but the gcc
warning seems valid in principle, in terms writing safe
error checking.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 9c4c60554acf ("regulator: s5m8767: Convert to use regulator_[enable|disable|is_enabled]_regmap")
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/regulator/s5m8767.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/regulator/s5m8767.c b/drivers/regulator/s5m8767.c
index 58f5d3b8e981..27343e1c43ef 100644
--- a/drivers/regulator/s5m8767.c
+++ b/drivers/regulator/s5m8767.c
@@ -202,9 +202,10 @@ static int s5m8767_get_register(struct s5m8767_info *s5m8767, int reg_id,
 		}
 	}
 
-	if (i < s5m8767->num_regulators)
-		*enable_ctrl =
-		s5m8767_opmode_reg[reg_id][mode] << S5M8767_ENCTRL_SHIFT;
+	if (i >= s5m8767->num_regulators)
+		return -EINVAL;
+
+	*enable_ctrl = s5m8767_opmode_reg[reg_id][mode] << S5M8767_ENCTRL_SHIFT;
 
 	return 0;
 }
@@ -937,8 +938,12 @@ static int s5m8767_pmic_probe(struct platform_device *pdev)
 			else
 				regulators[id].vsel_mask = 0xff;
 
-			s5m8767_get_register(s5m8767, id, &enable_reg,
+			ret = s5m8767_get_register(s5m8767, id, &enable_reg,
 					     &enable_val);
+			if (ret) {
+				dev_err(s5m8767->dev, "error reading registers\n");
+				return ret;
+			}
 			regulators[id].enable_reg = enable_reg;
 			regulators[id].enable_mask = S5M8767_ENCTRL_MASK;
 			regulators[id].enable_val = enable_val;

From aea6995abbe4e13299d8606679cfe1b92fa45932 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 15 Mar 2016 14:53:29 -0700
Subject: [PATCH 121/424] paride: make 'verbose' parameter an 'int' again

commit dec63a4dec2d6d01346fd5d96062e67c0636852b upstream.

gcc-6.0 found an ancient bug in the paride driver, which had a
"module_param(verbose, bool, 0);" since before 2.6.12, but actually uses
it to accept '0', '1' or '2' as arguments:

  drivers/block/paride/pd.c: In function 'pd_init_dev_parms':
  drivers/block/paride/pd.c:298:29: warning: comparison of constant '1' with boolean expression is always false [-Wbool-compare]
   #define DBMSG(msg) ((verbose>1)?(msg):NULL)

In 2012, Rusty did a cleanup patch that also changed the type of the
variable to 'bool', which introduced what is now a gcc warning.

This changes the type back to 'int' and adapts the module_param() line
instead, so it should work as documented in case anyone ever cares about
running the ancient driver with debugging.

Fixes: 90ab5ee94171 ("module_param: make bool parameters really bool (drivers & misc)")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Rusty Russell <rusty@rustcorp.com.au>
Cc: Tim Waugh <tim@cyberelk.net>
Cc: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Cc: Jens Axboe <axboe@fb.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/paride/pd.c | 4 ++--
 drivers/block/paride/pt.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 562b5a4ca7b7..78a39f736c64 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -126,7 +126,7 @@
 */
 #include <linux/types.h>
 
-static bool verbose = 0;
+static int verbose = 0;
 static int major = PD_MAJOR;
 static char *name = PD_NAME;
 static int cluster = 64;
@@ -161,7 +161,7 @@ enum {D_PRT, D_PRO, D_UNI, D_MOD, D_GEO, D_SBY, D_DLY, D_SLV};
 static DEFINE_MUTEX(pd_mutex);
 static DEFINE_SPINLOCK(pd_lock);
 
-module_param(verbose, bool, 0);
+module_param(verbose, int, 0);
 module_param(major, int, 0);
 module_param(name, charp, 0);
 module_param(cluster, int, 0);
diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c
index 1740d75e8a32..216a94fed5b4 100644
--- a/drivers/block/paride/pt.c
+++ b/drivers/block/paride/pt.c
@@ -117,7 +117,7 @@
 
 */
 
-static bool verbose = 0;
+static int verbose = 0;
 static int major = PT_MAJOR;
 static char *name = PT_NAME;
 static int disable = 0;
@@ -152,7 +152,7 @@ static int (*drives[4])[6] = {&drive0, &drive1, &drive2, &drive3};
 
 #include <asm/uaccess.h>
 
-module_param(verbose, bool, 0);
+module_param(verbose, int, 0);
 module_param(major, int, 0);
 module_param(name, charp, 0);
 module_param_array(drive0, int, NULL, 0);

From 9d9fefc8283a2bda6c7daeaab3b310965cb35f85 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 27 Jan 2016 16:57:23 +0100
Subject: [PATCH 122/424] scsi_dh: force modular build if SCSI is a module

commit 0c994c03c926d26ce48e6bbabbbe60366044fcae upstream.

When the scsi_dh core was moved into the scsi core module,
CONFIG_SCSI_DH became a 'bool' option, and now anything depending on it
can be built-in even when CONFIG_SCSI=m. This of course cannot link
successfully:

drivers/scsi/built-in.o: In function `rdac_init':
scsi_dh_alua.c:(.init.text+0x14): undefined reference to `scsi_register_device_handler'
scsi_dh_alua.c:(.init.text+0x64): undefined reference to `scsi_unregister_device_handler'
drivers/scsi/built-in.o: In function `alua_init':
scsi_dh_alua.c:(.init.text+0xb0): undefined reference to `scsi_register_device_handler'

As a workaround, this adds an extra dependency on CONFIG_SCSI, so
Kconfig can figure out whether built-in is allowed or not.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 086b91d052eb ("scsi_dh: integrate into the core SCSI code")
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/device_handler/Kconfig | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/device_handler/Kconfig b/drivers/scsi/device_handler/Kconfig
index e5647d59224f..0b331c9c0a8f 100644
--- a/drivers/scsi/device_handler/Kconfig
+++ b/drivers/scsi/device_handler/Kconfig
@@ -13,13 +13,13 @@ menuconfig SCSI_DH
 
 config SCSI_DH_RDAC
 	tristate "LSI RDAC Device Handler"
-	depends on SCSI_DH
+	depends on SCSI_DH && SCSI
 	help
 	If you have a LSI RDAC select y. Otherwise, say N.
 
 config SCSI_DH_HP_SW
 	tristate "HP/COMPAQ MSA Device Handler"
-	depends on SCSI_DH
+	depends on SCSI_DH && SCSI
 	help
 	If you have a HP/COMPAQ MSA device that requires START_STOP to
 	be sent to start it and cannot upgrade the firmware then select y.
@@ -27,13 +27,13 @@ config SCSI_DH_HP_SW
 
 config SCSI_DH_EMC
 	tristate "EMC CLARiiON Device Handler"
-	depends on SCSI_DH
+	depends on SCSI_DH && SCSI
 	help
 	If you have a EMC CLARiiON select y. Otherwise, say N.
 
 config SCSI_DH_ALUA
 	tristate "SPC-3 ALUA Device Handler"
-	depends on SCSI_DH
+	depends on SCSI_DH && SCSI
 	help
 	  SCSI Device handler for generic SPC-3 Asymmetric Logical Unit
 	  Access (ALUA).

From 0658e8c5e8cc02f12f9ae3df1f3b87ee7283bb24 Mon Sep 17 00:00:00 2001
From: Sushaanth Srirangapathi <sushaanth.s@ti.com>
Date: Mon, 29 Feb 2016 18:42:19 +0530
Subject: [PATCH 123/424] fbdev: da8xx-fb: fix videomodes of lcd panels

commit 713fced8d10fa1c759c8fb6bf9aaa681bae68cad upstream.

Commit 028cd86b794f4a ("video: da8xx-fb: fix the polarities of the
hsync/vsync pulse") fixes polarities of HSYNC/VSYNC pulse but
forgot to update known_lcd_panels[] which had sync values
according to old logic. This breaks LCD at least on DA850 EVM.

This patch fixes this issue and I have tested this for panel
"Sharp_LK043T1DG01" using DA850 EVM board.

Fixes: 028cd86b794f4a ("video: da8xx-fb: fix the polarities of the hsync/vsync pulse")
Signed-off-by: Sushaanth Srirangapathi <sushaanth.s@ti.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/video/fbdev/da8xx-fb.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/video/fbdev/da8xx-fb.c b/drivers/video/fbdev/da8xx-fb.c
index 0081725c6b5b..d00510029c93 100644
--- a/drivers/video/fbdev/da8xx-fb.c
+++ b/drivers/video/fbdev/da8xx-fb.c
@@ -209,8 +209,7 @@ static struct fb_videomode known_lcd_panels[] = {
 		.lower_margin   = 2,
 		.hsync_len      = 0,
 		.vsync_len      = 0,
-		.sync           = FB_SYNC_CLK_INVERT |
-			FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+		.sync           = FB_SYNC_CLK_INVERT,
 	},
 	/* Sharp LK043T1DG01 */
 	[1] = {
@@ -224,7 +223,7 @@ static struct fb_videomode known_lcd_panels[] = {
 		.lower_margin   = 2,
 		.hsync_len      = 41,
 		.vsync_len      = 10,
-		.sync           = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+		.sync           = 0,
 		.flag           = 0,
 	},
 	[2] = {
@@ -239,7 +238,7 @@ static struct fb_videomode known_lcd_panels[] = {
 		.lower_margin   = 10,
 		.hsync_len      = 10,
 		.vsync_len      = 10,
-		.sync           = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+		.sync           = 0,
 		.flag           = 0,
 	},
 	[3] = {

From 81b3a56ed84b0f2c1e2ff75ee2e05d5d4cd2462b Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.com>
Date: Wed, 17 Feb 2016 14:46:59 +0100
Subject: [PATCH 124/424] lib/mpi: Endianness fix

commit 3ee0cb5fb5eea2110db1b5cb7f67029b7be8a376 upstream.

The limbs are integers in the host endianness, so we can't simply
iterate over the individual bytes. The current code happens to work on
little-endian, because the order of the limbs in the MPI array is the
same as the order of the bytes in each limb, but it breaks on
big-endian.

Fixes: 0f74fbf77d45 ("MPI: Fix mpi_read_buffer")
Signed-off-by: Michal Marek <mmarek@suse.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/mpi/mpicoder.c | 39 +++++++++++++++++++++------------------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index 3db76b8c1115..e00ff00e861c 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -128,6 +128,23 @@ leave:
 }
 EXPORT_SYMBOL_GPL(mpi_read_from_buffer);
 
+static int count_lzeros(MPI a)
+{
+	mpi_limb_t alimb;
+	int i, lzeros = 0;
+
+	for (i = a->nlimbs - 1; i >= 0; i--) {
+		alimb = a->d[i];
+		if (alimb == 0) {
+			lzeros += sizeof(mpi_limb_t);
+		} else {
+			lzeros += count_leading_zeros(alimb) / 8;
+			break;
+		}
+	}
+	return lzeros;
+}
+
 /**
  * mpi_read_buffer() - read MPI to a bufer provided by user (msb first)
  *
@@ -146,7 +163,7 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes,
 	uint8_t *p;
 	mpi_limb_t alimb;
 	unsigned int n = mpi_get_size(a);
-	int i, lzeros = 0;
+	int i, lzeros;
 
 	if (buf_len < n || !buf || !nbytes)
 		return -EINVAL;
@@ -154,14 +171,7 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes,
 	if (sign)
 		*sign = a->sign;
 
-	p = (void *)&a->d[a->nlimbs] - 1;
-
-	for (i = a->nlimbs * sizeof(alimb) - 1; i >= 0; i--, p--) {
-		if (!*p)
-			lzeros++;
-		else
-			break;
-	}
+	lzeros = count_lzeros(a);
 
 	p = buf;
 	*nbytes = n - lzeros;
@@ -343,7 +353,7 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
 	u8 *p, *p2;
 	mpi_limb_t alimb, alimb2;
 	unsigned int n = mpi_get_size(a);
-	int i, x, y = 0, lzeros = 0, buf_len;
+	int i, x, y = 0, lzeros, buf_len;
 
 	if (!nbytes || *nbytes < n)
 		return -EINVAL;
@@ -351,14 +361,7 @@ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned *nbytes,
 	if (sign)
 		*sign = a->sign;
 
-	p = (void *)&a->d[a->nlimbs] - 1;
-
-	for (i = a->nlimbs * sizeof(alimb) - 1; i >= 0; i--, p--) {
-		if (!*p)
-			lzeros++;
-		else
-			break;
-	}
+	lzeros = count_lzeros(a);
 
 	*nbytes = n - lzeros;
 	buf_len = sgl->length;

From 01c8261c5ec46183e14dec7df335ee88bb037e30 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 14 Dec 2015 14:29:23 +0000
Subject: [PATCH 125/424] misc/bmp085: Enable building as a module

commit 50e6315dba721cbc24ccd6d7b299f1782f210a98 upstream.

Commit 985087dbcb02 'misc: add support for bmp18x chips to the bmp085
driver' changed the BMP085 config symbol to a boolean.  I see no
reason why the shared code cannot be built as a module, so change it
back to tristate.

Fixes: 985087dbcb02 ("misc: add support for bmp18x chips to the bmp085 driver")
Cc: Eric Andersson <eric.andersson@unixphere.com>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 22892c701c63..4bf7d50b1bc7 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -439,7 +439,7 @@ config ARM_CHARLCD
 	  still useful.
 
 config BMP085
-	bool
+	tristate
 	depends on SYSFS
 
 config BMP085_I2C

From 4f8e29e7547be52fa24b0cdc7cf69baac9d82328 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 19 Oct 2015 14:19:01 +0300
Subject: [PATCH 126/424] misc: mic/scif: fix wrap around tests

commit 7b64dbf849abdd7e769820e25120758f956a7f13 upstream.

Signed integer overflow is undefined.  Also I added a check for
"(offset < 0)" in scif_unregister() because that makes it match the
other conditions and because I didn't want to subtract a negative.

Fixes: ba612aa8b487 ('misc: mic: SCIF memory registration and unregistration')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mic/scif/scif_rma.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
index 8310b4dbff06..6a451bd65bf3 100644
--- a/drivers/misc/mic/scif/scif_rma.c
+++ b/drivers/misc/mic/scif/scif_rma.c
@@ -1511,7 +1511,7 @@ off_t scif_register_pinned_pages(scif_epd_t epd,
 	if ((map_flags & SCIF_MAP_FIXED) &&
 	    ((ALIGN(offset, PAGE_SIZE) != offset) ||
 	    (offset < 0) ||
-	    (offset + (off_t)len < offset)))
+	    (len > LONG_MAX - offset)))
 		return -EINVAL;
 
 	might_sleep();
@@ -1614,7 +1614,7 @@ off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
 	if ((map_flags & SCIF_MAP_FIXED) &&
 	    ((ALIGN(offset, PAGE_SIZE) != offset) ||
 	    (offset < 0) ||
-	    (offset + (off_t)len < offset)))
+	    (len > LONG_MAX - offset)))
 		return -EINVAL;
 
 	/* Unsupported protection requested */
@@ -1732,7 +1732,8 @@ scif_unregister(scif_epd_t epd, off_t offset, size_t len)
 
 	/* Offset is not page aligned or offset+len wraps around */
 	if ((ALIGN(offset, PAGE_SIZE) != offset) ||
-	    (offset + (off_t)len < offset))
+	    (offset < 0) ||
+	    (len > LONG_MAX - offset))
 		return -EINVAL;
 
 	err = scif_verify_epd(ep);

From dabe14168a929839b4757f496ad6886489078997 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 15 Feb 2016 10:21:53 +0530
Subject: [PATCH 127/424] PM / OPP: Initialize u_volt_min/max to a valid value

commit c88c395f4a6485f23f81e385c79945d68bcd5c5d upstream.

We kept u_volt_min/max initialized to 0, when only the target voltage is
present in DT, instead of the target/min/max triplet.

This didn't go well with the regulator framework, as on few calls the
min voltage was set to target and max was set to 0 and so resulted in a
kernel crash like below:

kernel BUG at ../drivers/regulator/core.c:216!

[<c0684af4>] (regulator_check_voltage) from [<c06857ac>] (regulator_set_voltage_unlocked+0x58/0x230)
[<c06857ac>] (regulator_set_voltage_unlocked) from [<c06859ac>] (regulator_set_voltage+0x28/0x54)
[<c06859ac>] (regulator_set_voltage) from [<c0775b28>] (_set_opp_voltage+0x30/0x98)
[<c0775b28>] (_set_opp_voltage) from [<c0776630>] (dev_pm_opp_set_rate+0xf0/0x28c)
[<c0776630>] (dev_pm_opp_set_rate) from [<c096f784>] (__cpufreq_driver_target+0x184/0x2b4)
[<c096f784>] (__cpufreq_driver_target) from [<c0973760>] (dbs_check_cpu+0x1b0/0x1f4)
[<c0973760>] (dbs_check_cpu) from [<c0973f30>] (cpufreq_governor_dbs+0x324/0x5c4)
[<c0973f30>] (cpufreq_governor_dbs) from [<c0970958>] (__cpufreq_governor+0xe4/0x1ec)
[<c0970958>] (__cpufreq_governor) from [<c09711e0>] (cpufreq_init_policy+0x64/0x8c)
[<c09711e0>] (cpufreq_init_policy) from [<c09718cc>] (cpufreq_online+0x2fc/0x708)
[<c09718cc>] (cpufreq_online) from [<c0765ff0>] (subsys_interface_register+0x94/0xd8)
[<c0765ff0>] (subsys_interface_register) from [<c0970530>] (cpufreq_register_driver+0x14c/0x19c)
[<c0970530>] (cpufreq_register_driver) from [<c09746dc>] (dt_cpufreq_probe+0x70/0xec)
[<c09746dc>] (dt_cpufreq_probe) from [<c076907c>] (platform_drv_probe+0x4c/0xb0)
[<c076907c>] (platform_drv_probe) from [<c07678e0>] (driver_probe_device+0x214/0x2c0)
[<c07678e0>] (driver_probe_device) from [<c0767a18>] (__driver_attach+0x8c/0x90)
[<c0767a18>] (__driver_attach) from [<c0765c2c>] (bus_for_each_dev+0x68/0x9c)
[<c0765c2c>] (bus_for_each_dev) from [<c0766d78>] (bus_add_driver+0x1a0/0x218)
[<c0766d78>] (bus_add_driver) from [<c076810c>] (driver_register+0x78/0xf8)
[<c076810c>] (driver_register) from [<c0301d74>] (do_one_initcall+0x90/0x1d8)
[<c0301d74>] (do_one_initcall) from [<c1100e14>] (kernel_init_freeable+0x15c/0x1fc)
[<c1100e14>] (kernel_init_freeable) from [<c0b27a0c>] (kernel_init+0x8/0xf0)
[<c0b27a0c>] (kernel_init) from [<c0307d78>] (ret_from_fork+0x14/0x3c)
Code: e1550004 baffffeb e3a00000 e8bd8070 (e7f001f2)

Fix that by initializing u_volt_min/max to the target voltage in such cases.

Reported-and-tested-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Fixes: 274659029c9d (PM / OPP: Add support to parse "operating-points-v2" bindings)
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/power/opp/core.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index b8e76f75073b..f8580900c273 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -809,8 +809,14 @@ static int opp_parse_supplies(struct dev_pm_opp *opp, struct device *dev)
 	}
 
 	opp->u_volt = microvolt[0];
-	opp->u_volt_min = microvolt[1];
-	opp->u_volt_max = microvolt[2];
+
+	if (count == 1) {
+		opp->u_volt_min = opp->u_volt;
+		opp->u_volt_max = opp->u_volt;
+	} else {
+		opp->u_volt_min = microvolt[1];
+		opp->u_volt_max = microvolt[2];
+	}
 
 	if (!of_property_read_u32(opp->np, "opp-microamp", &val))
 		opp->u_amp = val;

From cab4c949ade11ac67f69bc05124c9d6ffef31917 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Fri, 4 Mar 2016 10:55:14 +0000
Subject: [PATCH 128/424] PM / Domains: Fix removal of a subdomain

commit beda5fc1ff9b527059290a97b672d2ee0eb7b92f upstream.

Commit 30e7a65b3fdb (PM / Domains: Ensure subdomain is not in use
before removing) added a test to ensure that a subdomain is not a
master to another subdomain or if any devices are using the subdomain
before removing. This change incorrectly used the "slave_links" list to
determine if the subdomain is a master to another subdomain, where it
should have been using the "master_links" list instead. The
"slave_links" list will never be empty for a subdomain and so a
subdomain can never be removed. Fix this by testing if the
"master_links" list is empty instead.

Fixes: 30e7a65b3fdb (PM / Domains: Ensure subdomain is not in use before removing)
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-by: Thierry Reding <treding@nvidia.com>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/power/domain.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 65f50eccd49b..a48824deabc5 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1381,7 +1381,7 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 
 	mutex_lock(&genpd->lock);
 
-	if (!list_empty(&subdomain->slave_links) || subdomain->device_count) {
+	if (!list_empty(&subdomain->master_links) || subdomain->device_count) {
 		pr_warn("%s: unable to remove subdomain %s\n", genpd->name,
 			subdomain->name);
 		ret = -EBUSY;

From 1392ec2a303a53512ad16dcf1ab31e77b08c52d9 Mon Sep 17 00:00:00 2001
From: Alexander Kochetkov <al.kochet@gmail.com>
Date: Sun, 6 Mar 2016 12:43:57 +0300
Subject: [PATCH 129/424] rtc: hym8563: fix invalid year calculation

commit d5861262210067fc01b2fb4f7af2fd85a3453f15 upstream.

Year field must be in BCD format, according to
hym8563 datasheet.

Due to the bug year 2016 became 2010.

Fixes: dcaf03849352 ("rtc: add hym8563 rtc-driver")
Signed-off-by: Alexander Kochetkov <al.kochet@gmail.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rtc/rtc-hym8563.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-hym8563.c b/drivers/rtc/rtc-hym8563.c
index 097325d96db5..b1b4746a0eab 100644
--- a/drivers/rtc/rtc-hym8563.c
+++ b/drivers/rtc/rtc-hym8563.c
@@ -144,7 +144,7 @@ static int hym8563_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	 * it does not seem to carry it over a subsequent write/read.
 	 * So we'll limit ourself to 100 years, starting at 2000 for now.
 	 */
-	buf[6] = tm->tm_year - 100;
+	buf[6] = bin2bcd(tm->tm_year - 100);
 
 	/*
 	 * CTL1 only contains TEST-mode bits apart from stop,

From 041f2ca3ff039ebe10a48a775b29bf3fa7c993fa Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 1 Mar 2016 09:50:01 +0100
Subject: [PATCH 130/424] rtc: vr41xx: Wire up alarm_irq_enable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit a25f4a95ec3cded34c1250364eba704c5e4fdac4 upstream.

drivers/rtc/rtc-vr41xx.c:229: warning: ‘vr41xx_rtc_alarm_irq_enable’ defined but not used

Apparently the conversion to alarm_irq_enable forgot to wire up the
callback.

Fixes: 16380c153a69c378 ("RTC: Convert rtc drivers to use the alarm_irq_enable method")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rtc/rtc-vr41xx.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/rtc/rtc-vr41xx.c b/drivers/rtc/rtc-vr41xx.c
index f64c282275b3..e1b86bb01062 100644
--- a/drivers/rtc/rtc-vr41xx.c
+++ b/drivers/rtc/rtc-vr41xx.c
@@ -272,12 +272,13 @@ static irqreturn_t rtclong1_interrupt(int irq, void *dev_id)
 }
 
 static const struct rtc_class_ops vr41xx_rtc_ops = {
-	.release	= vr41xx_rtc_release,
-	.ioctl		= vr41xx_rtc_ioctl,
-	.read_time	= vr41xx_rtc_read_time,
-	.set_time	= vr41xx_rtc_set_time,
-	.read_alarm	= vr41xx_rtc_read_alarm,
-	.set_alarm	= vr41xx_rtc_set_alarm,
+	.release		= vr41xx_rtc_release,
+	.ioctl			= vr41xx_rtc_ioctl,
+	.read_time		= vr41xx_rtc_read_time,
+	.set_time		= vr41xx_rtc_set_time,
+	.read_alarm		= vr41xx_rtc_read_alarm,
+	.set_alarm		= vr41xx_rtc_set_alarm,
+	.alarm_irq_enable	= vr41xx_rtc_alarm_irq_enable,
 };
 
 static int rtc_probe(struct platform_device *pdev)

From 83fe55baa881f1d7fed118b4f4ec3bf325d7285a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 2 Mar 2016 13:07:45 +0300
Subject: [PATCH 131/424] rtc: ds1685: passing bogus values to irq_restore

commit 8c09b9fdecab1f4a289f07b46e2ad174b6641928 upstream.

We call spin_lock_irqrestore with "flags" set to zero instead of to the
value from spin_lock_irqsave().

Fixes: aaaf5fbf56f1 ('rtc: add driver for DS1685 family of real time clocks')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rtc/rtc-ds1685.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/rtc/rtc-ds1685.c b/drivers/rtc/rtc-ds1685.c
index 05a51ef52703..d5c1b057a739 100644
--- a/drivers/rtc/rtc-ds1685.c
+++ b/drivers/rtc/rtc-ds1685.c
@@ -187,9 +187,9 @@ ds1685_rtc_end_data_access(struct ds1685_priv *rtc)
  * Only use this where you are certain another lock will not be held.
  */
 static inline void
-ds1685_rtc_begin_ctrl_access(struct ds1685_priv *rtc, unsigned long flags)
+ds1685_rtc_begin_ctrl_access(struct ds1685_priv *rtc, unsigned long *flags)
 {
-	spin_lock_irqsave(&rtc->lock, flags);
+	spin_lock_irqsave(&rtc->lock, *flags);
 	ds1685_rtc_switch_to_bank1(rtc);
 }
 
@@ -1304,7 +1304,7 @@ ds1685_rtc_sysfs_ctrl_regs_store(struct device *dev,
 {
 	struct ds1685_priv *rtc = dev_get_drvdata(dev);
 	u8 reg = 0, bit = 0, tmp;
-	unsigned long flags = 0;
+	unsigned long flags;
 	long int val = 0;
 	const struct ds1685_rtc_ctrl_regs *reg_info =
 		ds1685_rtc_sysfs_ctrl_regs_lookup(attr->attr.name);
@@ -1325,7 +1325,7 @@ ds1685_rtc_sysfs_ctrl_regs_store(struct device *dev,
 	bit = reg_info->bit;
 
 	/* Safe to spinlock during a write. */
-	ds1685_rtc_begin_ctrl_access(rtc, flags);
+	ds1685_rtc_begin_ctrl_access(rtc, &flags);
 	tmp = rtc->read(rtc, reg);
 	rtc->write(rtc, reg, (val ? (tmp | bit) : (tmp & ~(bit))));
 	ds1685_rtc_end_ctrl_access(rtc, flags);

From 11dd7f9a1ed13794cc77bd144b3aa9dd17c4030f Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Thu, 21 Jan 2016 13:24:21 +0100
Subject: [PATCH 132/424] rtc: rx8025: remove rv8803 id

commit aaa3cee5deffa28415a6e1852c5afae0f5d210e2 upstream.

The rv8803 has its own driver that should be used. Remove its id from
the rx8025 driver.

Fixes: b1f9d790b59dc04f8813a49a92ddd8651770ffee
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rtc/rtc-rx8025.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index bd911bafb809..17341feadad1 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -65,7 +65,6 @@
 
 static const struct i2c_device_id rx8025_id[] = {
 	{ "rx8025", 0 },
-	{ "rv8803", 1 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, rx8025_id);

From f55131145b8d16d942ddb363c3e1b72cf4775384 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Thu, 4 Feb 2016 09:26:35 +0900
Subject: [PATCH 133/424] rtc: max77686: Properly handle regmap_irq_get_virq()
 error code

commit fb166ba1d7f0a662f7332f4ff660a0d6f4d76915 upstream.

The regmap_irq_get_virq() can return 0 or -EINVAL in error conditions
but driver checked only for value of 0.

This could lead to a cast of -EINVAL to an unsigned int used as a
interrupt number for devm_request_threaded_irq(). Although this is not
yet fatal (devm_request_threaded_irq() will just fail with -EINVAL) but
might be a misleading when diagnosing errors.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Fixes: 6f1c1e71d933 ("mfd: max77686: Convert to use regmap_irq")
Reviewed-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rtc/rtc-max77686.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c
index 7184a0eda793..725dccae24e7 100644
--- a/drivers/rtc/rtc-max77686.c
+++ b/drivers/rtc/rtc-max77686.c
@@ -465,7 +465,7 @@ static int max77686_rtc_probe(struct platform_device *pdev)
 
 	info->virq = regmap_irq_get_virq(max77686->rtc_irq_data,
 					 MAX77686_RTCIRQ_RTCA1);
-	if (!info->virq) {
+	if (info->virq <= 0) {
 		ret = -ENXIO;
 		goto err_rtc;
 	}

From ab2c82dcd6cdb4d4871b151123f1523f2285a27e Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Mon, 22 Feb 2016 10:20:24 +0100
Subject: [PATCH 134/424] drivers/misc/ad525x_dpot: AD5274 fix RDAC read back
 errors

commit f3df53e4d70b5736368a8fe8aa1bb70c1cb1f577 upstream.

Fix RDAC read back errors caused by a typo. Value must shift by 2.

Fixes: a4bd394956f2 ("drivers/misc/ad525x_dpot.c: new features")
Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ad525x_dpot.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/ad525x_dpot.c b/drivers/misc/ad525x_dpot.c
index 15e88078ba1e..f1a0b99f5a9a 100644
--- a/drivers/misc/ad525x_dpot.c
+++ b/drivers/misc/ad525x_dpot.c
@@ -216,7 +216,7 @@ static s32 dpot_read_i2c(struct dpot_data *dpot, u8 reg)
 			 */
 			value = swab16(value);
 
-			if (dpot->uid == DPOT_UID(AD5271_ID))
+			if (dpot->uid == DPOT_UID(AD5274_ID))
 				value = value >> 2;
 		return value;
 	default:

From 36828721fbbe4b53a53d62847b476f314123c819 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 17 Feb 2016 10:57:19 -0300
Subject: [PATCH 135/424] perf evlist: Reference count the cpu and thread maps
 at set_maps()

commit a55e5663761366fb883f6f25375dd68bc958b9db upstream.

We were dropping the reference we possibly held but not obtaining one
for the new maps, which we will drop at perf_evlist__delete(), fix it.

This was caught by Steven Noonan in some of the machines which would
produce this output when caught by glibc debug mechanisms:

  $ sudo perf test 21
  21: Test object code reading                                 :***
  Error in `perf': corrupted double-linked list: 0x00000000023ffcd0 ***
  ======= Backtrace: =========
  /usr/lib/libc.so.6(+0x72055)[0x7f25be0f3055]
  /usr/lib/libc.so.6(+0x779b6)[0x7f25be0f89b6]
  /usr/lib/libc.so.6(+0x7a0ed)[0x7f25be0fb0ed]
  /usr/lib/libc.so.6(__libc_calloc+0xba)[0x7f25be0fceda]
  perf(parse_events_lex_init_extra+0x38)[0x4cfff8]
  perf(parse_events+0x55)[0x4a0615]
  perf(perf_evlist__config+0xcf)[0x4eeb2f]
  perf[0x479f82]
  perf(test__code_reading+0x1e)[0x47ad4e]
  perf(cmd_test+0x5dd)[0x46452d]
  perf[0x47f4e3]
  perf(main+0x603)[0x42c723]
  /usr/lib/libc.so.6(__libc_start_main+0xf0)[0x7f25be0a1610]
  perf(_start+0x29)[0x42c859]

Further investigation using valgrind led to the reference count imbalance fixed
in this patch.

Reported-and-Tested-by: Steven Noonan <steven@uplinklabs.net>
Report-Link: http://lkml.kernel.org/r/CAKbGBLjC2Dx5vshxyGmQkcD+VwiAQLbHoXA9i7kvRB2-2opHZQ@mail.gmail.com
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: f30a79b012e5 ("perf tools: Add reference counting for cpu_map object")
Link: http://lkml.kernel.org/n/tip-j0u1bdhr47sa511sgg76kb8h@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/util/evlist.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index d1392194a9a9..b4b96120fc3b 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1211,12 +1211,12 @@ void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus,
 	 */
 	if (cpus != evlist->cpus) {
 		cpu_map__put(evlist->cpus);
-		evlist->cpus = cpus;
+		evlist->cpus = cpu_map__get(cpus);
 	}
 
 	if (threads != evlist->threads) {
 		thread_map__put(evlist->threads);
-		evlist->threads = threads;
+		evlist->threads = thread_map__get(threads);
 	}
 
 	perf_evlist__propagate_maps(evlist);

From 8481fdf6dc13e3a2b3f7e75e414b5eab3771329d Mon Sep 17 00:00:00 2001
From: Karol Herbst <nouveau@karolherbst.de>
Date: Thu, 3 Mar 2016 02:03:11 +0100
Subject: [PATCH 136/424] x86/mm/kmmio: Fix mmiotrace for hugepages

commit cfa52c0cfa4d727aa3e457bf29aeff296c528a08 upstream.

Because Linux might use bigger pages than the 4K pages to handle those mmio
ioremaps, the kmmio code shouldn't rely on the pade id as it currently does.

Using the memory address instead of the page id lets us look up how big the
page is and what its base address is, so that we won't get a page fault
within the same page twice anymore.

Tested-by: Pierre Moreau <pierre.morrow@free.fr>
Signed-off-by: Karol Herbst <nouveau@karolherbst.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: linux-mm@kvack.org
Cc: linux-x86_64@vger.kernel.org
Cc: nouveau@lists.freedesktop.org
Cc: pq@iki.fi
Cc: rostedt@goodmis.org
Link: http://lkml.kernel.org/r/1456966991-6861-1-git-send-email-nouveau@karolherbst.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/kmmio.c | 88 ++++++++++++++++++++++++++++++---------------
 1 file changed, 59 insertions(+), 29 deletions(-)

diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 637ab34ed632..ddb2244b06a1 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -33,7 +33,7 @@
 struct kmmio_fault_page {
 	struct list_head list;
 	struct kmmio_fault_page *release_next;
-	unsigned long page; /* location of the fault page */
+	unsigned long addr; /* the requested address */
 	pteval_t old_presence; /* page presence prior to arming */
 	bool armed;
 
@@ -70,9 +70,16 @@ unsigned int kmmio_count;
 static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
 static LIST_HEAD(kmmio_probes);
 
-static struct list_head *kmmio_page_list(unsigned long page)
+static struct list_head *kmmio_page_list(unsigned long addr)
 {
-	return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
+	unsigned int l;
+	pte_t *pte = lookup_address(addr, &l);
+
+	if (!pte)
+		return NULL;
+	addr &= page_level_mask(l);
+
+	return &kmmio_page_table[hash_long(addr, KMMIO_PAGE_HASH_BITS)];
 }
 
 /* Accessed per-cpu */
@@ -98,15 +105,19 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
 }
 
 /* You must be holding RCU read lock. */
-static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
+static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr)
 {
 	struct list_head *head;
 	struct kmmio_fault_page *f;
+	unsigned int l;
+	pte_t *pte = lookup_address(addr, &l);
 
-	page &= PAGE_MASK;
-	head = kmmio_page_list(page);
+	if (!pte)
+		return NULL;
+	addr &= page_level_mask(l);
+	head = kmmio_page_list(addr);
 	list_for_each_entry_rcu(f, head, list) {
-		if (f->page == page)
+		if (f->addr == addr)
 			return f;
 	}
 	return NULL;
@@ -137,10 +148,10 @@ static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
 static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
 {
 	unsigned int level;
-	pte_t *pte = lookup_address(f->page, &level);
+	pte_t *pte = lookup_address(f->addr, &level);
 
 	if (!pte) {
-		pr_err("no pte for page 0x%08lx\n", f->page);
+		pr_err("no pte for addr 0x%08lx\n", f->addr);
 		return -1;
 	}
 
@@ -156,7 +167,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
 		return -1;
 	}
 
-	__flush_tlb_one(f->page);
+	__flush_tlb_one(f->addr);
 	return 0;
 }
 
@@ -176,12 +187,12 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
 	int ret;
 	WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n"));
 	if (f->armed) {
-		pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n",
-			   f->page, f->count, !!f->old_presence);
+		pr_warning("double-arm: addr 0x%08lx, ref %d, old %d\n",
+			   f->addr, f->count, !!f->old_presence);
 	}
 	ret = clear_page_presence(f, true);
-	WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"),
-		  f->page);
+	WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"),
+		  f->addr);
 	f->armed = true;
 	return ret;
 }
@@ -191,7 +202,7 @@ static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
 {
 	int ret = clear_page_presence(f, false);
 	WARN_ONCE(ret < 0,
-			KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
+			KERN_ERR "kmmio disarming at 0x%08lx failed.\n", f->addr);
 	f->armed = false;
 }
 
@@ -215,6 +226,12 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
 	struct kmmio_context *ctx;
 	struct kmmio_fault_page *faultpage;
 	int ret = 0; /* default to fault not handled */
+	unsigned long page_base = addr;
+	unsigned int l;
+	pte_t *pte = lookup_address(addr, &l);
+	if (!pte)
+		return -EINVAL;
+	page_base &= page_level_mask(l);
 
 	/*
 	 * Preemption is now disabled to prevent process switch during
@@ -227,7 +244,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
 	preempt_disable();
 	rcu_read_lock();
 
-	faultpage = get_kmmio_fault_page(addr);
+	faultpage = get_kmmio_fault_page(page_base);
 	if (!faultpage) {
 		/*
 		 * Either this page fault is not caused by kmmio, or
@@ -239,7 +256,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
 
 	ctx = &get_cpu_var(kmmio_ctx);
 	if (ctx->active) {
-		if (addr == ctx->addr) {
+		if (page_base == ctx->addr) {
 			/*
 			 * A second fault on the same page means some other
 			 * condition needs handling by do_page_fault(), the
@@ -267,9 +284,9 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
 	ctx->active++;
 
 	ctx->fpage = faultpage;
-	ctx->probe = get_kmmio_probe(addr);
+	ctx->probe = get_kmmio_probe(page_base);
 	ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
-	ctx->addr = addr;
+	ctx->addr = page_base;
 
 	if (ctx->probe && ctx->probe->pre_handler)
 		ctx->probe->pre_handler(ctx->probe, regs, addr);
@@ -354,12 +371,11 @@ out:
 }
 
 /* You must be holding kmmio_lock. */
-static int add_kmmio_fault_page(unsigned long page)
+static int add_kmmio_fault_page(unsigned long addr)
 {
 	struct kmmio_fault_page *f;
 
-	page &= PAGE_MASK;
-	f = get_kmmio_fault_page(page);
+	f = get_kmmio_fault_page(addr);
 	if (f) {
 		if (!f->count)
 			arm_kmmio_fault_page(f);
@@ -372,26 +388,25 @@ static int add_kmmio_fault_page(unsigned long page)
 		return -1;
 
 	f->count = 1;
-	f->page = page;
+	f->addr = addr;
 
 	if (arm_kmmio_fault_page(f)) {
 		kfree(f);
 		return -1;
 	}
 
-	list_add_rcu(&f->list, kmmio_page_list(f->page));
+	list_add_rcu(&f->list, kmmio_page_list(f->addr));
 
 	return 0;
 }
 
 /* You must be holding kmmio_lock. */
-static void release_kmmio_fault_page(unsigned long page,
+static void release_kmmio_fault_page(unsigned long addr,
 				struct kmmio_fault_page **release_list)
 {
 	struct kmmio_fault_page *f;
 
-	page &= PAGE_MASK;
-	f = get_kmmio_fault_page(page);
+	f = get_kmmio_fault_page(addr);
 	if (!f)
 		return;
 
@@ -420,18 +435,27 @@ int register_kmmio_probe(struct kmmio_probe *p)
 	int ret = 0;
 	unsigned long size = 0;
 	const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
+	unsigned int l;
+	pte_t *pte;
 
 	spin_lock_irqsave(&kmmio_lock, flags);
 	if (get_kmmio_probe(p->addr)) {
 		ret = -EEXIST;
 		goto out;
 	}
+
+	pte = lookup_address(p->addr, &l);
+	if (!pte) {
+		ret = -EINVAL;
+		goto out;
+	}
+
 	kmmio_count++;
 	list_add_rcu(&p->list, &kmmio_probes);
 	while (size < size_lim) {
 		if (add_kmmio_fault_page(p->addr + size))
 			pr_err("Unable to set page fault.\n");
-		size += PAGE_SIZE;
+		size += page_level_size(l);
 	}
 out:
 	spin_unlock_irqrestore(&kmmio_lock, flags);
@@ -506,11 +530,17 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
 	const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
 	struct kmmio_fault_page *release_list = NULL;
 	struct kmmio_delayed_release *drelease;
+	unsigned int l;
+	pte_t *pte;
+
+	pte = lookup_address(p->addr, &l);
+	if (!pte)
+		return;
 
 	spin_lock_irqsave(&kmmio_lock, flags);
 	while (size < size_lim) {
 		release_kmmio_fault_page(p->addr + size, &release_list);
-		size += PAGE_SIZE;
+		size += page_level_size(l);
 	}
 	list_del_rcu(&p->list);
 	kmmio_count--;

From c745297ba18668f8a760493d7d769563c818616e Mon Sep 17 00:00:00 2001
From: Eryu Guan <guaneryu@gmail.com>
Date: Sat, 12 Mar 2016 21:40:32 -0500
Subject: [PATCH 137/424] ext4: fix NULL pointer dereference in
 ext4_mark_inode_dirty()

commit 5e1021f2b6dff1a86a468a1424d59faae2bc63c1 upstream.

ext4_reserve_inode_write() in ext4_mark_inode_dirty() could fail on
error (e.g. EIO) and iloc.bh can be NULL in this case. But the error is
ignored in the following "if" condition and ext4_expand_extra_isize()
might be called with NULL iloc.bh set, which triggers NULL pointer
dereference.

This is uncovered by commit 8b4953e13f4c ("ext4: reserve code points for
the project quota feature"), which enlarges the ext4_inode size, and
run the following script on new kernel but with old mke2fs:

  #/bin/bash
  mnt=/mnt/ext4
  devname=ext4-error
  dev=/dev/mapper/$devname
  fsimg=/home/fs.img

  trap cleanup 0 1 2 3 9 15

  cleanup()
  {
          umount $mnt >/dev/null 2>&1
          dmsetup remove $devname
          losetup -d $backend_dev
          rm -f $fsimg
          exit 0
  }

  rm -f $fsimg
  fallocate -l 1g $fsimg
  backend_dev=`losetup -f --show $fsimg`
  devsize=`blockdev --getsz $backend_dev`

  good_tab="0 $devsize linear $backend_dev 0"
  error_tab="0 $devsize error $backend_dev 0"

  dmsetup create $devname --table "$good_tab"

  mkfs -t ext4 $dev
  mount -t ext4 -o errors=continue,strictatime $dev $mnt

  dmsetup load $devname --table "$error_tab" && dmsetup resume $devname
  echo 3 > /proc/sys/vm/drop_caches
  ls -l $mnt
  exit 0

[ Patch changed to simplify the function a tiny bit. -- Ted ]

Signed-off-by: Eryu Guan <guaneryu@gmail.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/inode.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 06bda0361e7c..547600556bb9 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5109,6 +5109,8 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
 	might_sleep();
 	trace_ext4_mark_inode_dirty(inode, _RET_IP_);
 	err = ext4_reserve_inode_write(handle, inode, &iloc);
+	if (err)
+		return err;
 	if (ext4_handle_valid(handle) &&
 	    EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
 	    !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
@@ -5139,9 +5141,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
 			}
 		}
 	}
-	if (!err)
-		err = ext4_mark_iloc_dirty(handle, inode, &iloc);
-	return err;
+	return ext4_mark_iloc_dirty(handle, inode, &iloc);
 }
 
 /*

From 447ea0a34b78213dd668bf7d0a2b7add1c5675e6 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 5 Jan 2016 19:36:37 +0100
Subject: [PATCH 138/424] serial: sh-sci: Remove cpufreq notifier to fix
 crash/deadlock

commit ff1cab374ad98f4b9f408525ca9c08992b4ed784 upstream.

The BSP team noticed that there is spin/mutex lock issue on sh-sci when
CPUFREQ is used.  The issue is that the notifier function may call
mutex_lock() while the spinlock is held, which can lead to a BUG().
This may happen if CPUFREQ is changed while another CPU calls
clk_get_rate().

Taking the spinlock was added to the notifier function in commit
e552de2413edad1a ("sh-sci: add platform device private data"), to
protect the list of serial ports against modification during traversal.
At that time the Common Clock Framework didn't exist yet, and
clk_get_rate() just returned clk->rate without taking a mutex.
Note that since commit d535a2305facf9b4 ("serial: sh-sci: Require a
device per port mapping."), there's no longer a list of serial ports to
traverse, and taking the spinlock became superfluous.

To fix the issue, just remove the cpufreq notifier:
  1. The notifier doesn't work correctly: all it does is update stored
     clock rates; it does not update the divider in the hardware.
     The divider will only be updated when calling sci_set_termios().
     I believe this was broken back in 2004, when the old
     drivers/char/sh-sci.c driver (where the notifier did update the
     divider) was replaced by drivers/serial/sh-sci.c (where the
     notifier just updated port->uartclk).
     Cfr. full-history-linux commits 6f8deaef2e9675d9 ("[PATCH] sh: port
     sh-sci driver to the new API") and 3f73fe878dc9210a ("[PATCH]
     Remove old sh-sci driver").
  2. On modern SoCs, the sh-sci parent clock rate is no longer related
     to the CPU clock rate anyway, so using a cpufreq notifier is
     futile.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sh-sci.c | 39 -------------------------------------
 1 file changed, 39 deletions(-)

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 51c7507b0444..63a06ab6ba03 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -38,7 +38,6 @@
 #include <linux/major.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/notifier.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
@@ -116,8 +115,6 @@ struct sci_port {
 	struct timer_list		rx_timer;
 	unsigned int			rx_timeout;
 #endif
-
-	struct notifier_block		freq_transition;
 };
 
 #define SCI_NPORTS CONFIG_SERIAL_SH_SCI_NR_UARTS
@@ -1606,29 +1603,6 @@ static irqreturn_t sci_mpxed_interrupt(int irq, void *ptr)
 	return ret;
 }
 
-/*
- * Here we define a transition notifier so that we can update all of our
- * ports' baud rate when the peripheral clock changes.
- */
-static int sci_notifier(struct notifier_block *self,
-			unsigned long phase, void *p)
-{
-	struct sci_port *sci_port;
-	unsigned long flags;
-
-	sci_port = container_of(self, struct sci_port, freq_transition);
-
-	if (phase == CPUFREQ_POSTCHANGE) {
-		struct uart_port *port = &sci_port->port;
-
-		spin_lock_irqsave(&port->lock, flags);
-		port->uartclk = clk_get_rate(sci_port->iclk);
-		spin_unlock_irqrestore(&port->lock, flags);
-	}
-
-	return NOTIFY_OK;
-}
-
 static const struct sci_irq_desc {
 	const char	*desc;
 	irq_handler_t	handler;
@@ -2559,9 +2533,6 @@ static int sci_remove(struct platform_device *dev)
 {
 	struct sci_port *port = platform_get_drvdata(dev);
 
-	cpufreq_unregister_notifier(&port->freq_transition,
-				    CPUFREQ_TRANSITION_NOTIFIER);
-
 	uart_remove_one_port(&sci_uart_driver, &port->port);
 
 	sci_cleanup_single(port);
@@ -2714,16 +2685,6 @@ static int sci_probe(struct platform_device *dev)
 	if (ret)
 		return ret;
 
-	sp->freq_transition.notifier_call = sci_notifier;
-
-	ret = cpufreq_register_notifier(&sp->freq_transition,
-					CPUFREQ_TRANSITION_NOTIFIER);
-	if (unlikely(ret < 0)) {
-		uart_remove_one_port(&sci_uart_driver, &sp->port);
-		sci_cleanup_single(sp);
-		return ret;
-	}
-
 #ifdef CONFIG_SH_STANDARD_BIOS
 	sh_bios_gdb_detach();
 #endif

From 87261de30fd8e5ebd441cd2f05df73ddf04c2af2 Mon Sep 17 00:00:00 2001
From: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Date: Wed, 3 Feb 2016 14:26:46 +0100
Subject: [PATCH 139/424] mtd: spi-nor: remove micron_quad_enable()

commit 3b5394a3ccffbfa1d1d448d48742853a862822c4 upstream.

This patch remove the micron_quad_enable() function which force the Quad
SPI mode. However, once this mode is enabled, the Micron memory expect ALL
commands to use the SPI 4-4-4 protocol. Hence a failure does occur when
calling spi_nor_wait_till_ready() right after the update of the Enhanced
Volatile Configuration Register (EVCR) in the micron_quad_enable() as
the SPI controller driver is not aware about the protocol change.

Since there is almost no performance increase using Fast Read 4-4-4
commands instead of Fast Read 1-1-4 commands, we rather keep on using the
Extended SPI mode than enabling the Quad SPI mode.

Let's take the example of the pretty standard use of 8 dummy cycles during
Fast Read operations on 64KB erase sectors:

Fast Read 1-1-4 requires 8 cycles for the command, then 24 cycles for the
3byte address followed by 8 dummy clock cycles and finally 65536*2 cycles
for the read data; so 131112 clock cycles.

On the other hand the Fast Read 4-4-4 would require 2 cycles for the
command, then 6 cycles for the 3byte address followed by 8 dummy clock
cycles and finally 65536*2 cycles for the read data. So 131088 clock
cycles. The theorical bandwidth increase is 0.0%.

Now using Fast Read operations on 512byte pages:
Fast Read 1-1-4 needs 8+24+8+(512*2) = 1064 clock cycles whereas Fast
Read 4-4-4 would requires 2+6+8+(512*2) = 1040 clock cycles. Hence the
theorical bandwidth increase is 2.3%.
Consecutive reads for non sequential pages is not a relevant use case so
The Quad SPI mode is not worth it.

mtd_speedtest seems to confirm these figures.

Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
Fixes: 548cd3ab54da ("mtd: spi-nor: Add quad I/O support for Micron SPI NOR")
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/spi-nor/spi-nor.c | 46 +----------------------------------
 1 file changed, 1 insertion(+), 45 deletions(-)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 32477c4eb421..37e4135ab213 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1067,45 +1067,6 @@ static int spansion_quad_enable(struct spi_nor *nor)
 	return 0;
 }
 
-static int micron_quad_enable(struct spi_nor *nor)
-{
-	int ret;
-	u8 val;
-
-	ret = nor->read_reg(nor, SPINOR_OP_RD_EVCR, &val, 1);
-	if (ret < 0) {
-		dev_err(nor->dev, "error %d reading EVCR\n", ret);
-		return ret;
-	}
-
-	write_enable(nor);
-
-	/* set EVCR, enable quad I/O */
-	nor->cmd_buf[0] = val & ~EVCR_QUAD_EN_MICRON;
-	ret = nor->write_reg(nor, SPINOR_OP_WD_EVCR, nor->cmd_buf, 1);
-	if (ret < 0) {
-		dev_err(nor->dev, "error while writing EVCR register\n");
-		return ret;
-	}
-
-	ret = spi_nor_wait_till_ready(nor);
-	if (ret)
-		return ret;
-
-	/* read EVCR and check it */
-	ret = nor->read_reg(nor, SPINOR_OP_RD_EVCR, &val, 1);
-	if (ret < 0) {
-		dev_err(nor->dev, "error %d reading EVCR\n", ret);
-		return ret;
-	}
-	if (val & EVCR_QUAD_EN_MICRON) {
-		dev_err(nor->dev, "Micron EVCR Quad bit not clear\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
 static int set_quad_mode(struct spi_nor *nor, const struct flash_info *info)
 {
 	int status;
@@ -1119,12 +1080,7 @@ static int set_quad_mode(struct spi_nor *nor, const struct flash_info *info)
 		}
 		return status;
 	case SNOR_MFR_MICRON:
-		status = micron_quad_enable(nor);
-		if (status) {
-			dev_err(nor->dev, "Micron quad-read not enabled\n");
-			return -EINVAL;
-		}
-		return status;
+		return 0;
 	default:
 		status = spansion_quad_enable(nor);
 		if (status) {

From 67891850e58b0190005441cf4f54da957fed8e01 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 24 Feb 2016 16:07:23 -0800
Subject: [PATCH 140/424] mtd: brcmnand: Fix v7.1 register offsets

commit d267aefc54a28efc5bda7f009598dc83b5f98734 upstream.

The BRCMNAND controller revision 7.1 is almost 100% compatible with the
previous v6.0 register offset layout, except for the Correctable Error
Reporting Threshold registers. Fix this by adding another table with the
correct offsets for CORR_THRESHOLD and CORR_THRESHOLD_EXT.

Fixes: 27c5b17cd1b1 ("mtd: nand: add NAND driver "library" for Broadcom STB NAND controller")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/nand/brcmnand/brcmnand.c | 34 +++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/brcmnand/brcmnand.c b/drivers/mtd/nand/brcmnand/brcmnand.c
index 12c6190c6e33..4a07ba1195b5 100644
--- a/drivers/mtd/nand/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/brcmnand/brcmnand.c
@@ -309,6 +309,36 @@ static const u16 brcmnand_regs_v60[] = {
 	[BRCMNAND_FC_BASE]		= 0x400,
 };
 
+/* BRCMNAND v7.1 */
+static const u16 brcmnand_regs_v71[] = {
+	[BRCMNAND_CMD_START]		=  0x04,
+	[BRCMNAND_CMD_EXT_ADDRESS]	=  0x08,
+	[BRCMNAND_CMD_ADDRESS]		=  0x0c,
+	[BRCMNAND_INTFC_STATUS]		=  0x14,
+	[BRCMNAND_CS_SELECT]		=  0x18,
+	[BRCMNAND_CS_XOR]		=  0x1c,
+	[BRCMNAND_LL_OP]		=  0x20,
+	[BRCMNAND_CS0_BASE]		=  0x50,
+	[BRCMNAND_CS1_BASE]		=     0,
+	[BRCMNAND_CORR_THRESHOLD]	=  0xdc,
+	[BRCMNAND_CORR_THRESHOLD_EXT]	=  0xe0,
+	[BRCMNAND_UNCORR_COUNT]		=  0xfc,
+	[BRCMNAND_CORR_COUNT]		= 0x100,
+	[BRCMNAND_CORR_EXT_ADDR]	= 0x10c,
+	[BRCMNAND_CORR_ADDR]		= 0x110,
+	[BRCMNAND_UNCORR_EXT_ADDR]	= 0x114,
+	[BRCMNAND_UNCORR_ADDR]		= 0x118,
+	[BRCMNAND_SEMAPHORE]		= 0x150,
+	[BRCMNAND_ID]			= 0x194,
+	[BRCMNAND_ID_EXT]		= 0x198,
+	[BRCMNAND_LL_RDATA]		= 0x19c,
+	[BRCMNAND_OOB_READ_BASE]	= 0x200,
+	[BRCMNAND_OOB_READ_10_BASE]	=     0,
+	[BRCMNAND_OOB_WRITE_BASE]	= 0x280,
+	[BRCMNAND_OOB_WRITE_10_BASE]	=     0,
+	[BRCMNAND_FC_BASE]		= 0x400,
+};
+
 enum brcmnand_cs_reg {
 	BRCMNAND_CS_CFG_EXT = 0,
 	BRCMNAND_CS_CFG,
@@ -404,7 +434,9 @@ static int brcmnand_revision_init(struct brcmnand_controller *ctrl)
 	}
 
 	/* Register offsets */
-	if (ctrl->nand_version >= 0x0600)
+	if (ctrl->nand_version >= 0x0701)
+		ctrl->reg_offsets = brcmnand_regs_v71;
+	else if (ctrl->nand_version >= 0x0600)
 		ctrl->reg_offsets = brcmnand_regs_v60;
 	else if (ctrl->nand_version >= 0x0500)
 		ctrl->reg_offsets = brcmnand_regs_v50;

From 35bfb7949b7f23cdbfda12d83a8038f640b49141 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ezequiel=20Garc=C3=ADa?= <ezequiel@vanguardiasur.com.ar>
Date: Fri, 1 Apr 2016 18:29:23 -0300
Subject: [PATCH 141/424] mtd: nand: Drop mtd.owner requirement in nand_scan

commit 20c07a5bf094198ff2382aa5e7c930b3c9807792 upstream.

Since commit 807f16d4db95 ("mtd: core: set some defaults
when dev.parent is set"), it's now legal for drivers
to call nand_scan and nand_scan_ident without setting
mtd.owner.

Drop the check and while at it remove the BUG() abuse.

Fixes: 807f16d4db95 ("mtd: core: set some defaults when dev.parent is set")
Signed-off-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
[Brian: editorial note - while commit 807f16d4db95 wasn't explicitly
    broken, some follow-up commits in the v4.4 release broke a few
    drivers, since they would hit this BUG() if they used nand_scan()
    and were built as modules]
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/nand/nand_base.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 3ff583f165cd..ce7b2cab5762 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3979,7 +3979,6 @@ static int nand_dt_init(struct mtd_info *mtd, struct nand_chip *chip,
  * This is the first phase of the normal nand_scan() function. It reads the
  * flash ID and sets up MTD fields accordingly.
  *
- * The mtd->owner field must be set to the module of the caller.
  */
 int nand_scan_ident(struct mtd_info *mtd, int maxchips,
 		    struct nand_flash_dev *table)
@@ -4403,19 +4402,12 @@ EXPORT_SYMBOL(nand_scan_tail);
  *
  * This fills out all the uninitialized function pointers with the defaults.
  * The flash ID is read and the mtd/chip structures are filled with the
- * appropriate values. The mtd->owner field must be set to the module of the
- * caller.
+ * appropriate values.
  */
 int nand_scan(struct mtd_info *mtd, int maxchips)
 {
 	int ret;
 
-	/* Many callers got this wrong, so check for it for a while... */
-	if (!mtd->owner && caller_is_module()) {
-		pr_crit("%s called with NULL mtd->owner!\n", __func__);
-		BUG();
-	}
-
 	ret = nand_scan_ident(mtd, maxchips, NULL);
 	if (!ret)
 		ret = nand_scan_tail(mtd);

From c3173539ec17901391863321e1eaf335b0029a09 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 21 Jan 2016 19:50:09 -0300
Subject: [PATCH 142/424] perf hists browser: Only offer symbol scripting when
 a symbol is under the cursor

commit c221acb0f970d3b80d72c812cda19c121acf5d52 upstream.

When this feature was introduced a check was made if there was a
resolved symbol under the cursor, it got lost in commit ea7cd5923309
("perf hists browser: Split popup menu actions - part 2"), reinstate it.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>,
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: ea7cd5923309 ("perf hists browser: Split popup menu actions - part 2")
Link: http://lkml.kernel.org/r/1452960197-5323-9-git-send-email-namhyung@kernel.org
[ Carved out from a  larger patch ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/ui/browsers/hists.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 81def6c3f24b..3900386a3629 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2059,10 +2059,12 @@ skip_annotation:
 			 *
 			 * See hist_browser__show_entry.
 			 */
-			nr_options += add_script_opt(browser,
-						     &actions[nr_options],
-						     &options[nr_options],
-						     NULL, browser->selection->sym);
+			if (sort__has_sym && browser->selection->sym) {
+				nr_options += add_script_opt(browser,
+							     &actions[nr_options],
+							     &options[nr_options],
+							     NULL, browser->selection->sym);
+			}
 		}
 		nr_options += add_script_opt(browser, &actions[nr_options],
 					     &options[nr_options], NULL, NULL);

From dcfdb38c41385bc3cb7be295eb5b9d4b00ea1177 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcin=20=C5=9Alusarz?= <marcin.slusarz@gmail.com>
Date: Tue, 19 Jan 2016 20:03:03 +0100
Subject: [PATCH 143/424] perf tools: handle spaces in file names obtained from
 /proc/pid/maps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 89fee59b504f86925894fcc9ba79d5c933842f93 upstream.

Steam frequently puts game binaries in folders with spaces.

Note: "(deleted)" markers are now treated as part of the file name.

Signed-off-by: Marcin Ślusarz <marcin.slusarz@gmail.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Fixes: 6064803313ba ("perf tools: Use sscanf for parsing /proc/pid/maps")
Link: http://lkml.kernel.org/r/20160119190303.GA17579@marcin-Inspiron-7720
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/util/event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 8b10621b415c..956187bf1a85 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -274,7 +274,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 		strcpy(execname, "");
 
 		/* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-		n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %s\n",
+		n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %[^\n]\n",
 		       &event->mmap2.start, &event->mmap2.len, prot,
 		       &event->mmap2.pgoff, &event->mmap2.maj,
 		       &event->mmap2.min,

From d7b60bafb195dd349821e422b1dbc8b897eeb368 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 7 Mar 2016 16:44:44 -0300
Subject: [PATCH 144/424] perf stat: Document --detailed option

commit f594bae08183fb6b57db55387794ece3e1edf6f6 upstream.

I'm surprised this remained undocumented since at least 2011. And it is
actually a very useful switch, as Steve and I came to realize recently.

Add the text from

  2cba3ffb9a9d ("perf stat: Add -d -d and -d -d -d options to show more CPU events")

which added the incrementing aspect to -d.

Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Davidlohr Bueso <dbueso@suse.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mel Gorman <mgorman@suse.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 2cba3ffb9a9d ("perf stat: Add -d -d and -d -d -d options to show more CPU events")
Link: http://lkml.kernel.org/r/1457347294-32546-1-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/Documentation/perf-stat.txt | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 4e074a660826..90c3558c2c12 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -62,6 +62,14 @@ OPTIONS
 --scale::
 	scale/normalize counter values
 
+-d::
+--detailed::
+	print more detailed statistics, can be specified up to 3 times
+
+	   -d:          detailed events, L1 and LLC data cache
+        -d -d:     more detailed events, dTLB and iTLB events
+     -d -d -d:     very detailed events, adding prefetch events
+
 -r::
 --repeat=<n>::
 	repeat command and print average + stddev (max: 100). 0 means forever.

From 0b680de452570274716c2c9990903acea525f0d0 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.com>
Date: Mon, 7 Dec 2015 14:28:03 -0500
Subject: [PATCH 145/424] ext4: fix races between page faults and hole punching

commit ea3d7209ca01da209cda6f0dea8be9cc4b7a933b upstream.

Currently, page faults and hole punching are completely unsynchronized.
This can result in page fault faulting in a page into a range that we
are punching after truncate_pagecache_range() has been called and thus
we can end up with a page mapped to disk blocks that will be shortly
freed. Filesystem corruption will shortly follow. Note that the same
race is avoided for truncate by checking page fault offset against
i_size but there isn't similar mechanism available for punching holes.

Fix the problem by creating new rw semaphore i_mmap_sem in inode and
grab it for writing over truncate, hole punching, and other functions
removing blocks from extent tree and for read over page faults. We
cannot easily use i_data_sem for this since that ranks below transaction
start and we need something ranking above it so that it can be held over
the whole truncate / hole punching operation. Also remove various
workarounds we had in the code to reduce race window when page fault
could have created pages with stale mapping information.

Signed-off-by: Jan Kara <jack@suse.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/ext4.h     | 10 +++++++
 fs/ext4/extents.c  | 54 ++++++++++++++++++++-----------------
 fs/ext4/file.c     | 66 +++++++++++++++++++++++++++++++++++++++-------
 fs/ext4/inode.c    | 36 ++++++++++++++++++-------
 fs/ext4/super.c    |  1 +
 fs/ext4/truncate.h |  2 ++
 6 files changed, 127 insertions(+), 42 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d4156e1c128d..89df9f55595b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -933,6 +933,15 @@ struct ext4_inode_info {
 	 * by other means, so we have i_data_sem.
 	 */
 	struct rw_semaphore i_data_sem;
+	/*
+	 * i_mmap_sem is for serializing page faults with truncate / punch hole
+	 * operations. We have to make sure that new page cannot be faulted in
+	 * a section of the inode that is being punched. We cannot easily use
+	 * i_data_sem for this since we need protection for the whole punch
+	 * operation and i_data_sem ranks below transaction start so we have
+	 * to occasionally drop it.
+	 */
+	struct rw_semaphore i_mmap_sem;
 	struct inode vfs_inode;
 	struct jbd2_inode *jinode;
 
@@ -2507,6 +2516,7 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
 extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
 			     loff_t lstart, loff_t lend);
 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
+extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 extern qsize_t *ext4_get_reserved_space(struct inode *inode);
 extern void ext4_da_update_reserve_space(struct inode *inode,
 					int used, int quota_claim);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 551353b1b17a..5be9ca5a8a7a 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4770,7 +4770,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 	int partial_begin, partial_end;
 	loff_t start, end;
 	ext4_lblk_t lblk;
-	struct address_space *mapping = inode->i_mapping;
 	unsigned int blkbits = inode->i_blkbits;
 
 	trace_ext4_zero_range(inode, offset, len, mode);
@@ -4785,17 +4784,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 			return ret;
 	}
 
-	/*
-	 * Write out all dirty pages to avoid race conditions
-	 * Then release them.
-	 */
-	if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
-		ret = filemap_write_and_wait_range(mapping, offset,
-						   offset + len - 1);
-		if (ret)
-			return ret;
-	}
-
 	/*
 	 * Round up offset. This is not fallocate, we neet to zero out
 	 * blocks, so convert interior block aligned part of the range to
@@ -4856,16 +4844,22 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 		flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
 			  EXT4_EX_NOCACHE);
 
-		/* Now release the pages and zero block aligned part of pages*/
-		truncate_pagecache_range(inode, start, end - 1);
-		inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
-
 		/* Wait all existing dio workers, newcomers will block on i_mutex */
 		ext4_inode_block_unlocked_dio(inode);
 		inode_dio_wait(inode);
 
+		/*
+		 * Prevent page faults from reinstantiating pages we have
+		 * released from page cache.
+		 */
+		down_write(&EXT4_I(inode)->i_mmap_sem);
+		/* Now release the pages and zero block aligned part of pages */
+		truncate_pagecache_range(inode, start, end - 1);
+		inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+
 		ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
 					     flags, mode);
+		up_write(&EXT4_I(inode)->i_mmap_sem);
 		if (ret)
 			goto out_dio;
 	}
@@ -5524,17 +5518,22 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 		goto out_mutex;
 	}
 
-	truncate_pagecache(inode, ioffset);
-
 	/* Wait for existing dio to complete */
 	ext4_inode_block_unlocked_dio(inode);
 	inode_dio_wait(inode);
 
+	/*
+	 * Prevent page faults from reinstantiating pages we have released from
+	 * page cache.
+	 */
+	down_write(&EXT4_I(inode)->i_mmap_sem);
+	truncate_pagecache(inode, ioffset);
+
 	credits = ext4_writepage_trans_blocks(inode);
 	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
-		goto out_dio;
+		goto out_mmap;
 	}
 
 	down_write(&EXT4_I(inode)->i_data_sem);
@@ -5573,7 +5572,8 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 
 out_stop:
 	ext4_journal_stop(handle);
-out_dio:
+out_mmap:
+	up_write(&EXT4_I(inode)->i_mmap_sem);
 	ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
 	mutex_unlock(&inode->i_mutex);
@@ -5660,17 +5660,22 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
 		goto out_mutex;
 	}
 
-	truncate_pagecache(inode, ioffset);
-
 	/* Wait for existing dio to complete */
 	ext4_inode_block_unlocked_dio(inode);
 	inode_dio_wait(inode);
 
+	/*
+	 * Prevent page faults from reinstantiating pages we have released from
+	 * page cache.
+	 */
+	down_write(&EXT4_I(inode)->i_mmap_sem);
+	truncate_pagecache(inode, ioffset);
+
 	credits = ext4_writepage_trans_blocks(inode);
 	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
-		goto out_dio;
+		goto out_mmap;
 	}
 
 	/* Expand file to avoid data loss if there is error while shifting */
@@ -5741,7 +5746,8 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
 
 out_stop:
 	ext4_journal_stop(handle);
-out_dio:
+out_mmap:
+	up_write(&EXT4_I(inode)->i_mmap_sem);
 	ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
 	mutex_unlock(&inode->i_mutex);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 113837e7ba98..0d24ebcd7c9e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -209,15 +209,18 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	int result;
 	handle_t *handle = NULL;
-	struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+	struct inode *inode = file_inode(vma->vm_file);
+	struct super_block *sb = inode->i_sb;
 	bool write = vmf->flags & FAULT_FLAG_WRITE;
 
 	if (write) {
 		sb_start_pagefault(sb);
 		file_update_time(vma->vm_file);
+		down_read(&EXT4_I(inode)->i_mmap_sem);
 		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
 						EXT4_DATA_TRANS_BLOCKS(sb));
-	}
+	} else
+		down_read(&EXT4_I(inode)->i_mmap_sem);
 
 	if (IS_ERR(handle))
 		result = VM_FAULT_SIGBUS;
@@ -228,8 +231,10 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	if (write) {
 		if (!IS_ERR(handle))
 			ext4_journal_stop(handle);
+		up_read(&EXT4_I(inode)->i_mmap_sem);
 		sb_end_pagefault(sb);
-	}
+	} else
+		up_read(&EXT4_I(inode)->i_mmap_sem);
 
 	return result;
 }
@@ -246,10 +251,12 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
 	if (write) {
 		sb_start_pagefault(sb);
 		file_update_time(vma->vm_file);
+		down_read(&EXT4_I(inode)->i_mmap_sem);
 		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
 				ext4_chunk_trans_blocks(inode,
 							PMD_SIZE / PAGE_SIZE));
-	}
+	} else
+		down_read(&EXT4_I(inode)->i_mmap_sem);
 
 	if (IS_ERR(handle))
 		result = VM_FAULT_SIGBUS;
@@ -260,30 +267,71 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
 	if (write) {
 		if (!IS_ERR(handle))
 			ext4_journal_stop(handle);
+		up_read(&EXT4_I(inode)->i_mmap_sem);
 		sb_end_pagefault(sb);
-	}
+	} else
+		up_read(&EXT4_I(inode)->i_mmap_sem);
 
 	return result;
 }
 
 static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
-	return dax_mkwrite(vma, vmf, ext4_get_block_dax,
-				ext4_end_io_unwritten);
+	int err;
+	struct inode *inode = file_inode(vma->vm_file);
+
+	sb_start_pagefault(inode->i_sb);
+	file_update_time(vma->vm_file);
+	down_read(&EXT4_I(inode)->i_mmap_sem);
+	err = __dax_mkwrite(vma, vmf, ext4_get_block_dax,
+			    ext4_end_io_unwritten);
+	up_read(&EXT4_I(inode)->i_mmap_sem);
+	sb_end_pagefault(inode->i_sb);
+
+	return err;
+}
+
+/*
+ * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite()
+ * handler we check for races agaist truncate. Note that since we cycle through
+ * i_mmap_sem, we are sure that also any hole punching that began before we
+ * were called is finished by now and so if it included part of the file we
+ * are working on, our pte will get unmapped and the check for pte_same() in
+ * wp_pfn_shared() fails. Thus fault gets retried and things work out as
+ * desired.
+ */
+static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
+				struct vm_fault *vmf)
+{
+	struct inode *inode = file_inode(vma->vm_file);
+	struct super_block *sb = inode->i_sb;
+	int ret = VM_FAULT_NOPAGE;
+	loff_t size;
+
+	sb_start_pagefault(sb);
+	file_update_time(vma->vm_file);
+	down_read(&EXT4_I(inode)->i_mmap_sem);
+	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	if (vmf->pgoff >= size)
+		ret = VM_FAULT_SIGBUS;
+	up_read(&EXT4_I(inode)->i_mmap_sem);
+	sb_end_pagefault(sb);
+
+	return ret;
 }
 
 static const struct vm_operations_struct ext4_dax_vm_ops = {
 	.fault		= ext4_dax_fault,
 	.pmd_fault	= ext4_dax_pmd_fault,
 	.page_mkwrite	= ext4_dax_mkwrite,
-	.pfn_mkwrite	= dax_pfn_mkwrite,
+	.pfn_mkwrite	= ext4_dax_pfn_mkwrite,
 };
 #else
 #define ext4_dax_vm_ops	ext4_file_vm_ops
 #endif
 
 static const struct vm_operations_struct ext4_file_vm_ops = {
-	.fault		= filemap_fault,
+	.fault		= ext4_filemap_fault,
 	.map_pages	= filemap_map_pages,
 	.page_mkwrite   = ext4_page_mkwrite,
 };
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 547600556bb9..214e30a3ef9e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3651,6 +3651,15 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 
 	}
 
+	/* Wait all existing dio workers, newcomers will block on i_mutex */
+	ext4_inode_block_unlocked_dio(inode);
+	inode_dio_wait(inode);
+
+	/*
+	 * Prevent page faults from reinstantiating pages we have released from
+	 * page cache.
+	 */
+	down_write(&EXT4_I(inode)->i_mmap_sem);
 	first_block_offset = round_up(offset, sb->s_blocksize);
 	last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
 
@@ -3659,10 +3668,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 		truncate_pagecache_range(inode, first_block_offset,
 					 last_block_offset);
 
-	/* Wait all existing dio workers, newcomers will block on i_mutex */
-	ext4_inode_block_unlocked_dio(inode);
-	inode_dio_wait(inode);
-
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
 		credits = ext4_writepage_trans_blocks(inode);
 	else
@@ -3708,16 +3713,12 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 	if (IS_SYNC(inode))
 		ext4_handle_sync(handle);
 
-	/* Now release the pages again to reduce race window */
-	if (last_block_offset > first_block_offset)
-		truncate_pagecache_range(inode, first_block_offset,
-					 last_block_offset);
-
 	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
 	ext4_mark_inode_dirty(handle, inode);
 out_stop:
 	ext4_journal_stop(handle);
 out_dio:
+	up_write(&EXT4_I(inode)->i_mmap_sem);
 	ext4_inode_resume_unlocked_dio(inode);
 out_mutex:
 	mutex_unlock(&inode->i_mutex);
@@ -4851,6 +4852,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 			} else
 				ext4_wait_for_tail_page_commit(inode);
 		}
+		down_write(&EXT4_I(inode)->i_mmap_sem);
 		/*
 		 * Truncate pagecache after we've waited for commit
 		 * in data=journal mode to make pages freeable.
@@ -4858,6 +4860,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 		truncate_pagecache(inode, inode->i_size);
 		if (shrink)
 			ext4_truncate(inode);
+		up_write(&EXT4_I(inode)->i_mmap_sem);
 	}
 
 	if (!rc) {
@@ -5306,6 +5309,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 	sb_start_pagefault(inode->i_sb);
 	file_update_time(vma->vm_file);
+
+	down_read(&EXT4_I(inode)->i_mmap_sem);
 	/* Delalloc case is easy... */
 	if (test_opt(inode->i_sb, DELALLOC) &&
 	    !ext4_should_journal_data(inode) &&
@@ -5375,6 +5380,19 @@ retry_alloc:
 out_ret:
 	ret = block_page_mkwrite_return(ret);
 out:
+	up_read(&EXT4_I(inode)->i_mmap_sem);
 	sb_end_pagefault(inode->i_sb);
 	return ret;
 }
+
+int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct inode *inode = file_inode(vma->vm_file);
+	int err;
+
+	down_read(&EXT4_I(inode)->i_mmap_sem);
+	err = filemap_fault(vma, vmf);
+	up_read(&EXT4_I(inode)->i_mmap_sem);
+
+	return err;
+}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ba1cf0bf2f81..852c26806af2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -958,6 +958,7 @@ static void init_once(void *foo)
 	INIT_LIST_HEAD(&ei->i_orphan);
 	init_rwsem(&ei->xattr_sem);
 	init_rwsem(&ei->i_data_sem);
+	init_rwsem(&ei->i_mmap_sem);
 	inode_init_once(&ei->vfs_inode);
 }
 
diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h
index 011ba6670d99..c70d06a383e2 100644
--- a/fs/ext4/truncate.h
+++ b/fs/ext4/truncate.h
@@ -10,8 +10,10 @@
  */
 static inline void ext4_truncate_failed_write(struct inode *inode)
 {
+	down_write(&EXT4_I(inode)->i_mmap_sem);
 	truncate_inode_pages(inode->i_mapping, inode->i_size);
 	ext4_truncate(inode);
+	up_write(&EXT4_I(inode)->i_mmap_sem);
 }
 
 /*

From e096ade68c13011ba6548a542c1fc00e14555f5c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.com>
Date: Mon, 7 Dec 2015 14:29:17 -0500
Subject: [PATCH 146/424] ext4: move unlocked dio protection from
 ext4_alloc_file_blocks()

commit 17048e8a083fec7ad841d88ef0812707fbc7e39f upstream.

Currently ext4_alloc_file_blocks() was handling protection against
unlocked DIO. However we now need to sometimes call it under i_mmap_sem
and sometimes not and DIO protection ranks above it (although strictly
speaking this cannot currently create any deadlocks). Also
ext4_zero_range() was actually getting & releasing unlocked DIO
protection twice in some cases. Luckily it didn't introduce any real bug
but it was a land mine waiting to be stepped on.  So move DIO protection
out from ext4_alloc_file_blocks() into the two callsites.

Signed-off-by: Jan Kara <jack@suse.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/extents.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 5be9ca5a8a7a..65b5ada2833f 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4685,10 +4685,6 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
 	if (len <= EXT_UNWRITTEN_MAX_LEN)
 		flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
 
-	/* Wait all existing dio workers, newcomers will block on i_mutex */
-	ext4_inode_block_unlocked_dio(inode);
-	inode_dio_wait(inode);
-
 	/*
 	 * credits to insert 1 extent into extent tree
 	 */
@@ -4752,8 +4748,6 @@ retry:
 		goto retry;
 	}
 
-	ext4_inode_resume_unlocked_dio(inode);
-
 	return ret > 0 ? ret2 : ret;
 }
 
@@ -4827,6 +4821,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 	if (mode & FALLOC_FL_KEEP_SIZE)
 		flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
 
+	/* Wait all existing dio workers, newcomers will block on i_mutex */
+	ext4_inode_block_unlocked_dio(inode);
+	inode_dio_wait(inode);
+
 	/* Preallocate the range including the unaligned edges */
 	if (partial_begin || partial_end) {
 		ret = ext4_alloc_file_blocks(file,
@@ -4835,7 +4833,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 				 round_down(offset, 1 << blkbits)) >> blkbits,
 				new_size, flags, mode);
 		if (ret)
-			goto out_mutex;
+			goto out_dio;
 
 	}
 
@@ -4844,10 +4842,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 		flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
 			  EXT4_EX_NOCACHE);
 
-		/* Wait all existing dio workers, newcomers will block on i_mutex */
-		ext4_inode_block_unlocked_dio(inode);
-		inode_dio_wait(inode);
-
 		/*
 		 * Prevent page faults from reinstantiating pages we have
 		 * released from page cache.
@@ -4992,8 +4986,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 			goto out;
 	}
 
+	/* Wait all existing dio workers, newcomers will block on i_mutex */
+	ext4_inode_block_unlocked_dio(inode);
+	inode_dio_wait(inode);
+
 	ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
 				     flags, mode);
+	ext4_inode_resume_unlocked_dio(inode);
 	if (ret)
 		goto out;
 

From 1f7b7e9a4ba3d60af27c78a149743d269e6fb848 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.com>
Date: Mon, 7 Dec 2015 14:31:11 -0500
Subject: [PATCH 147/424] ext4: fix races between buffered IO and collapse /
 insert range

commit 32ebffd3bbb4162da5ff88f9a35dd32d0a28ea70 upstream.

Current code implementing FALLOC_FL_COLLAPSE_RANGE and
FALLOC_FL_INSERT_RANGE is prone to races with buffered writes and page
faults. If buffered write or write via mmap manages to squeeze between
filemap_write_and_wait_range() and truncate_pagecache() in the fallocate
implementations, the written data is simply discarded by
truncate_pagecache() although it should have been shifted.

Fix the problem by moving filemap_write_and_wait_range() call inside
i_mutex and i_mmap_sem. That way we are protected against races with
both buffered writes and page faults.

Signed-off-by: Jan Kara <jack@suse.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/extents.c | 59 +++++++++++++++++++++++++----------------------
 1 file changed, 31 insertions(+), 28 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 65b5ada2833f..4b105c96df08 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5487,21 +5487,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 			return ret;
 	}
 
-	/*
-	 * Need to round down offset to be aligned with page size boundary
-	 * for page size > block size.
-	 */
-	ioffset = round_down(offset, PAGE_SIZE);
-
-	/* Write out all dirty pages */
-	ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
-					   LLONG_MAX);
-	if (ret)
-		return ret;
-
-	/* Take mutex lock */
 	mutex_lock(&inode->i_mutex);
-
 	/*
 	 * There is no need to overlap collapse range with EOF, in which case
 	 * it is effectively a truncate operation
@@ -5526,6 +5512,27 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	 * page cache.
 	 */
 	down_write(&EXT4_I(inode)->i_mmap_sem);
+	/*
+	 * Need to round down offset to be aligned with page size boundary
+	 * for page size > block size.
+	 */
+	ioffset = round_down(offset, PAGE_SIZE);
+	/*
+	 * Write tail of the last page before removed range since it will get
+	 * removed from the page cache below.
+	 */
+	ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset);
+	if (ret)
+		goto out_mmap;
+	/*
+	 * Write data that will be shifted to preserve them when discarding
+	 * page cache below. We are also protected from pages becoming dirty
+	 * by i_mmap_sem.
+	 */
+	ret = filemap_write_and_wait_range(inode->i_mapping, offset + len,
+					   LLONG_MAX);
+	if (ret)
+		goto out_mmap;
 	truncate_pagecache(inode, ioffset);
 
 	credits = ext4_writepage_trans_blocks(inode);
@@ -5626,21 +5633,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
 			return ret;
 	}
 
-	/*
-	 * Need to round down to align start offset to page size boundary
-	 * for page size > block size.
-	 */
-	ioffset = round_down(offset, PAGE_SIZE);
-
-	/* Write out all dirty pages */
-	ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
-			LLONG_MAX);
-	if (ret)
-		return ret;
-
-	/* Take mutex lock */
 	mutex_lock(&inode->i_mutex);
-
 	/* Currently just for extent based files */
 	if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
 		ret = -EOPNOTSUPP;
@@ -5668,6 +5661,16 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
 	 * page cache.
 	 */
 	down_write(&EXT4_I(inode)->i_mmap_sem);
+	/*
+	 * Need to round down to align start offset to page size boundary
+	 * for page size > block size.
+	 */
+	ioffset = round_down(offset, PAGE_SIZE);
+	/* Write out all dirty pages */
+	ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
+			LLONG_MAX);
+	if (ret)
+		goto out_mmap;
 	truncate_pagecache(inode, ioffset);
 
 	credits = ext4_writepage_trans_blocks(inode);

From 21228341bf17496062b0e6a1b37265f6bcf5c8f3 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.com>
Date: Mon, 7 Dec 2015 14:34:49 -0500
Subject: [PATCH 148/424] ext4: fix races of writeback with punch hole and zero
 range

commit 011278485ecc3cd2a3954b5d4c73101d919bf1fa upstream.

When doing delayed allocation, update of on-disk inode size is postponed
until IO submission time. However hole punch or zero range fallocate
calls can end up discarding the tail page cache page and thus on-disk
inode size would never be properly updated.

Make sure the on-disk inode size is updated before truncating page
cache.

Signed-off-by: Jan Kara <jack@suse.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/ext4.h    |  3 +++
 fs/ext4/extents.c |  5 +++++
 fs/ext4/inode.c   | 35 ++++++++++++++++++++++++++++++++++-
 3 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 89df9f55595b..b7e921d207fb 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2881,6 +2881,9 @@ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
 	return changed;
 }
 
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+				      loff_t len);
+
 struct ext4_group_info {
 	unsigned long   bb_state;
 	struct rb_root  bb_free_root;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4b105c96df08..3578b25fccfd 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4847,6 +4847,11 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 		 * released from page cache.
 		 */
 		down_write(&EXT4_I(inode)->i_mmap_sem);
+		ret = ext4_update_disksize_before_punch(inode, offset, len);
+		if (ret) {
+			up_write(&EXT4_I(inode)->i_mmap_sem);
+			goto out_dio;
+		}
 		/* Now release the pages and zero block aligned part of pages */
 		truncate_pagecache_range(inode, start, end - 1);
 		inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 214e30a3ef9e..e31d762eedce 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3586,6 +3586,35 @@ int ext4_can_truncate(struct inode *inode)
 	return 0;
 }
 
+/*
+ * We have to make sure i_disksize gets properly updated before we truncate
+ * page cache due to hole punching or zero range. Otherwise i_disksize update
+ * can get lost as it may have been postponed to submission of writeback but
+ * that will never happen after we truncate page cache.
+ */
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+				      loff_t len)
+{
+	handle_t *handle;
+	loff_t size = i_size_read(inode);
+
+	WARN_ON(!mutex_is_locked(&inode->i_mutex));
+	if (offset > size || offset + len < size)
+		return 0;
+
+	if (EXT4_I(inode)->i_disksize >= size)
+		return 0;
+
+	handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	ext4_update_i_disksize(inode, size);
+	ext4_mark_inode_dirty(handle, inode);
+	ext4_journal_stop(handle);
+
+	return 0;
+}
+
 /*
  * ext4_punch_hole: punches a hole in a file by releaseing the blocks
  * associated with the given offset and length
@@ -3664,9 +3693,13 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
 	last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
 
 	/* Now release the pages and zero block aligned part of pages*/
-	if (last_block_offset > first_block_offset)
+	if (last_block_offset > first_block_offset) {
+		ret = ext4_update_disksize_before_punch(inode, offset, length);
+		if (ret)
+			goto out_dio;
 		truncate_pagecache_range(inode, first_block_offset,
 					 last_block_offset);
+	}
 
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
 		credits = ext4_writepage_trans_blocks(inode);

From 40cab474b47b2fc87911812687e83f8cd21aea1b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali.rohar@gmail.com>
Date: Fri, 19 Feb 2016 10:35:39 -0800
Subject: [PATCH 149/424] ARM: OMAP3: Add cpuidle parameters table for omap3430
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 98f42221501353067251fbf11e732707dbb68ce3 upstream.

Based on CPU type choose generic omap3 or omap3430 specific cpuidle
parameters. Parameters for omap3430 were measured on Nokia N900 device and
added by commit 5a1b1d3a9efa ("OMAP3: RX-51: Pass cpu idle parameters")
which were later removed by commit 231900afba52 ("ARM: OMAP3: cpuidle -
remove rx51 cpuidle parameters table") due to huge code complexity.

This patch brings cpuidle parameters for omap3430 devices again, but uses
simple condition based on CPU type.

Fixes: 231900afba52 ("ARM: OMAP3: cpuidle - remove rx51 cpuidle
parameters table")
Signed-off-by: Pali Rohár <pali.rohar@gmail.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-omap2/cpuidle34xx.c | 69 ++++++++++++++++++++++++++++++-
 1 file changed, 68 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-omap2/cpuidle34xx.c b/arch/arm/mach-omap2/cpuidle34xx.c
index aa7b379e2661..2a3db0bd9e15 100644
--- a/arch/arm/mach-omap2/cpuidle34xx.c
+++ b/arch/arm/mach-omap2/cpuidle34xx.c
@@ -34,6 +34,7 @@
 #include "pm.h"
 #include "control.h"
 #include "common.h"
+#include "soc.h"
 
 /* Mach specific information to be recorded in the C-state driver_data */
 struct omap3_idle_statedata {
@@ -315,6 +316,69 @@ static struct cpuidle_driver omap3_idle_driver = {
 	.safe_state_index = 0,
 };
 
+/*
+ * Numbers based on measurements made in October 2009 for PM optimized kernel
+ * with CPU freq enabled on device Nokia N900. Assumes OPP2 (main idle OPP,
+ * and worst case latencies).
+ */
+static struct cpuidle_driver omap3430_idle_driver = {
+	.name             = "omap3430_idle",
+	.owner            = THIS_MODULE,
+	.states = {
+		{
+			.enter		  = omap3_enter_idle_bm,
+			.exit_latency	  = 110 + 162,
+			.target_residency = 5,
+			.name		  = "C1",
+			.desc		  = "MPU ON + CORE ON",
+		},
+		{
+			.enter		  = omap3_enter_idle_bm,
+			.exit_latency	  = 106 + 180,
+			.target_residency = 309,
+			.name		  = "C2",
+			.desc		  = "MPU ON + CORE ON",
+		},
+		{
+			.enter		  = omap3_enter_idle_bm,
+			.exit_latency	  = 107 + 410,
+			.target_residency = 46057,
+			.name		  = "C3",
+			.desc		  = "MPU RET + CORE ON",
+		},
+		{
+			.enter		  = omap3_enter_idle_bm,
+			.exit_latency	  = 121 + 3374,
+			.target_residency = 46057,
+			.name		  = "C4",
+			.desc		  = "MPU OFF + CORE ON",
+		},
+		{
+			.enter		  = omap3_enter_idle_bm,
+			.exit_latency	  = 855 + 1146,
+			.target_residency = 46057,
+			.name		  = "C5",
+			.desc		  = "MPU RET + CORE RET",
+		},
+		{
+			.enter		  = omap3_enter_idle_bm,
+			.exit_latency	  = 7580 + 4134,
+			.target_residency = 484329,
+			.name		  = "C6",
+			.desc		  = "MPU OFF + CORE RET",
+		},
+		{
+			.enter		  = omap3_enter_idle_bm,
+			.exit_latency	  = 7505 + 15274,
+			.target_residency = 484329,
+			.name		  = "C7",
+			.desc		  = "MPU OFF + CORE OFF",
+		},
+	},
+	.state_count = ARRAY_SIZE(omap3_idle_data),
+	.safe_state_index = 0,
+};
+
 /* Public functions */
 
 /**
@@ -333,5 +397,8 @@ int __init omap3_idle_init(void)
 	if (!mpu_pd || !core_pd || !per_pd || !cam_pd)
 		return -ENODEV;
 
-	return cpuidle_register(&omap3_idle_driver, NULL);
+	if (cpu_is_omap3430())
+		return cpuidle_register(&omap3430_idle_driver, NULL);
+	else
+		return cpuidle_register(&omap3_idle_driver, NULL);
 }

From 159c52e15f95712dd22aa5d64b17a79a7fd8f939 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 28 Nov 2015 23:56:47 +0100
Subject: [PATCH 150/424] ARM: prima2: always enable reset controller

commit ef2b1d777d643af227a22309d8b79898b90b123c upstream.

The atlas7 clock controller driver registers a reset controller
for itself, which causes a link error when the subsystem is
disabled:

drivers/built-in.o: In function `atlas7_clk_init':
drivers/clk/sirf/clk-atlas7.c:1681: undefined reference to `reset_controller_register'

As the clk driver does not have a Kconfig symbol for itself
but it always built-in when the platform is enabled, we have
to ensure that the reset controller subsystem is also built-in
in this case.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Philipp Zabel <p.zabel@pengutronix.de>
Fixes: 301c5d29402e ("clk: sirf: add CSR atlas7 clk and reset support")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-prima2/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-prima2/Kconfig b/arch/arm/mach-prima2/Kconfig
index 9ab8932403e5..56e55fd37d13 100644
--- a/arch/arm/mach-prima2/Kconfig
+++ b/arch/arm/mach-prima2/Kconfig
@@ -1,6 +1,7 @@
 menuconfig ARCH_SIRF
 	bool "CSR SiRF" if ARCH_MULTI_V7
 	select ARCH_HAS_RESET_CONTROLLER
+	select RESET_CONTROLLER
 	select ARCH_REQUIRE_GPIOLIB
 	select GENERIC_IRQ_CHIP
 	select NO_IOPORT_MAP

From abc48d066b7b5063db56f4a81e367c84b9582882 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 29 Jan 2016 15:50:38 +0100
Subject: [PATCH 151/424] ARM: EXYNOS: select THERMAL_OF

commit dc7eb9d589e595954792cc192bcbb92932e5c2ff upstream.

We cannot select a symbol that has disabled dependencies, so
we get a warning if we ever enable EXYNOS_THERMAL without
also turning on THERMAL_OF:

warning: (ARCH_EXYNOS) selects EXYNOS_THERMAL which has unmet direct dependencies (THERMAL && (ARCH_EXYNOS || COMPILE_TEST) && THERMAL_OF)

This adds another 'select' in the platform code to avoid that
case. Alternatively, we could decide to not select EXYNOS_THERMAL
here and instead make it a user option.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: f87e6bd3f740 ("thermal: exynos: Add the dependency of CONFIG_THERMAL_OF instead of CONFIG_OF")
Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-exynos/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index 3a10f1a8317a..bfd8bb371477 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -26,6 +26,7 @@ menuconfig ARCH_EXYNOS
 	select S5P_DEV_MFC
 	select SRAM
 	select THERMAL
+	select THERMAL_OF
 	select MFD_SYSCON
 	help
 	  Support for SAMSUNG EXYNOS SoCs (EXYNOS4/5)

From ea075ae7f00c6416b12d68abf29b6a57a15b3916 Mon Sep 17 00:00:00 2001
From: Lior Amsalem <alior@marvell.com>
Date: Wed, 10 Feb 2016 17:29:15 +0100
Subject: [PATCH 152/424] ARM: dts: armada-375: use armada-370-sata for SATA

commit b3a7f31eb7375633cd6a742f19488fc5a4208b36 upstream.

The Armada 375 has the same SATA IP as Armada 370 and Armada XP, which
requires the PHY speed to be set in the LP_PHY_CTL register for SATA
hotplug to work.

Therefore, this commit updates the compatible string used to describe
the SATA IP in Armada 375 from marvell,orion-sata to
marvell,armada-370-sata.

Fixes: 4de59085091f753d08c8429d756b46756ab94665 ("ARM: mvebu: add Device Tree description of the Armada 375 SoC")
Signed-off-by: Lior Amsalem <alior@marvell.com>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/armada-375.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/armada-375.dtsi b/arch/arm/boot/dts/armada-375.dtsi
index 7ccce7529b0c..cc952cf8ec30 100644
--- a/arch/arm/boot/dts/armada-375.dtsi
+++ b/arch/arm/boot/dts/armada-375.dtsi
@@ -529,7 +529,7 @@
 			};
 
 			sata@a0000 {
-				compatible = "marvell,orion-sata";
+				compatible = "marvell,armada-370-sata";
 				reg = <0xa0000 0x5000>;
 				interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>;
 				clocks = <&gateclk 14>, <&gateclk 20>;

From eb7f1c5fb5c8e888ca8b728e17e71426ea809590 Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Sat, 13 Feb 2016 00:49:20 +0100
Subject: [PATCH 153/424] ARM: dts: pxa: fix dma engine node to pxa3xx-nand

commit 07c6b2d01d351f0512ed7145625265e435ab3240 upstream.

Since the switch from mmp_pdma to pxa_dma driver for pxa architectures,
the pxa_dma requires 2 arguments, namely the requestor line and the
requested priority.

Fix the only left device node which was still passing only one argument,
making the pxa3xx-nand driver misbehave in a device-tree configuration,
ie. failing all data transfers.

Fixes: c943646d1f49 ("ARM: dts: pxa: add dma engine node to pxa3xx-nand")
Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/pxa3xx.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/pxa3xx.dtsi b/arch/arm/boot/dts/pxa3xx.dtsi
index cf6998a0804d..564341af7e97 100644
--- a/arch/arm/boot/dts/pxa3xx.dtsi
+++ b/arch/arm/boot/dts/pxa3xx.dtsi
@@ -30,7 +30,7 @@
 			reg = <0x43100000 90>;
 			interrupts = <45>;
 			clocks = <&clks CLK_NAND>;
-			dmas = <&pdma 97>;
+			dmas = <&pdma 97 3>;
 			dma-names = "data";
 			#address-cells = <1>;
 			#size-cells = <1>;	

From c565897ffe54ec0e36854db7fcfe88014e05ce41 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Mon, 22 Feb 2016 09:01:53 -0300
Subject: [PATCH 154/424] bus: imx-weim: Take the 'status' property value into
 account

commit 33b96d2c9579213cf3f36d7b29841b1e464750c4 upstream.

Currently we have an incorrect behaviour when multiple devices
are present under the weim node. For example:

&weim {
	...
	status = "okay";

	sram@0,0 {
		...
        	status = "okay";
	};

	mram@0,0 {
		...
        	status = "disabled";
    	};
};

In this case only the 'sram' device should be probed and not 'mram'.

However what happens currently is that the status variable is ignored,
causing the 'sram' device to be disabled and 'mram' to be enabled.

Change the weim_parse_dt() function to use
for_each_available_child_of_node()so that the devices marked with
'status = disabled' are not probed.

Suggested-by: Wolfgang Netbal <wolfgang.netbal@sigmatek.at>
Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Acked-by: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bus/imx-weim.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/bus/imx-weim.c b/drivers/bus/imx-weim.c
index e98d15eaa799..1827fc4d15c1 100644
--- a/drivers/bus/imx-weim.c
+++ b/drivers/bus/imx-weim.c
@@ -150,7 +150,7 @@ static int __init weim_parse_dt(struct platform_device *pdev,
 			return ret;
 	}
 
-	for_each_child_of_node(pdev->dev.of_node, child) {
+	for_each_available_child_of_node(pdev->dev.of_node, child) {
 		if (!child->name)
 			continue;
 

From 1b06e9942d51804170631351ada984947e87f042 Mon Sep 17 00:00:00 2001
From: Guo-Fu Tseng <cooldavid@cooldavid.org>
Date: Sat, 5 Mar 2016 08:11:55 +0800
Subject: [PATCH 155/424] jme: Do not enable NIC WoL functions on S0

commit 0772a99b818079e628a1da122ac7ee023faed83e upstream.

Otherwise it might be back on resume right after going to suspend in
some hardware.

Reported-by: Diego Viola <diego.viola@gmail.com>
Signed-off-by: Guo-Fu Tseng <cooldavid@cooldavid.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/jme.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index 973dade2d07f..39da60007ade 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -270,11 +270,17 @@ jme_reset_mac_processor(struct jme_adapter *jme)
 }
 
 static inline void
-jme_clear_pm(struct jme_adapter *jme)
+jme_clear_pm_enable_wol(struct jme_adapter *jme)
 {
 	jwrite32(jme, JME_PMCS, PMCS_STMASK | jme->reg_pmcs);
 }
 
+static inline void
+jme_clear_pm_disable_wol(struct jme_adapter *jme)
+{
+	jwrite32(jme, JME_PMCS, PMCS_STMASK);
+}
+
 static int
 jme_reload_eeprom(struct jme_adapter *jme)
 {
@@ -1853,7 +1859,7 @@ jme_open(struct net_device *netdev)
 	struct jme_adapter *jme = netdev_priv(netdev);
 	int rc;
 
-	jme_clear_pm(jme);
+	jme_clear_pm_disable_wol(jme);
 	JME_NAPI_ENABLE(jme);
 
 	tasklet_init(&jme->linkch_task, jme_link_change_tasklet,
@@ -1929,7 +1935,7 @@ jme_powersave_phy(struct jme_adapter *jme)
 		jme_set_100m_half(jme);
 		if (jme->reg_pmcs & (PMCS_LFEN | PMCS_LREN))
 			jme_wait_link(jme);
-		jme_clear_pm(jme);
+		jme_clear_pm_enable_wol(jme);
 	} else {
 		jme_phy_off(jme);
 	}
@@ -2646,7 +2652,6 @@ jme_set_wol(struct net_device *netdev,
 	if (wol->wolopts & WAKE_MAGIC)
 		jme->reg_pmcs |= PMCS_MFEN;
 
-	jwrite32(jme, JME_PMCS, jme->reg_pmcs);
 	device_set_wakeup_enable(&jme->pdev->dev, !!(jme->reg_pmcs));
 
 	return 0;
@@ -3172,7 +3177,7 @@ jme_init_one(struct pci_dev *pdev,
 	jme->mii_if.mdio_read = jme_mdio_read;
 	jme->mii_if.mdio_write = jme_mdio_write;
 
-	jme_clear_pm(jme);
+	jme_clear_pm_disable_wol(jme);
 	device_set_wakeup_enable(&pdev->dev, true);
 
 	jme_set_phyfifo_5level(jme);
@@ -3304,7 +3309,7 @@ jme_resume(struct device *dev)
 	if (!netif_running(netdev))
 		return 0;
 
-	jme_clear_pm(jme);
+	jme_clear_pm_disable_wol(jme);
 	jme_phy_on(jme);
 	if (test_bit(JME_FLAG_SSET, &jme->flags))
 		jme_set_settings(netdev, &jme->old_ecmd);

From e91b1dbdc1f064872a6a2bb2375ae9202dd5e6e0 Mon Sep 17 00:00:00 2001
From: Guo-Fu Tseng <cooldavid@cooldavid.org>
Date: Sat, 5 Mar 2016 08:11:56 +0800
Subject: [PATCH 156/424] jme: Fix device PM wakeup API usage

commit 81422e672f8181d7ad1ee6c60c723aac649f538f upstream.

According to Documentation/power/devices.txt

The driver should not use device_set_wakeup_enable() which is the policy
for user to decide.

Using device_init_wakeup() to initialize dev->power.should_wakeup and
dev->power.can_wakeup on driver initialization.

And use device_may_wakeup() on suspend to decide if WoL function should
be enabled on NIC.

Reported-by: Diego Viola <diego.viola@gmail.com>
Signed-off-by: Guo-Fu Tseng <cooldavid@cooldavid.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/jme.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index 39da60007ade..1257b18e6b90 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -1931,7 +1931,7 @@ jme_wait_link(struct jme_adapter *jme)
 static void
 jme_powersave_phy(struct jme_adapter *jme)
 {
-	if (jme->reg_pmcs) {
+	if (jme->reg_pmcs && device_may_wakeup(&jme->pdev->dev)) {
 		jme_set_100m_half(jme);
 		if (jme->reg_pmcs & (PMCS_LFEN | PMCS_LREN))
 			jme_wait_link(jme);
@@ -2652,8 +2652,6 @@ jme_set_wol(struct net_device *netdev,
 	if (wol->wolopts & WAKE_MAGIC)
 		jme->reg_pmcs |= PMCS_MFEN;
 
-	device_set_wakeup_enable(&jme->pdev->dev, !!(jme->reg_pmcs));
-
 	return 0;
 }
 
@@ -3178,7 +3176,7 @@ jme_init_one(struct pci_dev *pdev,
 	jme->mii_if.mdio_write = jme_mdio_write;
 
 	jme_clear_pm_disable_wol(jme);
-	device_set_wakeup_enable(&pdev->dev, true);
+	device_init_wakeup(&pdev->dev, true);
 
 	jme_set_phyfifo_5level(jme);
 	jme->pcirev = pdev->revision;

From 22327f609cef2a3f9bf0781fb2e9dda07ec64c98 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Thu, 14 Jan 2016 18:13:49 +0000
Subject: [PATCH 157/424] unbreak allmodconfig KCONFIG_ALLCONFIG=...

commit 6b87b70c5339f30e3c5b32085e69625906513dc2 upstream.

	Prior to 3.13 make allmodconfig KCONFIG_ALLCONFIG=/dev/null used
to be equivalent to make allmodconfig; these days it hardwires MODULES to n.
In fact, any KCONFIG_ALLCONFIG that doesn't set MODULES explicitly is
treated as if it set it to n.

	Regression had been introduced by commit cfa98f ("kconfig: do not
override symbols already set"); what happens is that conf_read_simple()
does sym_calc_value(modules_sym) on exit, which leaves SYMBOL_VALID set and
has conf_set_all_new_symbols() skip modules_sym.

	It's pretty easy to fix - simply move that call of sym_calc_value()
into the callers, except for the ones in KCONFIG_ALLCONFIG handling.
Objections?

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Fixes: cfa98f2e0ae9 ("kconfig: do not override symbols already set")
Signed-off-by: Michal Marek <mmarek@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 scripts/kconfig/confdata.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
index 0b7dc2fd7bac..dd243d2abd87 100644
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c
@@ -267,10 +267,8 @@ int conf_read_simple(const char *name, int def)
 		if (in)
 			goto load;
 		sym_add_change_count(1);
-		if (!sym_defconfig_list) {
-			sym_calc_value(modules_sym);
+		if (!sym_defconfig_list)
 			return 1;
-		}
 
 		for_all_defaults(sym_defconfig_list, prop) {
 			if (expr_calc_value(prop->visible.expr) == no ||
@@ -403,7 +401,6 @@ setsym:
 	}
 	free(line);
 	fclose(in);
-	sym_calc_value(modules_sym);
 	return 0;
 }
 
@@ -414,8 +411,12 @@ int conf_read(const char *name)
 
 	sym_set_change_count(0);
 
-	if (conf_read_simple(name, S_DEF_USER))
+	if (conf_read_simple(name, S_DEF_USER)) {
+		sym_calc_value(modules_sym);
 		return 1;
+	}
+
+	sym_calc_value(modules_sym);
 
 	for_all_symbols(i, sym) {
 		sym_calc_value(sym);
@@ -846,6 +847,7 @@ static int conf_split_config(void)
 
 	name = conf_get_autoconfig_name();
 	conf_read_simple(name, S_DEF_AUTO);
+	sym_calc_value(modules_sym);
 
 	if (chdir("include/config"))
 		return 1;

From 1f5c4e0cb83cde427f1b8b95aa9a2a42e249fd53 Mon Sep 17 00:00:00 2001
From: Caesar Wang <wxt@rock-chips.com>
Date: Mon, 15 Feb 2016 15:33:28 +0800
Subject: [PATCH 158/424] thermal: rockchip: fix a impossible condition caused
 by the warning

commit 43b4eb9fe719b107c8e5d49d1edbff0c135a42cb upstream.

As the Dan report the smatch check the thermal driver warning:
drivers/thermal/rockchip_thermal.c:551 rockchip_configure_from_dt()
warn: impossible condition '(thermal->tshut_temp > ((~0 >> 1))) =>
(s32min-s32max > s32max)'

Although The shut_temp read from DT is u32,the temperature is currently
represented as int not long in the thermal driver.
Let's change to make shut_temp instead of the thermal->tshut_temp for
the condition.

Fixes: commit 437df2172e8d
("thermal: rockchip: consistently use int for temperatures")

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Caesar Wang <wxt@rock-chips.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/thermal/rockchip_thermal.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/thermal/rockchip_thermal.c b/drivers/thermal/rockchip_thermal.c
index e845841ab036..7106288efae3 100644
--- a/drivers/thermal/rockchip_thermal.c
+++ b/drivers/thermal/rockchip_thermal.c
@@ -545,15 +545,14 @@ static int rockchip_configure_from_dt(struct device *dev,
 			 thermal->chip->tshut_temp);
 		thermal->tshut_temp = thermal->chip->tshut_temp;
 	} else {
+		if (shut_temp > INT_MAX) {
+			dev_err(dev, "Invalid tshut temperature specified: %d\n",
+				shut_temp);
+			return -ERANGE;
+		}
 		thermal->tshut_temp = shut_temp;
 	}
 
-	if (thermal->tshut_temp > INT_MAX) {
-		dev_err(dev, "Invalid tshut temperature specified: %d\n",
-			thermal->tshut_temp);
-		return -ERANGE;
-	}
-
 	if (of_property_read_u32(np, "rockchip,hw-tshut-mode", &tshut_mode)) {
 		dev_warn(dev,
 			 "Missing tshut mode property, using default (%s)\n",

From 03d86237007729b006808e8eab90e96a565deee4 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Fri, 4 Mar 2016 17:20:13 +1100
Subject: [PATCH 159/424] sunrpc/cache: drop reference when
 sunrpc_cache_pipe_upcall() detects a race

commit a6ab1e8126d205238defbb55d23661a3a5c6a0d8 upstream.

sunrpc_cache_pipe_upcall() can detect a race if CACHE_PENDING is no longer
set.  In this case it aborts the queuing of the upcall.
However it has already taken a new counted reference on "h" and
doesn't "put" it, even though it frees the data structure holding the reference.

So let's delay the "cache_get" until we know we need it.

Fixes: f9e1aedc6c79 ("sunrpc/cache: remove races with queuing an upcall.")
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sunrpc/cache.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 21e20353178e..63fb5ee212cf 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1182,14 +1182,14 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
 	}
 
 	crq->q.reader = 0;
-	crq->item = cache_get(h);
 	crq->buf = buf;
 	crq->len = 0;
 	crq->readers = 0;
 	spin_lock(&queue_lock);
-	if (test_bit(CACHE_PENDING, &h->flags))
+	if (test_bit(CACHE_PENDING, &h->flags)) {
+		crq->item = cache_get(h);
 		list_add_tail(&crq->q.list, &detail->queue);
-	else
+	} else
 		/* Lost a race, no longer PENDING, so don't enqueue */
 		ret = -EAGAIN;
 	spin_unlock(&queue_lock);

From 5b6e810f352b00c7bf5e7e32557a39b6d550458a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 14 Mar 2016 15:29:45 +0100
Subject: [PATCH 160/424] megaraid_sas: add missing curly braces in ioctl
 handler

commit 3deb9438d34a09f6796639b652a01d110aca9f75 upstream.

gcc-6 found a dubious indentation in the megasas_mgmt_fw_ioctl
function:

drivers/scsi/megaraid/megaraid_sas_base.c: In function 'megasas_mgmt_fw_ioctl':
drivers/scsi/megaraid/megaraid_sas_base.c:6658:4: warning: statement is indented as if it were guarded by... [-Wmisleading-indentation]
    kbuff_arr[i] = NULL;
    ^~~~~~~~~
drivers/scsi/megaraid/megaraid_sas_base.c:6653:3: note: ...this 'if' clause, but it is not
   if (kbuff_arr[i])
   ^~

The code is actually correct, as there is no downside in clearing a NULL
pointer again.

This clarifies the code and avoids the warning by adding extra curly
braces.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 90dc9d98f01b ("megaraid_sas : MFI MPT linked list corruption fix")
Reviewed-by: Hannes Reinecke <hare@suse.com>
Acked-by: Sumit Saxena <sumit.saxena@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/megaraid/megaraid_sas_base.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 97a1c1c33b05..00ce3e269a43 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -6282,12 +6282,13 @@ out:
 	}
 
 	for (i = 0; i < ioc->sge_count; i++) {
-		if (kbuff_arr[i])
+		if (kbuff_arr[i]) {
 			dma_free_coherent(&instance->pdev->dev,
 					  le32_to_cpu(kern_sge32[i].length),
 					  kbuff_arr[i],
 					  le32_to_cpu(kern_sge32[i].phys_addr));
 			kbuff_arr[i] = NULL;
+		}
 	}
 
 	megasas_return_cmd(instance, cmd);

From f4b1d0a9a3f4291ba4ab48dd27efd01d3775d7f6 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 22 Dec 2015 17:25:17 +0200
Subject: [PATCH 161/424] stm class: Select CONFIG_SRCU

commit 042d4460b5b4379a12f375045ff9065cf6758735 upstream.

The newly added STM code uses SRCU, but does not ensure that
this code is part of the kernel:

drivers/built-in.o: In function `stm_source_link_show':
include/linux/srcu.h:221: undefined reference to `__srcu_read_lock'
include/linux/srcu.h:238: undefined reference to `__srcu_read_unlock'
drivers/built-in.o: In function `stm_source_link_drop':
include/linux/srcu.h:221: undefined reference to `__srcu_read_lock'
include/linux/srcu.h:238: undefined reference to `__srcu_read_unlock'

This adds a Kconfig 'select' statement like all the other SRCU using
drivers have.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 7bd1d4093c2f ("stm class: Introduce an abstraction for System Trace Module devices")
Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/stm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hwtracing/stm/Kconfig b/drivers/hwtracing/stm/Kconfig
index 83e9f591a54b..e7a348807f0c 100644
--- a/drivers/hwtracing/stm/Kconfig
+++ b/drivers/hwtracing/stm/Kconfig
@@ -1,6 +1,7 @@
 config STM
 	tristate "System Trace Module devices"
 	select CONFIGFS_FS
+	select SRCU
 	help
 	  A System Trace Module (STM) is a device exporting data in System
 	  Trace Protocol (STP) format as defined by MIPI STP standards.

From b393b9da446626170a39bcd79c52e8ebadb19c8c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 4 Feb 2016 14:36:09 +0300
Subject: [PATCH 162/424] extcon: max77843: Use correct size for reading the
 interrupt register

commit c4924e92442d7218bd725e47fa3988c73aae84c9 upstream.

The info->status[] array has 3 elements.  We are using size
MAX77843_MUIC_IRQ_NUM (16) instead of MAX77843_MUIC_STATUS_NUM (3) as
intended.

Fixes: 135d9f7d135a ('extcon: max77843: Clear IRQ bits state before request IRQ')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Jaewon Kim <jaewon02.kim@samsung.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
[cw00.choi: Modify the patch title]
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/extcon/extcon-max77843.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/extcon/extcon-max77843.c b/drivers/extcon/extcon-max77843.c
index 9f9ea334399c..b6cb30d207be 100644
--- a/drivers/extcon/extcon-max77843.c
+++ b/drivers/extcon/extcon-max77843.c
@@ -803,7 +803,7 @@ static int max77843_muic_probe(struct platform_device *pdev)
 	/* Clear IRQ bits before request IRQs */
 	ret = regmap_bulk_read(max77843->regmap_muic,
 			MAX77843_MUIC_REG_INT1, info->status,
-			MAX77843_MUIC_IRQ_NUM);
+			MAX77843_MUIC_STATUS_NUM);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to Clear IRQ bits\n");
 		goto err_muic_irq;

From 1a1a512b983108015ced1e7a7c7775cfeec42d8c Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 4 May 2016 14:50:15 -0700
Subject: [PATCH 163/424] Linux 4.4.9

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 1928fcd539cc..0722cdf52152 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 8
+SUBLEVEL = 9
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 

From cc798dcca00a8b71e5c09a38d3921461f931c85f Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 23 Nov 2015 13:26:19 +0000
Subject: [PATCH 164/424] arm64: mm: detect bad __create_mapping uses

If a caller of __create_mapping provides a PA and VA which have
different sub-page offsets, it is not clear which offset they expect to
apply to the mapping, and is indicative of a bad caller.

In some cases, the region we wish to map may validly have a sub-page
offset in the physical and virtual addresses. For example, EFI runtime
regions have 4K granularity, yet may be mapped by a 64K page kernel. So
long as the physical and virtual offsets are the same, the region will
be mapped at the expected VAs.

Disallow calls with differing sub-page offsets, and WARN when they are
encountered, so that we can detect and fix such cases.

Cc: Laura Abbott <labbott@fedoraproject.org>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Steve Capper <steve.capper@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit cc5d2b3b95cdbb3fed4e38e667d17b9ac7250f7a)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/mmu.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 116ad654dd59..61a82d68330d 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -251,6 +251,13 @@ static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
 {
 	unsigned long addr, length, end, next;
 
+	/*
+	 * If the virtual and physical address don't have the same offset
+	 * within a page, we cannot map the region as the caller expects.
+	 */
+	if (WARN_ON((phys ^ virt) & ~PAGE_MASK))
+		return;
+
 	addr = virt & PAGE_MASK;
 	length = PAGE_ALIGN(size + (virt & ~PAGE_MASK));
 

From 6329e5d3b74540578dbbf4550fdc53c52c706c94 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 23 Nov 2015 13:26:20 +0000
Subject: [PATCH 165/424] arm64: mm: allow sections for unaligned bases

Callees of __create_mapping may decide to create section mappings if
sufficient low bits of the physical and virtual addresses they were
passed are zero. While __create_mapping rounds the virtual base address
down, it does not similarly round the physical base address down, and
hence non-zero bits in the physical address can prevent use of a section
mapping, even where a whole next-level table would be used instead.

Round down the physical base address in __create_mapping to enable all
callees to always create section mappings when such a mapping is
possible.

Cc: Laura Abbott <labbott@fedoraproject.org>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Steve Capper <steve.capper@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 9c4e08a3022b6df90d31ef4007291faabfce5431)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/mmu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 61a82d68330d..d2a6194f4bec 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -258,6 +258,7 @@ static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
 	if (WARN_ON((phys ^ virt) & ~PAGE_MASK))
 		return;
 
+	phys &= PAGE_MASK;
 	addr = virt & PAGE_MASK;
 	length = PAGE_ALIGN(size + (virt & ~PAGE_MASK));
 

From 6e8ef09edf4a9e61a04fad753ffeebaecc60b568 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 30 Oct 2015 18:56:19 +0000
Subject: [PATCH 166/424] arm64: pgtable: implement pte_accessible()

This patch implements the pte_accessible() macro, which can be used to
test whether or not a given pte is a candidate for allocation in the
TLB.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 76c714be0e5e60c935a53b31be58939510ba1d0f)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/pgtable.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index c63868ae9a4a..cd5dfc97268e 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -168,6 +168,16 @@ extern struct page *empty_zero_page;
 #define pte_valid(pte)		(!!(pte_val(pte) & PTE_VALID))
 #define pte_valid_not_user(pte) \
 	((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
+#define pte_valid_young(pte) \
+	((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF))
+
+/*
+ * Could the pte be present in the TLB? We must check mm_tlb_flush_pending
+ * so that we don't erroneously return false for pages that have been
+ * remapped as PROT_NONE but are yet to be flushed from the TLB.
+ */
+#define pte_accessible(mm, pte)	\
+	(mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte))
 
 static inline pte_t clear_pte_bit(pte_t pte, pgprot_t prot)
 {

From 1a9cc42c0a812241f6cd679a19aefba2900437a7 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@marvell.com>
Date: Fri, 20 Nov 2015 17:59:10 +0800
Subject: [PATCH 167/424] arm64: add __init/__initdata section marker to some
 functions/variables

These functions/variables are not needed after booting, so mark them
as __init or __initdata.

Signed-off-by: Jisheng Zhang <jszhang@marvell.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit a7c61a3452d39078919f0e1f493ff966fb64f0db)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/armv8_deprecated.c | 6 +++---
 arch/arm64/kernel/cpufeature.c       | 9 +++++----
 arch/arm64/kernel/fpsimd.c           | 2 +-
 arch/arm64/mm/dma-mapping.c          | 4 ++--
 arch/arm64/mm/init.c                 | 6 +++---
 5 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index 937f5e58a4d3..3e01207917b1 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -62,7 +62,7 @@ struct insn_emulation {
 };
 
 static LIST_HEAD(insn_emulation);
-static int nr_insn_emulated;
+static int nr_insn_emulated __initdata;
 static DEFINE_RAW_SPINLOCK(insn_emulation_lock);
 
 static void register_emulation_hooks(struct insn_emulation_ops *ops)
@@ -173,7 +173,7 @@ static int update_insn_emulation_mode(struct insn_emulation *insn,
 	return ret;
 }
 
-static void register_insn_emulation(struct insn_emulation_ops *ops)
+static void __init register_insn_emulation(struct insn_emulation_ops *ops)
 {
 	unsigned long flags;
 	struct insn_emulation *insn;
@@ -237,7 +237,7 @@ static struct ctl_table ctl_abi[] = {
 	{ }
 };
 
-static void register_insn_emulation_sysctl(struct ctl_table *table)
+static void __init register_insn_emulation_sysctl(struct ctl_table *table)
 {
 	unsigned long flags;
 	int i = 0;
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 0669c63281ea..5c90aa490a2b 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -684,7 +684,7 @@ static const struct arm64_cpu_capabilities arm64_hwcaps[] = {
 	{},
 };
 
-static void cap_set_hwcap(const struct arm64_cpu_capabilities *cap)
+static void __init cap_set_hwcap(const struct arm64_cpu_capabilities *cap)
 {
 	switch (cap->hwcap_type) {
 	case CAP_HWCAP:
@@ -729,7 +729,7 @@ static bool __maybe_unused cpus_have_hwcap(const struct arm64_cpu_capabilities *
 	return rc;
 }
 
-static void setup_cpu_hwcaps(void)
+static void __init setup_cpu_hwcaps(void)
 {
 	int i;
 	const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps;
@@ -758,7 +758,8 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
  * Run through the enabled capabilities and enable() it on all active
  * CPUs
  */
-static void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
+static void __init
+enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
 {
 	int i;
 
@@ -897,7 +898,7 @@ static inline void set_sys_caps_initialised(void)
 
 #endif	/* CONFIG_HOTPLUG_CPU */
 
-static void setup_feature_capabilities(void)
+static void __init setup_feature_capabilities(void)
 {
 	update_cpu_capabilities(arm64_features, "detected feature:");
 	enable_cpu_capabilities(arm64_features);
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 4c46c54a3ad7..acc1afd5c749 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -289,7 +289,7 @@ static struct notifier_block fpsimd_cpu_pm_notifier_block = {
 	.notifier_call = fpsimd_cpu_pm_notifier,
 };
 
-static void fpsimd_pm_init(void)
+static void __init fpsimd_pm_init(void)
 {
 	cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block);
 }
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 354144e33218..a6e757cbab77 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -40,7 +40,7 @@ static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
 static struct gen_pool *atomic_pool;
 
 #define DEFAULT_DMA_COHERENT_POOL_SIZE  SZ_256K
-static size_t atomic_pool_size = DEFAULT_DMA_COHERENT_POOL_SIZE;
+static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
 
 static int __init early_coherent_pool(char *p)
 {
@@ -896,7 +896,7 @@ static int __iommu_attach_notifier(struct notifier_block *nb,
 	return 0;
 }
 
-static int register_iommu_dma_ops_notifier(struct bus_type *bus)
+static int __init register_iommu_dma_ops_notifier(struct bus_type *bus)
 {
 	struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL);
 	int ret;
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 4cb98aa8c27b..10fab52eed95 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -71,7 +71,7 @@ early_param("initrd", early_initrd);
  * currently assumes that for memory starting above 4G, 32-bit devices will
  * use a DMA offset.
  */
-static phys_addr_t max_zone_dma_phys(void)
+static phys_addr_t __init max_zone_dma_phys(void)
 {
 	phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32);
 	return min(offset + (1ULL << 32), memblock_end_of_DRAM());
@@ -126,11 +126,11 @@ EXPORT_SYMBOL(pfn_valid);
 #endif
 
 #ifndef CONFIG_SPARSEMEM
-static void arm64_memory_present(void)
+static void __init arm64_memory_present(void)
 {
 }
 #else
-static void arm64_memory_present(void)
+static void __init arm64_memory_present(void)
 {
 	struct memblock_region *reg;
 

From 2e310797997214e0cc606013ac308167b6b72dc0 Mon Sep 17 00:00:00 2001
From: Yury Norov <ynorov@caviumnetworks.com>
Date: Wed, 2 Dec 2015 14:00:10 +0000
Subject: [PATCH 168/424] arm64: fix COMPAT_SHMLBA definition for large pages

ARM glibc uses (4 * __getpagesize()) for SHMLBA, which is correct for
4KB pages and works fine for 64KB pages, but the kernel uses a hardcoded
16KB that is too small for 64KB page based kernels. This changes the
definition to what user space sees when using 64KB pages.

Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit b9b7aebb42d1b1392f3111de61136bb6cf3aae3f)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/shmparam.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/shmparam.h b/arch/arm64/include/asm/shmparam.h
index 4df608a8459e..e368a55ebd22 100644
--- a/arch/arm64/include/asm/shmparam.h
+++ b/arch/arm64/include/asm/shmparam.h
@@ -21,7 +21,7 @@
  * alignment value. Since we don't have aliasing D-caches, the rest of
  * the time we can safely use PAGE_SIZE.
  */
-#define COMPAT_SHMLBA	0x4000
+#define COMPAT_SHMLBA	(4 * PAGE_SIZE)
 
 #include <asm-generic/shmparam.h>
 

From 4ea9dd702768f3d1c3ab346f30fdce8b8d1a8ef9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 23 Nov 2015 15:12:59 +0000
Subject: [PATCH 169/424] arm64: enable HAVE_IRQ_TIME_ACCOUNTING

arm64 relies on the arm_arch_timer for sched_clock, so we can select
HAVE_IRQ_TIME_ACCOUNTING and have the core sched-clock code enable the
feature at runtime based on the rate.

Reported-by: Mario Smarduch <m.smarduch@samsung.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 24da208db32ee1e4757ceaba898c47add8e5361e)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 Documentation/features/time/irq-time-acct/arch-support.txt | 2 +-
 arch/arm64/Kconfig                                         | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/features/time/irq-time-acct/arch-support.txt b/Documentation/features/time/irq-time-acct/arch-support.txt
index e63316239938..4199ffecc0ff 100644
--- a/Documentation/features/time/irq-time-acct/arch-support.txt
+++ b/Documentation/features/time/irq-time-acct/arch-support.txt
@@ -9,7 +9,7 @@
     |       alpha: |  ..  |
     |         arc: | TODO |
     |         arm: |  ok  |
-    |       arm64: |  ..  |
+    |       arm64: |  ok  |
     |       avr32: | TODO |
     |    blackfin: | TODO |
     |         c6x: | TODO |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 871f21783866..4876459c0838 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -70,6 +70,7 @@ config ARM64
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS
+	select HAVE_IRQ_TIME_ACCOUNTING
 	select HAVE_MEMBLOCK
 	select HAVE_PATA_PLATFORM
 	select HAVE_PERF_EVENTS

From 0348dff2c49b25c7b5702ef887f03177f4b0c0fd Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 19 Nov 2015 17:48:31 +0000
Subject: [PATCH 170/424] arm64: spinlock: serialise spin_unlock_wait against
 concurrent lockers

Boqun Feng reported a rather nasty ordering issue with spin_unlock_wait
on architectures implementing spin_lock with LL/SC sequences and acquire
semantics:

 | CPU 1                   CPU 2                     CPU 3
 | ==================      ====================      ==============
 |                                                   spin_unlock(&lock);
 |                         spin_lock(&lock):
 |                           r1 = *lock; // r1 == 0;
 |                         o = READ_ONCE(object); // reordered here
 | object = NULL;
 | smp_mb();
 | spin_unlock_wait(&lock);
 |                           *lock = 1;
 | smp_mb();
 | o->dead = true;
 |                         if (o) // true
 |                           BUG_ON(o->dead); // true!!

The crux of the problem is that spin_unlock_wait(&lock) can return on
CPU 1 whilst CPU 2 is in the process of taking the lock. This can be
resolved by upgrading spin_unlock_wait to a LOCK operation, forcing it
to serialise against a concurrent locker and giving it acquire semantics
in the process (although it is not at all clear whether this is needed -
different callers seem to assume different things about the barrier
semantics and architectures are similarly disjoint in their
implementations of the macro).

This patch implements spin_unlock_wait using an LL/SC sequence with
acquire semantics on arm64. For v8.1 systems with the LSE atomics, the
exclusive writeback is omitted, since the spin_lock operation is
indivisible and no intermediate state can be observed.

Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit d86b8da04dfa4771a68bdbad6c424d40f22f0d14)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/spinlock.h | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index c85e96d174a5..fc9682bfe002 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -26,9 +26,28 @@
  * The memory barriers are implicit with the load-acquire and store-release
  * instructions.
  */
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
+{
+	unsigned int tmp;
+	arch_spinlock_t lockval;
 
-#define arch_spin_unlock_wait(lock) \
-	do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+	asm volatile(
+"	sevl\n"
+"1:	wfe\n"
+"2:	ldaxr	%w0, %2\n"
+"	eor	%w1, %w0, %w0, ror #16\n"
+"	cbnz	%w1, 1b\n"
+	ARM64_LSE_ATOMIC_INSN(
+	/* LL/SC */
+"	stxr	%w1, %w0, %2\n"
+"	cbnz	%w1, 2b\n", /* Serialise against any concurrent lockers */
+	/* LSE atomics */
+"	nop\n"
+"	nop\n")
+	: "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
+	:
+	: "memory");
+}
 
 #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
 

From 2ef8b1f56c1989157c8ef929f4f89bd6a3ac7950 Mon Sep 17 00:00:00 2001
From: Li Bin <huawei.libin@huawei.com>
Date: Fri, 4 Dec 2015 11:38:39 +0800
Subject: [PATCH 171/424] arm64: ftrace: stop using kstop_machine to
 enable/disable tracing

For ftrace on arm64, kstop_machine which is hugely disruptive
to a running system is not needed to convert nops to ftrace calls
or back, because that to be modified instrucions, that NOP, B or BL,
are all safe instructions which called "concurrent modification
and execution of instructions", that can be executed by one
thread of execution as they are being modified by another thread
of execution without requiring explicit synchronization.

Signed-off-by: Li Bin <huawei.libin@huawei.com>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 81a6a146e88eca5d6726569779778d61489d85aa)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/ftrace.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index c851be795080..9669b331a23b 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -93,6 +93,11 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
 	return ftrace_modify_code(pc, old, new, true);
 }
 
+void arch_ftrace_update_code(int command)
+{
+	ftrace_modify_all_code(command);
+}
+
 int __init ftrace_dyn_arch_init(void)
 {
 	return 0;

From 42c1d121864549d11fb0f002df8dc8ef35219107 Mon Sep 17 00:00:00 2001
From: Li Bin <huawei.libin@huawei.com>
Date: Fri, 4 Dec 2015 11:38:40 +0800
Subject: [PATCH 172/424] arm64: ftrace: fix the comments for
 ftrace_modify_code

There is no need to worry about module and __init text disappearing
case, because that ftrace has a module notifier that is called when
a module is being unloaded and before the text goes away and this
code grabs the ftrace_lock mutex and removes the module functions
from the ftrace list, such that it will no longer do any
modifications to that module's text, the update to make functions
be traced or not is done under the ftrace_lock mutex as well.
And by now, __init section codes should not been modified
by ftrace, because it is black listed in recordmcount.c and
ignored by ftrace.

Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Li Bin <huawei.libin@huawei.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 004ab584e028093996cf5b8e220b8bc50c5111cf)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/ftrace.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 9669b331a23b..8f7005bc35bd 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -29,12 +29,11 @@ static int ftrace_modify_code(unsigned long pc, u32 old, u32 new,
 
 	/*
 	 * Note:
-	 * Due to modules and __init, code can disappear and change,
-	 * we need to protect against faulting as well as code changing.
-	 * We do this by aarch64_insn_*() which use the probe_kernel_*().
-	 *
-	 * No lock is held here because all the modifications are run
-	 * through stop_machine().
+	 * We are paranoid about modifying text, as if a bug were to happen, it
+	 * could cause us to read or write to someplace that could cause harm.
+	 * Carefully read and modify the code with aarch64_insn_*() which uses
+	 * probe_kernel_*(), and make sure what we read is what we expected it
+	 * to be before modifying it.
 	 */
 	if (validate) {
 		if (aarch64_insn_read((void *)pc, &replaced))

From a5b499e62f4070c7bfe2d322516a937e6f48d04f Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 4 Dec 2015 12:42:29 +0000
Subject: [PATCH 173/424] arm64: Add trace_hardirqs_off annotation in
 ret_to_user

When a kernel is built with CONFIG_TRACE_IRQFLAGS the following warning
is produced when entering userspace for the first time:

  WARNING: at /work/Linux/linux-2.6-aarch64/kernel/locking/lockdep.c:3519
  Modules linked in:
  CPU: 1 PID: 1 Comm: systemd Not tainted 4.4.0-rc3+ #639
  Hardware name: Juno (DT)
  task: ffffffc9768a0000 ti: ffffffc9768a8000 task.ti: ffffffc9768a8000
  PC is at check_flags.part.22+0x19c/0x1a8
  LR is at check_flags.part.22+0x19c/0x1a8
  pc : [<ffffffc0000fba6c>] lr : [<ffffffc0000fba6c>] pstate: 600001c5
  sp : ffffffc9768abe10
  x29: ffffffc9768abe10 x28: ffffffc9768a8000
  x27: 0000000000000000 x26: 0000000000000001
  x25: 00000000000000a6 x24: ffffffc00064be6c
  x23: ffffffc0009f249e x22: ffffffc9768a0000
  x21: ffffffc97fea5480 x20: 00000000000001c0
  x19: ffffffc00169a000 x18: 0000005558cc7b58
  x17: 0000007fb78e3180 x16: 0000005558d2e238
  x15: ffffffffffffffff x14: 0ffffffffffffffd
  x13: 0000000000000008 x12: 0101010101010101
  x11: 7f7f7f7f7f7f7f7f x10: fefefefefefeff63
  x9 : 7f7f7f7f7f7f7f7f x8 : 6e655f7371726964
  x7 : 0000000000000001 x6 : ffffffc0001079c4
  x5 : 0000000000000000 x4 : 0000000000000001
  x3 : ffffffc001698438 x2 : 0000000000000000
  x1 : ffffffc9768a0000 x0 : 000000000000002e
  Call trace:
  [<ffffffc0000fba6c>] check_flags.part.22+0x19c/0x1a8
  [<ffffffc0000fc440>] lock_is_held+0x80/0x98
  [<ffffffc00064bafc>] __schedule+0x404/0x730
  [<ffffffc00064be6c>] schedule+0x44/0xb8
  [<ffffffc000085bb0>] ret_to_user+0x0/0x24
  possible reason: unannotated irqs-off.
  irq event stamp: 502169
  hardirqs last  enabled at (502169): [<ffffffc000085a98>] el0_irq_naked+0x1c/0x24
  hardirqs last disabled at (502167): [<ffffffc0000bb3bc>] __do_softirq+0x17c/0x298
  softirqs last  enabled at (502168): [<ffffffc0000bb43c>] __do_softirq+0x1fc/0x298
  softirqs last disabled at (502143): [<ffffffc0000bb830>] irq_exit+0xa0/0xf0

This happens because we disable interrupts in ret_to_user before calling
schedule() in work_resched. This patch adds the necessary
trace_hardirqs_off annotation.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit db3899a6477a4dccd26cbfb7f408b6be2cc068e0)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/entry.S | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 7ed3d75f6304..e5b25389c48f 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -634,6 +634,9 @@ work_pending:
 	bl	do_notify_resume
 	b	ret_to_user
 work_resched:
+#ifdef CONFIG_TRACE_IRQFLAGS
+	bl	trace_hardirqs_off		// the IRQs are off here, inform the tracing code
+#endif
 	bl	schedule
 
 /*

From c0f49bdceea7d2f0cfe1c6f857e4e1cfa919382a Mon Sep 17 00:00:00 2001
From: Jungseok Lee <jungseoklee85@gmail.com>
Date: Fri, 4 Dec 2015 11:02:25 +0000
Subject: [PATCH 174/424] arm64: Store struct thread_info in sp_el0

There is need for figuring out how to manage struct thread_info data when
IRQ stack is introduced. struct thread_info information should be copied
to IRQ stack under the current thread_info calculation logic whenever
context switching is invoked. This is too expensive to keep supporting
the approach.

Instead, this patch pays attention to sp_el0 which is an unused scratch
register in EL1 context. sp_el0 utilization not only simplifies the
management, but also prevents text section size from being increased
largely due to static allocated IRQ stack as removing masking operation
using THREAD_SIZE in many places.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Jungseok Lee <jungseoklee85@gmail.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 6cdf9c7ca687e01840d0215437620a20263012fc)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/thread_info.h | 10 ++++++++--
 arch/arm64/kernel/entry.S            | 15 ++++++++++++---
 arch/arm64/kernel/head.S             |  5 +++++
 arch/arm64/kernel/sleep.S            |  3 +++
 4 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 90c7ff233735..abd64bd1f6d9 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -73,10 +73,16 @@ register unsigned long current_stack_pointer asm ("sp");
  */
 static inline struct thread_info *current_thread_info(void) __attribute_const__;
 
+/*
+ * struct thread_info can be accessed directly via sp_el0.
+ */
 static inline struct thread_info *current_thread_info(void)
 {
-	return (struct thread_info *)
-		(current_stack_pointer & ~(THREAD_SIZE - 1));
+	unsigned long sp_el0;
+
+	asm ("mrs %0, sp_el0" : "=r" (sp_el0));
+
+	return (struct thread_info *)sp_el0;
 }
 
 #define thread_saved_pc(tsk)	\
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index e5b25389c48f..245fa6837880 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -88,7 +88,8 @@
 
 	.if	\el == 0
 	mrs	x21, sp_el0
-	get_thread_info tsk			// Ensure MDSCR_EL1.SS is clear,
+	mov	tsk, sp
+	and	tsk, tsk, #~(THREAD_SIZE - 1)	// Ensure MDSCR_EL1.SS is clear,
 	ldr	x19, [tsk, #TI_FLAGS]		// since we can unmask debug
 	disable_step_tsk x19, x20		// exceptions when scheduling.
 	.else
@@ -107,6 +108,13 @@
 	str	x21, [sp, #S_SYSCALLNO]
 	.endif
 
+	/*
+	 * Set sp_el0 to current thread_info.
+	 */
+	.if	\el == 0
+	msr	sp_el0, tsk
+	.endif
+
 	/*
 	 * Registers that may be useful after this macro is invoked:
 	 *
@@ -164,8 +172,7 @@ alternative_endif
 	.endm
 
 	.macro	get_thread_info, rd
-	mov	\rd, sp
-	and	\rd, \rd, #~(THREAD_SIZE - 1)	// top of stack
+	mrs	\rd, sp_el0
 	.endm
 
 /*
@@ -599,6 +606,8 @@ ENTRY(cpu_switch_to)
 	ldp	x29, x9, [x8], #16
 	ldr	lr, [x8]
 	mov	sp, x9
+	and	x9, x9, #~(THREAD_SIZE - 1)
+	msr	sp_el0, x9
 	ret
 ENDPROC(cpu_switch_to)
 
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index b685257926f0..17ce7285bb12 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -424,6 +424,9 @@ __mmap_switched:
 	b	1b
 2:
 	adr_l	sp, initial_sp, x4
+	mov	x4, sp
+	and	x4, x4, #~(THREAD_SIZE - 1)
+	msr	sp_el0, x4			// Save thread_info
 	str_l	x21, __fdt_pointer, x5		// Save FDT pointer
 	str_l	x24, memstart_addr, x6		// Save PHYS_OFFSET
 	mov	x29, #0
@@ -611,6 +614,8 @@ ENDPROC(secondary_startup)
 ENTRY(__secondary_switched)
 	ldr	x0, [x21]			// get secondary_data.stack
 	mov	sp, x0
+	and	x0, x0, #~(THREAD_SIZE - 1)
+	msr	sp_el0, x0			// save thread_info
 	mov	x29, #0
 	b	secondary_start_kernel
 ENDPROC(__secondary_switched)
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index f586f7c875e2..e33fe33876ab 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -173,6 +173,9 @@ ENTRY(cpu_resume)
 	/* load physical address of identity map page table in x1 */
 	adrp	x1, idmap_pg_dir
 	mov	sp, x2
+	/* save thread_info */
+	and	x2, x2, #~(THREAD_SIZE - 1)
+	msr	sp_el0, x2
 	/*
 	 * cpu_do_resume expects x0 to contain context physical address
 	 * pointer and x1 to contain physical address of 1:1 page tables

From 2f478c1abcc07611671c5fee4bd10974dde476ab Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Fri, 4 Dec 2015 11:02:26 +0000
Subject: [PATCH 175/424] arm64: Modify stack trace and dump for use with
 irq_stack

This patch allows unwind_frame() to traverse from interrupt stack to task
stack correctly. It requires data from a dummy stack frame, created
during irq_stack_entry(), added by a later patch.

A similar approach is taken to modify dump_backtrace(), which expects to
find struct pt_regs underneath any call to functions marked __exception.
When on an irq_stack, the struct pt_regs is stored on the old task stack,
the location of which is stored in the dummy stack frame.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
[james.morse: merged two patches, reworked for per_cpu irq_stacks, and
 no alignment guarantees, added irq_stack definitions]
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>

(cherry picked from commit 132cd887b5c54758d04bf25c52fa48f45e843a30)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/irq.h   | 32 ++++++++++++++++++++++++++++++++
 arch/arm64/kernel/irq.c        |  3 +++
 arch/arm64/kernel/stacktrace.c | 29 +++++++++++++++++++++++++++--
 arch/arm64/kernel/traps.c      | 14 +++++++++++++-
 4 files changed, 75 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index 8e8d30684392..e2f3f135a3bc 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -1,10 +1,32 @@
 #ifndef __ASM_IRQ_H
 #define __ASM_IRQ_H
 
+#define IRQ_STACK_SIZE			THREAD_SIZE
+#define IRQ_STACK_START_SP		THREAD_START_SP
+
+#ifndef __ASSEMBLER__
+
+#include <linux/percpu.h>
+
 #include <asm-generic/irq.h>
+#include <asm/thread_info.h>
 
 struct pt_regs;
 
+DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
+
+/*
+ * The highest address on the stack, and the first to be used. Used to
+ * find the dummy-stack frame put down by el?_irq() in entry.S.
+ */
+#define IRQ_STACK_PTR(cpu) ((unsigned long)per_cpu(irq_stack, cpu) + IRQ_STACK_START_SP)
+
+/*
+ * The offset from irq_stack_ptr where entry.S will store the original
+ * stack pointer. Used by unwind_frame() and dump_backtrace().
+ */
+#define IRQ_STACK_TO_TASK_STACK(ptr) *((unsigned long *)(ptr - 0x10));
+
 extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 
 static inline int nr_legacy_irqs(void)
@@ -12,4 +34,14 @@ static inline int nr_legacy_irqs(void)
 	return 0;
 }
 
+static inline bool on_irq_stack(unsigned long sp, int cpu)
+{
+	/* variable names the same as kernel/stacktrace.c */
+	unsigned long low = (unsigned long)per_cpu(irq_stack, cpu);
+	unsigned long high = low + IRQ_STACK_START_SP;
+
+	return (low <= sp && sp <= high);
+}
+
+#endif /* !__ASSEMBLER__ */
 #endif
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 9f17ec071ee0..1e3cef578e21 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -30,6 +30,9 @@
 
 unsigned long irq_err_count;
 
+/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned */
+DEFINE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack) __aligned(16);
+
 int arch_show_interrupts(struct seq_file *p, int prec)
 {
 	show_ipi_list(p, prec);
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index ccb6078ed9f2..b947eeffa5b2 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -20,6 +20,7 @@
 #include <linux/sched.h>
 #include <linux/stacktrace.h>
 
+#include <asm/irq.h>
 #include <asm/stacktrace.h>
 
 /*
@@ -39,17 +40,41 @@ int notrace unwind_frame(struct stackframe *frame)
 {
 	unsigned long high, low;
 	unsigned long fp = frame->fp;
+	unsigned long irq_stack_ptr;
+
+	/*
+	 * Use raw_smp_processor_id() to avoid false-positives from
+	 * CONFIG_DEBUG_PREEMPT. get_wchan() calls unwind_frame() on sleeping
+	 * task stacks, we can be pre-empted in this case, so
+	 * {raw_,}smp_processor_id() may give us the wrong value. Sleeping
+	 * tasks can't ever be on an interrupt stack, so regardless of cpu,
+	 * the checks will always fail.
+	 */
+	irq_stack_ptr = IRQ_STACK_PTR(raw_smp_processor_id());
 
 	low  = frame->sp;
-	high = ALIGN(low, THREAD_SIZE);
+	/* irq stacks are not THREAD_SIZE aligned */
+	if (on_irq_stack(frame->sp, raw_smp_processor_id()))
+		high = irq_stack_ptr;
+	else
+		high = ALIGN(low, THREAD_SIZE) - 0x20;
 
-	if (fp < low || fp > high - 0x18 || fp & 0xf)
+	if (fp < low || fp > high || fp & 0xf)
 		return -EINVAL;
 
 	frame->sp = fp + 0x10;
 	frame->fp = *(unsigned long *)(fp);
 	frame->pc = *(unsigned long *)(fp + 8);
 
+	/*
+	 * Check whether we are going to walk through from interrupt stack
+	 * to task stack.
+	 * If we reach the end of the stack - and its an interrupt stack,
+	 * read the original task stack pointer from the dummy frame.
+	 */
+	if (frame->sp == irq_stack_ptr)
+		frame->sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
+
 	return 0;
 }
 
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index e9b9b5364393..8a0084541f84 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -146,6 +146,7 @@ static void dump_instr(const char *lvl, struct pt_regs *regs)
 static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 {
 	struct stackframe frame;
+	unsigned long irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
 
 	pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
 
@@ -180,9 +181,20 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 		if (ret < 0)
 			break;
 		stack = frame.sp;
-		if (in_exception_text(where))
+		if (in_exception_text(where)) {
+			/*
+			 * If we switched to the irq_stack before calling this
+			 * exception handler, then the pt_regs will be on the
+			 * task stack. The easiest way to tell is if the large
+			 * pt_regs would overlap with the end of the irq_stack.
+			 */
+			if (stack < irq_stack_ptr &&
+			    (stack + sizeof(struct pt_regs)) > irq_stack_ptr)
+				stack = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
+
 			dump_mem("", "Exception stack", stack,
 				 stack + sizeof(struct pt_regs), false);
+		}
 	}
 }
 

From ea288f7a80b63d6956d23f50dd04fa70f8e7368f Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 4 Dec 2015 11:02:27 +0000
Subject: [PATCH 176/424] arm64: Add do_softirq_own_stack() and enable
 irq_stacks

entry.S is modified to switch to the per_cpu irq_stack during el{0,1}_irq.
irq_count is used to detect recursive interrupts on the irq_stack, it is
updated late by do_softirq_own_stack(), when called on the irq_stack, before
__do_softirq() re-enables interrupts to process softirqs.

do_softirq_own_stack() is added by this patch, but does not yet switch
stack.

This patch adds the dummy stack frame and data needed by the previous
stack tracing patches.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 8e23dacd12a48e58125b84c817da50850b73280a)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/irq.h |  2 ++
 arch/arm64/kernel/entry.S    | 42 ++++++++++++++++++++++++++++++++++--
 arch/arm64/kernel/irq.c      | 38 +++++++++++++++++++++++++++++++-
 3 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index e2f3f135a3bc..fa2a8d0e4792 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -11,6 +11,8 @@
 #include <asm-generic/irq.h>
 #include <asm/thread_info.h>
 
+#define __ARCH_HAS_DO_SOFTIRQ
+
 struct pt_regs;
 
 DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 245fa6837880..8f7e737949fe 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -27,6 +27,7 @@
 #include <asm/cpufeature.h>
 #include <asm/errno.h>
 #include <asm/esr.h>
+#include <asm/irq.h>
 #include <asm/thread_info.h>
 #include <asm/unistd.h>
 
@@ -175,6 +176,42 @@ alternative_endif
 	mrs	\rd, sp_el0
 	.endm
 
+	.macro	irq_stack_entry, dummy_lr
+	mov	x19, sp			// preserve the original sp
+
+	adr_l	x25, irq_stack
+	mrs	x26, tpidr_el1
+	add	x25, x25, x26
+
+	/*
+	 * Check the lowest address on irq_stack for the irq_count value,
+	 * incremented by do_softirq_own_stack if we have re-enabled irqs
+	 * while on the irq_stack.
+	 */
+	ldr	x26, [x25]
+	cbnz	x26, 9998f		// recursive use?
+
+	/* switch to the irq stack */
+	mov	x26, #IRQ_STACK_START_SP
+	add	x26, x25, x26
+	mov	sp, x26
+
+	/* Add a dummy stack frame */
+	stp     x29, \dummy_lr, [sp, #-16]!           // dummy stack frame
+	mov	x29, sp
+	stp     xzr, x19, [sp, #-16]!
+
+9998:
+	.endm
+
+	/*
+	 * x19 should be preserved between irq_stack_entry and
+	 * irq_stack_exit.
+	 */
+	.macro	irq_stack_exit
+	mov	sp, x19
+	.endm
+
 /*
  * These are the registers used in the syscall handler, and allow us to
  * have in theory up to 7 arguments to a function - x0 to x6.
@@ -190,10 +227,11 @@ tsk	.req	x28		// current thread_info
  * Interrupt handling.
  */
 	.macro	irq_handler
-	adrp	x1, handle_arch_irq
-	ldr	x1, [x1, #:lo12:handle_arch_irq]
+	ldr_l	x1, handle_arch_irq
 	mov	x0, sp
+	irq_stack_entry x22
 	blr	x1
+	irq_stack_exit
 	.endm
 
 	.text
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 1e3cef578e21..ff7ebb710e51 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -25,14 +25,24 @@
 #include <linux/irq.h>
 #include <linux/smp.h>
 #include <linux/init.h>
+#include <linux/interrupt.h>
 #include <linux/irqchip.h>
 #include <linux/seq_file.h>
 
 unsigned long irq_err_count;
 
-/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned */
+/*
+ * irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned.
+ * irq_stack[0] is used as irq_count, a non-zero value indicates the stack
+ * is in use, and el?_irq() shouldn't switch to it. This is used to detect
+ * recursive use of the irq_stack, it is lazily updated by
+ * do_softirq_own_stack(), which is called on the irq_stack, before
+ * re-enabling interrupts to process softirqs.
+ */
 DEFINE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack) __aligned(16);
 
+#define IRQ_COUNT()	(*per_cpu(irq_stack, smp_processor_id()))
+
 int arch_show_interrupts(struct seq_file *p, int prec)
 {
 	show_ipi_list(p, prec);
@@ -56,3 +66,29 @@ void __init init_IRQ(void)
 	if (!handle_arch_irq)
 		panic("No interrupt controller found.");
 }
+
+/*
+ * do_softirq_own_stack() is called from irq_exit() before __do_softirq()
+ * re-enables interrupts, at which point we may re-enter el?_irq(). We
+ * increase irq_count here so that el1_irq() knows that it is already on the
+ * irq stack.
+ *
+ * Called with interrupts disabled, so we don't worry about moving cpu, or
+ * being interrupted while modifying irq_count.
+ *
+ * This function doesn't actually switch stack.
+ */
+void do_softirq_own_stack(void)
+{
+	int cpu = smp_processor_id();
+
+	WARN_ON_ONCE(!irqs_disabled());
+
+	if (on_irq_stack(current_stack_pointer, cpu)) {
+		IRQ_COUNT()++;
+		__do_softirq();
+		IRQ_COUNT()--;
+	} else {
+		__do_softirq();
+	}
+}

From 306fe6c320ec846544b25130a2fe61e3d394cb6f Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 9 Dec 2015 13:58:42 +0000
Subject: [PATCH 177/424] arm64: irq: fix walking from irq stack to task stack

Running with CONFIG_DEBUG_SPINLOCK=y can trigger a BUG with the new IRQ
stack code:

  BUG: spinlock lockup suspected on CPU#1

This is due to the IRQ_STACK_TO_TASK_STACK macro incorrectly retrieving
the task stack pointer stashed at the top of the IRQ stack.

Sayeth James:

| Yup, this is what is happening. Its an off-by-one due to broken
| thinking about how the stack works. My broken thinking was:
|
| >   top ------------
| >       | dummy_lr | <- irq_stack_ptr
| >       ------------
| >       |   x29    |
| >       ------------
| >       |   x19    | <- irq_stack_ptr - 0x10
| >       ------------
| >       |   xzr    |
| >       ------------
|
| But the stack-pointer is decreased before use. So it actually looks
| like this:
|
| >       ------------
| >       |          |  <- irq_stack_ptr
| >   top ------------
| >       | dummy_lr |
| >       ------------
| >       |   x29    | <- irq_stack_ptr - 0x10
| >       ------------
| >       |   x19    |
| >       ------------
| >       |   xzr    | <- irq_stack_ptr - 0x20
| >       ------------
|
| The value being used as the original stack is x29, which in all the
| tests is sp but without the current frames data, hence there are no
| missing frames in the output.
|
| Jungseok Lee picked it up with a 32bit user space because aarch32
| can't use x29, so it remains 0 forever. The fix he posted is correct.

This patch fixes the macro and adds some of this wisdom to a comment,
so that the layout of the IRQ stack is well understood.

Cc: James Morse <james.morse@arm.com>
Reported-by: Jungseok Lee <jungseoklee85@gmail.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 7596abf2e5661d52c4f414f37addeed54e098880)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/irq.h | 20 ++++++++++++++++++--
 arch/arm64/kernel/entry.S    |  2 +-
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index fa2a8d0e4792..877c7e358384 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -19,7 +19,23 @@ DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
 
 /*
  * The highest address on the stack, and the first to be used. Used to
- * find the dummy-stack frame put down by el?_irq() in entry.S.
+ * find the dummy-stack frame put down by el?_irq() in entry.S, which
+ * is structured as follows:
+ *
+ *       ------------
+ *       |          |  <- irq_stack_ptr
+ *   top ------------
+ *       |  elr_el1 |
+ *       ------------
+ *       |   x29    | <- irq_stack_ptr - 0x10
+ *       ------------
+ *       |   xzr    |
+ *       ------------
+ *       |   x19    | <- irq_stack_ptr - 0x20
+ *       ------------
+ *
+ * where x19 holds a copy of the task stack pointer.
+ *
  */
 #define IRQ_STACK_PTR(cpu) ((unsigned long)per_cpu(irq_stack, cpu) + IRQ_STACK_START_SP)
 
@@ -27,7 +43,7 @@ DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
  * The offset from irq_stack_ptr where entry.S will store the original
  * stack pointer. Used by unwind_frame() and dump_backtrace().
  */
-#define IRQ_STACK_TO_TASK_STACK(ptr) *((unsigned long *)(ptr - 0x10));
+#define IRQ_STACK_TO_TASK_STACK(ptr) *((unsigned long *)(ptr - 0x20));
 
 extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 8f7e737949fe..be7ec544b540 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -199,7 +199,7 @@ alternative_endif
 	/* Add a dummy stack frame */
 	stp     x29, \dummy_lr, [sp, #-16]!           // dummy stack frame
 	mov	x29, sp
-	stp     xzr, x19, [sp, #-16]!
+	stp     x19, xzr, [sp, #-16]!
 
 9998:
 	.endm

From 95e1db8bd78d2b3f15f7d4e7896735a041c775f6 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Thu, 10 Dec 2015 10:22:39 +0000
Subject: [PATCH 178/424] arm64: Add this_cpu_ptr() assembler macro for use in
 entry.S

irq_stack is a per_cpu variable, that needs to be access from entry.S.
Use an assembler macro instead of the unreadable details.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit aa4d5d3cbc258c355151a3903211b27359390ec5)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/assembler.h | 11 +++++++++++
 arch/arm64/kernel/entry.S          |  4 +---
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 12eff928ef8b..bb7b72734c24 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -193,6 +193,17 @@ lr	.req	x30		// link register
 	str	\src, [\tmp, :lo12:\sym]
 	.endm
 
+	/*
+	 * @sym: The name of the per-cpu variable
+	 * @reg: Result of per_cpu(sym, smp_processor_id())
+	 * @tmp: scratch register
+	 */
+	.macro this_cpu_ptr, sym, reg, tmp
+	adr_l	\reg, \sym
+	mrs	\tmp, tpidr_el1
+	add	\reg, \reg, \tmp
+	.endm
+
 /*
  * Annotate a function as position independent, i.e., safe to be called before
  * the kernel virtual mapping is activated.
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index be7ec544b540..e394f8c9595a 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -179,9 +179,7 @@ alternative_endif
 	.macro	irq_stack_entry, dummy_lr
 	mov	x19, sp			// preserve the original sp
 
-	adr_l	x25, irq_stack
-	mrs	x26, tpidr_el1
-	add	x25, x25, x26
+	this_cpu_ptr irq_stack, x25, x26
 
 	/*
 	 * Check the lowest address on irq_stack for the irq_count value,

From e330d15430acce6073bb2c8486fba7555be1e923 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Thu, 10 Dec 2015 10:22:40 +0000
Subject: [PATCH 179/424] arm64: when walking onto the task stack, check sp &
 fp are in current->stack

When unwind_frame() reaches the bottom of the irq_stack, the last fp
points to the original task stack. unwind_frame() uses
IRQ_STACK_TO_TASK_STACK() to find the sp value. If either values is
wrong, we may end up walking a corrupt stack.

Check these values are sane by testing if they are both on the stack
pointed to by current->stack.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 1ffe199b1c9b72a8e752a9ae2a7af10128ab2ca1)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/stacktrace.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index b947eeffa5b2..d916d5b6aef6 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -71,9 +71,17 @@ int notrace unwind_frame(struct stackframe *frame)
 	 * to task stack.
 	 * If we reach the end of the stack - and its an interrupt stack,
 	 * read the original task stack pointer from the dummy frame.
+	 *
+	 * Check the frame->fp we read from the bottom of the irq_stack,
+	 * and the original task stack pointer are both in current->stack.
 	 */
-	if (frame->sp == irq_stack_ptr)
-		frame->sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
+	if (frame->sp == irq_stack_ptr) {
+		unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
+
+		if(object_is_on_stack((void *)orig_sp) &&
+		   object_is_on_stack((void *)frame->fp))
+			frame->sp = orig_sp;
+	}
 
 	return 0;
 }

From 70dfc6968ad22e057520da92b7b4da86041d3ea7 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Thu, 10 Dec 2015 10:22:41 +0000
Subject: [PATCH 180/424] arm64: don't call C code with el0's fp register

On entry from el0, we save all the registers on the kernel stack, and
restore them before returning. x29 remains unchanged when we call out
to C code, which will store x29 as the frame-pointer on the stack.

Instead, write 0 into x29 after entry from el0, to avoid any risk of
tracing into user space.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 49003a8d6b35e128ef5e51433e60e783a46fbe5f)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/entry.S | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index e394f8c9595a..2284c296e3f7 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -93,6 +93,8 @@
 	and	tsk, tsk, #~(THREAD_SIZE - 1)	// Ensure MDSCR_EL1.SS is clear,
 	ldr	x19, [tsk, #TI_FLAGS]		// since we can unmask debug
 	disable_step_tsk x19, x20		// exceptions when scheduling.
+
+	mov	x29, xzr			// fp pointed to user-space
 	.else
 	add	x21, sp, #S_FRAME_SIZE
 	.endif

From 2949f7a8a1516b704750cd343aebb36de2428d38 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 9 Dec 2015 12:44:36 +0000
Subject: [PATCH 181/424] arm64: mm: remove pointless PAGE_MASKing

As pgd_offset{,_k} shift the input address by PGDIR_SHIFT, the sub-page
bits will always be shifted out. There is no need to apply PAGE_MASK
before this.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit e2c30ee320eb96304896c7ab84499e5bc5e5fb6e)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/mmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index d2a6194f4bec..c5bd5bca8e3d 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -288,7 +288,7 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt,
 			&phys, virt);
 		return;
 	}
-	__create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt,
+	__create_mapping(&init_mm, pgd_offset_k(virt), phys, virt,
 			 size, prot, early_alloc);
 }
 
@@ -309,7 +309,7 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
 		return;
 	}
 
-	return __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK),
+	return __create_mapping(&init_mm, pgd_offset_k(virt),
 				phys, virt, size, prot, late_alloc);
 }
 

From a2151a0e23afcaf1fbb8f2e63bf2335f61e12172 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 9 Dec 2015 12:44:37 +0000
Subject: [PATCH 182/424] arm64: Remove redundant padding from linker script

Currently we place an ALIGN_DEBUG_RO between text and data for the .text
and .init sections, and depending on configuration each of these may
result in up to SECTION_SIZE bytes worth of padding (for
DEBUG_RODATA_ALIGN).

We make no distinction between the text and data in each of these
sections at any point when creating the initial page tables in head.S.
We also make no distinction when modifying the tables; __map_memblock,
fixup_executable, mark_rodata_ro, and fixup_init only work at section
granularity. Thus this padding is unnecessary.

For the spit between init text and data we impose a minimum alignment of
16 bytes, but this is also unnecessary. The init data is output
immediately after the padding before any symbols are defined, so this is
not required to keep a symbol for linker a section array correctly
associated with the data. Any objects within the section will be given
at least their usual alignment regardless.

This patch removes the redundant padding.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 5b28cd9d084eca8ddc46270d2720305bfd40e348)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/vmlinux.lds.S | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 71426a78db12..cc2572db32a6 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -113,7 +113,6 @@ SECTIONS
 		*(.got)			/* Global offset table		*/
 	}
 
-	ALIGN_DEBUG_RO
 	RO_DATA(PAGE_SIZE)
 	EXCEPTION_TABLE(8)
 	NOTES
@@ -128,7 +127,6 @@ SECTIONS
 		ARM_EXIT_KEEP(EXIT_TEXT)
 	}
 
-	ALIGN_DEBUG_RO_MIN(16)
 	.init.data : {
 		INIT_DATA
 		INIT_SETUP(16)

From 9250d09be9e3f467145584fd476a6132e04ddaef Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 9 Dec 2015 12:44:38 +0000
Subject: [PATCH 183/424] arm64: mm: fold alternatives into .init

Currently we treat the alternatives separately from other data that's
only used during initialisation, using separate .altinstructions and
.altinstr_replacement linker sections. These are freed for general
allocation separately from .init*. This is problematic as:

* We do not remove execute permissions, as we do for .init, leaving the
  memory executable.

* We pad between them, making the kernel Image bianry up to PAGE_SIZE
  bytes larger than necessary.

This patch moves the two sections into the contiguous region used for
.init*. This saves some memory, ensures that we remove execute
permissions, and allows us to remove some code made redundant by this
reorganisation.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Andre Przywara <andre.przywara@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 9aa4ec1571da62366cfddc20f3b923609604fe63)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/alternative.h | 1 -
 arch/arm64/kernel/alternative.c      | 6 ------
 arch/arm64/kernel/vmlinux.lds.S      | 5 ++---
 arch/arm64/mm/init.c                 | 1 -
 4 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index d56ec0715157..e4962f04201e 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -19,7 +19,6 @@ struct alt_instr {
 
 void __init apply_alternatives_all(void);
 void apply_alternatives(void *start, size_t length);
-void free_alternatives_memory(void);
 
 #define ALTINSTR_ENTRY(feature)						      \
 	" .word 661b - .\n"				/* label           */ \
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index ab9db0e9818c..d2ee1b21a10d 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -158,9 +158,3 @@ void apply_alternatives(void *start, size_t length)
 
 	__apply_alternatives(&region);
 }
-
-void free_alternatives_memory(void)
-{
-	free_reserved_area(__alt_instructions, __alt_instructions_end,
-			   0, "alternatives");
-}
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index cc2572db32a6..e3928f578891 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -141,9 +141,6 @@ SECTIONS
 
 	PERCPU_SECTION(L1_CACHE_BYTES)
 
-	. = ALIGN(PAGE_SIZE);
-	__init_end = .;
-
 	. = ALIGN(4);
 	.altinstructions : {
 		__alt_instructions = .;
@@ -155,6 +152,8 @@ SECTIONS
 	}
 
 	. = ALIGN(PAGE_SIZE);
+	__init_end = .;
+
 	_data = .;
 	_sdata = .;
 	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 10fab52eed95..dba32ceff17a 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -360,7 +360,6 @@ void free_initmem(void)
 {
 	fixup_init();
 	free_initmem_default(0);
-	free_alternatives_memory();
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD

From c92251d1a82c7c071662ca90300ebf488ab3d6f1 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Thu, 10 Dec 2015 16:54:32 +0000
Subject: [PATCH 184/424] arm64: cmpxchg: Don't incldue linux/mmdebug.h

The arm64 asm/cmpxchg.h includes linux/mmdebug.h but doesn't so far as I
can tell actually use anything from it.  Removing the inclusion reduces
spurious header dependency rebuilds and also avoids issues with
recursive inclusions of headers causing build breaks due to attempts to
use things before they are defined if linux/mmdebug.h starts pulling in
more low level headers.

Such errors have happened in -next recently, for example:

In file included from include/linux/completion.h:11:0,
                 from include/linux/rcupdate.h:43,
                 from include/linux/tracepoint.h:19,
                 from include/linux/mmdebug.h:6,
                 from ./arch/arm64/include/asm/cmpxchg.h:22,
                 from ./arch/arm64/include/asm/atomic.h:41,
                 from include/linux/atomic.h:4,
                 from include/linux/spinlock.h:406,
                 from include/linux/seqlock.h:35,
                 from include/linux/time.h:5,
                 from include/uapi/linux/timex.h:56,
                 from include/linux/timex.h:56,
                 from include/linux/sched.h:19,
                 from arch/arm64/kernel/asm-offsets.c:21:
include/linux/wait.h: In function 'wait_on_atomic_t':
include/linux/wait.h:1218:2: error: implicit declaration of function 'atomic_read' [-Werror=implicit-function-declaration]
 if (atomic_read(val) == 0)

Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 4a6ccf30263f4e265c0f171561bf4c40bed5f273)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/cmpxchg.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 9ea611ea69df..510c7b404454 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -19,7 +19,6 @@
 #define __ASM_CMPXCHG_H
 
 #include <linux/bug.h>
-#include <linux/mmdebug.h>
 
 #include <asm/atomic.h>
 #include <asm/barrier.h>

From 9429ab599551a430b8e6d9d8bfccfc9f31288211 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 11 Dec 2015 11:04:31 +0000
Subject: [PATCH 185/424] arm64: mm: place __cpu_setup in .text

We drop __cpu_setup in .text.init, which ends up being part of .text.
The .text.init section was a legacy section name which has been unused
elsewhere for a long time.

The ".text.init" name is misleading if read as a synonym for
".init.text". Any CPU may execute __cpu_setup before turning the MMU on,
so it should simply live in .text.

Remove the pointless section assignment. This will leave __cpu_setup in
the .text section.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit f00083cae331e5d3eecade6b4fdc35d0825e73ef)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/proc.S | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index b8f04b3f2786..c164d2cb35c0 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -140,8 +140,6 @@ ENTRY(cpu_do_switch_mm)
 	ret
 ENDPROC(cpu_do_switch_mm)
 
-	.section ".text.init", #alloc, #execinstr
-
 /*
  *	__cpu_setup
  *

From 7ae24aa87b5b0c58d378b6fc8fb0d8e72fbfd81d Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 17 Nov 2015 14:45:47 +0000
Subject: [PATCH 186/424] arm64: Documentation: add list of software
 workarounds for errata

It's not immediately obvious which hardware errata are worked around in
the Linux kernel for an arbitrary kernel tree, so add a file to keep
track of what we're working around.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 9cb9c9e5ba8453537e8e645318edf231fe54eaf9)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 Documentation/arm64/silicon-errata.txt | 58 ++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 Documentation/arm64/silicon-errata.txt

diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
new file mode 100644
index 000000000000..58b71ddf9b60
--- /dev/null
+++ b/Documentation/arm64/silicon-errata.txt
@@ -0,0 +1,58 @@
+                Silicon Errata and Software Workarounds
+                =======================================
+
+Author: Will Deacon <will.deacon@arm.com>
+Date  : 27 November 2015
+
+It is an unfortunate fact of life that hardware is often produced with
+so-called "errata", which can cause it to deviate from the architecture
+under specific circumstances.  For hardware produced by ARM, these
+errata are broadly classified into the following categories:
+
+  Category A: A critical error without a viable workaround.
+  Category B: A significant or critical error with an acceptable
+              workaround.
+  Category C: A minor error that is not expected to occur under normal
+              operation.
+
+For more information, consult one of the "Software Developers Errata
+Notice" documents available on infocenter.arm.com (registration
+required).
+
+As far as Linux is concerned, Category B errata may require some special
+treatment in the operating system. For example, avoiding a particular
+sequence of code, or configuring the processor in a particular way. A
+less common situation may require similar actions in order to declassify
+a Category A erratum into a Category C erratum. These are collectively
+known as "software workarounds" and are only required in the minority of
+cases (e.g. those cases that both require a non-secure workaround *and*
+can be triggered by Linux).
+
+For software workarounds that may adversely impact systems unaffected by
+the erratum in question, a Kconfig entry is added under "Kernel
+Features" -> "ARM errata workarounds via the alternatives framework".
+These are enabled by default and patched in at runtime when an affected
+CPU is detected. For less-intrusive workarounds, a Kconfig option is not
+available and the code is structured (preferably with a comment) in such
+a way that the erratum will not be hit.
+
+This approach can make it slightly onerous to determine exactly which
+errata are worked around in an arbitrary kernel source tree, so this
+file acts as a registry of software workarounds in the Linux Kernel and
+will be updated when new workarounds are committed and backported to
+stable kernels.
+
+| Implementor    | Component       | Erratum ID      | Kconfig                 |
++----------------+-----------------+-----------------+-------------------------+
+| ARM            | Cortex-A53      | #826319         | ARM64_ERRATUM_826319    |
+| ARM            | Cortex-A53      | #827319         | ARM64_ERRATUM_827319    |
+| ARM            | Cortex-A53      | #824069         | ARM64_ERRATUM_824069    |
+| ARM            | Cortex-A53      | #819472         | ARM64_ERRATUM_819472    |
+| ARM            | Cortex-A53      | #845719         | ARM64_ERRATUM_845719    |
+| ARM            | Cortex-A53      | #843419         | ARM64_ERRATUM_843419    |
+| ARM            | Cortex-A57      | #832075         | ARM64_ERRATUM_832075    |
+| ARM            | Cortex-A57      | #852523         | N/A                     |
+| ARM            | Cortex-A57      | #834220         | ARM64_ERRATUM_834220    |
+|                |                 |                 |                         |
+| Cavium         | ThunderX ITS    | #22375, #24313  | CAVIUM_ERRATUM_22375    |
+| Cavium         | ThunderX GICv3  | #23154          | CAVIUM_ERRATUM_23154    |

From a79c216b06b18f1545d55e8f238dd9b49896f347 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Tue, 15 Dec 2015 11:21:25 +0000
Subject: [PATCH 187/424] arm64: reduce stack use in irq_handler

The code for switching to irq_stack stores three pieces of information on
the stack, fp+lr, as a fake stack frame (that lets us walk back onto the
interrupted tasks stack frame), and the address of the struct pt_regs that
contains the register values from kernel entry. (which dump_backtrace()
will print in any stack trace).

To reduce this, we store fp, and the pointer to the struct pt_regs.
unwind_frame() can recognise this as the irq_stack dummy frame, (as it only
appears at the top of the irq_stack), and use the struct pt_regs values
to find the missing interrupted link-register.

Suggested-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 971c67ce37cfeeaf560e792a2c3bc21d8b67163a)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/irq.h   | 11 ++++-------
 arch/arm64/kernel/entry.S      | 12 +++++++-----
 arch/arm64/kernel/stacktrace.c | 19 ++++++++++++++++---
 3 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index 877c7e358384..3bece4379bd9 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -25,16 +25,13 @@ DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
  *       ------------
  *       |          |  <- irq_stack_ptr
  *   top ------------
- *       |  elr_el1 |
+ *       |   x19    | <- irq_stack_ptr - 0x08
  *       ------------
  *       |   x29    | <- irq_stack_ptr - 0x10
  *       ------------
- *       |   xzr    |
- *       ------------
- *       |   x19    | <- irq_stack_ptr - 0x20
- *       ------------
  *
- * where x19 holds a copy of the task stack pointer.
+ * where x19 holds a copy of the task stack pointer where the struct pt_regs
+ * from kernel_entry can be found.
  *
  */
 #define IRQ_STACK_PTR(cpu) ((unsigned long)per_cpu(irq_stack, cpu) + IRQ_STACK_START_SP)
@@ -43,7 +40,7 @@ DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
  * The offset from irq_stack_ptr where entry.S will store the original
  * stack pointer. Used by unwind_frame() and dump_backtrace().
  */
-#define IRQ_STACK_TO_TASK_STACK(ptr) *((unsigned long *)(ptr - 0x20));
+#define IRQ_STACK_TO_TASK_STACK(ptr) (*((unsigned long *)((ptr) - 0x08)))
 
 extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 2284c296e3f7..0667fb7d8bb1 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -178,7 +178,7 @@ alternative_endif
 	mrs	\rd, sp_el0
 	.endm
 
-	.macro	irq_stack_entry, dummy_lr
+	.macro	irq_stack_entry
 	mov	x19, sp			// preserve the original sp
 
 	this_cpu_ptr irq_stack, x25, x26
@@ -196,10 +196,12 @@ alternative_endif
 	add	x26, x25, x26
 	mov	sp, x26
 
-	/* Add a dummy stack frame */
-	stp     x29, \dummy_lr, [sp, #-16]!           // dummy stack frame
+	/*
+	 * Add a dummy stack frame, this non-standard format is fixed up
+	 * by unwind_frame()
+	 */
+	stp     x29, x19, [sp, #-16]!
 	mov	x29, sp
-	stp     x19, xzr, [sp, #-16]!
 
 9998:
 	.endm
@@ -229,7 +231,7 @@ tsk	.req	x28		// current thread_info
 	.macro	irq_handler
 	ldr_l	x1, handle_arch_irq
 	mov	x0, sp
-	irq_stack_entry x22
+	irq_stack_entry
 	blr	x1
 	irq_stack_exit
 	.endm
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index d916d5b6aef6..b9fd3a8abfc1 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -70,17 +70,30 @@ int notrace unwind_frame(struct stackframe *frame)
 	 * Check whether we are going to walk through from interrupt stack
 	 * to task stack.
 	 * If we reach the end of the stack - and its an interrupt stack,
-	 * read the original task stack pointer from the dummy frame.
+	 * unpack the dummy frame to find the original elr.
 	 *
 	 * Check the frame->fp we read from the bottom of the irq_stack,
 	 * and the original task stack pointer are both in current->stack.
 	 */
 	if (frame->sp == irq_stack_ptr) {
+		struct pt_regs *irq_args;
 		unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
 
-		if(object_is_on_stack((void *)orig_sp) &&
-		   object_is_on_stack((void *)frame->fp))
+		if (object_is_on_stack((void *)orig_sp) &&
+		   object_is_on_stack((void *)frame->fp)) {
 			frame->sp = orig_sp;
+
+			/* orig_sp is the saved pt_regs, find the elr */
+			irq_args = (struct pt_regs *)orig_sp;
+			frame->pc = irq_args->pc;
+		} else {
+			/*
+			 * This frame has a non-standard format, and we
+			 * didn't fix it, because the data looked wrong.
+			 * Refuse to output this frame.
+			 */
+			return -EINVAL;
+		}
 	}
 
 	return 0;

From ac7406c28c8bada863d36c46ca246bb7b76f3e9f Mon Sep 17 00:00:00 2001
From: Ashok Kumar <ashoks@broadcom.com>
Date: Thu, 17 Dec 2015 01:38:31 -0800
Subject: [PATCH 188/424] arm64: Defer dcache flush in __cpu_copy_user_page

Defer dcache flushing to __sync_icache_dcache by calling
flush_dcache_page which clears PG_dcache_clean flag.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Ashok Kumar <ashoks@broadcom.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit e6b1185f77351aa154e63bd54b05d07ff99d4ffa)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/copypage.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c
index 13bbc3be6f5a..22e4cb4d6f53 100644
--- a/arch/arm64/mm/copypage.c
+++ b/arch/arm64/mm/copypage.c
@@ -24,8 +24,9 @@
 
 void __cpu_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr)
 {
+	struct page *page = virt_to_page(kto);
 	copy_page(kto, kfrom);
-	__flush_dcache_area(kto, PAGE_SIZE);
+	flush_dcache_page(page);
 }
 EXPORT_SYMBOL_GPL(__cpu_copy_user_page);
 

From 358e3c80a223c4d79a786be2e71e51cab91c2e7e Mon Sep 17 00:00:00 2001
From: Ashok Kumar <ashoks@broadcom.com>
Date: Thu, 17 Dec 2015 01:38:32 -0800
Subject: [PATCH 189/424] arm64: Use PoU cache instr for I/D coherency

In systems with three levels of cache(PoU at L1 and PoC at L3),
PoC cache flush instructions flushes L2 and L3 caches which could affect
performance.
For cache flushes for I and D coherency, PoU should suffice.
So changing all I and D coherency related cache flushes to PoU.

Introduced a new __clean_dcache_area_pou API for dcache flush till PoU
and provided a common macro for __flush_dcache_area and
__clean_dcache_area_pou.

Also, now in __sync_icache_dcache, icache invalidation for non-aliasing
VIPT icache is done only for that particular page instead of the earlier
__flush_icache_all.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ashok Kumar <ashoks@broadcom.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 0a28714c53fd4f7aea709be7577dfbe0095c8c3e)
Signed-off-by: Alex Shi <alex.shi@linaro.org>

Conflicts:
	included reset_pmuserenr_el0 in arch/arm64/mm/proc-macros.S
---
 arch/arm64/include/asm/cacheflush.h |  1 +
 arch/arm64/mm/cache.S               | 28 ++++++++++++++----------
 arch/arm64/mm/flush.c               | 33 ++++++++++++++++-------------
 arch/arm64/mm/proc-macros.S         | 22 +++++++++++++++++++
 4 files changed, 58 insertions(+), 26 deletions(-)

diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 54efedaf331f..7fc294c3bc5b 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -68,6 +68,7 @@
 extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
 extern void flush_icache_range(unsigned long start, unsigned long end);
 extern void __flush_dcache_area(void *addr, size_t len);
+extern void __clean_dcache_area_pou(void *addr, size_t len);
 extern long __flush_cache_user_range(unsigned long start, unsigned long end);
 
 static inline void flush_cache_mm(struct mm_struct *mm)
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index cfa44a6adc0a..6df07069a025 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -81,25 +81,31 @@ ENDPROC(__flush_cache_user_range)
 /*
  *	__flush_dcache_area(kaddr, size)
  *
- *	Ensure that the data held in the page kaddr is written back to the
- *	page in question.
+ *	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ *	are cleaned and invalidated to the PoC.
  *
  *	- kaddr   - kernel address
  *	- size    - size in question
  */
 ENTRY(__flush_dcache_area)
-	dcache_line_size x2, x3
-	add	x1, x0, x1
-	sub	x3, x2, #1
-	bic	x0, x0, x3
-1:	dc	civac, x0			// clean & invalidate D line / unified line
-	add	x0, x0, x2
-	cmp	x0, x1
-	b.lo	1b
-	dsb	sy
+	dcache_by_line_op civac, sy, x0, x1, x2, x3
 	ret
 ENDPIPROC(__flush_dcache_area)
 
+/*
+ *	__clean_dcache_area_pou(kaddr, size)
+ *
+ * 	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * 	are cleaned to the PoU.
+ *
+ *	- kaddr   - kernel address
+ *	- size    - size in question
+ */
+ENTRY(__clean_dcache_area_pou)
+	dcache_by_line_op cvau, ish, x0, x1, x2, x3
+	ret
+ENDPROC(__clean_dcache_area_pou)
+
 /*
  *	__inval_cache_range(start, end)
  *	- start   - start address of region
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index c26b804015e8..46649d6e6c5a 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -34,19 +34,24 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
 		__flush_icache_all();
 }
 
+static void sync_icache_aliases(void *kaddr, unsigned long len)
+{
+	unsigned long addr = (unsigned long)kaddr;
+
+	if (icache_is_aliasing()) {
+		__clean_dcache_area_pou(kaddr, len);
+		__flush_icache_all();
+	} else {
+		flush_icache_range(addr, addr + len);
+	}
+}
+
 static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
 				unsigned long uaddr, void *kaddr,
 				unsigned long len)
 {
-	if (vma->vm_flags & VM_EXEC) {
-		unsigned long addr = (unsigned long)kaddr;
-		if (icache_is_aliasing()) {
-			__flush_dcache_area(kaddr, len);
-			__flush_icache_all();
-		} else {
-			flush_icache_range(addr, addr + len);
-		}
-	}
+	if (vma->vm_flags & VM_EXEC)
+		sync_icache_aliases(kaddr, len);
 }
 
 /*
@@ -74,13 +79,11 @@ void __sync_icache_dcache(pte_t pte, unsigned long addr)
 	if (!page_mapping(page))
 		return;
 
-	if (!test_and_set_bit(PG_dcache_clean, &page->flags)) {
-		__flush_dcache_area(page_address(page),
-				PAGE_SIZE << compound_order(page));
+	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
+		sync_icache_aliases(page_address(page),
+				    PAGE_SIZE << compound_order(page));
+	else if (icache_is_aivivt())
 		__flush_icache_all();
-	} else if (icache_is_aivivt()) {
-		__flush_icache_all();
-	}
 }
 
 /*
diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
index d69dffffaa89..984edcda1850 100644
--- a/arch/arm64/mm/proc-macros.S
+++ b/arch/arm64/mm/proc-macros.S
@@ -74,3 +74,25 @@
 	msr	pmuserenr_el0, xzr		// Disable PMU access from EL0
 9000:
 	.endm
+
+/*
+ * Macro to perform a data cache maintenance for the interval
+ * [kaddr, kaddr + size)
+ *
+ * 	op:		operation passed to dc instruction
+ * 	domain:		domain used in dsb instruciton
+ * 	kaddr:		starting virtual address of the region
+ * 	size:		size of the region
+ * 	Corrupts: 	kaddr, size, tmp1, tmp2
+ */
+	.macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
+	dcache_line_size \tmp1, \tmp2
+	add	\size, \kaddr, \size
+	sub	\tmp2, \tmp1, #1
+	bic	\kaddr, \kaddr, \tmp2
+9998:	dc	\op, \kaddr
+	add	\kaddr, \kaddr, \tmp1
+	cmp	\kaddr, \size
+	b.lo	9998b
+	dsb	\domain
+	.endm

From 720089ef0ba8007c34dfa7d80d96e27d6d23088f Mon Sep 17 00:00:00 2001
From: David Woods <dwoods@ezchip.com>
Date: Thu, 17 Dec 2015 14:31:26 -0500
Subject: [PATCH 190/424] arm64: hugetlb: add support for PTE contiguous bit

The arm64 MMU supports a Contiguous bit which is a hint that the TTE
is one of a set of contiguous entries which can be cached in a single
TLB entry.  Supporting this bit adds new intermediate huge page sizes.

The set of huge page sizes available depends on the base page size.
Without using contiguous pages the huge page sizes are as follows.

 4KB:   2MB  1GB
64KB: 512MB

With a 4KB granule, the contiguous bit groups together sets of 16 pages
and with a 64KB granule it groups sets of 32 pages.  This enables two new
huge page sizes in each case, so that the full set of available sizes
is as follows.

 4KB:  64KB   2MB  32MB  1GB
64KB:   2MB 512MB  16GB

If a 16KB granule is used then the contiguous bit groups 128 pages
at the PTE level and 32 pages at the PMD level.

If the base page size is set to 64KB then 2MB pages are enabled by
default.  It is possible in the future to make 2MB the default huge
page size for both 4KB and 64KB granules.

Reviewed-by: Chris Metcalf <cmetcalf@ezchip.com>
Reviewed-by: Steve Capper <steve.capper@linaro.org>
Signed-off-by: David Woods <dwoods@ezchip.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 66b3923a1a0f77a563b43f43f6ad091354abbfe9)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/Kconfig                     |   3 -
 arch/arm64/include/asm/hugetlb.h       |  44 ++--
 arch/arm64/include/asm/pgtable-hwdef.h |  18 +-
 arch/arm64/include/asm/pgtable.h       |  10 +-
 arch/arm64/mm/hugetlbpage.c            | 274 ++++++++++++++++++++++++-
 include/linux/hugetlb.h                |   2 -
 6 files changed, 313 insertions(+), 38 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 4876459c0838..ffa3c549a4ba 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -530,9 +530,6 @@ config HW_PERF_EVENTS
 config SYS_SUPPORTS_HUGETLBFS
 	def_bool y
 
-config ARCH_WANT_GENERAL_HUGETLB
-	def_bool y
-
 config ARCH_WANT_HUGE_PMD_SHARE
 	def_bool y if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
 
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index bb4052e85dba..bbc1e35aa601 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -26,36 +26,7 @@ static inline pte_t huge_ptep_get(pte_t *ptep)
 	return *ptep;
 }
 
-static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
-				   pte_t *ptep, pte_t pte)
-{
-	set_pte_at(mm, addr, ptep, pte);
-}
 
-static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
-					 unsigned long addr, pte_t *ptep)
-{
-	ptep_clear_flush(vma, addr, ptep);
-}
-
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
-					   unsigned long addr, pte_t *ptep)
-{
-	ptep_set_wrprotect(mm, addr, ptep);
-}
-
-static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
-					    unsigned long addr, pte_t *ptep)
-{
-	return ptep_get_and_clear(mm, addr, ptep);
-}
-
-static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
-					     unsigned long addr, pte_t *ptep,
-					     pte_t pte, int dirty)
-{
-	return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
-}
 
 static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 					  unsigned long addr, unsigned long end,
@@ -97,4 +68,19 @@ static inline void arch_clear_hugepage_flags(struct page *page)
 	clear_bit(PG_dcache_clean, &page->flags);
 }
 
+extern pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
+				struct page *page, int writable);
+#define arch_make_huge_pte arch_make_huge_pte
+extern void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+			    pte_t *ptep, pte_t pte);
+extern int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+				      unsigned long addr, pte_t *ptep,
+				      pte_t pte, int dirty);
+extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+				     unsigned long addr, pte_t *ptep);
+extern void huge_ptep_set_wrprotect(struct mm_struct *mm,
+				    unsigned long addr, pte_t *ptep);
+extern void huge_ptep_clear_flush(struct vm_area_struct *vma,
+				  unsigned long addr, pte_t *ptep);
+
 #endif /* __ASM_HUGETLB_H */
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index d6739e836f7b..5c25b831273d 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -90,7 +90,23 @@
 /*
  * Contiguous page definitions.
  */
-#define CONT_PTES		(_AC(1, UL) << CONT_SHIFT)
+#ifdef CONFIG_ARM64_64K_PAGES
+#define CONT_PTE_SHIFT		5
+#define CONT_PMD_SHIFT		5
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define CONT_PTE_SHIFT		7
+#define CONT_PMD_SHIFT		5
+#else
+#define CONT_PTE_SHIFT		4
+#define CONT_PMD_SHIFT		4
+#endif
+
+#define CONT_PTES		(1 << CONT_PTE_SHIFT)
+#define CONT_PTE_SIZE		(CONT_PTES * PAGE_SIZE)
+#define CONT_PTE_MASK		(~(CONT_PTE_SIZE - 1))
+#define CONT_PMDS		(1 << CONT_PMD_SHIFT)
+#define CONT_PMD_SIZE		(CONT_PMDS * PMD_SIZE)
+#define CONT_PMD_MASK		(~(CONT_PMD_SIZE - 1))
 /* the the numerical offset of the PTE within a range of CONT_PTES */
 #define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1))
 
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index cd5dfc97268e..fd3d7c177c5f 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -228,7 +228,8 @@ static inline pte_t pte_mkspecial(pte_t pte)
 
 static inline pte_t pte_mkcont(pte_t pte)
 {
-	return set_pte_bit(pte, __pgprot(PTE_CONT));
+	pte = set_pte_bit(pte, __pgprot(PTE_CONT));
+	return set_pte_bit(pte, __pgprot(PTE_TYPE_PAGE));
 }
 
 static inline pte_t pte_mknoncont(pte_t pte)
@@ -236,6 +237,11 @@ static inline pte_t pte_mknoncont(pte_t pte)
 	return clear_pte_bit(pte, __pgprot(PTE_CONT));
 }
 
+static inline pmd_t pmd_mkcont(pmd_t pmd)
+{
+	return __pmd(pmd_val(pmd) | PMD_SECT_CONT);
+}
+
 static inline void set_pte(pte_t *ptep, pte_t pte)
 {
 	*ptep = pte;
@@ -309,7 +315,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 /*
  * Hugetlb definitions.
  */
-#define HUGE_MAX_HSTATE		2
+#define HUGE_MAX_HSTATE		4
 #define HPAGE_SHIFT		PMD_SHIFT
 #define HPAGE_SIZE		(_AC(1, UL) << HPAGE_SHIFT)
 #define HPAGE_MASK		(~(HPAGE_SIZE - 1))
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 383b03ff38f8..82d607c3614e 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -41,17 +41,289 @@ int pud_huge(pud_t pud)
 #endif
 }
 
+static int find_num_contig(struct mm_struct *mm, unsigned long addr,
+			   pte_t *ptep, pte_t pte, size_t *pgsize)
+{
+	pgd_t *pgd = pgd_offset(mm, addr);
+	pud_t *pud;
+	pmd_t *pmd;
+
+	*pgsize = PAGE_SIZE;
+	if (!pte_cont(pte))
+		return 1;
+	if (!pgd_present(*pgd)) {
+		VM_BUG_ON(!pgd_present(*pgd));
+		return 1;
+	}
+	pud = pud_offset(pgd, addr);
+	if (!pud_present(*pud)) {
+		VM_BUG_ON(!pud_present(*pud));
+		return 1;
+	}
+	pmd = pmd_offset(pud, addr);
+	if (!pmd_present(*pmd)) {
+		VM_BUG_ON(!pmd_present(*pmd));
+		return 1;
+	}
+	if ((pte_t *)pmd == ptep) {
+		*pgsize = PMD_SIZE;
+		return CONT_PMDS;
+	}
+	return CONT_PTES;
+}
+
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+			    pte_t *ptep, pte_t pte)
+{
+	size_t pgsize;
+	int i;
+	int ncontig = find_num_contig(mm, addr, ptep, pte, &pgsize);
+	unsigned long pfn;
+	pgprot_t hugeprot;
+
+	if (ncontig == 1) {
+		set_pte_at(mm, addr, ptep, pte);
+		return;
+	}
+
+	pfn = pte_pfn(pte);
+	hugeprot = __pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^ pte_val(pte));
+	for (i = 0; i < ncontig; i++) {
+		pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
+			 pte_val(pfn_pte(pfn, hugeprot)));
+		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
+		ptep++;
+		pfn += pgsize >> PAGE_SHIFT;
+		addr += pgsize;
+	}
+}
+
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+		      unsigned long addr, unsigned long sz)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pte_t *pte = NULL;
+
+	pr_debug("%s: addr:0x%lx sz:0x%lx\n", __func__, addr, sz);
+	pgd = pgd_offset(mm, addr);
+	pud = pud_alloc(mm, pgd, addr);
+	if (!pud)
+		return NULL;
+
+	if (sz == PUD_SIZE) {
+		pte = (pte_t *)pud;
+	} else if (sz == (PAGE_SIZE * CONT_PTES)) {
+		pmd_t *pmd = pmd_alloc(mm, pud, addr);
+
+		WARN_ON(addr & (sz - 1));
+		/*
+		 * Note that if this code were ever ported to the
+		 * 32-bit arm platform then it will cause trouble in
+		 * the case where CONFIG_HIGHPTE is set, since there
+		 * will be no pte_unmap() to correspond with this
+		 * pte_alloc_map().
+		 */
+		pte = pte_alloc_map(mm, NULL, pmd, addr);
+	} else if (sz == PMD_SIZE) {
+		if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
+		    pud_none(*pud))
+			pte = huge_pmd_share(mm, addr, pud);
+		else
+			pte = (pte_t *)pmd_alloc(mm, pud, addr);
+	} else if (sz == (PMD_SIZE * CONT_PMDS)) {
+		pmd_t *pmd;
+
+		pmd = pmd_alloc(mm, pud, addr);
+		WARN_ON(addr & (sz - 1));
+		return (pte_t *)pmd;
+	}
+
+	pr_debug("%s: addr:0x%lx sz:0x%lx ret pte=%p/0x%llx\n", __func__, addr,
+	       sz, pte, pte_val(*pte));
+	return pte;
+}
+
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd = NULL;
+	pte_t *pte = NULL;
+
+	pgd = pgd_offset(mm, addr);
+	pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd);
+	if (!pgd_present(*pgd))
+		return NULL;
+	pud = pud_offset(pgd, addr);
+	if (!pud_present(*pud))
+		return NULL;
+
+	if (pud_huge(*pud))
+		return (pte_t *)pud;
+	pmd = pmd_offset(pud, addr);
+	if (!pmd_present(*pmd))
+		return NULL;
+
+	if (pte_cont(pmd_pte(*pmd))) {
+		pmd = pmd_offset(
+			pud, (addr & CONT_PMD_MASK));
+		return (pte_t *)pmd;
+	}
+	if (pmd_huge(*pmd))
+		return (pte_t *)pmd;
+	pte = pte_offset_kernel(pmd, addr);
+	if (pte_present(*pte) && pte_cont(*pte)) {
+		pte = pte_offset_kernel(
+			pmd, (addr & CONT_PTE_MASK));
+		return pte;
+	}
+	return NULL;
+}
+
+pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
+			 struct page *page, int writable)
+{
+	size_t pagesize = huge_page_size(hstate_vma(vma));
+
+	if (pagesize == CONT_PTE_SIZE) {
+		entry = pte_mkcont(entry);
+	} else if (pagesize == CONT_PMD_SIZE) {
+		entry = pmd_pte(pmd_mkcont(pte_pmd(entry)));
+	} else if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) {
+		pr_warn("%s: unrecognized huge page size 0x%lx\n",
+			__func__, pagesize);
+	}
+	return entry;
+}
+
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+			      unsigned long addr, pte_t *ptep)
+{
+	pte_t pte;
+
+	if (pte_cont(*ptep)) {
+		int ncontig, i;
+		size_t pgsize;
+		pte_t *cpte;
+		bool is_dirty = false;
+
+		cpte = huge_pte_offset(mm, addr);
+		ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize);
+		/* save the 1st pte to return */
+		pte = ptep_get_and_clear(mm, addr, cpte);
+		for (i = 1; i < ncontig; ++i) {
+			/*
+			 * If HW_AFDBM is enabled, then the HW could
+			 * turn on the dirty bit for any of the page
+			 * in the set, so check them all.
+			 */
+			++cpte;
+			if (pte_dirty(ptep_get_and_clear(mm, addr, cpte)))
+				is_dirty = true;
+		}
+		if (is_dirty)
+			return pte_mkdirty(pte);
+		else
+			return pte;
+	} else {
+		return ptep_get_and_clear(mm, addr, ptep);
+	}
+}
+
+int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+			       unsigned long addr, pte_t *ptep,
+			       pte_t pte, int dirty)
+{
+	pte_t *cpte;
+
+	if (pte_cont(pte)) {
+		int ncontig, i, changed = 0;
+		size_t pgsize = 0;
+		unsigned long pfn = pte_pfn(pte);
+		/* Select all bits except the pfn */
+		pgprot_t hugeprot =
+			__pgprot(pte_val(pfn_pte(pfn, __pgprot(0))) ^
+				 pte_val(pte));
+
+		cpte = huge_pte_offset(vma->vm_mm, addr);
+		pfn = pte_pfn(*cpte);
+		ncontig = find_num_contig(vma->vm_mm, addr, cpte,
+					  *cpte, &pgsize);
+		for (i = 0; i < ncontig; ++i, ++cpte) {
+			changed = ptep_set_access_flags(vma, addr, cpte,
+							pfn_pte(pfn,
+								hugeprot),
+							dirty);
+			pfn += pgsize >> PAGE_SHIFT;
+		}
+		return changed;
+	} else {
+		return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+	}
+}
+
+void huge_ptep_set_wrprotect(struct mm_struct *mm,
+			     unsigned long addr, pte_t *ptep)
+{
+	if (pte_cont(*ptep)) {
+		int ncontig, i;
+		pte_t *cpte;
+		size_t pgsize = 0;
+
+		cpte = huge_pte_offset(mm, addr);
+		ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize);
+		for (i = 0; i < ncontig; ++i, ++cpte)
+			ptep_set_wrprotect(mm, addr, cpte);
+	} else {
+		ptep_set_wrprotect(mm, addr, ptep);
+	}
+}
+
+void huge_ptep_clear_flush(struct vm_area_struct *vma,
+			   unsigned long addr, pte_t *ptep)
+{
+	if (pte_cont(*ptep)) {
+		int ncontig, i;
+		pte_t *cpte;
+		size_t pgsize = 0;
+
+		cpte = huge_pte_offset(vma->vm_mm, addr);
+		ncontig = find_num_contig(vma->vm_mm, addr, cpte,
+					  *cpte, &pgsize);
+		for (i = 0; i < ncontig; ++i, ++cpte)
+			ptep_clear_flush(vma, addr, cpte);
+	} else {
+		ptep_clear_flush(vma, addr, ptep);
+	}
+}
+
 static __init int setup_hugepagesz(char *opt)
 {
 	unsigned long ps = memparse(opt, &opt);
+
 	if (ps == PMD_SIZE) {
 		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
 	} else if (ps == PUD_SIZE) {
 		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
+	} else if (ps == (PAGE_SIZE * CONT_PTES)) {
+		hugetlb_add_hstate(CONT_PTE_SHIFT);
+	} else if (ps == (PMD_SIZE * CONT_PMDS)) {
+		hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT);
 	} else {
-		pr_err("hugepagesz: Unsupported page size %lu M\n", ps >> 20);
+		pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
 		return 0;
 	}
 	return 1;
 }
 __setup("hugepagesz=", setup_hugepagesz);
+
+#ifdef CONFIG_ARM64_64K_PAGES
+static __init int add_default_hugepagesz(void)
+{
+	if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
+		hugetlb_add_hstate(CONT_PMD_SHIFT);
+	return 0;
+}
+arch_initcall(add_default_hugepagesz);
+#endif
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 685c262e0be8..b0eb06423d5e 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -96,9 +96,7 @@ u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
 				struct address_space *mapping,
 				pgoff_t idx, unsigned long address);
 
-#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
 pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
-#endif
 
 extern int hugepages_treat_as_movable;
 extern int sysctl_hugetlb_shm_group;

From da604e8646f553a4a2c7613491112b0c9b3ec400 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 18 Dec 2015 16:01:47 +0000
Subject: [PATCH 191/424] arm64: remove irq_count and do_softirq_own_stack()

sysrq_handle_reboot() re-enables interrupts while on the irq stack. The
irq_stack implementation wrongly assumed this would only ever happen
via the softirq path, allowing it to update irq_count late, in
do_softirq_own_stack().

This means if an irq occurs in sysrq_handle_reboot(), during
emergency_restart() the stack will be corrupted, as irq_count wasn't
updated.

Lose the optimisation, and instead of moving the adding/subtracting of
irq_count into irq_stack_entry/irq_stack_exit, remove it, and compare
sp_el0 (struct thread_info) with sp & ~(THREAD_SIZE - 1). This tells us
if we are on a task stack, if so, we can safely switch to the irq stack.
Finally, remove do_softirq_own_stack(), we don't need it anymore.

Reported-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
[will: use get_thread_info macro]
Signed-off-by: Will Deacon <will.deacon@arm.com>

(cherry picked from commit d224a69e3d80fe08f285d1f41d21b590bae4fa9f)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/irq.h |  2 --
 arch/arm64/kernel/entry.S    | 19 +++++++++---------
 arch/arm64/kernel/irq.c      | 38 +-----------------------------------
 3 files changed, 11 insertions(+), 48 deletions(-)

diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index 3bece4379bd9..b77197d941fc 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -11,8 +11,6 @@
 #include <asm-generic/irq.h>
 #include <asm/thread_info.h>
 
-#define __ARCH_HAS_DO_SOFTIRQ
-
 struct pt_regs;
 
 DECLARE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack);
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 0667fb7d8bb1..c0db321db7e1 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -181,19 +181,20 @@ alternative_endif
 	.macro	irq_stack_entry
 	mov	x19, sp			// preserve the original sp
 
-	this_cpu_ptr irq_stack, x25, x26
-
 	/*
-	 * Check the lowest address on irq_stack for the irq_count value,
-	 * incremented by do_softirq_own_stack if we have re-enabled irqs
-	 * while on the irq_stack.
+	 * Compare sp with the current thread_info, if the top
+	 * ~(THREAD_SIZE - 1) bits match, we are on a task stack, and
+	 * should switch to the irq stack.
 	 */
-	ldr	x26, [x25]
-	cbnz	x26, 9998f		// recursive use?
+	and	x25, x19, #~(THREAD_SIZE - 1)
+	cmp	x25, tsk
+	b.ne	9998f
 
-	/* switch to the irq stack */
+	this_cpu_ptr irq_stack, x25, x26
 	mov	x26, #IRQ_STACK_START_SP
 	add	x26, x25, x26
+
+	/* switch to the irq stack */
 	mov	sp, x26
 
 	/*
@@ -405,10 +406,10 @@ el1_irq:
 	bl	trace_hardirqs_off
 #endif
 
+	get_thread_info tsk
 	irq_handler
 
 #ifdef CONFIG_PREEMPT
-	get_thread_info tsk
 	ldr	w24, [tsk, #TI_PREEMPT]		// get preempt count
 	cbnz	w24, 1f				// preempt count != 0
 	ldr	x0, [tsk, #TI_FLAGS]		// get flags
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index ff7ebb710e51..2386b26c0712 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -25,24 +25,14 @@
 #include <linux/irq.h>
 #include <linux/smp.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
 #include <linux/irqchip.h>
 #include <linux/seq_file.h>
 
 unsigned long irq_err_count;
 
-/*
- * irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned.
- * irq_stack[0] is used as irq_count, a non-zero value indicates the stack
- * is in use, and el?_irq() shouldn't switch to it. This is used to detect
- * recursive use of the irq_stack, it is lazily updated by
- * do_softirq_own_stack(), which is called on the irq_stack, before
- * re-enabling interrupts to process softirqs.
- */
+/* irq stack only needs to be 16 byte aligned - not IRQ_STACK_SIZE aligned. */
 DEFINE_PER_CPU(unsigned long [IRQ_STACK_SIZE/sizeof(long)], irq_stack) __aligned(16);
 
-#define IRQ_COUNT()	(*per_cpu(irq_stack, smp_processor_id()))
-
 int arch_show_interrupts(struct seq_file *p, int prec)
 {
 	show_ipi_list(p, prec);
@@ -66,29 +56,3 @@ void __init init_IRQ(void)
 	if (!handle_arch_irq)
 		panic("No interrupt controller found.");
 }
-
-/*
- * do_softirq_own_stack() is called from irq_exit() before __do_softirq()
- * re-enables interrupts, at which point we may re-enter el?_irq(). We
- * increase irq_count here so that el1_irq() knows that it is already on the
- * irq stack.
- *
- * Called with interrupts disabled, so we don't worry about moving cpu, or
- * being interrupted while modifying irq_count.
- *
- * This function doesn't actually switch stack.
- */
-void do_softirq_own_stack(void)
-{
-	int cpu = smp_processor_id();
-
-	WARN_ON_ONCE(!irqs_disabled());
-
-	if (on_irq_stack(current_stack_pointer, cpu)) {
-		IRQ_COUNT()++;
-		__do_softirq();
-		IRQ_COUNT()--;
-	} else {
-		__do_softirq();
-	}
-}

From 76f2d0af233200abc487122d0002f3c14d796676 Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Tue, 15 Dec 2015 17:33:39 +0900
Subject: [PATCH 192/424] arm64: ftrace: modify a stack frame in a safe way

Function graph tracer modifies a return address (LR) in a stack frame by
calling ftrace_prepare_return() in a traced function's function prologue.
The current code does this modification before preserving an original
address at ftrace_push_return_trace() and there is always a small window
of inconsistency when an interrupt occurs.

This doesn't matter, as far as an interrupt stack is introduced, because
stack tracer won't be invoked in an interrupt context. But it would be
better to proactively minimize such a window by moving the LR modification
after ftrace_push_return_trace().

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 79fdee9b6355c9720f14717e1ad66af51bb331b5)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/ftrace.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 8f7005bc35bd..ebecf9aa33d1 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -129,23 +129,20 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 	 * on other archs. It's unlikely on AArch64.
 	 */
 	old = *parent;
-	*parent = return_hooker;
 
 	trace.func = self_addr;
 	trace.depth = current->curr_ret_stack + 1;
 
 	/* Only trace if the calling function expects to */
-	if (!ftrace_graph_entry(&trace)) {
-		*parent = old;
+	if (!ftrace_graph_entry(&trace))
 		return;
-	}
 
 	err = ftrace_push_return_trace(old, self_addr, &trace.depth,
 				       frame_pointer);
-	if (err == -EBUSY) {
-		*parent = old;
+	if (err == -EBUSY)
 		return;
-	}
+	else
+		*parent = return_hooker;
 }
 
 #ifdef CONFIG_DYNAMIC_FTRACE

From 30e9fa2678d1ab96894aa1ee670c5a804fe5a29b Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Tue, 15 Dec 2015 17:33:40 +0900
Subject: [PATCH 193/424] arm64: pass a task parameter to unwind_frame()

Function graph tracer modifies a return address (LR) in a stack frame
to hook a function's return. This will result in many useless entries
(return_to_handler) showing up in a call stack list.
We will fix this problem in a later patch ("arm64: ftrace: fix a stack
tracer's output under function graph tracer"). But since real return
addresses are saved in ret_stack[] array in struct task_struct,
unwind functions need to be notified of, in addition to a stack pointer
address, which task is being traced in order to find out real return
addresses.

This patch extends unwind functions' interfaces by adding an extra
argument of a pointer to task_struct.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit fe13f95b720075327a761fe6ddb45b0c90cab504)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/stacktrace.h | 6 ++++--
 arch/arm64/kernel/perf_callchain.c  | 2 +-
 arch/arm64/kernel/process.c         | 2 +-
 arch/arm64/kernel/return_address.c  | 2 +-
 arch/arm64/kernel/stacktrace.c      | 8 ++++----
 arch/arm64/kernel/time.c            | 2 +-
 arch/arm64/kernel/traps.c           | 2 +-
 7 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 7318f6d54aa9..6fb61c5090b4 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -16,14 +16,16 @@
 #ifndef __ASM_STACKTRACE_H
 #define __ASM_STACKTRACE_H
 
+struct task_struct;
+
 struct stackframe {
 	unsigned long fp;
 	unsigned long sp;
 	unsigned long pc;
 };
 
-extern int unwind_frame(struct stackframe *frame);
-extern void walk_stackframe(struct stackframe *frame,
+extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame);
+extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
 			    int (*fn)(struct stackframe *, void *), void *data);
 
 #endif	/* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c
index 3aa74830cc69..797220da912b 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -165,7 +165,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
 	frame.sp = regs->sp;
 	frame.pc = regs->pc;
 
-	walk_stackframe(&frame, callchain_trace, entry);
+	walk_stackframe(current, &frame, callchain_trace, entry);
 }
 
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index f75b540bc3b4..98bf5461d4b6 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -348,7 +348,7 @@ unsigned long get_wchan(struct task_struct *p)
 	do {
 		if (frame.sp < stack_page ||
 		    frame.sp >= stack_page + THREAD_SIZE ||
-		    unwind_frame(&frame))
+		    unwind_frame(p, &frame))
 			return 0;
 		if (!in_sched_functions(frame.pc))
 			return frame.pc;
diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
index 6c4fd2810ecb..07b37ac05be4 100644
--- a/arch/arm64/kernel/return_address.c
+++ b/arch/arm64/kernel/return_address.c
@@ -44,7 +44,7 @@ void *return_address(unsigned int level)
 	frame.sp = current_stack_pointer;
 	frame.pc = (unsigned long)return_address; /* dummy */
 
-	walk_stackframe(&frame, save_return_addr, &data);
+	walk_stackframe(current, &frame, save_return_addr, &data);
 
 	if (!data.level)
 		return data.addr;
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index b9fd3a8abfc1..f7ee597ec883 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -36,7 +36,7 @@
  *	ldp	x29, x30, [sp]
  *	add	sp, sp, #0x10
  */
-int notrace unwind_frame(struct stackframe *frame)
+int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 {
 	unsigned long high, low;
 	unsigned long fp = frame->fp;
@@ -99,7 +99,7 @@ int notrace unwind_frame(struct stackframe *frame)
 	return 0;
 }
 
-void notrace walk_stackframe(struct stackframe *frame,
+void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
 		     int (*fn)(struct stackframe *, void *), void *data)
 {
 	while (1) {
@@ -107,7 +107,7 @@ void notrace walk_stackframe(struct stackframe *frame,
 
 		if (fn(frame, data))
 			break;
-		ret = unwind_frame(frame);
+		ret = unwind_frame(tsk, frame);
 		if (ret < 0)
 			break;
 	}
@@ -159,7 +159,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 		frame.pc = (unsigned long)save_stack_trace_tsk;
 	}
 
-	walk_stackframe(&frame, save_trace, &data);
+	walk_stackframe(tsk, &frame, save_trace, &data);
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 13339b6ffc1a..6e5c521f123a 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -53,7 +53,7 @@ unsigned long profile_pc(struct pt_regs *regs)
 	frame.sp = regs->sp;
 	frame.pc = regs->pc;
 	do {
-		int ret = unwind_frame(&frame);
+		int ret = unwind_frame(NULL, &frame);
 		if (ret < 0)
 			return 0;
 	} while (in_lock_functions(frame.pc));
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 8a0084541f84..937008523fa5 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -177,7 +177,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 		int ret;
 
 		dump_backtrace_entry(where);
-		ret = unwind_frame(&frame);
+		ret = unwind_frame(tsk, &frame);
 		if (ret < 0)
 			break;
 		stack = frame.sp;

From 1f18836b775f8f041dcad620c1b5d18c22c25c06 Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Tue, 15 Dec 2015 17:33:41 +0900
Subject: [PATCH 194/424] arm64: ftrace: fix a stack tracer's output under
 function graph tracer

Function graph tracer modifies a return address (LR) in a stack frame
to hook a function return. This will result in many useless entries
(return_to_handler) showing up in
 a) a stack tracer's output
 b) perf call graph (with perf record -g)
 c) dump_backtrace (at panic et al.)

For example, in case of a),
  $ echo function_graph > /sys/kernel/debug/tracing/current_tracer
  $ echo 1 > /proc/sys/kernel/stack_trace_enabled
  $ cat /sys/kernel/debug/tracing/stack_trace
        Depth    Size   Location    (54 entries)
        -----    ----   --------
  0)     4504      16   gic_raise_softirq+0x28/0x150
  1)     4488      80   smp_cross_call+0x38/0xb8
  2)     4408      48   return_to_handler+0x0/0x40
  3)     4360      32   return_to_handler+0x0/0x40
  ...

In case of b),
  $ echo function_graph > /sys/kernel/debug/tracing/current_tracer
  $ perf record -e mem:XXX:x -ag -- sleep 10
  $ perf report
                  ...
                  |          |          |--0.22%-- 0x550f8
                  |          |          |          0x10888
                  |          |          |          el0_svc_naked
                  |          |          |          sys_openat
                  |          |          |          return_to_handler
                  |          |          |          return_to_handler
                  ...

In case of c),
  $ echo function_graph > /sys/kernel/debug/tracing/current_tracer
  $ echo c > /proc/sysrq-trigger
  ...
  Call trace:
  [<ffffffc00044d3ac>] sysrq_handle_crash+0x24/0x30
  [<ffffffc000092250>] return_to_handler+0x0/0x40
  [<ffffffc000092250>] return_to_handler+0x0/0x40
  ...

This patch replaces such entries with real addresses preserved in
current->ret_stack[] at unwind_frame(). This way, we can cover all
the cases.

Reviewed-by: Jungseok Lee <jungseoklee85@gmail.com>
Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
[will: fixed minor context changes conflicting with irq stack bits]
Signed-off-by: Will Deacon <will.deacon@arm.com>

(cherry picked from commit 20380bb390a443b2c5c8800cec59743faf8151b4)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/ftrace.h     |  2 ++
 arch/arm64/include/asm/stacktrace.h |  3 +++
 arch/arm64/kernel/perf_callchain.c  |  3 +++
 arch/arm64/kernel/process.c         |  3 +++
 arch/arm64/kernel/return_address.c  |  3 +++
 arch/arm64/kernel/stacktrace.c      | 17 +++++++++++++++++
 arch/arm64/kernel/time.c            |  3 +++
 arch/arm64/kernel/traps.c           | 26 ++++++++++++++++++++------
 8 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index c5534facf941..3c60f37e48ab 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -28,6 +28,8 @@ struct dyn_arch_ftrace {
 
 extern unsigned long ftrace_graph_call;
 
+extern void return_to_handler(void);
+
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
 	/*
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 6fb61c5090b4..801a16dbbdf6 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -22,6 +22,9 @@ struct stackframe {
 	unsigned long fp;
 	unsigned long sp;
 	unsigned long pc;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	unsigned int graph;
+#endif
 };
 
 extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame);
diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c
index 797220da912b..ff4665462a02 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -164,6 +164,9 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
 	frame.fp = regs->regs[29];
 	frame.sp = regs->sp;
 	frame.pc = regs->pc;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	frame.graph = current->curr_ret_stack;
+#endif
 
 	walk_stackframe(current, &frame, callchain_trace, entry);
 }
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 98bf5461d4b6..88d742ba19d5 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -344,6 +344,9 @@ unsigned long get_wchan(struct task_struct *p)
 	frame.fp = thread_saved_fp(p);
 	frame.sp = thread_saved_sp(p);
 	frame.pc = thread_saved_pc(p);
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	frame.graph = p->curr_ret_stack;
+#endif
 	stack_page = (unsigned long)task_stack_page(p);
 	do {
 		if (frame.sp < stack_page ||
diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
index 07b37ac05be4..1718706fde83 100644
--- a/arch/arm64/kernel/return_address.c
+++ b/arch/arm64/kernel/return_address.c
@@ -43,6 +43,9 @@ void *return_address(unsigned int level)
 	frame.fp = (unsigned long)__builtin_frame_address(0);
 	frame.sp = current_stack_pointer;
 	frame.pc = (unsigned long)return_address; /* dummy */
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	frame.graph = current->curr_ret_stack;
+#endif
 
 	walk_stackframe(current, &frame, save_return_addr, &data);
 
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index f7ee597ec883..4fad9787ab46 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -17,6 +17,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/export.h>
+#include <linux/ftrace.h>
 #include <linux/sched.h>
 #include <linux/stacktrace.h>
 
@@ -66,6 +67,19 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 	frame->fp = *(unsigned long *)(fp);
 	frame->pc = *(unsigned long *)(fp + 8);
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	if (tsk && tsk->ret_stack &&
+			(frame->pc == (unsigned long)return_to_handler)) {
+		/*
+		 * This is a case where function graph tracer has
+		 * modified a return address (LR) in a stack frame
+		 * to hook a function return.
+		 * So replace it to an original value.
+		 */
+		frame->pc = tsk->ret_stack[frame->graph--].ret;
+	}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
 	/*
 	 * Check whether we are going to walk through from interrupt stack
 	 * to task stack.
@@ -158,6 +172,9 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 		frame.sp = current_stack_pointer;
 		frame.pc = (unsigned long)save_stack_trace_tsk;
 	}
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	frame.graph = tsk->curr_ret_stack;
+#endif
 
 	walk_stackframe(tsk, &frame, save_trace, &data);
 	if (trace->nr_entries < trace->max_entries)
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 6e5c521f123a..59779699a1a4 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -52,6 +52,9 @@ unsigned long profile_pc(struct pt_regs *regs)
 	frame.fp = regs->regs[29];
 	frame.sp = regs->sp;
 	frame.pc = regs->pc;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	frame.graph = -1; /* no task info */
+#endif
 	do {
 		int ret = unwind_frame(NULL, &frame);
 		if (ret < 0)
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 937008523fa5..bdc293f6adc4 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -147,17 +147,14 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 {
 	struct stackframe frame;
 	unsigned long irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
+	int skip;
 
 	pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
 
 	if (!tsk)
 		tsk = current;
 
-	if (regs) {
-		frame.fp = regs->regs[29];
-		frame.sp = regs->sp;
-		frame.pc = regs->pc;
-	} else if (tsk == current) {
+	if (tsk == current) {
 		frame.fp = (unsigned long)__builtin_frame_address(0);
 		frame.sp = current_stack_pointer;
 		frame.pc = (unsigned long)dump_backtrace;
@@ -169,14 +166,31 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 		frame.sp = thread_saved_sp(tsk);
 		frame.pc = thread_saved_pc(tsk);
 	}
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	frame.graph = tsk->curr_ret_stack;
+#endif
 
+	skip = !!regs;
 	pr_emerg("Call trace:\n");
 	while (1) {
 		unsigned long where = frame.pc;
 		unsigned long stack;
 		int ret;
 
-		dump_backtrace_entry(where);
+		/* skip until specified stack frame */
+		if (!skip) {
+			dump_backtrace_entry(where);
+		} else if (frame.fp == regs->regs[29]) {
+			skip = 0;
+			/*
+			 * Mostly, this is the case where this function is
+			 * called in panic/abort. As exception handler's
+			 * stack frame does not contain the corresponding pc
+			 * at which an exception has taken place, use regs->pc
+			 * instead.
+			 */
+			dump_backtrace_entry(regs->pc);
+		}
 		ret = unwind_frame(tsk, &frame);
 		if (ret < 0)
 			break;

From d2b08280e2c1f40fa209aaaaf86c9b730de28204 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 21 Dec 2015 16:44:27 +0000
Subject: [PATCH 195/424] arm64: traps: address fallout from printk -> pr_*
 conversion

Commit ac7b406c1a9d ("arm64: Use pr_* instead of printk") was a fairly
mindless s/printk/pr_*/ change driven by a complaint from checkpatch.

As is usual with such changes, this has led to some odd behaviour on
arm64:

  * syslog now picks up the "pr_emerg" line from dump_backtrace, but not
    the actual trace, which leads to a bunch of "kernel:Call trace:"
    lines in the log

  * __{pte,pmd,pgd}_error print at KERN_CRIT, as opposed to KERN_ERR
    which is used by other architectures.

This patch restores the original printk behaviour for dump_backtrace
and downgrade the pgtable error macros to KERN_ERR.

Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit c9cd0ed925c0b927283d4739bfe689eb9d1e9dfd)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/traps.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index bdc293f6adc4..cbedd724f48e 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -171,7 +171,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 #endif
 
 	skip = !!regs;
-	pr_emerg("Call trace:\n");
+	printk("Call trace:\n");
 	while (1) {
 		unsigned long where = frame.pc;
 		unsigned long stack;
@@ -482,22 +482,22 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
 
 void __pte_error(const char *file, int line, unsigned long val)
 {
-	pr_crit("%s:%d: bad pte %016lx.\n", file, line, val);
+	pr_err("%s:%d: bad pte %016lx.\n", file, line, val);
 }
 
 void __pmd_error(const char *file, int line, unsigned long val)
 {
-	pr_crit("%s:%d: bad pmd %016lx.\n", file, line, val);
+	pr_err("%s:%d: bad pmd %016lx.\n", file, line, val);
 }
 
 void __pud_error(const char *file, int line, unsigned long val)
 {
-	pr_crit("%s:%d: bad pud %016lx.\n", file, line, val);
+	pr_err("%s:%d: bad pud %016lx.\n", file, line, val);
 }
 
 void __pgd_error(const char *file, int line, unsigned long val)
 {
-	pr_crit("%s:%d: bad pgd %016lx.\n", file, line, val);
+	pr_err("%s:%d: bad pgd %016lx.\n", file, line, val);
 }
 
 /* GENERIC_BUG traps */

From 12037cecc2cfc56e01851c0fbf605be9e05bbe95 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 5 Jan 2016 10:18:51 +0100
Subject: [PATCH 196/424] arm64: module: fix relocation of movz instruction
 with negative immediate

The test whether a movz instruction with a signed immediate should be
turned into a movn instruction (i.e., when the immediate is negative)
is flawed, since the value of imm is always positive. Also, the
subsequent bounds check is incorrect since the limit update never
executes, due to the fact that the imm_type comparison will always be
false for negative signed immediates.

Let's fix this by performing the sign test on sval directly, and
replacing the bounds check with a simple comparison against U16_MAX.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[will: tidied up use of sval, renamed MOVK enum value to MOVKZ]
Signed-off-by: Will Deacon <will.deacon@arm.com>

(cherry picked from commit b24a557527f97ad88619d5bd4c8017c635056d69)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/module.c | 51 ++++++++++++++------------------------
 1 file changed, 18 insertions(+), 33 deletions(-)

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index f4bc779e62e8..03464ab0fff2 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -30,9 +30,6 @@
 #include <asm/insn.h>
 #include <asm/sections.h>
 
-#define	AARCH64_INSN_IMM_MOVNZ		AARCH64_INSN_IMM_MAX
-#define	AARCH64_INSN_IMM_MOVK		AARCH64_INSN_IMM_16
-
 void *module_alloc(unsigned long size)
 {
 	void *p;
@@ -110,16 +107,20 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
 	return 0;
 }
 
+enum aarch64_insn_movw_imm_type {
+	AARCH64_INSN_IMM_MOVNZ,
+	AARCH64_INSN_IMM_MOVKZ,
+};
+
 static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
-			   int lsb, enum aarch64_insn_imm_type imm_type)
+			   int lsb, enum aarch64_insn_movw_imm_type imm_type)
 {
-	u64 imm, limit = 0;
+	u64 imm;
 	s64 sval;
 	u32 insn = le32_to_cpu(*(u32 *)place);
 
 	sval = do_reloc(op, place, val);
-	sval >>= lsb;
-	imm = sval & 0xffff;
+	imm = sval >> lsb;
 
 	if (imm_type == AARCH64_INSN_IMM_MOVNZ) {
 		/*
@@ -128,7 +129,7 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
 		 * immediate is less than zero.
 		 */
 		insn &= ~(3 << 29);
-		if ((s64)imm >= 0) {
+		if (sval >= 0) {
 			/* >=0: Set the instruction to MOVZ (opcode 10b). */
 			insn |= 2 << 29;
 		} else {
@@ -140,29 +141,13 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val,
 			 */
 			imm = ~imm;
 		}
-		imm_type = AARCH64_INSN_IMM_MOVK;
 	}
 
 	/* Update the instruction with the new encoding. */
-	insn = aarch64_insn_encode_immediate(imm_type, insn, imm);
+	insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm);
 	*(u32 *)place = cpu_to_le32(insn);
 
-	/* Shift out the immediate field. */
-	sval >>= 16;
-
-	/*
-	 * For unsigned immediates, the overflow check is straightforward.
-	 * For signed immediates, the sign bit is actually the bit past the
-	 * most significant bit of the field.
-	 * The AARCH64_INSN_IMM_16 immediate type is unsigned.
-	 */
-	if (imm_type != AARCH64_INSN_IMM_16) {
-		sval++;
-		limit++;
-	}
-
-	/* Check the upper bits depending on the sign of the immediate. */
-	if ((u64)sval > limit)
+	if (imm > U16_MAX)
 		return -ERANGE;
 
 	return 0;
@@ -267,25 +252,25 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 			overflow_check = false;
 		case R_AARCH64_MOVW_UABS_G0:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
-					      AARCH64_INSN_IMM_16);
+					      AARCH64_INSN_IMM_MOVKZ);
 			break;
 		case R_AARCH64_MOVW_UABS_G1_NC:
 			overflow_check = false;
 		case R_AARCH64_MOVW_UABS_G1:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16,
-					      AARCH64_INSN_IMM_16);
+					      AARCH64_INSN_IMM_MOVKZ);
 			break;
 		case R_AARCH64_MOVW_UABS_G2_NC:
 			overflow_check = false;
 		case R_AARCH64_MOVW_UABS_G2:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32,
-					      AARCH64_INSN_IMM_16);
+					      AARCH64_INSN_IMM_MOVKZ);
 			break;
 		case R_AARCH64_MOVW_UABS_G3:
 			/* We're using the top bits so we can't overflow. */
 			overflow_check = false;
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48,
-					      AARCH64_INSN_IMM_16);
+					      AARCH64_INSN_IMM_MOVKZ);
 			break;
 		case R_AARCH64_MOVW_SABS_G0:
 			ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0,
@@ -302,7 +287,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 		case R_AARCH64_MOVW_PREL_G0_NC:
 			overflow_check = false;
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
-					      AARCH64_INSN_IMM_MOVK);
+					      AARCH64_INSN_IMM_MOVKZ);
 			break;
 		case R_AARCH64_MOVW_PREL_G0:
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0,
@@ -311,7 +296,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 		case R_AARCH64_MOVW_PREL_G1_NC:
 			overflow_check = false;
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
-					      AARCH64_INSN_IMM_MOVK);
+					      AARCH64_INSN_IMM_MOVKZ);
 			break;
 		case R_AARCH64_MOVW_PREL_G1:
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16,
@@ -320,7 +305,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 		case R_AARCH64_MOVW_PREL_G2_NC:
 			overflow_check = false;
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,
-					      AARCH64_INSN_IMM_MOVK);
+					      AARCH64_INSN_IMM_MOVKZ);
 			break;
 		case R_AARCH64_MOVW_PREL_G2:
 			ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32,

From 3fd9316702a82b498fcf7055f9781589ea6c1e1c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 5 Jan 2016 10:18:52 +0100
Subject: [PATCH 197/424] arm64: module: avoid undefined shift behavior in
 reloc_data()

Compilers may engage the improbability drive when encountering shifts
by a distance that is a multiple of the size of the operand type. Since
the required bounds check is very simple here, we can get rid of all the
fuzzy masking, shifting and comparing, and use the documented bounds
directly.

Reported-by: David Binderman <dcb314@hotmail.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit f930896967fa3f9ab16a6f87267b92798308d48f)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/module.c | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 03464ab0fff2..93e970231ca9 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -72,15 +72,18 @@ static u64 do_reloc(enum aarch64_reloc_op reloc_op, void *place, u64 val)
 
 static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
 {
-	u64 imm_mask = (1 << len) - 1;
 	s64 sval = do_reloc(op, place, val);
 
 	switch (len) {
 	case 16:
 		*(s16 *)place = sval;
+		if (sval < S16_MIN || sval > U16_MAX)
+			return -ERANGE;
 		break;
 	case 32:
 		*(s32 *)place = sval;
+		if (sval < S32_MIN || sval > U32_MAX)
+			return -ERANGE;
 		break;
 	case 64:
 		*(s64 *)place = sval;
@@ -89,21 +92,6 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
 		pr_err("Invalid length (%d) for data relocation\n", len);
 		return 0;
 	}
-
-	/*
-	 * Extract the upper value bits (including the sign bit) and
-	 * shift them to bit 0.
-	 */
-	sval = (s64)(sval & ~(imm_mask >> 1)) >> (len - 1);
-
-	/*
-	 * Overflow has occurred if the value is not representable in
-	 * len bits (i.e the bottom len bits are not sign-extended and
-	 * the top bits are not all zero).
-	 */
-	if ((u64)(sval + 1) > 2)
-		return -ERANGE;
-
 	return 0;
 }
 

From 87e4c1f363cfd0ed3a673d47f229725a6b0946d7 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 5 Jan 2016 15:36:59 +0000
Subject: [PATCH 198/424] arm64: mm: move pgd_cache initialisation to
 pgtable_cache_init

Initialising the suppport for EFI runtime services requires us to
allocate a pgd off the back of an early_initcall. On systems where the
PGD_SIZE is smaller than PAGE_SIZE (e.g. 64k pages and 48-bit VA), the
pgd_cache isn't initialised at this stage, and we panic with a NULL
dereference during boot:

  Unable to handle kernel NULL pointer dereference at virtual address 00000000

  __create_mapping.isra.5+0x84/0x350
  create_pgd_mapping+0x20/0x28
  efi_create_mapping+0x5c/0x6c
  arm_enable_runtime_services+0x154/0x1e4
  do_one_initcall+0x8c/0x190
  kernel_init_freeable+0x84/0x1ec
  kernel_init+0x10/0xe0
  ret_from_fork+0x10/0x50

This patch fixes the problem by initialising the pgd_cache earlier, in
the pgtable_cache_init callback, which sounds suspiciously like what it
was intended for.

Reported-by: Dennis Chen <dennis.chen@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 39b5be9b4233a9f212b98242bddf008f379b5122)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/pgtable.h |  3 ++-
 arch/arm64/mm/pgd.c              | 12 ++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index fd3d7c177c5f..76ff5d93c6c3 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -682,7 +682,8 @@ extern int kern_addr_valid(unsigned long addr);
 
 #include <asm-generic/pgtable.h>
 
-#define pgtable_cache_init() do { } while (0)
+void pgd_cache_init(void);
+#define pgtable_cache_init	pgd_cache_init
 
 /*
  * On AArch64, the cache coherency is handled via the set_pte_at() function.
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index cb3ba1b812e7..ae11d4e03d0e 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -46,14 +46,14 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 		kmem_cache_free(pgd_cache, pgd);
 }
 
-static int __init pgd_cache_init(void)
+void __init pgd_cache_init(void)
 {
+	if (PGD_SIZE == PAGE_SIZE)
+		return;
+
 	/*
 	 * Naturally aligned pgds required by the architecture.
 	 */
-	if (PGD_SIZE != PAGE_SIZE)
-		pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_SIZE,
-					      SLAB_PANIC, NULL);
-	return 0;
+	pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_SIZE,
+				      SLAB_PANIC, NULL);
 }
-core_initcall(pgd_cache_init);

From ec567e8f53127f17d757c1ac30cfe09db6d1ea66 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 5 Jan 2016 17:33:34 +0000
Subject: [PATCH 199/424] arm64: entry: remove pointless SPSR mode check

In work_pending, we may skip work if the stacked SPSR value represents
anything other than an EL0 context. We then immediately invoke the
kernel_exit 0 macro as part of ret_to_user, assuming a return to EL0.
This is somewhat confusing.

We use work_pending as part of the ret_to_user/ret_fast_syscall state
machine. We only use ret_fast_syscall in the return from an SVC issued
from EL0. We use ret_to_user for return from EL0 exception handlers and
also for return from ret_from_fork in the case the task was not a kernel
thread (i.e. it is a user task).

Thus in all cases the stacked SPSR value must represent an EL0 context,
and the check is redundant. This patch removes it, along with the now
unused no_work_pending label.

Cc: Chris Metcalf <cmetcalf@ezchip.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit ee03353bc04f8e460cc4e3da80d9721d9ecb89f1)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/entry.S | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index c0db321db7e1..1f7f5a2b61bf 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -676,10 +676,7 @@ ret_fast_syscall_trace:
 work_pending:
 	tbnz	x1, #TIF_NEED_RESCHED, work_resched
 	/* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */
-	ldr	x2, [sp, #S_PSTATE]
 	mov	x0, sp				// 'regs'
-	tst	x2, #PSR_MODE_MASK		// user mode regs?
-	b.ne	no_work_pending			// returning to kernel
 	enable_irq				// enable interrupts for do_notify_resume()
 	bl	do_notify_resume
 	b	ret_to_user
@@ -698,7 +695,6 @@ ret_to_user:
 	and	x2, x1, #_TIF_WORK_MASK
 	cbnz	x2, work_pending
 	enable_step_tsk x1, x2
-no_work_pending:
 	kernel_exit 0
 ENDPROC(ret_to_user)
 

From 91a6481661c6e9d8e503540decf6382a604c7893 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 23 Dec 2015 10:29:28 +0100
Subject: [PATCH 200/424] efi: stub: define DISABLE_BRANCH_PROFILING for all
 architectures

This moves the DISABLE_BRANCH_PROFILING define from the x86 specific
to the general CFLAGS definition for the stub. This fixes build errors
when building for arm64 with CONFIG_PROFILE_ALL_BRANCHES_ENABLED.

Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Reported-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit b523e185bba36164ca48a190f5468c140d815414)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/firmware/efi/libstub/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 3c0467d3688c..c0ddd1b8dca3 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -8,7 +8,7 @@ cflags-$(CONFIG_X86_32)		:= -march=i386
 cflags-$(CONFIG_X86_64)		:= -mcmodel=small
 cflags-$(CONFIG_X86)		+= -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 \
 				   -fPIC -fno-strict-aliasing -mno-red-zone \
-				   -mno-mmx -mno-sse -DDISABLE_BRANCH_PROFILING
+				   -mno-mmx -mno-sse
 
 cflags-$(CONFIG_ARM64)		:= $(subst -pg,,$(KBUILD_CFLAGS))
 cflags-$(CONFIG_ARM)		:= $(subst -pg,,$(KBUILD_CFLAGS)) \
@@ -16,7 +16,7 @@ cflags-$(CONFIG_ARM)		:= $(subst -pg,,$(KBUILD_CFLAGS)) \
 
 cflags-$(CONFIG_EFI_ARMSTUB)	+= -I$(srctree)/scripts/dtc/libfdt
 
-KBUILD_CFLAGS			:= $(cflags-y) \
+KBUILD_CFLAGS			:= $(cflags-y) -DDISABLE_BRANCH_PROFILING \
 				   $(call cc-option,-ffreestanding) \
 				   $(call cc-option,-fno-stack-protector)
 

From 1ebc63c2d5ebd9578b625a049b6f83e9181caf8d Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 6 Jan 2016 11:05:27 +0000
Subject: [PATCH 201/424] arm64: head.S: use memset to clear BSS

Currently we use an open-coded memzero to clear the BSS. As it is a
trivial implementation, it is sub-optimal.

Our optimised memset doesn't use the stack, is position-independent, and
for the memzero case can use of DC ZVA to clear large blocks
efficiently. In __mmap_switched the MMU is on and there are no live
caller-saved registers, so we can safely call an uninstrumented memset.

This patch changes __mmap_switched to use memset when clearing the BSS.
We use the __pi_memset alias so as to avoid any instrumentation in all
kernel configurations.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 2a803c4db615d85126c5c7afd5849a3cfde71422)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/head.S | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 17ce7285bb12..917d98108b3f 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -415,14 +415,13 @@ ENDPROC(__create_page_tables)
  */
 	.set	initial_sp, init_thread_union + THREAD_START_SP
 __mmap_switched:
-	adr_l	x6, __bss_start
-	adr_l	x7, __bss_stop
+	// Clear BSS
+	adr_l	x0, __bss_start
+	mov	x1, xzr
+	adr_l	x2, __bss_stop
+	sub	x2, x2, x0
+	bl	__pi_memset
 
-1:	cmp	x6, x7
-	b.hs	2f
-	str	xzr, [x6], #8			// Clear BSS
-	b	1b
-2:
 	adr_l	sp, initial_sp, x4
 	mov	x4, sp
 	and	x4, x4, #~(THREAD_SIZE - 1)

From a9bd748299179a8d8f8fcd937c74ab321981ab4d Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 6 May 2016 12:03:29 -0400
Subject: [PATCH 202/424] Revert: "powerpc/tm: Check for already reclaimed
 tasks"

This reverts commit e924c60db1b4891e45d15a33474ac5fab62cf029 which was
commit 7f821fc9c77a9b01fe7b1d6e72717b33d8d64142 upstream.

It shouldn't have been applied as the original was already in 4.4.

Reported-by: Jiri Slaby <jslaby@suse.cz>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/kernel/process.c | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ef2ad2d682da..646bf4d222c1 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -551,24 +551,6 @@ static void tm_reclaim_thread(struct thread_struct *thr,
 		msr_diff &= MSR_FP | MSR_VEC | MSR_VSX | MSR_FE0 | MSR_FE1;
 	}
 
-	/*
-	 * Use the current MSR TM suspended bit to track if we have
-	 * checkpointed state outstanding.
-	 * On signal delivery, we'd normally reclaim the checkpointed
-	 * state to obtain stack pointer (see:get_tm_stackpointer()).
-	 * This will then directly return to userspace without going
-	 * through __switch_to(). However, if the stack frame is bad,
-	 * we need to exit this thread which calls __switch_to() which
-	 * will again attempt to reclaim the already saved tm state.
-	 * Hence we need to check that we've not already reclaimed
-	 * this state.
-	 * We do this using the current MSR, rather tracking it in
-	 * some specific thread_struct bit, as it has the additional
-	 * benifit of checking for a potential TM bad thing exception.
-	 */
-	if (!MSR_TM_SUSPENDED(mfmsr()))
-		return;
-
 	/*
 	 * Use the current MSR TM suspended bit to track if we have
 	 * checkpointed state outstanding.

From 2349384312b4192c5200d16f6089921fb4ffb7d8 Mon Sep 17 00:00:00 2001
From: Hariprasad S <hariprasad@chelsio.com>
Date: Tue, 5 Apr 2016 10:23:48 +0530
Subject: [PATCH 203/424] RDMA/iw_cxgb4: Fix bar2 virt addr calculation for T4
 chips

commit 32cc92c7b5e52357a0a24010bae9eb257fa75d3e upstream.

For T4, kernel mode qps don't use the user doorbell. User mode qps during
flow control db ringing are forced into kernel, where user doorbell is
treated as kernel doorbell and proper bar2 offset in bar2 virtual space is
calculated, which incase of T4 is a bogus address, causing a kernel panic
due to illegal write during doorbell ringing.
In case of T4, kernel mode qp bar2 virtual address should be 0. Added T4
check during bar2 virtual address calculation to return 0. Fixed Bar2
range checks based on bar2 physical address.

The below oops will be fixed

  <1>BUG: unable to handle kernel paging request at 000000000002aa08
  <1>IP: [<ffffffffa011d800>] c4iw_uld_control+0x4e0/0x880 [iw_cxgb4]
  <4>PGD 1416a8067 PUD 15bf35067 PMD 0
  <4>Oops: 0002 [#1] SMP
  <4>last sysfs file:
  /sys/devices/pci0000:00/0000:00:03.0/0000:02:00.4/infiniband/cxgb4_0/node_guid
  <4>CPU 5
  <4>Modules linked in: rdma_ucm rdma_cm ib_cm ib_sa ib_mad ib_uverbs
  ip6table_filter ip6_tables ebtable_nat ebtables ipt_MASQUERADE
  iptable_nat nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack
  ipt_REJECT xt_CHECKSUM iptable_mangle iptable_filter ip_tables bridge autofs4
  target_core_iblock target_core_file target_core_pscsi target_core_mod
  configfs bnx2fc cnic uio fcoe libfcoe libfc scsi_transport_fc scsi_tgt 8021q
  garp stp llc cpufreq_ondemand acpi_cpufreq freq_table mperf vhost_net macvtap
  macvlan tun kvm uinput microcode iTCO_wdt iTCO_vendor_support sg joydev
  serio_raw i2c_i801 i2c_core lpc_ich mfd_core e1000e ptp pps_core ioatdma dca
  i7core_edac edac_core shpchp ext3 jbd mbcache sd_mod crc_t10dif pata_acpi
  ata_generic ata_piix iw_cxgb4 iw_cm ib_core ib_addr cxgb4 ipv6 dm_mirror
  dm_region_hash dm_log dm_mod [last unloaded: scsi_wait_scan]
  <4>
  Supermicro X8ST3/X8ST3
  <4>RIP: 0010:[<ffffffffa011d800>]  [<ffffffffa011d800>]
  c4iw_uld_control+0x4e0/0x880 [iw_cxgb4]
  <4>RSP: 0000:ffff880155a03db0  EFLAGS: 00010006
  <4>RAX: 000000000000001d RBX: ffff88013ae5fc00 RCX: ffff880155adb180
  <4>RDX: 000000000002aa00 RSI: 0000000000000001 RDI: ffff88013ae5fdf8
  <4>RBP: ffff880155a03e10 R08: 0000000000000000 R09: 0000000000000001
  <4>R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
  <4>R13: 000000000000001d R14: ffff880156414ab0 R15: ffffe8ffffc05b88
  <4>FS:  0000000000000000(0000) GS:ffff8800282a0000(0000) knlGS:0000000000000000
  <4>CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
  <4>CR2: 000000000002aa08 CR3: 000000015bd0e000 CR4: 00000000000007e0
  <4>DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  <4>DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
  <4>Process cxgb4 (pid: 394, threadinfo ffff880155a00000, task ffff880156414ab0)
  <4>Stack:
  <4> ffff880156415068 ffff880155adb180 ffff880155a03df0 ffffffffa00a344b
  <4><d> 00000000000003e8 ffff880155920000 0000000000000004 ffff880155920000
  <4><d> ffff88015592d438 ffffffffa00a3860 ffff880155a03fd8 ffffe8ffffc05b88
  <4>Call Trace:
  <4> [<ffffffffa00a344b>] ? enable_txq_db+0x2b/0x80 [cxgb4]
  <4> [<ffffffffa00a3860>] ? process_db_full+0x0/0xa0 [cxgb4]
  <4> [<ffffffffa00a38a6>] process_db_full+0x46/0xa0 [cxgb4]
  <4> [<ffffffff8109fda0>] worker_thread+0x170/0x2a0
  <4> [<ffffffff810a6aa0>] ? autoremove_wake_function+0x0/0x40
  <4> [<ffffffff8109fc30>] ? worker_thread+0x0/0x2a0
  <4> [<ffffffff810a660e>] kthread+0x9e/0xc0
  <4> [<ffffffff8100c28a>] child_rip+0xa/0x20
  <4> [<ffffffff810a6570>] ? kthread+0x0/0xc0
  <4> [<ffffffff8100c280>] ? child_rip+0x0/0x20
  <4>Code: e9 ba 00 00 00 66 0f 1f 44 00 00 44 8b 05 29 07 02 00 45 85 c0 0f 85
  71 02 00 00 8b 83 70 01 00 00 45 0f b7 ed c1 e0 0f 44 09 e8 <89> 42 08 0f ae f8
  66 c7 83 82 01 00 00 00 00 44 0f b7 ab dc 01
  <1>RIP  [<ffffffffa011d800>] c4iw_uld_control+0x4e0/0x880 [iw_cxgb4]
  <4> RSP <ffff880155a03db0>
  <4>CR2: 000000000002aa08`

Based on original work by Bharat Potnuri <bharat@chelsio.com>

Fixes: 74217d4c6a4fb0d8 ("iw_cxgb4: support for bar2 qid densities exceeding the page size")

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
Reviewed-by: Leon Romanovsky <leon@leon.nu>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/cxgb4/cq.c | 2 +-
 drivers/infiniband/hw/cxgb4/qp.c | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index de9cd6901752..bc147582bed9 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -162,7 +162,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 	cq->bar2_va = c4iw_bar2_addrs(rdev, cq->cqid, T4_BAR2_QTYPE_INGRESS,
 				      &cq->bar2_qid,
 				      user ? &cq->bar2_pa : NULL);
-	if (user && !cq->bar2_va) {
+	if (user && !cq->bar2_pa) {
 		pr_warn(MOD "%s: cqid %u not in BAR2 range.\n",
 			pci_name(rdev->lldi.pdev), cq->cqid);
 		ret = -EINVAL;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index aa515afee724..53aa7515f542 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -185,6 +185,10 @@ void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid,
 
 	if (pbar2_pa)
 		*pbar2_pa = (rdev->bar2_pa + bar2_qoffset) & PAGE_MASK;
+
+	if (is_t4(rdev->lldi.adapter_type))
+		return NULL;
+
 	return rdev->bar2_kva + bar2_qoffset;
 }
 
@@ -270,7 +274,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 	/*
 	 * User mode must have bar2 access.
 	 */
-	if (user && (!wq->sq.bar2_va || !wq->rq.bar2_va)) {
+	if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) {
 		pr_warn(MOD "%s: sqid %u or rqid %u not in BAR2 range.\n",
 			pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid);
 		goto free_dma;

From c6a012ba56536cac022045cab504d76d342d8c91 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 27 Jan 2016 14:52:02 +0100
Subject: [PATCH 204/424] ipvs: handle ip_vs_fill_iph_skb_off failure

commit 3f20efba41916ee17ce82f0fdd02581ada2872b2 upstream.

ip_vs_fill_iph_skb_off() may not find an IP header, and gcc has
determined that ip_vs_sip_fill_param() then incorrectly accesses
the protocol fields:

net/netfilter/ipvs/ip_vs_pe_sip.c: In function 'ip_vs_sip_fill_param':
net/netfilter/ipvs/ip_vs_pe_sip.c:76:5: error: 'iph.protocol' may be used uninitialized in this function [-Werror=maybe-uninitialized]
  if (iph.protocol != IPPROTO_UDP)
     ^
net/netfilter/ipvs/ip_vs_pe_sip.c:81:10: error: 'iph.len' may be used uninitialized in this function [-Werror=maybe-uninitialized]
  dataoff = iph.len + sizeof(struct udphdr);
          ^

This adds a check for the ip_vs_fill_iph_skb_off() return code
before looking at the ip header data returned from it.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: b0e010c527de ("ipvs: replace ip_vs_fill_ip4hdr with ip_vs_fill_iph_skb_off")
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/ipvs/ip_vs_pe_sip.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 1b8d594e493a..c4e9ca016a88 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -70,10 +70,10 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
 	const char *dptr;
 	int retc;
 
-	ip_vs_fill_iph_skb(p->af, skb, false, &iph);
+	retc = ip_vs_fill_iph_skb(p->af, skb, false, &iph);
 
 	/* Only useful with UDP */
-	if (iph.protocol != IPPROTO_UDP)
+	if (!retc || iph.protocol != IPPROTO_UDP)
 		return -EINVAL;
 	/* todo: IPv6 fragments:
 	 *       I think this only should be done for the first fragment. /HS

From ba5e7e673624b6099640111e7366be850cf5dbe7 Mon Sep 17 00:00:00 2001
From: Marco Angaroni <marcoangaroni@gmail.com>
Date: Sat, 5 Mar 2016 12:10:02 +0100
Subject: [PATCH 205/424] ipvs: correct initial offset of Call-ID header search
 in SIP persistence engine

commit 7617a24f83b5d67f4dab1844956be1cebc44aec8 upstream.

The IPVS SIP persistence engine is not able to parse the SIP header
"Call-ID" when such header is inserted in the first positions of
the SIP message.

When IPVS is configured with "--pe sip" option, like for example:
ipvsadm -A -u 1.2.3.4:5060 -s rr --pe sip -p 120 -o
some particular messages (see below for details) do not create entries
in the connection template table, which can be listed with:
ipvsadm -Lcn --persistent-conn

Problematic SIP messages are SIP responses having "Call-ID" header
positioned just after message first line:
SIP/2.0 200 OK
[Call-ID header here]
[rest of the headers]

When "Call-ID" header is positioned down (after a few other headers)
it is correctly recognized.

This is due to the data offset used in get_callid function call inside
ip_vs_pe_sip.c file: since dptr already points to the start of the
SIP message, the value of dataoff should be initially 0.
Otherwise the header is searched starting from some bytes after the
first character of the SIP message.

Fixes: 758ff0338722 ("IPVS: sip persistence engine")
Signed-off-by: Marco Angaroni <marcoangaroni@gmail.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/ipvs/ip_vs_pe_sip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index c4e9ca016a88..0a6eb5c0d9e9 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -88,7 +88,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
 	dptr = skb->data + dataoff;
 	datalen = skb->len - dataoff;
 
-	if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen))
+	if (get_callid(dptr, 0, datalen, &matchoff, &matchlen))
 		return -EINVAL;
 
 	/* N.B: pe_data is only set on success,

From f94ad404f8934e0cae299dd6520707dc02bbf2fc Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sat, 5 Mar 2016 15:03:22 +0200
Subject: [PATCH 206/424] ipvs: drop first packet to redirect conntrack

commit f719e3754ee2f7275437e61a6afd520181fdd43b upstream.

Jiri Bohac is reporting for a problem where the attempt
to reschedule existing connection to another real server
needs proper redirect for the conntrack used by the IPVS
connection. For example, when IPVS connection is created
to NAT-ed real server we alter the reply direction of
conntrack. If we later decide to select different real
server we can not alter again the conntrack. And if we
expire the old connection, the new connection is left
without conntrack.

So, the only way to redirect both the IPVS connection and
the Netfilter's conntrack is to drop the SYN packet that
hits existing connection, to wait for the next jiffie
to expire the old connection and its conntrack and to rely
on client's retransmission to create new connection as
usually.

Jiri Bohac provided a fix that drops all SYNs on rescheduling,
I extended his patch to do such drops only for connections
that use conntrack. Here is the original report from Jiri Bohac:

Since commit dc7b3eb900aa ("ipvs: Fix reuse connection if real server
is dead"), new connections to dead servers are redistributed
immediately to new servers.  The old connection is expired using
ip_vs_conn_expire_now() which sets the connection timer to expire
immediately.

However, before the timer callback, ip_vs_conn_expire(), is run
to clean the connection's conntrack entry, the new redistributed
connection may already be established and its conntrack removed
instead.

Fix this by dropping the first packet of the new connection
instead, like we do when the destination server is not available.
The timer will have deleted the old conntrack entry long before
the first packet of the new connection is retransmitted.

Fixes: dc7b3eb900aa ("ipvs: Fix reuse connection if real server is dead")
Signed-off-by: Jiri Bohac <jbohac@suse.cz>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/ip_vs.h             | 17 +++++++++++++++
 net/netfilter/ipvs/ip_vs_core.c | 37 +++++++++++++++++++++++++--------
 2 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 0816c872b689..a6cc576fd467 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1588,6 +1588,23 @@ static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
 }
 #endif /* CONFIG_IP_VS_NFCT */
 
+/* Really using conntrack? */
+static inline bool ip_vs_conn_uses_conntrack(struct ip_vs_conn *cp,
+					     struct sk_buff *skb)
+{
+#ifdef CONFIG_IP_VS_NFCT
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+
+	if (!(cp->flags & IP_VS_CONN_F_NFCT))
+		return false;
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct && !nf_ct_is_untracked(ct))
+		return true;
+#endif
+	return false;
+}
+
 static inline int
 ip_vs_dest_conn_overhead(struct ip_vs_dest *dest)
 {
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index f57b4dcdb233..4da560005b0e 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1757,15 +1757,34 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
 	cp = pp->conn_in_get(ipvs, af, skb, &iph);
 
 	conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
-	if (conn_reuse_mode && !iph.fragoffs &&
-	    is_new_conn(skb, &iph) && cp &&
-	    ((unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
-	      unlikely(!atomic_read(&cp->dest->weight))) ||
-	     unlikely(is_new_conn_expected(cp, conn_reuse_mode)))) {
-		if (!atomic_read(&cp->n_control))
-			ip_vs_conn_expire_now(cp);
-		__ip_vs_conn_put(cp);
-		cp = NULL;
+	if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) {
+		bool uses_ct = false, resched = false;
+
+		if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
+		    unlikely(!atomic_read(&cp->dest->weight))) {
+			resched = true;
+			uses_ct = ip_vs_conn_uses_conntrack(cp, skb);
+		} else if (is_new_conn_expected(cp, conn_reuse_mode)) {
+			uses_ct = ip_vs_conn_uses_conntrack(cp, skb);
+			if (!atomic_read(&cp->n_control)) {
+				resched = true;
+			} else {
+				/* Do not reschedule controlling connection
+				 * that uses conntrack while it is still
+				 * referenced by controlled connection(s).
+				 */
+				resched = !uses_ct;
+			}
+		}
+
+		if (resched) {
+			if (!atomic_read(&cp->n_control))
+				ip_vs_conn_expire_now(cp);
+			__ip_vs_conn_put(cp);
+			if (uses_ct)
+				return NF_DROP;
+			cp = NULL;
+		}
 	}
 
 	if (unlikely(!cp)) {

From 8cc8381397b44d8888c02ad1995496d709cd541a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 22 Jan 2016 16:48:46 +0200
Subject: [PATCH 207/424] mfd: intel-lpss: Remove clock tree on error path

commit 84cb36cac581c915ef4e8b70abb73e084325df92 upstream.

We forgot to remove the clock tree if something goes wrong in ->probe(). Add a
call to intel_lpss_unregister_clock() on error path in ->probe() to fix the
potential issue.

Fixes: 4b45efe85263 (mfd: Add support for Intel Sunrisepoint LPSS devices)
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mfd/intel-lpss.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mfd/intel-lpss.c b/drivers/mfd/intel-lpss.c
index 6255513f54c7..68aa31ae553a 100644
--- a/drivers/mfd/intel-lpss.c
+++ b/drivers/mfd/intel-lpss.c
@@ -445,6 +445,7 @@ int intel_lpss_probe(struct device *dev,
 err_remove_ltr:
 	intel_lpss_debugfs_remove(lpss);
 	intel_lpss_ltr_hide(lpss);
+	intel_lpss_unregister_clock(lpss);
 
 err_clk_register:
 	ida_simple_remove(&intel_lpss_devid_ida, lpss->devid);

From be0860081ab64f99bcdaeaaf106778b1a16a3198 Mon Sep 17 00:00:00 2001
From: Dan Streetman <dan.streetman@canonical.com>
Date: Thu, 14 Jan 2016 13:42:32 -0500
Subject: [PATCH 208/424] nbd: ratelimit error msgs after socket close

commit da6ccaaa79caca4f38b540b651238f87215217a2 upstream.

Make the "Attempted send on closed socket" error messages generated in
nbd_request_handler() ratelimited.

When the nbd socket is shutdown, the nbd_request_handler() function emits
an error message for every request remaining in its queue.  If the queue
is large, this will spam a large amount of messages to the log.  There's
no need for a separate error message for each request, so this patch
ratelimits it.

In the specific case this was found, the system was virtual and the error
messages were logged to the serial port, which overwhelmed it.

Fixes: 4d48a542b427 ("nbd: fix I/O hang on disconnected nbds")
Signed-off-by: Dan Streetman <dan.streetman@canonical.com>
Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/nbd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 93b3f99b6865..8f1ce6d57a08 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -618,8 +618,8 @@ static void nbd_request_handler(struct request_queue *q)
 			req, req->cmd_type);
 
 		if (unlikely(!nbd->sock)) {
-			dev_err(disk_to_dev(nbd->disk),
-				"Attempted send on closed socket\n");
+			dev_err_ratelimited(disk_to_dev(nbd->disk),
+					    "Attempted send on closed socket\n");
 			req->errors++;
 			nbd_end_request(nbd, req);
 			spin_lock_irq(q->queue_lock);

From 5dd660ee0ebedf9aea7bbf7360584668af3cecce Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 10 Mar 2016 10:45:32 +0300
Subject: [PATCH 209/424] ata: ahci_xgene: dereferencing uninitialized pointer
 in probe

commit 8134233e8d346aaa1c929dc510e75482ae318bce upstream.

If the call to acpi_get_object_info() fails then "info" hasn't been
initialized.  In that situation, we already know that "version" should
be XGENE_AHCI_V1 so we don't actually need to dereference "info".

Fixes: c9802a4be661 ('ata: ahci_xgene: Add AHCI Support for 2nd HW version of APM X-Gene SoC AHCI SATA Host controller.')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/ahci_xgene.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c
index e2c6d9e0c5ac..e916bff6cee8 100644
--- a/drivers/ata/ahci_xgene.c
+++ b/drivers/ata/ahci_xgene.c
@@ -739,9 +739,9 @@ static int xgene_ahci_probe(struct platform_device *pdev)
 				dev_warn(&pdev->dev, "%s: Error reading device info. Assume version1\n",
 					__func__);
 				version = XGENE_AHCI_V1;
-			}
-			if (info->valid & ACPI_VALID_CID)
+			} else if (info->valid & ACPI_VALID_CID) {
 				version = XGENE_AHCI_V2;
+			}
 		}
 	}
 #endif

From c94897e820a85375c4cb7447ac429f6d1550b331 Mon Sep 17 00:00:00 2001
From: Amitkumar Karwar <akarwar@marvell.com>
Date: Tue, 23 Feb 2016 05:16:17 -0800
Subject: [PATCH 210/424] mwifiex: fix corner case association failure

commit a6139b6271f9f95377fe3486aed6120c9142779b upstream.

This patch corrects the error case in association path by returning
-1. Earlier "media_connected" used to remain on in this error case
causing failure for further association attempts.

Signed-off-by: Amitkumar Karwar <akarwar@marvell.com>
Fixes: b887664d882ee4 ('mwifiex: channel switch handling for station')
Signed-off-by: Cathy Luo <cluo@marvell.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/mwifiex/sta_ioctl.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/mwifiex/sta_ioctl.c b/drivers/net/wireless/mwifiex/sta_ioctl.c
index a6c8a4f7bfe9..d6c4f0f60839 100644
--- a/drivers/net/wireless/mwifiex/sta_ioctl.c
+++ b/drivers/net/wireless/mwifiex/sta_ioctl.c
@@ -313,6 +313,7 @@ int mwifiex_bss_start(struct mwifiex_private *priv, struct cfg80211_bss *bss,
 			mwifiex_dbg(adapter, ERROR,
 				    "Attempt to reconnect on csa closed chan(%d)\n",
 				    bss_desc->channel);
+			ret = -1;
 			goto done;
 		}
 

From d65bf4e2407824aecba454c1b0bc7908a5113047 Mon Sep 17 00:00:00 2001
From: Krzysztof Halasa <khalasa@piap.pl>
Date: Fri, 11 Mar 2016 12:32:14 +0100
Subject: [PATCH 211/424] CNS3xxx: Fix PCI cns3xxx_write_config()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 88e9da9a2a70b6f1a171fbf30a681d6bc4031c4d upstream.

The "where" offset was added twice, fix it.

Signed-off-by: Krzysztof Hałasa <khalasa@piap.pl>
Fixes: 498a92d42596 ("ARM: cns3xxx: pci: avoid potential stack overflow")
Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-cns3xxx/pcie.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/mach-cns3xxx/pcie.c b/arch/arm/mach-cns3xxx/pcie.c
index 47905a50e075..318394ed5c7a 100644
--- a/arch/arm/mach-cns3xxx/pcie.c
+++ b/arch/arm/mach-cns3xxx/pcie.c
@@ -220,13 +220,13 @@ static void cns3xxx_write_config(struct cns3xxx_pcie *cnspci,
 	u32 mask = (0x1ull << (size * 8)) - 1;
 	int shift = (where % 4) * 8;
 
-	v = readl_relaxed(base + (where & 0xffc));
+	v = readl_relaxed(base);
 
 	v &= ~(mask << shift);
 	v |= (val & mask) << shift;
 
-	writel_relaxed(v, base + (where & 0xffc));
-	readl_relaxed(base + (where & 0xffc));
+	writel_relaxed(v, base);
+	readl_relaxed(base);
 }
 
 static void __init cns3xxx_pcie_hw_init(struct cns3xxx_pcie *cnspci)

From f0e92143b8e2e6fa1e854385667427011cfe1059 Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Thu, 21 Jan 2016 21:53:09 +0100
Subject: [PATCH 212/424] clk-divider: make sure read-only dividers do not
 write to their register

commit 50359819794b4a16ae35051cd80f2dab025f6019 upstream.

Commit e6d5e7d90be9 ("clk-divider: Fix READ_ONLY when divider > 1") removed
the special ops struct for read-only clocks and instead opted to handle
them inside the regular ops.

On the rk3368 this results in breakage as aclkm now gets set a value.
While it is the same divider value, the A53 core still doesn't like it,
which can result in the cpu ending up in a hang.
The reason being that "ACLKENMasserts one clock cycle before the rising
edge of ACLKM" and the clock should only be touched when STANDBYWFIL2
is asserted.

To fix this, reintroduce the read-only ops but do include the round_rate
callback. That way no writes that may be unsafe are done to the divider
register in any case.

The Rockchip use of the clk_divider_ops is adapted to this split again,
as is the nxp, lpc18xx-ccu driver that was included since the original
commit. On lpc18xx-ccu the divider seems to always be read-only
so only uses the new ops now.

Fixes: e6d5e7d90be9 ("clk-divider: Fix READ_ONLY when divider > 1")
Reported-by: Zhang Qing <zhangqing@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/clk-divider.c         | 11 ++++++++++-
 drivers/clk/nxp/clk-lpc18xx-ccu.c |  2 +-
 drivers/clk/rockchip/clk.c        |  4 +++-
 include/linux/clk-provider.h      |  1 +
 4 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c
index 3ace102a2a0a..bbf206e3da0d 100644
--- a/drivers/clk/clk-divider.c
+++ b/drivers/clk/clk-divider.c
@@ -422,6 +422,12 @@ const struct clk_ops clk_divider_ops = {
 };
 EXPORT_SYMBOL_GPL(clk_divider_ops);
 
+const struct clk_ops clk_divider_ro_ops = {
+	.recalc_rate = clk_divider_recalc_rate,
+	.round_rate = clk_divider_round_rate,
+};
+EXPORT_SYMBOL_GPL(clk_divider_ro_ops);
+
 static struct clk *_register_divider(struct device *dev, const char *name,
 		const char *parent_name, unsigned long flags,
 		void __iomem *reg, u8 shift, u8 width,
@@ -445,7 +451,10 @@ static struct clk *_register_divider(struct device *dev, const char *name,
 		return ERR_PTR(-ENOMEM);
 
 	init.name = name;
-	init.ops = &clk_divider_ops;
+	if (clk_divider_flags & CLK_DIVIDER_READ_ONLY)
+		init.ops = &clk_divider_ro_ops;
+	else
+		init.ops = &clk_divider_ops;
 	init.flags = flags | CLK_IS_BASIC;
 	init.parent_names = (parent_name ? &parent_name: NULL);
 	init.num_parents = (parent_name ? 1 : 0);
diff --git a/drivers/clk/nxp/clk-lpc18xx-ccu.c b/drivers/clk/nxp/clk-lpc18xx-ccu.c
index 13aabbb3acbe..558da89555af 100644
--- a/drivers/clk/nxp/clk-lpc18xx-ccu.c
+++ b/drivers/clk/nxp/clk-lpc18xx-ccu.c
@@ -222,7 +222,7 @@ static void lpc18xx_ccu_register_branch_gate_div(struct lpc18xx_clk_branch *bran
 		div->width = 1;
 
 		div_hw = &div->hw;
-		div_ops = &clk_divider_ops;
+		div_ops = &clk_divider_ro_ops;
 	}
 
 	branch->gate.reg = branch->offset + reg_base;
diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c
index be6c7fd8315d..e37eee819df9 100644
--- a/drivers/clk/rockchip/clk.c
+++ b/drivers/clk/rockchip/clk.c
@@ -90,7 +90,9 @@ static struct clk *rockchip_clk_register_branch(const char *name,
 		div->width = div_width;
 		div->lock = lock;
 		div->table = div_table;
-		div_ops = &clk_divider_ops;
+		div_ops = (div_flags & CLK_DIVIDER_READ_ONLY)
+						? &clk_divider_ro_ops
+						: &clk_divider_ops;
 	}
 
 	clk = clk_register_composite(NULL, name, parent_names, num_parents,
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index c56988ac63f7..7cd0171963ae 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -384,6 +384,7 @@ struct clk_divider {
 #define CLK_DIVIDER_MAX_AT_ZERO		BIT(6)
 
 extern const struct clk_ops clk_divider_ops;
+extern const struct clk_ops clk_divider_ro_ops;
 
 unsigned long divider_recalc_rate(struct clk_hw *hw, unsigned long parent_rate,
 		unsigned int val, const struct clk_div_table *table,

From e6ce6ce062650eda95bc2b44420a4a7151c42d8a Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Mon, 1 Feb 2016 16:18:40 +0800
Subject: [PATCH 213/424] soc: rockchip: power-domain: fix err handle while
 probing

commit 1d961f11a108af9f7fbe89cc950a8d16ddbdbb28 upstream.

If we fail to probe the driver, we should not directly break
from the for_each_available_child_of_node since it calls of_node_get
while iterating. This patch add of_node_put to fix the unbalanced
call pair.

Fixes: 7c696693a4f5 ("soc: rockchip: power-domain: Add power domain driver")
Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/soc/rockchip/pm_domains.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/soc/rockchip/pm_domains.c b/drivers/soc/rockchip/pm_domains.c
index 534c58937a56..4a65c5bda146 100644
--- a/drivers/soc/rockchip/pm_domains.c
+++ b/drivers/soc/rockchip/pm_domains.c
@@ -419,6 +419,7 @@ static int rockchip_pm_domain_probe(struct platform_device *pdev)
 		if (error) {
 			dev_err(dev, "failed to handle node %s: %d\n",
 				node->name, error);
+			of_node_put(node);
 			goto err_out;
 		}
 	}

From c7ea1f7642d7d77503804086af9b2336621b31e8 Mon Sep 17 00:00:00 2001
From: Shawn Lin <shawn.lin@rock-chips.com>
Date: Tue, 2 Feb 2016 11:37:50 +0800
Subject: [PATCH 214/424] clk: rockchip: free memory in error cases when
 registering clock branches

commit 2467b6745e0ae9c6cdccff24c4cceeb14b1cce3f upstream.

Add free memeory if rockchip_clk_register_branch fails.

Fixes: a245fecbb806 ("clk: rockchip: add basic infrastructure...")
Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/rockchip/clk.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c
index e37eee819df9..9b6c8188efac 100644
--- a/drivers/clk/rockchip/clk.c
+++ b/drivers/clk/rockchip/clk.c
@@ -70,7 +70,7 @@ static struct clk *rockchip_clk_register_branch(const char *name,
 	if (gate_offset >= 0) {
 		gate = kzalloc(sizeof(*gate), GFP_KERNEL);
 		if (!gate)
-			return ERR_PTR(-ENOMEM);
+			goto err_gate;
 
 		gate->flags = gate_flags;
 		gate->reg = base + gate_offset;
@@ -82,7 +82,7 @@ static struct clk *rockchip_clk_register_branch(const char *name,
 	if (div_width > 0) {
 		div = kzalloc(sizeof(*div), GFP_KERNEL);
 		if (!div)
-			return ERR_PTR(-ENOMEM);
+			goto err_div;
 
 		div->flags = div_flags;
 		div->reg = base + muxdiv_offset;
@@ -102,6 +102,11 @@ static struct clk *rockchip_clk_register_branch(const char *name,
 				     flags);
 
 	return clk;
+err_div:
+	kfree(gate);
+err_gate:
+	kfree(mux);
+	return ERR_PTR(-ENOMEM);
 }
 
 static struct clk *rockchip_clk_register_frac_branch(const char *name,

From cf5281ef53856c5c9e4e9b0728805416502e466a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20F=C3=A4rber?= <afaerber@suse.de>
Date: Sun, 7 Feb 2016 22:13:03 +0100
Subject: [PATCH 215/424] clk: meson: Fix meson_clk_register_clks() signature
 type mismatch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit bb473593c8099302bfd7befc23de67df907e3a99 upstream.

As preparation for arm64 based mesongxbb, which pulls in this code once
enabling ARCH_MESON, fix a size_t vs. unsigned int type mismatch.
The loop uses a local unsigned int variable, so adopt that type,
matching the header.

Fixes: 7a29a869434e ("clk: meson: Add support for Meson clock controller")
Signed-off-by: Andreas Färber <afaerber@suse.de>
Acked-by: Carlo Caione <carlo@endlessm.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/meson/clkc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/meson/clkc.c b/drivers/clk/meson/clkc.c
index c83ae1367abc..d920d410b51d 100644
--- a/drivers/clk/meson/clkc.c
+++ b/drivers/clk/meson/clkc.c
@@ -198,7 +198,7 @@ meson_clk_register_fixed_rate(const struct clk_conf *clk_conf,
 }
 
 void __init meson_clk_register_clks(const struct clk_conf *clk_confs,
-				    size_t nr_confs,
+				    unsigned int nr_confs,
 				    void __iomem *clk_base)
 {
 	unsigned int i;

From faaf496612c39c8ca6d46fec5a6af78b85689f65 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Mon, 22 Feb 2016 11:43:39 +0000
Subject: [PATCH 216/424] clk: qcom: msm8960: fix ce3_core clk enable register

commit 732d6913691848db9fabaa6a25b4d6fad10ddccf upstream.

This patch corrects the enable register offset which is actually 0x36cc
instead of 0x36c4

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Fixes: 5f775498bdc4 ("clk: qcom: Fully support apq8064 global clock control")
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/qcom/gcc-msm8960.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/qcom/gcc-msm8960.c b/drivers/clk/qcom/gcc-msm8960.c
index 66c18bc97857..2c83c03309cb 100644
--- a/drivers/clk/qcom/gcc-msm8960.c
+++ b/drivers/clk/qcom/gcc-msm8960.c
@@ -2769,7 +2769,7 @@ static struct clk_branch ce3_core_clk = {
 	.halt_reg = 0x2fdc,
 	.halt_bit = 5,
 	.clkr = {
-		.enable_reg = 0x36c4,
+		.enable_reg = 0x36cc,
 		.enable_mask = BIT(4),
 		.hw.init = &(struct clk_init_data){
 			.name = "ce3_core_clk",

From 5a9a5671011a3732daf1d300d5205aaae82ee558 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 24 Feb 2016 09:39:11 +0100
Subject: [PATCH 217/424] clk: versatile: sp810: support reentrance

commit ec7957a6aa0aaf981fb8356dc47a2cdd01cde03c upstream.

Despite care take to allocate clocks state containers the
SP810 driver actually just supports creating one instance:
all clocks registered for every instance will end up with the
exact same name and __clk_init() will fail.

Rename the timclken<0> .. timclken<n> to sp810_<instance>_<n>
so every clock on every instance gets a unique name.

This is necessary for the RealView PBA8 which has two SP810
blocks: the second block will not register its clocks unless
every clock on every instance is unique and results in boot
logs like this:

------------[ cut here ]------------
WARNING: CPU: 0 PID: 0 at ../drivers/clk/versatile/clk-sp810.c:137
  clk_sp810_of_setup+0x110/0x154()
Modules linked in:
CPU: 0 PID: 0 Comm: swapper/0 Not tainted
4.5.0-rc2-00030-g352718fc39f6-dirty #225
Hardware name: ARM RealView Machine (Device Tree Support)
[<c00167f8>] (unwind_backtrace) from [<c0013204>]
             (show_stack+0x10/0x14)
[<c0013204>] (show_stack) from [<c01a049c>]
             (dump_stack+0x84/0x9c)
[<c01a049c>] (dump_stack) from [<c0024990>]
             (warn_slowpath_common+0x74/0xb0)
[<c0024990>] (warn_slowpath_common) from [<c0024a68>]
             (warn_slowpath_null+0x1c/0x24)
[<c0024a68>] (warn_slowpath_null) from [<c051eb44>]
             (clk_sp810_of_setup+0x110/0x154)
[<c051eb44>] (clk_sp810_of_setup) from [<c051e3a4>]
             (of_clk_init+0x12c/0x1c8)
[<c051e3a4>] (of_clk_init) from [<c0504714>]
             (time_init+0x20/0x2c)
[<c0504714>] (time_init) from [<c0501b18>]
             (start_kernel+0x244/0x3c4)
[<c0501b18>] (start_kernel) from [<7000807c>] (0x7000807c)
---[ end trace cb88537fdc8fa200 ]---

Cc: Michael Turquette <mturquette@baylibre.com>
Cc: Pawel Moll <pawel.moll@arm.com>
Fixes: 6e973d2c4385 "clk: vexpress: Add separate SP810 driver"
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/versatile/clk-sp810.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/clk/versatile/clk-sp810.c b/drivers/clk/versatile/clk-sp810.c
index a1cdef6b0f90..897c36c1754a 100644
--- a/drivers/clk/versatile/clk-sp810.c
+++ b/drivers/clk/versatile/clk-sp810.c
@@ -92,6 +92,7 @@ static void __init clk_sp810_of_setup(struct device_node *node)
 	int num = ARRAY_SIZE(parent_names);
 	char name[12];
 	struct clk_init_data init;
+	static int instance;
 	int i;
 	bool deprecated;
 
@@ -118,7 +119,7 @@ static void __init clk_sp810_of_setup(struct device_node *node)
 	deprecated = !of_find_property(node, "assigned-clock-parents", NULL);
 
 	for (i = 0; i < ARRAY_SIZE(sp810->timerclken); i++) {
-		snprintf(name, ARRAY_SIZE(name), "timerclken%d", i);
+		snprintf(name, sizeof(name), "sp810_%d_%d", instance, i);
 
 		sp810->timerclken[i].sp810 = sp810;
 		sp810->timerclken[i].channel = i;
@@ -139,5 +140,6 @@ static void __init clk_sp810_of_setup(struct device_node *node)
 	}
 
 	of_clk_add_provider(node, clk_sp810_timerclken_of_get, sp810);
+	instance++;
 }
 CLK_OF_DECLARE(sp810, "arm,sp810", clk_sp810_of_setup);

From 0d50da4683464e150961142341c69ea5a578974a Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Tue, 1 Mar 2016 17:26:48 -0800
Subject: [PATCH 218/424] clk: qcom: msm8960: Fix ce3_src register offset

commit 0f75e1a370fd843c9e508fc1ccf0662833034827 upstream.

The offset seems to have been copied from the sata clk. Fix it so
that enabling the crypto engine source clk works.

Tested-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Tested-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Fixes: 5f775498bdc4 ("clk: qcom: Fully support apq8064 global clock control")
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/qcom/gcc-msm8960.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/qcom/gcc-msm8960.c b/drivers/clk/qcom/gcc-msm8960.c
index 2c83c03309cb..bdc4b2d07a23 100644
--- a/drivers/clk/qcom/gcc-msm8960.c
+++ b/drivers/clk/qcom/gcc-msm8960.c
@@ -2753,7 +2753,7 @@ static struct clk_rcg ce3_src = {
 	},
 	.freq_tbl = clk_tbl_ce3,
 	.clkr = {
-		.enable_reg = 0x2c08,
+		.enable_reg = 0x36c0,
 		.enable_mask = BIT(7),
 		.hw.init = &(struct clk_init_data){
 			.name = "ce3_src",

From 8e9a156140f9fb568ffcbdaaf390862bbfb09d83 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 14 Mar 2016 15:29:44 +0100
Subject: [PATCH 219/424] lpfc: fix misleading indentation

commit aeb6641f8ebdd61939f462a8255b316f9bfab707 upstream.

gcc-6 complains about the indentation of the lpfc_destroy_vport_work_array()
call in lpfc_online(), which clearly doesn't look right:

drivers/scsi/lpfc/lpfc_init.c: In function 'lpfc_online':
drivers/scsi/lpfc/lpfc_init.c:2880:3: warning: statement is indented as if it were guarded by... [-Wmisleading-indentation]
   lpfc_destroy_vport_work_array(phba, vports);
   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/scsi/lpfc/lpfc_init.c:2863:2: note: ...this 'if' clause, but it is not
  if (vports != NULL)
  ^~

Looking at the patch that introduced this code, it's clear that the
behavior is correct and the indentation is wrong.

This fixes the indentation and adds curly braces around the previous
if() block for clarity, as that is most likely what caused the code
to be misindented in the first place.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 549e55cd2a1b ("[SCSI] lpfc 8.2.2 : Fix locking around HBA's port_list")
Reviewed-by: Sebastian Herbszt <herbszt@gmx.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Ewan D. Milne <emilne@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/lpfc/lpfc_init.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index db9446c612da..b0d92b84bcdc 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -2855,7 +2855,7 @@ lpfc_online(struct lpfc_hba *phba)
 	}
 
 	vports = lpfc_create_vport_work_array(phba);
-	if (vports != NULL)
+	if (vports != NULL) {
 		for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) {
 			struct Scsi_Host *shost;
 			shost = lpfc_shost_from_vport(vports[i]);
@@ -2872,7 +2872,8 @@ lpfc_online(struct lpfc_hba *phba)
 			}
 			spin_unlock_irq(shost->host_lock);
 		}
-		lpfc_destroy_vport_work_array(phba, vports);
+	}
+	lpfc_destroy_vport_work_array(phba, vports);
 
 	lpfc_unblock_mgmt_io(phba);
 	return 0;

From ee22885fc1fb49f98bb59e0825cc4ef3b1004f00 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <linux@rempel-privat.de>
Date: Tue, 12 Apr 2016 19:37:44 +0200
Subject: [PATCH 220/424] ath9k: ar5008_hw_cmn_spur_mitigate: add missing
 mask_m & mask_p initialisation

commit de478a61389cacafe94dc8b035081b681b878f9d upstream.

by moving common code to ar5008_hw_cmn_spur_mitigate i forgot to move
mask_m & mask_p initialisation. This coused a performance regression
on ar9281.

Fixes: f911085ffa88 ("ath9k: split ar5008_hw_spur_mitigate and reuse common code in ar9002_hw_spur_mitigate.")
Reported-by: Gustav Frederiksen <lkml2017@openmailbox.org>
Tested-by: Gustav Frederiksen <lkml2017@openmailbox.org>
Signed-off-by: Oleksij Rempel <linux@rempel-privat.de>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/ath9k/ar5008_phy.c | 8 +++-----
 drivers/net/wireless/ath/ath9k/ar9002_phy.c | 5 -----
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/ar5008_phy.c b/drivers/net/wireless/ath/ath9k/ar5008_phy.c
index 8f8793004b9f..1b271b99c49e 100644
--- a/drivers/net/wireless/ath/ath9k/ar5008_phy.c
+++ b/drivers/net/wireless/ath/ath9k/ar5008_phy.c
@@ -274,6 +274,9 @@ void ar5008_hw_cmn_spur_mitigate(struct ath_hw *ah,
 	};
 	static const int inc[4] = { 0, 100, 0, 0 };
 
+	memset(&mask_m, 0, sizeof(int8_t) * 123);
+	memset(&mask_p, 0, sizeof(int8_t) * 123);
+
 	cur_bin = -6000;
 	upper = bin + 100;
 	lower = bin - 100;
@@ -424,14 +427,9 @@ static void ar5008_hw_spur_mitigate(struct ath_hw *ah,
 	int tmp, new;
 	int i;
 
-	int8_t mask_m[123];
-	int8_t mask_p[123];
 	int cur_bb_spur;
 	bool is2GHz = IS_CHAN_2GHZ(chan);
 
-	memset(&mask_m, 0, sizeof(int8_t) * 123);
-	memset(&mask_p, 0, sizeof(int8_t) * 123);
-
 	for (i = 0; i < AR_EEPROM_MODAL_SPURS; i++) {
 		cur_bb_spur = ah->eep_ops->get_spur_channel(ah, i, is2GHz);
 		if (AR_NO_SPUR == cur_bb_spur)
diff --git a/drivers/net/wireless/ath/ath9k/ar9002_phy.c b/drivers/net/wireless/ath/ath9k/ar9002_phy.c
index db6624527d99..53d7445a5d12 100644
--- a/drivers/net/wireless/ath/ath9k/ar9002_phy.c
+++ b/drivers/net/wireless/ath/ath9k/ar9002_phy.c
@@ -178,14 +178,9 @@ static void ar9002_hw_spur_mitigate(struct ath_hw *ah,
 	int i;
 	struct chan_centers centers;
 
-	int8_t mask_m[123];
-	int8_t mask_p[123];
 	int cur_bb_spur;
 	bool is2GHz = IS_CHAN_2GHZ(chan);
 
-	memset(&mask_m, 0, sizeof(int8_t) * 123);
-	memset(&mask_p, 0, sizeof(int8_t) * 123);
-
 	ath9k_hw_get_channel_centers(ah, chan, &centers);
 	freq = centers.synth_center;
 

From fe9295e05bf878652e8d0e5caef53516d8de1789 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 26 Apr 2016 13:47:08 +0200
Subject: [PATCH 221/424] mac80211: fix statistics leak if dev_alloc_name()
 fails

commit e6436be21e77e3659b4ff7e357ab5a8342d132d2 upstream.

In the case that dev_alloc_name() fails, e.g. because the name was
given by the user and already exists, we need to clean up properly
and free the per-CPU statistics. Fix that.

Fixes: 5a490510ba5f ("mac80211: use per-CPU TX/RX statistics")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/mac80211/iface.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 7a2b7915093b..bcb0a1b64556 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1750,7 +1750,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 
 		ret = dev_alloc_name(ndev, ndev->name);
 		if (ret < 0) {
-			free_netdev(ndev);
+			ieee80211_if_free(ndev);
 			return ret;
 		}
 
@@ -1836,7 +1836,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 
 		ret = register_netdevice(ndev);
 		if (ret) {
-			free_netdev(ndev);
+			ieee80211_if_free(ndev);
 			return ret;
 		}
 	}

From 8d2923930be15a5b295ace2029c76653dc4def13 Mon Sep 17 00:00:00 2001
From: Chunyu Hu <chuhu@redhat.com>
Date: Tue, 3 May 2016 19:34:34 +0800
Subject: [PATCH 222/424] tracing: Don't display trigger file for events that
 can't be enabled

commit 854145e0a8e9a05f7366d240e2f99d9c1ca6d6dd upstream.

Currently register functions for events will be called
through the 'reg' field of event class directly without
any check when seting up triggers.

Triggers for events that don't support register through
debug fs (events under events/ftrace are for trace-cmd to
read event format, and most of them don't have a register
function except events/ftrace/functionx) can't be enabled
at all, and an oops will be hit when setting up trigger
for those events, so just not creating them is an easy way
to avoid the oops.

Link: http://lkml.kernel.org/r/1462275274-3911-1-git-send-email-chuhu@redhat.com

Fixes: 85f2b08268c01 ("tracing: Add basic event trigger framework")
Signed-off-by: Chunyu Hu <chuhu@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/trace_events.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index d202d991edae..996f0fd34312 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2107,8 +2107,13 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file)
 	trace_create_file("filter", 0644, file->dir, file,
 			  &ftrace_event_filter_fops);
 
-	trace_create_file("trigger", 0644, file->dir, file,
-			  &event_trigger_fops);
+	/*
+	 * Only event directories that can be enabled should have
+	 * triggers.
+	 */
+	if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
+		trace_create_file("trigger", 0644, file->dir, file,
+				  &event_trigger_fops);
 
 	trace_create_file("format", 0444, file->dir, call,
 			  &ftrace_event_format_fops);

From f3b51a03bea6dc4cda740481d48b2ff49abdced5 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Mon, 25 Apr 2016 16:52:38 -0700
Subject: [PATCH 223/424] MD: make bio mergeable

commit 9c573de3283af007ea11c17bde1e4568d9417328 upstream.

blk_queue_split marks bio unmergeable, which makes sense for normal bio.
But if dispatching the bio to underlayer disk, the blk_queue_split
checks are invalid, hence it's possible the bio becomes mergeable.

In the reported bug, this bug causes trim against raid0 performance slash
https://bugzilla.kernel.org/show_bug.cgi?id=117051

Reported-and-tested-by: Park Ju Hyung <qkrwngud825@gmail.com>
Fixes: 6ac45aeb6bca(block: avoid to merge splitted bio)
Cc: Ming Lei <ming.lei@canonical.com>
Cc: Neil Brown <neilb@suse.de>
Reviewed-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/md.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index b1e1f6b95782..c57fdf847b47 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -293,6 +293,8 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 	 * go away inside make_request
 	 */
 	sectors = bio_sectors(bio);
+	/* bio could be mergeable after passing to underlayer */
+	bio->bi_rw &= ~REQ_NOMERGE;
 	mddev->pers->make_request(mddev, bio);
 
 	cpu = part_stat_lock();

From fe21a25e8c0cc97a080cc73c135e92ddce61a660 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 2 May 2016 12:46:42 -0700
Subject: [PATCH 224/424] Minimal fix-up of bad hashing behavior of hash_64()

commit 689de1d6ca95b3b5bd8ee446863bf81a4883ea25 upstream.

This is a fairly minimal fixup to the horribly bad behavior of hash_64()
with certain input patterns.

In particular, because the multiplicative value used for the 64-bit hash
was intentionally bit-sparse (so that the multiply could be done with
shifts and adds on architectures without hardware multipliers), some
bits did not get spread out very much.  In particular, certain fairly
common bit ranges in the input (roughly bits 12-20: commonly with the
most information in them when you hash things like byte offsets in files
or memory that have block factors that mean that the low bits are often
zero) would not necessarily show up much in the result.

There's a bigger patch-series brewing to fix up things more completely,
but this is the fairly minimal fix for the 64-bit hashing problem.  It
simply picks a much better constant multiplier, spreading the bits out a
lot better.

NOTE! For 32-bit architectures, the bad old hash_64() remains the same
for now, since 64-bit multiplies are expensive.  The bigger hashing
cleanup will replace the 32-bit case with something better.

The new constants were picked by George Spelvin who wrote that bigger
cleanup series.  I just picked out the constants and part of the comment
from that series.

Cc: George Spelvin <linux@horizon.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hash.h | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/include/linux/hash.h b/include/linux/hash.h
index 1afde47e1528..79c52fa81cac 100644
--- a/include/linux/hash.h
+++ b/include/linux/hash.h
@@ -32,12 +32,28 @@
 #error Wordsize not 32 or 64
 #endif
 
+/*
+ * The above primes are actively bad for hashing, since they are
+ * too sparse. The 32-bit one is mostly ok, the 64-bit one causes
+ * real problems. Besides, the "prime" part is pointless for the
+ * multiplicative hash.
+ *
+ * Although a random odd number will do, it turns out that the golden
+ * ratio phi = (sqrt(5)-1)/2, or its negative, has particularly nice
+ * properties.
+ *
+ * These are the negative, (1 - phi) = (phi^2) = (3 - sqrt(5))/2.
+ * (See Knuth vol 3, section 6.4, exercise 9.)
+ */
+#define GOLDEN_RATIO_32 0x61C88647
+#define GOLDEN_RATIO_64 0x61C8864680B583EBull
+
 static __always_inline u64 hash_64(u64 val, unsigned int bits)
 {
 	u64 hash = val;
 
-#if defined(CONFIG_ARCH_HAS_FAST_MULTIPLIER) && BITS_PER_LONG == 64
-	hash = hash * GOLDEN_RATIO_PRIME_64;
+#if BITS_PER_LONG == 64
+	hash = hash * GOLDEN_RATIO_64;
 #else
 	/*  Sigh, gcc can't optimise this alone like it does for 32 bits. */
 	u64 n = hash;

From d27e2ddc40b632db8f84c4d3236e8191d0eabc69 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Thu, 5 May 2016 16:22:15 -0700
Subject: [PATCH 225/424] mm, cma: prevent nr_isolated_* counters from going
 negative

commit 14af4a5e9b26ad251f81c174e8a43f3e179434a5 upstream.

/proc/sys/vm/stat_refresh warns nr_isolated_anon and nr_isolated_file go
increasingly negative under compaction: which would add delay when
should be none, or no delay when should delay.  The bug in compaction
was due to a recent mmotm patch, but much older instance of the bug was
also noticed in isolate_migratepages_range() which is used for CMA and
gigantic hugepage allocations.

The bug is caused by putback_movable_pages() in an error path
decrementing the isolated counters without them being previously
incremented by acct_isolated().  Fix isolate_migratepages_range() by
removing the error-path putback, thus reaching acct_isolated() with
migratepages still isolated, and leaving putback to caller like most
other places do.

Fixes: edc2ca612496 ("mm, compaction: move pageblock checks up from isolate_migratepages_range()")
[vbabka@suse.cz: expanded the changelog]
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/compaction.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/mm/compaction.c b/mm/compaction.c
index de3e1e71cd9f..7881e072dc33 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -880,16 +880,8 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
 		pfn = isolate_migratepages_block(cc, pfn, block_end_pfn,
 							ISOLATE_UNEVICTABLE);
 
-		/*
-		 * In case of fatal failure, release everything that might
-		 * have been isolated in the previous iteration, and signal
-		 * the failure back to caller.
-		 */
-		if (!pfn) {
-			putback_movable_pages(&cc->migratepages);
-			cc->nr_migratepages = 0;
+		if (!pfn)
 			break;
-		}
 
 		if (cc->nr_migratepages == COMPACT_CLUSTER_MAX)
 			break;

From 851375cc493de34a1443d85c46b026d8aeda715a Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Thu, 5 May 2016 16:22:23 -0700
Subject: [PATCH 226/424] mm/zswap: provide unique zpool name

commit 32a4e169039927bfb6ee9f0ccbbe3a8aaf13a4bc upstream.

Instead of using "zswap" as the name for all zpools created, add an
atomic counter and use "zswap%x" with the counter number for each zpool
created, to provide a unique name for each new zpool.

As zsmalloc, one of the zpool implementations, requires/expects a unique
name for each pool created, zswap should provide a unique name.  The
zsmalloc pool creation does not fail if a new pool with a conflicting
name is created, unless CONFIG_ZSMALLOC_STAT is enabled; in that case,
zsmalloc pool creation fails with -ENOMEM.  Then zswap will be unable to
change its compressor parameter if its zpool is zsmalloc; it also will
be unable to change its zpool parameter back to zsmalloc, if it has any
existing old zpool using zsmalloc with page(s) in it.  Attempts to
change the parameters will result in failure to create the zpool.  This
changes zswap to provide a unique name for each zpool creation.

Fixes: f1c54846ee45 ("zswap: dynamic pool creation")
Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Reported-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Dan Streetman <dan.streetman@canonical.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/zswap.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index bf14508afd64..340261946fda 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -170,6 +170,8 @@ static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
 static LIST_HEAD(zswap_pools);
 /* protects zswap_pools list modification */
 static DEFINE_SPINLOCK(zswap_pools_lock);
+/* pool counter to provide unique names to zpool */
+static atomic_t zswap_pools_count = ATOMIC_INIT(0);
 
 /* used by param callback function */
 static bool zswap_init_started;
@@ -565,6 +567,7 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
 static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 {
 	struct zswap_pool *pool;
+	char name[38]; /* 'zswap' + 32 char (max) num + \0 */
 	gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
 
 	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
@@ -573,7 +576,10 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 		return NULL;
 	}
 
-	pool->zpool = zpool_create_pool(type, "zswap", gfp, &zswap_zpool_ops);
+	/* unique name for each pool specifically required by zsmalloc */
+	snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count));
+
+	pool->zpool = zpool_create_pool(type, name, gfp, &zswap_zpool_ops);
 	if (!pool->zpool) {
 		pr_err("%s zpool not available\n", type);
 		goto error;

From 63e9a60f4357e700a181980d424ffeaff32d5340 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Fri, 22 Apr 2016 09:26:52 +0200
Subject: [PATCH 227/424] ARM: EXYNOS: Properly skip unitialized parent clock
 in power domain on

commit a0a966b83873f33778710a4fc59240244b0734a5 upstream.

We want to skip reparenting a clock on turning on power domain, if we
do not have the parent yet. The parent is obtained when turning the
domain off. However due to a typo, the loop is continued on IS_ERR() of
clock being reparented, not on the IS_ERR() of the parent.

Theoretically this could lead to OOPS on first turn on of a power
domain, if there was no turn off before. Practically that should never
happen because all power domains are turned on by default (reset value,
bootloader does not turn off them usually) so the first action will be
always turn off.

Fixes: 29e5eea06bc1 ("ARM: EXYNOS: Get current parent clock for power domain on/off")
Reported-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-exynos/pm_domains.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/mach-exynos/pm_domains.c b/arch/arm/mach-exynos/pm_domains.c
index 7c21760f590f..875a2bab64f6 100644
--- a/arch/arm/mach-exynos/pm_domains.c
+++ b/arch/arm/mach-exynos/pm_domains.c
@@ -92,7 +92,7 @@ static int exynos_pd_power(struct generic_pm_domain *domain, bool power_on)
 			if (IS_ERR(pd->clk[i]))
 				break;
 
-			if (IS_ERR(pd->clk[i]))
+			if (IS_ERR(pd->pclk[i]))
 				continue; /* Skip on first power up */
 			if (clk_set_parent(pd->clk[i], pd->pclk[i]))
 				pr_err("%s: error setting parent to clock%d\n",

From dd86efc570e528de7931f9eb039fa0c91bb3c1bd Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Wed, 20 Apr 2016 13:34:31 +0000
Subject: [PATCH 228/424] ARM: SoCFPGA: Fix secondary CPU startup in thumb2
 kernel

commit 5616f36713ea77f57ae908bf2fef641364403c9f upstream.

The secondary CPU starts up in ARM mode. When the kernel is compiled in
thumb2 mode we have to explicitly compile the secondary startup
trampoline in ARM mode, otherwise the CPU will go to Nirvana.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Reported-by: Steffen Trumtrar <s.trumtrar@pengutronix.de>
Suggested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Dinh Nguyen <dinguyen@opensource.altera.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-socfpga/headsmp.S | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-socfpga/headsmp.S b/arch/arm/mach-socfpga/headsmp.S
index 5d94b7a2fb10..c160fa3007e9 100644
--- a/arch/arm/mach-socfpga/headsmp.S
+++ b/arch/arm/mach-socfpga/headsmp.S
@@ -13,6 +13,7 @@
 #include <asm/assembler.h>
 
 	.arch	armv7-a
+	.arm
 
 ENTRY(secondary_trampoline)
 	/* CPU1 will always fetch from 0x0 when it is brought out of reset.

From 0f7ea0699ac02fb7c5d67e8eac8f8581912f4988 Mon Sep 17 00:00:00 2001
From: Ross Lagerwall <ross.lagerwall@citrix.com>
Date: Thu, 17 Mar 2016 16:51:59 +0000
Subject: [PATCH 229/424] xen: Fix page <-> pfn conversion on 32 bit systems

commit 60901df3aed230d4565dca003f11b6a95fbf30d9 upstream.

Commit 1084b1988d22dc165c9dbbc2b0e057f9248ac4db (xen: Add Xen specific
page definition) caused a regression in 4.4.

The xen functions to convert between pages and pfns fail due to an
overflow on systems where a physical address may not fit in an
unsigned long (e.g. x86 32 bit PAE systems). Rework the conversion to
avoid overflow. This should also result in simpler object code.

This bug manifested itself as disk corruption with Linux 4.4 when
using blkfront in a Xen HVM x86 32 bit guest with more than 4 GiB of
memory.

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/xen/page.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/xen/page.h b/include/xen/page.h
index 96294ac93755..9dc46cb8a0fd 100644
--- a/include/xen/page.h
+++ b/include/xen/page.h
@@ -15,9 +15,9 @@
  */
 
 #define xen_pfn_to_page(xen_pfn)	\
-	((pfn_to_page(((unsigned long)(xen_pfn) << XEN_PAGE_SHIFT) >> PAGE_SHIFT)))
+	(pfn_to_page((unsigned long)(xen_pfn) >> (PAGE_SHIFT - XEN_PAGE_SHIFT)))
 #define page_to_xen_pfn(page)		\
-	(((page_to_pfn(page)) << PAGE_SHIFT) >> XEN_PAGE_SHIFT)
+	((page_to_pfn(page)) << (PAGE_SHIFT - XEN_PAGE_SHIFT))
 
 #define XEN_PFN_PER_PAGE	(PAGE_SIZE / XEN_PAGE_SIZE)
 

From 58d378b24b7f615b779ec20e7b7eebed8a3d8011 Mon Sep 17 00:00:00 2001
From: Ross Lagerwall <ross.lagerwall@citrix.com>
Date: Thu, 17 Mar 2016 16:52:00 +0000
Subject: [PATCH 230/424] xen/balloon: Fix crash when ballooning on x86 32 bit
 PAE

commit dfd74a1edfaba5864276a2859190a8d242d18952 upstream.

Commit 55b3da98a40dbb3776f7454daf0d95dde25c33d2 (xen/balloon: find
non-conflicting regions to place hotplugged memory) caused a
regression in 4.4.

When ballooning on an x86 32 bit PAE system with close to 64 GiB of
memory, the address returned by allocate_resource may be above 64 GiB.
When using CONFIG_SPARSEMEM, this setup is limited to using physical
addresses < 64 GiB.  When adding memory at this address, it runs off
the end of the mem_section array and causes a crash.  Instead, fail
the ballooning request.

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/xen/balloon.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 12eab503efd1..364bc44610c1 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -152,6 +152,8 @@ static DECLARE_WAIT_QUEUE_HEAD(balloon_wq);
 static void balloon_process(struct work_struct *work);
 static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
 
+static void release_memory_resource(struct resource *resource);
+
 /* When ballooning out (allocating memory to return to Xen) we don't really
    want the kernel to try too hard since that can trigger the oom killer. */
 #define GFP_BALLOON \
@@ -268,6 +270,20 @@ static struct resource *additional_memory_resource(phys_addr_t size)
 		return NULL;
 	}
 
+#ifdef CONFIG_SPARSEMEM
+	{
+		unsigned long limit = 1UL << (MAX_PHYSMEM_BITS - PAGE_SHIFT);
+		unsigned long pfn = res->start >> PAGE_SHIFT;
+
+		if (pfn > limit) {
+			pr_err("New System RAM resource outside addressable RAM (%lu > %lu)\n",
+			       pfn, limit);
+			release_memory_resource(res);
+			return NULL;
+		}
+	}
+#endif
+
 	return res;
 }
 

From 11dc8042c691244a085c16396fce4a3a3e9bc186 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Wed, 4 May 2016 07:02:36 -0600
Subject: [PATCH 231/424] xen/evtchn: fix ring resize when binding new events

commit 27e0e6385377c4dc68a4ddaf1a35a2dfa951f3c5 upstream.

The copying of ring data was wrong for two cases: For a full ring
nothing got copied at all (as in that case the canonicalized producer
and consumer indexes are identical). And in case one or both of the
canonicalized (after the resize) indexes would point into the second
half of the buffer, the copied data ended up in the wrong (free) part
of the new buffer. In both cases uninitialized data would get passed
back to the caller.

Fix this by simply copying the old ring contents twice: Once to the
low half of the new buffer, and a second time to the high half.

This addresses the inability to boot a HVM guest with 64 or more
vCPUs.  This regression was caused by 8620015499101090 (xen/evtchn:
dynamically grow pending event channel ring).

Reported-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/xen/evtchn.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 38272ad24551..f4edd6df3df2 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -316,7 +316,6 @@ static int evtchn_resize_ring(struct per_user_data *u)
 {
 	unsigned int new_size;
 	evtchn_port_t *new_ring, *old_ring;
-	unsigned int p, c;
 
 	/*
 	 * Ensure the ring is large enough to capture all possible
@@ -346,20 +345,17 @@ static int evtchn_resize_ring(struct per_user_data *u)
 	/*
 	 * Copy the old ring contents to the new ring.
 	 *
-	 * If the ring contents crosses the end of the current ring,
-	 * it needs to be copied in two chunks.
+	 * To take care of wrapping, a full ring, and the new index
+	 * pointing into the second half, simply copy the old contents
+	 * twice.
 	 *
 	 * +---------+    +------------------+
-	 * |34567  12| -> |       1234567    |
-	 * +-----p-c-+    +------------------+
+	 * |34567  12| -> |34567  1234567  12|
+	 * +-----p-c-+    +-------c------p---+
 	 */
-	p = evtchn_ring_offset(u, u->ring_prod);
-	c = evtchn_ring_offset(u, u->ring_cons);
-	if (p < c) {
-		memcpy(new_ring + c, u->ring + c, (u->ring_size - c) * sizeof(*u->ring));
-		memcpy(new_ring + u->ring_size, u->ring, p * sizeof(*u->ring));
-	} else
-		memcpy(new_ring + c, u->ring + c, (p - c) * sizeof(*u->ring));
+	memcpy(new_ring, old_ring, u->ring_size * sizeof(*u->ring));
+	memcpy(new_ring + u->ring_size, old_ring,
+	       u->ring_size * sizeof(*u->ring));
 
 	u->ring = new_ring;
 	u->ring_size = new_size;

From bba1e81824e04c76c14fe614b3d440b1a4d65db9 Mon Sep 17 00:00:00 2001
From: Ping Cheng <pinglinux@gmail.com>
Date: Tue, 12 Apr 2016 13:37:45 -0700
Subject: [PATCH 232/424] HID: wacom: Add support for DTK-1651

commit e1123fe975852cc0970b4e53ea65ca917e54c923 upstream.

DTK-1651 is a display pen-only tablet

Signed-off-by: Ping Cheng <pingc@wacom.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/wacom_wac.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 3c0f47ac8e53..5c02d7bbc7f2 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -3449,6 +3449,10 @@ static const struct wacom_features wacom_features_0x33E =
 	{ "Wacom Intuos PT M 2", 21600, 13500, 2047, 63,
 	  INTUOSHT2, WACOM_INTUOS_RES, WACOM_INTUOS_RES, .touch_max = 16,
 	  .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE };
+static const struct wacom_features wacom_features_0x343 =
+	{ "Wacom DTK1651", 34616, 19559, 1023, 0,
+	  DTUS, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 4,
+	  WACOM_DTU_OFFSET, WACOM_DTU_OFFSET };
 
 static const struct wacom_features wacom_features_HID_ANY_ID =
 	{ "Wacom HID", .type = HID_GENERIC };
@@ -3614,6 +3618,7 @@ const struct hid_device_id wacom_ids[] = {
 	{ USB_DEVICE_WACOM(0x33C) },
 	{ USB_DEVICE_WACOM(0x33D) },
 	{ USB_DEVICE_WACOM(0x33E) },
+	{ USB_DEVICE_WACOM(0x343) },
 	{ USB_DEVICE_WACOM(0x4001) },
 	{ USB_DEVICE_WACOM(0x4004) },
 	{ USB_DEVICE_WACOM(0x5000) },

From 5844e4cdacc5e002dfceb2872352af20cff40742 Mon Sep 17 00:00:00 2001
From: Nazar Mokrynskyi <nazar@mokrynskyi.com>
Date: Mon, 25 Apr 2016 17:01:56 +0300
Subject: [PATCH 233/424] HID: Fix boot delay for Creative SB Omni Surround 5.1
 with quirk

commit 567a44ecb44eb2584ddb93e962cfb133ce77e0bb upstream.

Needed for v2 of the device firmware, otherwise kernel will stuck for few
seconds and throw "usb_submit_urb(ctrl) failed: -1" early on system boot.

Signed-off-by: Nazar Mokrynskyi <nazar@mokrynskyi.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/hid-ids.h           | 1 +
 drivers/hid/usbhid/hid-quirks.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 8b78a7f1f779..909ab0176ef2 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -255,6 +255,7 @@
 #define USB_DEVICE_ID_CORSAIR_K90	0x1b02
 
 #define USB_VENDOR_ID_CREATIVELABS	0x041e
+#define USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51	0x322c
 #define USB_DEVICE_ID_PRODIKEYS_PCMIDI	0x2801
 
 #define USB_VENDOR_ID_CVTOUCH		0x1ff7
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index 7dd0953cd70f..dc8e6adf95a4 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -70,6 +70,7 @@ static const struct hid_blacklist {
 	{ USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_3AXIS_5BUTTON_STICK, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_AXIS_295, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE, HID_QUIRK_ALWAYS_POLL },
+	{ USB_VENDOR_ID_CREATIVELABS, USB_DEVICE_ID_CREATIVE_SB_OMNI_SURROUND_51, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET },
 	{ USB_VENDOR_ID_DRAGONRISE, USB_DEVICE_ID_DRAGONRISE_WIIU, HID_QUIRK_MULTI_INPUT },
 	{ USB_VENDOR_ID_ELAN, HID_ANY_ID, HID_QUIRK_ALWAYS_POLL },

From 5fd407ad088227ee030e93246cffe757541483f0 Mon Sep 17 00:00:00 2001
From: Knut Wohlrab <Knut.Wohlrab@de.bosch.com>
Date: Mon, 25 Apr 2016 14:08:25 -0700
Subject: [PATCH 234/424] Input: zforce_ts - fix dual touch recognition

commit 6984ab1ab35f422292b7781c65284038bcc0f6a6 upstream.

A wrong decoding of the touch coordinate message causes a wrong touch
ID. Touch ID for dual touch must be 0 or 1.

According to the actual Neonode nine byte touch coordinate coding,
the state is transported in the lower nibble and the touch ID in
the higher nibble of payload byte five.

Signed-off-by: Knut Wohlrab <Knut.Wohlrab@de.bosch.com>
Signed-off-by: Oleksij Rempel <linux@rempel-privat.de>
Signed-off-by: Dirk Behme <dirk.behme@de.bosch.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/touchscreen/zforce_ts.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/input/touchscreen/zforce_ts.c b/drivers/input/touchscreen/zforce_ts.c
index 9bbadaaf6bc3..7b3845aa5983 100644
--- a/drivers/input/touchscreen/zforce_ts.c
+++ b/drivers/input/touchscreen/zforce_ts.c
@@ -370,8 +370,8 @@ static int zforce_touch_event(struct zforce_ts *ts, u8 *payload)
 			point.coord_x = point.coord_y = 0;
 		}
 
-		point.state = payload[9 * i + 5] & 0x03;
-		point.id = (payload[9 * i + 5] & 0xfc) >> 2;
+		point.state = payload[9 * i + 5] & 0x0f;
+		point.id = (payload[9 * i + 5] & 0xf0) >> 4;
 
 		/* determine touch major, minor and orientation */
 		point.area_major = max(payload[9 * i + 6],

From 898149d10b855a0d0a5a9f8f05e4359970919eb9 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Thu, 5 May 2016 16:22:26 -0700
Subject: [PATCH 235/424] proc: prevent accessing /proc/<PID>/environ until
 it's ready

commit 8148a73c9901a8794a50f950083c00ccf97d43b3 upstream.

If /proc/<PID>/environ gets read before the envp[] array is fully set up
in create_{aout,elf,elf_fdpic,flat}_tables(), we might end up trying to
read more bytes than are actually written, as env_start will already be
set but env_end will still be zero, making the range calculation
underflow, allowing to read beyond the end of what has been written.

Fix this as it is done for /proc/<PID>/cmdline by testing env_end for
zero.  It is, apparently, intentionally set last in create_*_tables().

This bug was found by the PaX size_overflow plugin that detected the
arithmetic underflow of 'this_len = env_end - (env_start + src)' when
env_end is still zero.

The expected consequence is that userland trying to access
/proc/<PID>/environ of a not yet fully set up process may get
inconsistent data as we're in the middle of copying in the environment
variables.

Fixes: https://forums.grsecurity.net/viewtopic.php?f=3&t=4363
Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=116461
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Emese Revfy <re.emese@gmail.com>
Cc: Pax Team <pageexec@freemail.hu>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Mateusz Guzik <mguzik@redhat.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/proc/base.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index b7de324bec11..e8bbf6cdb437 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -954,7 +954,8 @@ static ssize_t environ_read(struct file *file, char __user *buf,
 	int ret = 0;
 	struct mm_struct *mm = file->private_data;
 
-	if (!mm)
+	/* Ensure the process spawned far enough to have an environment. */
+	if (!mm || !mm->env_end)
 		return 0;
 
 	page = (char *)__get_free_page(GFP_TEMPORARY);

From 24b8a175a66946ccb4ca227df52f517e1d8f5ef6 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@akamai.com>
Date: Thu, 5 May 2016 16:22:12 -0700
Subject: [PATCH 236/424] mm: update min_free_kbytes from khugepaged after core
 initialization

commit bc22af74f271ef76b2e6f72f3941f91f0da3f5f8 upstream.

Khugepaged attempts to raise min_free_kbytes if its set too low.
However, on boot khugepaged sets min_free_kbytes first from
subsys_initcall(), and then the mm 'core' over-rides min_free_kbytes
after from init_per_zone_wmark_min(), via a module_init() call.

Khugepaged used to use a late_initcall() to set min_free_kbytes (such
that it occurred after the core initialization), however this was
removed when the initialization of min_free_kbytes was integrated into
the starting of the khugepaged thread.

The fix here is simply to invoke the core initialization using a
core_initcall() instead of module_init(), such that the previous
initialization ordering is restored.  I didn't restore the
late_initcall() since start_stop_khugepaged() already sets
min_free_kbytes via set_recommended_min_free_kbytes().

This was noticed when we had a number of page allocation failures when
moving a workload to a kernel with this new initialization ordering.  On
an 8GB system this restores min_free_kbytes back to 67584 from 11365
when CONFIG_TRANSPARENT_HUGEPAGE=y is set and either
CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y or
CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y.

Fixes: 79553da293d3 ("thp: cleanup khugepaged startup")
Signed-off-by: Jason Baron <jbaron@akamai.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/page_alloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c69531afbd8f..6cf5cadeaef7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6193,7 +6193,7 @@ int __meminit init_per_zone_wmark_min(void)
 	setup_per_zone_inactive_ratio();
 	return 0;
 }
-module_init(init_per_zone_wmark_min)
+core_initcall(init_per_zone_wmark_min)
 
 /*
  * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so

From f9d46494887e1494c3a6b40434ab425f74b15148 Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <a@unstable.cc>
Date: Sat, 12 Mar 2016 11:12:59 +0100
Subject: [PATCH 237/424] batman-adv: fix DAT candidate selection (must use
 vid)

commit 2871734e85e920503d49b3a8bc0afbe0773b6036 upstream.

Now that DAT is VLAN aware, it must use the VID when
computing the DHT address of the candidate nodes where
an entry is going to be stored/retrieved.

Fixes: be1db4f6615b ("batman-adv: make the Distributed ARP Table vlan aware")
Signed-off-by: Antonio Quartulli <a@unstable.cc>
[sven@narfation.org: fix conflicts with current version]
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/batman-adv/distributed-arp-table.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index a49c705fb86b..5f19133c5530 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -553,6 +553,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
  * be sent to
  * @bat_priv: the bat priv with all the soft interface information
  * @ip_dst: ipv4 to look up in the DHT
+ * @vid: VLAN identifier
  *
  * An originator O is selected if and only if its DHT_ID value is one of three
  * closest values (from the LEFT, with wrap around if needed) then the hash
@@ -561,7 +562,8 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv,
  * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM.
  */
 static struct batadv_dat_candidate *
-batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
+batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst,
+			     unsigned short vid)
 {
 	int select;
 	batadv_dat_addr_t last_max = BATADV_DAT_ADDR_MAX, ip_key;
@@ -577,7 +579,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
 		return NULL;
 
 	dat.ip = ip_dst;
-	dat.vid = 0;
+	dat.vid = vid;
 	ip_key = (batadv_dat_addr_t)batadv_hash_dat(&dat,
 						    BATADV_DAT_ADDR_MAX);
 
@@ -597,6 +599,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
  * @bat_priv: the bat priv with all the soft interface information
  * @skb: payload to send
  * @ip: the DHT key
+ * @vid: VLAN identifier
  * @packet_subtype: unicast4addr packet subtype to use
  *
  * This function copies the skb with pskb_copy() and is sent as unicast packet
@@ -607,7 +610,7 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst)
  */
 static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
 				 struct sk_buff *skb, __be32 ip,
-				 int packet_subtype)
+				 unsigned short vid, int packet_subtype)
 {
 	int i;
 	bool ret = false;
@@ -616,7 +619,7 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
 	struct sk_buff *tmp_skb;
 	struct batadv_dat_candidate *cand;
 
-	cand = batadv_dat_select_candidates(bat_priv, ip);
+	cand = batadv_dat_select_candidates(bat_priv, ip, vid);
 	if (!cand)
 		goto out;
 
@@ -1004,7 +1007,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,
 		ret = true;
 	} else {
 		/* Send the request to the DHT */
-		ret = batadv_dat_send_data(bat_priv, skb, ip_dst,
+		ret = batadv_dat_send_data(bat_priv, skb, ip_dst, vid,
 					   BATADV_P_DAT_DHT_GET);
 	}
 out:
@@ -1132,8 +1135,8 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv,
 	/* Send the ARP reply to the candidates for both the IP addresses that
 	 * the node obtained from the ARP reply
 	 */
-	batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT);
-	batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT);
+	batadv_dat_send_data(bat_priv, skb, ip_src, vid, BATADV_P_DAT_DHT_PUT);
+	batadv_dat_send_data(bat_priv, skb, ip_dst, vid, BATADV_P_DAT_DHT_PUT);
 }
 
 /**

From c6865db3a49a8f80052489fc6e1848df56f12ade Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 26 Feb 2016 17:56:13 +0100
Subject: [PATCH 238/424] batman-adv: Check skb size before using encapsulated
 ETH+VLAN header

commit c78296665c3d81f040117432ab9e1cb125521b0c upstream.

The encapsulated ethernet and VLAN header may be outside the received
ethernet frame. Thus the skb buffer size has to be checked before it can be
parsed to find out if it encapsulates another batman-adv packet.

Fixes: 420193573f11 ("batman-adv: softif bridge loop avoidance")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Antonio Quartulli <a@unstable.cc>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/batman-adv/soft-interface.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index ac4d08de5df4..720f1a5b81ac 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -407,11 +407,17 @@ void batadv_interface_rx(struct net_device *soft_iface,
 	 */
 	nf_reset(skb);
 
+	if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
+		goto dropped;
+
 	vid = batadv_get_vid(skb, 0);
 	ethhdr = eth_hdr(skb);
 
 	switch (ntohs(ethhdr->h_proto)) {
 	case ETH_P_8021Q:
+		if (!pskb_may_pull(skb, VLAN_ETH_HLEN))
+			goto dropped;
+
 		vhdr = (struct vlan_ethhdr *)skb->data;
 
 		if (vhdr->h_vlan_encapsulated_proto != ethertype)
@@ -423,8 +429,6 @@ void batadv_interface_rx(struct net_device *soft_iface,
 	}
 
 	/* skb->dev & skb->pkt_type are set here */
-	if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
-		goto dropped;
 	skb->protocol = eth_type_trans(skb, soft_iface);
 
 	/* should not be necessary anymore as we use skb_pull_rcsum()

From e426a835c1da093b2804825bfb4bfd503492e543 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Fri, 11 Mar 2016 14:04:49 +0100
Subject: [PATCH 239/424] batman-adv: Fix broadcast/ogm queue limit on a
 removed interface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit c4fdb6cff2aa0ae740c5f19b6f745cbbe786d42f upstream.

When removing a single interface while a broadcast or ogm packet is
still pending then we will free the forward packet without releasing the
queue slots again.

This patch is supposed to fix this issue.

Fixes: 6d5808d4ae1b ("batman-adv: Add missing hardif_free_ref in forw_packet_free")
Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
[sven@narfation.org: fix conflicts with current version]
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Antonio Quartulli <a@unstable.cc>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/batman-adv/send.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index f664324805eb..0e0c3b8ed927 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -630,6 +630,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
 
 		if (pending) {
 			hlist_del(&forw_packet->list);
+			if (!forw_packet->own)
+				atomic_inc(&bat_priv->bcast_queue_left);
+
 			batadv_forw_packet_free(forw_packet);
 		}
 	}
@@ -657,6 +660,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
 
 		if (pending) {
 			hlist_del(&forw_packet->list);
+			if (!forw_packet->own)
+				atomic_inc(&bat_priv->batman_queue_left);
+
 			batadv_forw_packet_free(forw_packet);
 		}
 	}

From 639ddeaee4f4111d80699452de5b70db29e96054 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sun, 20 Mar 2016 12:27:53 +0100
Subject: [PATCH 240/424] batman-adv: Reduce refcnt of removed router when
 updating route

commit d1a65f1741bfd9c69f9e4e2ad447a89b6810427d upstream.

_batadv_update_route rcu_derefences orig_ifinfo->router outside of a
spinlock protected region to print some information messages to the debug
log. But this pointer is not checked again when the new pointer is assigned
in the spinlock protected region. Thus is can happen that the value of
orig_ifinfo->router changed in the meantime and thus the reference counter
of the wrong router gets reduced after the spinlock protected region.

Just rcu_dereferencing the value of orig_ifinfo->router inside the spinlock
protected region (which also set the new pointer) is enough to get the
correct old router object.

Fixes: e1a5382f978b ("batman-adv: Make orig_node->router an rcu protected pointer")
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Marek Lindner <mareklindner@neomailbox.ch>
Signed-off-by: Antonio Quartulli <a@unstable.cc>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/batman-adv/routing.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 3207667e69de..d8a2f33e60e5 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -104,6 +104,15 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
 		neigh_node = NULL;
 
 	spin_lock_bh(&orig_node->neigh_list_lock);
+	/* curr_router used earlier may not be the current orig_ifinfo->router
+	 * anymore because it was dereferenced outside of the neigh_list_lock
+	 * protected region. After the new best neighbor has replace the current
+	 * best neighbor the reference counter needs to decrease. Consequently,
+	 * the code needs to ensure the curr_router variable contains a pointer
+	 * to the replaced best neighbor.
+	 */
+	curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
+
 	rcu_assign_pointer(orig_ifinfo->router, neigh_node);
 	spin_unlock_bh(&orig_node->neigh_list_lock);
 	batadv_orig_ifinfo_free_ref(orig_ifinfo);

From 4bc9468f1680e799e3036a6e816ed9ecfc7d98a3 Mon Sep 17 00:00:00 2001
From: Howard Cochran <hcochran@kernelspring.com>
Date: Thu, 10 Mar 2016 01:12:39 -0500
Subject: [PATCH 241/424] writeback: Fix performance regression in
 wb_over_bg_thresh()

commit 74d369443325063a5f0260e63971decb950fd8fa upstream.

Commit 947e9762a8dd ("writeback: update wb_over_bg_thresh() to use
wb_domain aware operations") unintentionally changed this function's
meaning from "are there more dirty pages than the background writeback
threshold" to "are there more dirty pages than the writeback threshold".
The background writeback threshold is typically half of the writeback
threshold, so this had the effect of raising the number of dirty pages
required to cause a writeback worker to perform background writeout.

This can cause a very severe performance regression when a BDI uses
BDI_CAP_STRICTLIMIT because balance_dirty_pages() and the writeback worker
can now disagree on whether writeback should be initiated.

For example, in a system having 1GB of RAM, a single spinning disk, and a
"pass-through" FUSE filesystem mounted over the disk, application code
mmapped a 128MB file on the disk and was randomly dirtying pages in that
mapping.

Because FUSE uses strictlimit and has a default max_ratio of only 1%, in
balance_dirty_pages, thresh is ~200, bg_thresh is ~100, and the
dirty_freerun_ceiling is the average of those, ~150. So, it pauses the
dirtying processes when we have 151 dirty pages and wakes up a background
writeback worker. But the worker tests the wrong threshold (200 instead of
100), so it does not initiate writeback and just returns.

Thus, balance_dirty_pages keeps looping, sleeping and then waking up the
worker who will do nothing. It remains stuck in this state until the few
dirty pages that we have finally expire and we write them back for that
reason. Then the whole process repeats, resulting in near-zero throughput
through the FUSE BDI.

The fix is to call the parameterized variant of wb_calc_thresh, so that the
worker will do writeback if the bg_thresh is exceeded which was the
behavior before the referenced commit.

Fixes: 947e9762a8dd ("writeback: update wb_over_bg_thresh() to use wb_domain aware operations")
Signed-off-by: Howard Cochran <hcochran@kernelspring.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Tested-by Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/page-writeback.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d15d88c8efa1..e40c9364582d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1899,7 +1899,8 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
 	if (gdtc->dirty > gdtc->bg_thresh)
 		return true;
 
-	if (wb_stat(wb, WB_RECLAIMABLE) > __wb_calc_thresh(gdtc))
+	if (wb_stat(wb, WB_RECLAIMABLE) >
+	    wb_calc_thresh(gdtc->wb, gdtc->bg_thresh))
 		return true;
 
 	if (mdtc) {
@@ -1913,7 +1914,8 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
 		if (mdtc->dirty > mdtc->bg_thresh)
 			return true;
 
-		if (wb_stat(wb, WB_RECLAIMABLE) > __wb_calc_thresh(mdtc))
+		if (wb_stat(wb, WB_RECLAIMABLE) >
+		    wb_calc_thresh(mdtc->wb, mdtc->bg_thresh))
 			return true;
 	}
 

From a7ebd7f5d87b33f36041239f3c2087a0572db4fb Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt@codeblueprint.co.uk>
Date: Tue, 3 May 2016 20:29:39 +0100
Subject: [PATCH 242/424] MAINTAINERS: Remove asterisk from EFI directory names

commit e8dfe6d8f6762d515fcd4f30577f7bfcf7659887 upstream.

Mark reported that having asterisks on the end of directory names
confuses get_maintainer.pl when it encounters subdirectories, and that
my name does not appear when run on drivers/firmware/efi/libstub.

Reported-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1462303781-8686-2-git-send-email-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 4c3e1d2ac31b..ab65bbecb159 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4097,8 +4097,8 @@ F:	Documentation/efi-stub.txt
 F:	arch/ia64/kernel/efi.c
 F:	arch/x86/boot/compressed/eboot.[ch]
 F:	arch/x86/include/asm/efi.h
-F:	arch/x86/platform/efi/*
-F:	drivers/firmware/efi/*
+F:	arch/x86/platform/efi/
+F:	drivers/firmware/efi/
 F:	include/linux/efi*.h
 
 EFI VARIABLE FILESYSTEM

From 73c1fd0aa105bdea4768f9a11c850574fb9091f9 Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Fri, 6 May 2016 11:33:39 +0800
Subject: [PATCH 243/424] x86/tsc: Read all ratio bits from MSR_PLATFORM_INFO

commit 886123fb3a8656699dff40afa0573df359abeb18 upstream.

Currently we read the tsc radio: ratio = (MSR_PLATFORM_INFO >> 8) & 0x1f;

Thus we get bit 8-12 of MSR_PLATFORM_INFO, however according to the SDM
(35.5), the ratio bits are bit 8-15.

Ignoring the upper bits can result in an incorrect tsc ratio, which causes the
TSC calibration and the Local APIC timer frequency to be incorrect.

Fix this problem by masking 0xff instead.

[ tglx: Massaged changelog ]

Fixes: 7da7c1561366 "x86, tsc: Add static (MSR) TSC calibration on Intel Atom SoCs"
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Bin Gao <bin.gao@intel.com>
Cc: Len Brown <lenb@kernel.org>
Link: http://lkml.kernel.org/r/1462505619-5516-1-git-send-email-yu.c.chen@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/tsc_msr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c
index 92ae6acac8a7..6aa0f4d9eea6 100644
--- a/arch/x86/kernel/tsc_msr.c
+++ b/arch/x86/kernel/tsc_msr.c
@@ -92,7 +92,7 @@ unsigned long try_msr_calibrate_tsc(void)
 
 	if (freq_desc_tables[cpu_index].msr_plat) {
 		rdmsr(MSR_PLATFORM_INFO, lo, hi);
-		ratio = (lo >> 8) & 0x1f;
+		ratio = (lo >> 8) & 0xff;
 	} else {
 		rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
 		ratio = (hi >> 8) & 0x1f;

From ddd5c3139de87400a6c6601ad3f54621e9d238fb Mon Sep 17 00:00:00 2001
From: Maxim Patlasov <mpatlasov@virtuozzo.com>
Date: Tue, 16 Feb 2016 11:45:33 -0800
Subject: [PATCH 244/424] fs/pnode.c: treat zero mnt_group_id-s as unequal

commit 7ae8fd0351f912b075149a1e03a017be8b903b9a upstream.

propagate_one(m) calculates "type" argument for copy_tree() like this:

>    if (m->mnt_group_id == last_dest->mnt_group_id) {
>        type = CL_MAKE_SHARED;
>    } else {
>        type = CL_SLAVE;
>        if (IS_MNT_SHARED(m))
>           type |= CL_MAKE_SHARED;
>   }

The "type" argument then governs clone_mnt() behavior with respect to flags
and mnt_master of new mount. When we iterate through a slave group, it is
possible that both current "m" and "last_dest" are not shared (although,
both are slaves, i.e. have non-NULL mnt_master-s). Then the comparison
above erroneously makes new mount shared and sets its mnt_master to
last_source->mnt_master. The patch fixes the problem by handling zero
mnt_group_id-s as though they are unequal.

The similar problem exists in the implementation of "else" clause above
when we have to ascend upward in the master/slave tree by calling:

>    last_source = last_source->mnt_master;
>    last_dest = last_source->mnt_parent;

proper number of times. The last step is governed by
"n->mnt_group_id != last_dest->mnt_group_id" condition that may lie if
both are zero. The patch fixes this case in the same way as the former one.

[AV: don't open-code an obvious helper...]

Signed-off-by: Maxim Patlasov <mpatlasov@virtuozzo.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/pnode.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/fs/pnode.c b/fs/pnode.c
index 6367e1e435c6..c524fdddc7fb 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -202,6 +202,11 @@ static struct mount *last_dest, *last_source, *dest_master;
 static struct mountpoint *mp;
 static struct hlist_head *list;
 
+static inline bool peers(struct mount *m1, struct mount *m2)
+{
+	return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
+}
+
 static int propagate_one(struct mount *m)
 {
 	struct mount *child;
@@ -212,7 +217,7 @@ static int propagate_one(struct mount *m)
 	/* skip if mountpoint isn't covered by it */
 	if (!is_subdir(mp->m_dentry, m->mnt.mnt_root))
 		return 0;
-	if (m->mnt_group_id == last_dest->mnt_group_id) {
+	if (peers(m, last_dest)) {
 		type = CL_MAKE_SHARED;
 	} else {
 		struct mount *n, *p;
@@ -223,7 +228,7 @@ static int propagate_one(struct mount *m)
 					last_source = last_source->mnt_master;
 					last_dest = last_source->mnt_parent;
 				}
-				if (n->mnt_group_id != last_dest->mnt_group_id) {
+				if (!peers(n, last_dest)) {
 					last_source = last_source->mnt_master;
 					last_dest = last_source->mnt_parent;
 				}

From b17580a3cb901c56e9b9a3dea4d12153f5fc879e Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 5 May 2016 09:29:29 -0500
Subject: [PATCH 245/424] propogate_mnt: Handle the first propogated copy being
 a slave

commit 5ec0811d30378ae104f250bfc9b3640242d81e3f upstream.

When the first propgated copy was a slave the following oops would result:
> BUG: unable to handle kernel NULL pointer dereference at 0000000000000010
> IP: [<ffffffff811fba4e>] propagate_one+0xbe/0x1c0
> PGD bacd4067 PUD bac66067 PMD 0
> Oops: 0000 [#1] SMP
> Modules linked in:
> CPU: 1 PID: 824 Comm: mount Not tainted 4.6.0-rc5userns+ #1523
> Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
> task: ffff8800bb0a8000 ti: ffff8800bac3c000 task.ti: ffff8800bac3c000
> RIP: 0010:[<ffffffff811fba4e>]  [<ffffffff811fba4e>] propagate_one+0xbe/0x1c0
> RSP: 0018:ffff8800bac3fd38  EFLAGS: 00010283
> RAX: 0000000000000000 RBX: ffff8800bb77ec00 RCX: 0000000000000010
> RDX: 0000000000000000 RSI: ffff8800bb58c000 RDI: ffff8800bb58c480
> RBP: ffff8800bac3fd48 R08: 0000000000000001 R09: 0000000000000000
> R10: 0000000000001ca1 R11: 0000000000001c9d R12: 0000000000000000
> R13: ffff8800ba713800 R14: ffff8800bac3fda0 R15: ffff8800bb77ec00
> FS:  00007f3c0cd9b7e0(0000) GS:ffff8800bfb00000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 0000000000000010 CR3: 00000000bb79d000 CR4: 00000000000006e0
> Stack:
>  ffff8800bb77ec00 0000000000000000 ffff8800bac3fd88 ffffffff811fbf85
>  ffff8800bac3fd98 ffff8800bb77f080 ffff8800ba713800 ffff8800bb262b40
>  0000000000000000 0000000000000000 ffff8800bac3fdd8 ffffffff811f1da0
> Call Trace:
>  [<ffffffff811fbf85>] propagate_mnt+0x105/0x140
>  [<ffffffff811f1da0>] attach_recursive_mnt+0x120/0x1e0
>  [<ffffffff811f1ec3>] graft_tree+0x63/0x70
>  [<ffffffff811f1f6b>] do_add_mount+0x9b/0x100
>  [<ffffffff811f2c1a>] do_mount+0x2aa/0xdf0
>  [<ffffffff8117efbe>] ? strndup_user+0x4e/0x70
>  [<ffffffff811f3a45>] SyS_mount+0x75/0xc0
>  [<ffffffff8100242b>] do_syscall_64+0x4b/0xa0
>  [<ffffffff81988f3c>] entry_SYSCALL64_slow_path+0x25/0x25
> Code: 00 00 75 ec 48 89 0d 02 22 22 01 8b 89 10 01 00 00 48 89 05 fd 21 22 01 39 8e 10 01 00 00 0f 84 e0 00 00 00 48 8b 80 d8 00 00 00 <48> 8b 50 10 48 89 05 df 21 22 01 48 89 15 d0 21 22 01 8b 53 30
> RIP  [<ffffffff811fba4e>] propagate_one+0xbe/0x1c0
>  RSP <ffff8800bac3fd38>
> CR2: 0000000000000010
> ---[ end trace 2725ecd95164f217 ]---

This oops happens with the namespace_sem held and can be triggered by
non-root users.  An all around not pleasant experience.

To avoid this scenario when finding the appropriate source mount to
copy stop the walk up the mnt_master chain when the first source mount
is encountered.

Further rewrite the walk up the last_source mnt_master chain so that
it is clear what is going on.

The reason why the first source mount is special is that it it's
mnt_parent is not a mount in the dest_mnt propagation tree, and as
such termination conditions based up on the dest_mnt mount propgation
tree do not make sense.

To avoid other kinds of confusion last_dest is not changed when
computing last_source.  last_dest is only used once in propagate_one
and that is above the point of the code being modified, so changing
the global variable is meaningless and confusing.

fixes: f2ebb3a921c1ca1e2ddd9242e95a1989a50c4c68 ("smarter propagate_mnt()")
Reported-by: Tycho Andersen <tycho.andersen@canonical.com>
Reviewed-by: Seth Forshee <seth.forshee@canonical.com>
Tested-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/pnode.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/fs/pnode.c b/fs/pnode.c
index c524fdddc7fb..99899705b105 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -198,7 +198,7 @@ static struct mount *next_group(struct mount *m, struct mount *origin)
 
 /* all accesses are serialized by namespace_sem */
 static struct user_namespace *user_ns;
-static struct mount *last_dest, *last_source, *dest_master;
+static struct mount *last_dest, *first_source, *last_source, *dest_master;
 static struct mountpoint *mp;
 static struct hlist_head *list;
 
@@ -221,20 +221,22 @@ static int propagate_one(struct mount *m)
 		type = CL_MAKE_SHARED;
 	} else {
 		struct mount *n, *p;
+		bool done;
 		for (n = m; ; n = p) {
 			p = n->mnt_master;
-			if (p == dest_master || IS_MNT_MARKED(p)) {
-				while (last_dest->mnt_master != p) {
-					last_source = last_source->mnt_master;
-					last_dest = last_source->mnt_parent;
-				}
-				if (!peers(n, last_dest)) {
-					last_source = last_source->mnt_master;
-					last_dest = last_source->mnt_parent;
-				}
+			if (p == dest_master || IS_MNT_MARKED(p))
 				break;
-			}
 		}
+		do {
+			struct mount *parent = last_source->mnt_parent;
+			if (last_source == first_source)
+				break;
+			done = parent->mnt_master == p;
+			if (done && peers(n, parent))
+				break;
+			last_source = last_source->mnt_master;
+		} while (!done);
+
 		type = CL_SLAVE;
 		/* beginning of peer group among the slaves? */
 		if (IS_MNT_SHARED(m))
@@ -286,6 +288,7 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
 	 */
 	user_ns = current->nsproxy->mnt_ns->user_ns;
 	last_dest = dest_mnt;
+	first_source = source_mnt;
 	last_source = source_mnt;
 	mp = dest_mp;
 	list = tree_list;

From 303fa967e0a3cf1f9116ceb009d10e196f899142 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Tue, 26 Apr 2016 12:15:01 +0100
Subject: [PATCH 246/424] ARM: cpuidle: Pass on arm_cpuidle_suspend()'s return
 value

commit 625fe4f8ffc1b915248558481bb94249f6bd411c upstream.

arm_cpuidle_suspend() may return -EOPNOTSUPP, or any value returned
by the cpu_ops/cpuidle_ops suspend call. arm_enter_idle_state() doesn't
update 'ret' with this value, meaning we always signal success to
cpuidle_enter_state(), causing it to update the usage counters as if we
succeeded.

Fixes: 191de17aa3c1 ("ARM64: cpuidle: Replace cpu_suspend by the common ARM/ARM64 function")
Signed-off-by: James Morse <james.morse@arm.com>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/cpuidle/cpuidle-arm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c
index 545069d5fdfb..e342565e8715 100644
--- a/drivers/cpuidle/cpuidle-arm.c
+++ b/drivers/cpuidle/cpuidle-arm.c
@@ -50,7 +50,7 @@ static int arm_enter_idle_state(struct cpuidle_device *dev,
 		 * call the CPU ops suspend protocol with idle index as a
 		 * parameter.
 		 */
-		arm_cpuidle_suspend(idx);
+		ret = arm_cpuidle_suspend(idx);
 
 		cpu_pm_exit();
 	}

From f0f21f80609c7e1da91e34face5b86547bd7401a Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 5 May 2016 13:32:34 +0530
Subject: [PATCH 247/424] ARC: Add missing io barriers to
 io{read,write}{16,32}be()

commit e5bc0478ab6cf565619224536d75ecb2aedca43b upstream.

While reviewing a different change to asm-generic/io.h Arnd spotted that
ARC ioread32 and ioread32be both of which come from asm-generic versions
are not symmetrical in terms of calling the io barriers.

generic ioread32   -> ARC readl()                  [ has barriers]
generic ioread32be -> __be32_to_cpu(__raw_readl()) [ lacks barriers]

While generic ioread32be is being remediated to call readl(), that involves
a swab32(), causing double swaps on ioread32be() on Big Endian systems.

So provide our versions of big endian IO accessors to ensure io barrier
calls while also keeping them optimal

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arc/include/asm/io.h | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/arch/arc/include/asm/io.h b/arch/arc/include/asm/io.h
index 27b17adea50d..cb69299a492e 100644
--- a/arch/arc/include/asm/io.h
+++ b/arch/arc/include/asm/io.h
@@ -13,6 +13,15 @@
 #include <asm/byteorder.h>
 #include <asm/page.h>
 
+#ifdef CONFIG_ISA_ARCV2
+#include <asm/barrier.h>
+#define __iormb()		rmb()
+#define __iowmb()		wmb()
+#else
+#define __iormb()		do { } while (0)
+#define __iowmb()		do { } while (0)
+#endif
+
 extern void __iomem *ioremap(unsigned long physaddr, unsigned long size);
 extern void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size,
 				  unsigned long flags);
@@ -22,6 +31,15 @@ extern void iounmap(const void __iomem *addr);
 #define ioremap_wc(phy, sz)		ioremap(phy, sz)
 #define ioremap_wt(phy, sz)		ioremap(phy, sz)
 
+/*
+ * io{read,write}{16,32}be() macros
+ */
+#define ioread16be(p)		({ u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(); __v; })
+#define ioread32be(p)		({ u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(); __v; })
+
+#define iowrite16be(v,p)	({ __iowmb(); __raw_writew((__force u16)cpu_to_be16(v), p); })
+#define iowrite32be(v,p)	({ __iowmb(); __raw_writel((__force u32)cpu_to_be32(v), p); })
+
 /* Change struct page to physical address */
 #define page_to_phys(page)		(page_to_pfn(page) << PAGE_SHIFT)
 
@@ -99,15 +117,6 @@ static inline void __raw_writel(u32 w, volatile void __iomem *addr)
 
 }
 
-#ifdef CONFIG_ISA_ARCV2
-#include <asm/barrier.h>
-#define __iormb()		rmb()
-#define __iowmb()		wmb()
-#else
-#define __iormb()		do { } while (0)
-#define __iowmb()		do { } while (0)
-#endif
-
 /*
  * MMIO can also get buffered/optimized in micro-arch, so barriers needed
  * Based on ARM model for the typical use case

From ac8fc72dec814226cfcb96cbe3023b89cc386428 Mon Sep 17 00:00:00 2001
From: Wang YanQing <udknight@gmail.com>
Date: Thu, 5 May 2016 14:14:21 +0100
Subject: [PATCH 248/424] x86/sysfb_efi: Fix valid BAR address range check

commit c10fcb14c7afd6688c7b197a814358fecf244222 upstream.

The code for checking whether a BAR address range is valid will break
out of the loop when a start address of 0x0 is encountered.

This behaviour is wrong since by breaking out of the loop we may miss
the BAR that describes the EFI frame buffer in a later iteration.

Because of this bug I can't use video=efifb: boot parameter to get
efifb on my new ThinkPad E550 for my old linux system hard disk with
3.10 kernel. In 3.10, efifb is the only choice due to DRM/I915 not
supporting the GPU.

This patch also add a trivial optimization to break out after we find
the frame buffer address range without testing later BARs.

Signed-off-by: Wang YanQing <udknight@gmail.com>
[ Rewrote changelog. ]
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Reviewed-by: Peter Jones <pjones@redhat.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: David Herrmann <dh.herrmann@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/1462454061-21561-2-git-send-email-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/sysfb_efi.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/sysfb_efi.c b/arch/x86/kernel/sysfb_efi.c
index b285d4e8c68e..5da924bbf0a0 100644
--- a/arch/x86/kernel/sysfb_efi.c
+++ b/arch/x86/kernel/sysfb_efi.c
@@ -106,14 +106,24 @@ static int __init efifb_set_system(const struct dmi_system_id *id)
 					continue;
 				for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
 					resource_size_t start, end;
+					unsigned long flags;
+
+					flags = pci_resource_flags(dev, i);
+					if (!(flags & IORESOURCE_MEM))
+						continue;
+
+					if (flags & IORESOURCE_UNSET)
+						continue;
+
+					if (pci_resource_len(dev, i) == 0)
+						continue;
 
 					start = pci_resource_start(dev, i);
-					if (start == 0)
-						break;
 					end = pci_resource_end(dev, i);
 					if (screen_info.lfb_base >= start &&
 					    screen_info.lfb_base < end) {
 						found_bar = 1;
+						break;
 					}
 				}
 			}

From ee3e27f14e40bc3c95a175af482d6bbf35ab78bc Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Wed, 4 May 2016 13:48:56 +0800
Subject: [PATCH 249/424] ACPICA: Dispatcher: Update thread ID for recursive
 method calls

commit 93d68841a23a5779cef6fb9aa0ef32e7c5bd00da upstream.

ACPICA commit 7a3bd2d962f221809f25ddb826c9e551b916eb25

Set the mutex owner thread ID.
Original patch from: Prarit Bhargava <prarit@redhat.com>

Link: https://bugzilla.kernel.org/show_bug.cgi?id=115121
Link: https://github.com/acpica/acpica/commit/7a3bd2d9
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Tested-by: Andy Lutomirski <luto@kernel.org> # On a Dell XPS 13 9350
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lv Zheng <lv.zheng@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/acpica/dsmethod.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c
index bc32f3194afe..28c50c6b5f45 100644
--- a/drivers/acpi/acpica/dsmethod.c
+++ b/drivers/acpi/acpica/dsmethod.c
@@ -417,6 +417,9 @@ acpi_ds_begin_method_execution(struct acpi_namespace_node *method_node,
 				obj_desc->method.mutex->mutex.
 				    original_sync_level =
 				    obj_desc->method.mutex->mutex.sync_level;
+
+				obj_desc->method.mutex->mutex.thread_id =
+				    acpi_os_get_thread_id();
 			}
 		}
 

From beac678d0908ee0a14200e1412f98a89b765c0aa Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Sat, 30 Apr 2016 08:29:27 +1000
Subject: [PATCH 250/424] powerpc: Fix bad inline asm constraint in
 create_zero_mask()

commit b4c112114aab9aff5ed4568ca5e662bb02cdfe74 upstream.

In create_zero_mask() we have:

	addi	%1,%2,-1
	andc	%1,%1,%2
	popcntd	%0,%1

using the "r" constraint for %2. r0 is a valid register in the "r" set,
but addi X,r0,X turns it into an li:

	li	r7,-1
	andc	r7,r7,r0
	popcntd	r4,r7

Fix this by using the "b" constraint, for which r0 is not a valid
register.

This was found with a kernel build using gcc trunk, narrowed down to
when -frename-registers was enabled at -O2. It is just luck however
that we aren't seeing this on older toolchains.

Thanks to Segher for working with me to find this issue.

Fixes: d0cebfa650a0 ("powerpc: word-at-a-time optimization for 64-bit Little Endian")
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/include/asm/word-at-a-time.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h
index e4396a7d0f7c..4afe66aa1400 100644
--- a/arch/powerpc/include/asm/word-at-a-time.h
+++ b/arch/powerpc/include/asm/word-at-a-time.h
@@ -82,7 +82,7 @@ static inline unsigned long create_zero_mask(unsigned long bits)
 	    "andc	%1,%1,%2\n\t"
 	    "popcntd	%0,%1"
 		: "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask)
-		: "r" (bits));
+		: "b" (bits));
 
 	return leading_zero_bits;
 }

From a7fa0a478a625039ef0852e5606d1248cba093e4 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Fri, 1 Apr 2016 08:52:56 +0100
Subject: [PATCH 251/424] libahci: save port map for forced port map

commit 2fd0f46cb1b82587c7ae4a616d69057fb9bd0af7 upstream.

In usecases where force_port_map is used saved_port_map is never set,
resulting in not programming the PORTS_IMPL register as part of initial
config. This patch fixes this by setting it to port_map even in case
where force_port_map is used, making it more inline with other parts of
the code.

Fixes: 566d1827df2e ("libata: disable forced PORTS_IMPL for >= AHCI 1.3")
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Andy Gross <andy.gross@linaro.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ata/libahci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 998c6a85ad89..9628fa131757 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -467,6 +467,7 @@ void ahci_save_initial_config(struct device *dev, struct ahci_host_priv *hpriv)
 		dev_info(dev, "forcing port_map 0x%x -> 0x%x\n",
 			 port_map, hpriv->force_port_map);
 		port_map = hpriv->force_port_map;
+		hpriv->saved_port_map = port_map;
 	}
 
 	if (hpriv->mask_port_map) {

From 6e337a05df8adfc54540ca2a2b9d621836697796 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Fri, 1 Apr 2016 08:52:57 +0100
Subject: [PATCH 252/424] ata: ahci-platform: Add ports-implemented DT
 bindings.

commit 17dcc37e3e847bc0e67a5b1ec52471fcc6c18682 upstream.

On some SOCs PORTS_IMPL register value is never programmed by the
firmware and left at zero value. Which means that no sata ports are
available for software. AHCI driver used to cope up with this by
fabricating the port_map if the PORTS_IMPL register is read zero,
but recent patch broke this workaround as zero value was valid for
NVMe disks.

This patch adds ports-implemented DT bindings as workaround for this issue
in a way that DT can can override the PORTS_IMPL register in cases where
the firmware did not program it already.

Fixes: 566d1827df2e ("libata: disable forced PORTS_IMPL for >= AHCI 1.3")
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Andy Gross <andy.gross@linaro.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/ata/ahci-platform.txt | 4 ++++
 drivers/ata/ahci_platform.c                             | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/Documentation/devicetree/bindings/ata/ahci-platform.txt b/Documentation/devicetree/bindings/ata/ahci-platform.txt
index c2340eeeb97f..c000832a7fb9 100644
--- a/Documentation/devicetree/bindings/ata/ahci-platform.txt
+++ b/Documentation/devicetree/bindings/ata/ahci-platform.txt
@@ -30,6 +30,10 @@ Optional properties:
 - target-supply     : regulator for SATA target power
 - phys              : reference to the SATA PHY node
 - phy-names         : must be "sata-phy"
+- ports-implemented : Mask that indicates which ports that the HBA supports
+		      are available for software to use. Useful if PORTS_IMPL
+		      is not programmed by the BIOS, which is true with
+		      some embedded SOC's.
 
 Required properties when using sub-nodes:
 - #address-cells    : number of cells to encode an address
diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
index 04975b851c23..639adb1f8abd 100644
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c
@@ -51,6 +51,9 @@ static int ahci_probe(struct platform_device *pdev)
 	if (rc)
 		return rc;
 
+	of_property_read_u32(dev->of_node,
+			     "ports-implemented", &hpriv->force_port_map);
+
 	if (of_device_is_compatible(dev->of_node, "hisilicon,hisi-ahci"))
 		hpriv->flags |= AHCI_HFLAG_NO_FBS | AHCI_HFLAG_NO_NCQ;
 

From c8f8a515ae418498e73fff210d0b0c23e2193e6a Mon Sep 17 00:00:00 2001
From: Mike Manning <michael@bsch.com.au>
Date: Mon, 18 Apr 2016 12:13:23 +0000
Subject: [PATCH 253/424] USB: serial: cp210x: add ID for Link ECU

commit 1d377f4d690637a0121eac8701f84a0aa1e69a69 upstream.

The Link ECU is an aftermarket ECU computer for vehicles that provides
full tuning abilities as well as datalogging and displaying capabilities
via the USB to Serial adapter built into the device.

Signed-off-by: Mike Manning <michael@bsch.com.au>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/cp210x.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index bdc0f2f24f19..7f45d00bf2ff 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -140,6 +140,8 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0xF004) }, /* Elan Digital Systems USBcount50 */
 	{ USB_DEVICE(0x10C5, 0xEA61) }, /* Silicon Labs MobiData GPRS USB Modem */
 	{ USB_DEVICE(0x10CE, 0xEA6A) }, /* Silicon Labs MobiData GPRS USB Modem 100EU */
+	{ USB_DEVICE(0x12B8, 0xEC60) }, /* Link G4 ECU */
+	{ USB_DEVICE(0x12B8, 0xEC62) }, /* Link G4+ ECU */
 	{ USB_DEVICE(0x13AD, 0x9999) }, /* Baltech card reader */
 	{ USB_DEVICE(0x1555, 0x0004) }, /* Owen AC4 USB-RS485 Converter */
 	{ USB_DEVICE(0x166A, 0x0201) }, /* Clipsal 5500PACA C-Bus Pascal Automation Controller */

From e5dd50f5729d6c94a0732fdfacac6ad7a1c0eb64 Mon Sep 17 00:00:00 2001
From: Jasem Mutlaq <mutlaqja@ikarustech.com>
Date: Tue, 19 Apr 2016 10:38:27 +0300
Subject: [PATCH 254/424] USB: serial: cp210x: add Straizona Focusers device
 ids

commit 613ac23a46e10d4d4339febdd534fafadd68e059 upstream.

Adding VID:PID for Straizona Focusers to cp210x driver.

Signed-off-by: Jasem Mutlaq <mutlaqja@ikarustech.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/cp210x.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 7f45d00bf2ff..a2b43a6e7fa7 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -108,6 +108,7 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0x826B) }, /* Cygnal Integrated Products, Inc., Fasttrax GPS demonstration module */
 	{ USB_DEVICE(0x10C4, 0x8281) }, /* Nanotec Plug & Drive */
 	{ USB_DEVICE(0x10C4, 0x8293) }, /* Telegesis ETRX2USB */
+	{ USB_DEVICE(0x10C4, 0x82F4) }, /* Starizona MicroTouch */
 	{ USB_DEVICE(0x10C4, 0x82F9) }, /* Procyon AVS */
 	{ USB_DEVICE(0x10C4, 0x8341) }, /* Siemens MC35PU GPRS Modem */
 	{ USB_DEVICE(0x10C4, 0x8382) }, /* Cygnal Integrated Products, Inc. */
@@ -117,6 +118,7 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0x8418) }, /* IRZ Automation Teleport SG-10 GSM/GPRS Modem */
 	{ USB_DEVICE(0x10C4, 0x846E) }, /* BEI USB Sensor Interface (VCP) */
 	{ USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */
+	{ USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */
 	{ USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */
 	{ USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */
 	{ USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */

From 6e9544fb236325423d5066ebdeed577fb92be315 Mon Sep 17 00:00:00 2001
From: Stanislav Meduna <stano@meduna.org>
Date: Mon, 2 May 2016 16:05:11 +0100
Subject: [PATCH 255/424] nvmem: mxs-ocotp: fix buffer overflow in read

commit d1306eb675ad7a9a760b6b8e8e189824b8db89e7 upstream.

This patch fixes the issue where the mxs_ocotp_read is reading
the ocotp in reg_size steps but decrements the remaining size
by 1. The number of iterations is thus four times higher,
overwriting the area behind the output buffer.

Fixes: c01e9a11ab6f ("nvmem: add driver for ocotp in i.MX23 and i.MX28")
Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Stanislav Meduna <stano@meduna.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvmem/mxs-ocotp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nvmem/mxs-ocotp.c b/drivers/nvmem/mxs-ocotp.c
index 8ba19bba3156..2bb3c5799ac4 100644
--- a/drivers/nvmem/mxs-ocotp.c
+++ b/drivers/nvmem/mxs-ocotp.c
@@ -94,7 +94,7 @@ static int mxs_ocotp_read(void *context, const void *reg, size_t reg_size,
 	if (ret)
 		goto close_banks;
 
-	while (val_size) {
+	while (val_size >= reg_size) {
 		if ((offset < OCOTP_DATA_OFFSET) || (offset % 16)) {
 			/* fill up non-data register */
 			*buf = 0;
@@ -103,7 +103,7 @@ static int mxs_ocotp_read(void *context, const void *reg, size_t reg_size,
 		}
 
 		buf++;
-		val_size--;
+		val_size -= reg_size;
 		offset += reg_size;
 	}
 

From c04e6e9730e5613ae2d5bd75ead2493eee0dabde Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Wed, 27 Apr 2016 10:17:51 +0200
Subject: [PATCH 256/424] gpu: ipu-v3: Fix imx-ipuv3-crtc module autoloading

commit 503fe87bd0a8346ba9d8b7f49115dcd0a4185226 upstream.

If of_node is set before calling platform_device_add, the driver core
will try to use of: modalias matching, which fails because the device
tree nodes don't have a compatible property set. This patch fixes
imx-ipuv3-crtc module autoloading by setting the of_node property only
after the platform modalias is set.

Fixes: 304e6be652e2 ("gpu: ipu-v3: Assign of_node of child platform devices to corresponding ports")
Reported-by: Dennis Gilmore <dennis@ausil.us>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Tested-By: Dennis Gilmore <dennis@ausil.us>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/ipu-v3/ipu-common.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
index a0e28f3a278d..0585fd2031dd 100644
--- a/drivers/gpu/ipu-v3/ipu-common.c
+++ b/drivers/gpu/ipu-v3/ipu-common.c
@@ -1068,7 +1068,6 @@ static int ipu_add_client_devices(struct ipu_soc *ipu, unsigned long ipu_base)
 			goto err_register;
 		}
 
-		pdev->dev.of_node = of_node;
 		pdev->dev.parent = dev;
 
 		ret = platform_device_add_data(pdev, &reg->pdata,
@@ -1079,6 +1078,12 @@ static int ipu_add_client_devices(struct ipu_soc *ipu, unsigned long ipu_base)
 			platform_device_put(pdev);
 			goto err_register;
 		}
+
+		/*
+		 * Set of_node only after calling platform_device_add. Otherwise
+		 * the platform:imx-ipuv3-crtc modalias won't be used.
+		 */
+		pdev->dev.of_node = of_node;
 	}
 
 	return 0;

From 3d2ef4c1a725f185db7c25d186567f207813e74d Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 2 May 2016 18:54:39 -0400
Subject: [PATCH 257/424] drm/amdgpu: make sure vertical front porch is at
 least 1

commit 0126d4b9a516256f2432ca0dc78ab293a8255378 upstream.

hw doesn't like a 0 value.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/atombios_encoders.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
index 1e0bba29e167..1cd6de575305 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@@ -298,6 +298,10 @@ bool amdgpu_atombios_encoder_mode_fixup(struct drm_encoder *encoder,
 	    && (mode->crtc_vsync_start < (mode->crtc_vdisplay + 2)))
 		adjusted_mode->crtc_vsync_start = adjusted_mode->crtc_vdisplay + 2;
 
+	/* vertical FP must be at least 1 */
+	if (mode->crtc_vsync_start == mode->crtc_vdisplay)
+		adjusted_mode->crtc_vsync_start++;
+
 	/* get the native mode for scaling */
 	if (amdgpu_encoder->active_device & (ATOM_DEVICE_LCD_SUPPORT))
 		amdgpu_panel_mode_fixup(encoder, adjusted_mode);

From a71718ded5b74876097822f31416e6210795879a Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 3 May 2016 12:44:29 +1000
Subject: [PATCH 258/424] drm/amdgpu: set metadata pointer to NULL after
 freeing.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 0092d3edcb23fcdb8cbe4159ba94a534290ff982 upstream.

Without this there was a double free of the metadata,
which ended up freeing the fd table for me here, and taking
out the machine more often than not.

I reproduced with X.org + modesetting DDX + latest llvm/mesa,
also required using dri3.

Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index b8fbbd7699e4..73628c7599e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -540,6 +540,7 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
 	if (!metadata_size) {
 		if (bo->metadata_size) {
 			kfree(bo->metadata);
+			bo->metadata = NULL;
 			bo->metadata_size = 0;
 		}
 		return 0;

From d3cd04a8a94ab3fc02eef4f861aac0f494b2366e Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Mon, 4 Apr 2016 14:54:59 +0900
Subject: [PATCH 259/424] iio: ak8975: Fix NULL pointer exception on early
 interrupt

commit 07d2390e36ee5b3265e9cc8305f2a106c8721e16 upstream.

In certain probe conditions the interrupt came right after registering
the handler causing a NULL pointer exception because of uninitialized
waitqueue:

$ udevadm trigger
i2c-gpio i2c-gpio-1: using pins 143 (SDA) and 144 (SCL)
i2c-gpio i2c-gpio-3: using pins 53 (SDA) and 52 (SCL)
Unable to handle kernel NULL pointer dereference at virtual address 00000000
pgd = e8b38000
[00000000] *pgd=00000000
Internal error: Oops: 5 [#1] SMP ARM
Modules linked in: snd_soc_i2s(+) i2c_gpio(+) snd_soc_idma snd_soc_s3c_dma snd_soc_core snd_pcm_dmaengine snd_pcm snd_timer snd soundcore ac97_bus spi_s3c64xx pwm_samsung dwc2 exynos_adc phy_exynos_usb2 exynosdrm exynos_rng rng_core rtc_s3c
CPU: 0 PID: 717 Comm: data-provider-m Not tainted 4.6.0-rc1-next-20160401-00011-g1b8d87473b9e-dirty #101
Hardware name: SAMSUNG EXYNOS (Flattened Device Tree)
(...)
(__wake_up_common) from [<c0379624>] (__wake_up+0x38/0x4c)
(__wake_up) from [<c0a41d30>] (ak8975_irq_handler+0x28/0x30)
(ak8975_irq_handler) from [<c0386720>] (handle_irq_event_percpu+0x88/0x140)
(handle_irq_event_percpu) from [<c038681c>] (handle_irq_event+0x44/0x68)
(handle_irq_event) from [<c0389c40>] (handle_edge_irq+0xf0/0x19c)
(handle_edge_irq) from [<c0385e04>] (generic_handle_irq+0x24/0x34)
(generic_handle_irq) from [<c05ee360>] (exynos_eint_gpio_irq+0x50/0x68)
(exynos_eint_gpio_irq) from [<c0386720>] (handle_irq_event_percpu+0x88/0x140)
(handle_irq_event_percpu) from [<c038681c>] (handle_irq_event+0x44/0x68)
(handle_irq_event) from [<c0389a70>] (handle_fasteoi_irq+0xb4/0x194)
(handle_fasteoi_irq) from [<c0385e04>] (generic_handle_irq+0x24/0x34)
(generic_handle_irq) from [<c03860b4>] (__handle_domain_irq+0x5c/0xb4)
(__handle_domain_irq) from [<c0301774>] (gic_handle_irq+0x54/0x94)
(gic_handle_irq) from [<c030c910>] (__irq_usr+0x50/0x80)

The bug was reproduced on exynos4412-trats2 (with a max77693 device also
using i2c-gpio) after building max77693 as a module.

Fixes: 94a6d5cf7caa ("iio:ak8975 Implement data ready interrupt handling")
Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Tested-by: Gregor Boirie <gregor.boirie@parrot.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iio/magnetometer/ak8975.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/magnetometer/ak8975.c b/drivers/iio/magnetometer/ak8975.c
index b13936dacc78..fd780bbcd07e 100644
--- a/drivers/iio/magnetometer/ak8975.c
+++ b/drivers/iio/magnetometer/ak8975.c
@@ -462,6 +462,8 @@ static int ak8975_setup_irq(struct ak8975_data *data)
 	int rc;
 	int irq;
 
+	init_waitqueue_head(&data->data_ready_queue);
+	clear_bit(0, &data->flags);
 	if (client->irq)
 		irq = client->irq;
 	else
@@ -477,8 +479,6 @@ static int ak8975_setup_irq(struct ak8975_data *data)
 		return rc;
 	}
 
-	init_waitqueue_head(&data->data_ready_queue);
-	clear_bit(0, &data->flags);
 	data->eoc_irq = irq;
 
 	return rc;

From 0f5c3afc750715fb644d9b234a7b05afb11dfe54 Mon Sep 17 00:00:00 2001
From: Richard Leitner <dev@g0hl1n.net>
Date: Tue, 5 Apr 2016 15:03:48 +0200
Subject: [PATCH 260/424] iio: ak8975: fix maybe-uninitialized warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 05be8d4101d960bad271d32b4f6096af1ccb1534 upstream.

If i2c_device_id *id is NULL and acpi_match_device returns NULL too,
then chipset may be unitialized when accessing &ak_def_array[chipset] in
ak8975_probe. Therefore initialize chipset to AK_MAX_TYPE, which will
return an error when not changed.

This patch fixes the following maybe-uninitialized warning:

drivers/iio/magnetometer/ak8975.c: In function ‘ak8975_probe’:
drivers/iio/magnetometer/ak8975.c:788:14: warning: ‘chipset’ may be used
uninitialized in this function [-Wmaybe-uninitialized]
  data->def = &ak_def_array[chipset];

Signed-off-by: Richard Leitner <dev@g0hl1n.net>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iio/magnetometer/ak8975.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/magnetometer/ak8975.c b/drivers/iio/magnetometer/ak8975.c
index fd780bbcd07e..f2a7f72f7aa6 100644
--- a/drivers/iio/magnetometer/ak8975.c
+++ b/drivers/iio/magnetometer/ak8975.c
@@ -732,7 +732,7 @@ static int ak8975_probe(struct i2c_client *client,
 	int eoc_gpio;
 	int err;
 	const char *name = NULL;
-	enum asahi_compass_chipset chipset;
+	enum asahi_compass_chipset chipset = AK_MAX_TYPE;
 
 	/* Grab and set up the supplied GPIO. */
 	if (client->dev.platform_data)

From 6b5f7a680d9804f0f441229ce1278efe6f22f8a5 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 2 May 2016 18:53:27 -0400
Subject: [PATCH 261/424] drm/radeon: make sure vertical front porch is at
 least 1

commit 3104b8128d4d646a574ed9d5b17c7d10752cd70b upstream.

hw doesn't like a 0 value.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/atombios_encoders.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index adf74f4366bb..0b04b9282f56 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -310,6 +310,10 @@ static bool radeon_atom_mode_fixup(struct drm_encoder *encoder,
 	    && (mode->crtc_vsync_start < (mode->crtc_vdisplay + 2)))
 		adjusted_mode->crtc_vsync_start = adjusted_mode->crtc_vdisplay + 2;
 
+	/* vertical FP must be at least 1 */
+	if (mode->crtc_vsync_start == mode->crtc_vdisplay)
+		adjusted_mode->crtc_vsync_start++;
+
 	/* get the native mode for scaling */
 	if (radeon_encoder->active_device & (ATOM_DEVICE_LCD_SUPPORT)) {
 		radeon_panel_mode_fixup(encoder, adjusted_mode);

From cf26f675dbd9369a2f28555a6d241208cdc71c6e Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Mon, 18 Apr 2016 10:04:21 +0300
Subject: [PATCH 262/424] drm/i915/ddi: Fix eDP VDD handling during booting and
 suspend/resume
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 5eaa60c7109b40f17ac81090bc8b90482da76cd1 upstream.

The driver's VDD on/off logic assumes that whenever the VDD is on we
also hold an AUX power domain reference. Since BIOS can leave the VDD on
during booting and resuming and on DDI platforms we won't take a
corresponding power reference, the above assumption won't hold on those
platforms and an eventual delayed VDD off work will do an extraneous AUX
power domain put resulting in a refcount underflow. Fix this the same
way we did this for non-DDI DP encoders:

commit 6d93c0c41760c0 ("drm/i915: fix VDD state tracking after system
resume")

At the same time call the DP encoder suspend handler the same way as the
non-DDI DP encoders do to flush any pending VDD off work. Leaving the
work running may cause a HW access where we don't expect this (at a point
where power domains are suspended already).

While at it remove an unnecessary function call indirection.

This fixed for me AUX refcount underflow problems on BXT during
suspend/resume.

CC: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1460963062-13211-4-git-send-email-imre.deak@intel.com
(cherry picked from commit bf93ba67e9c05882f05b7ca2d773cfc8bf462c2a)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_ddi.c | 10 +++-------
 drivers/gpu/drm/i915/intel_dp.c  |  4 ++--
 drivers/gpu/drm/i915/intel_drv.h |  2 ++
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 7e6158b889da..241252de7186 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -3188,12 +3188,6 @@ void intel_ddi_get_config(struct intel_encoder *encoder,
 	intel_ddi_clock_get(encoder, pipe_config);
 }
 
-static void intel_ddi_destroy(struct drm_encoder *encoder)
-{
-	/* HDMI has nothing special to destroy, so we can go with this. */
-	intel_dp_encoder_destroy(encoder);
-}
-
 static bool intel_ddi_compute_config(struct intel_encoder *encoder,
 				     struct intel_crtc_state *pipe_config)
 {
@@ -3212,7 +3206,8 @@ static bool intel_ddi_compute_config(struct intel_encoder *encoder,
 }
 
 static const struct drm_encoder_funcs intel_ddi_funcs = {
-	.destroy = intel_ddi_destroy,
+	.reset = intel_dp_encoder_reset,
+	.destroy = intel_dp_encoder_destroy,
 };
 
 static struct intel_connector *
@@ -3284,6 +3279,7 @@ void intel_ddi_init(struct drm_device *dev, enum port port)
 	intel_encoder->post_disable = intel_ddi_post_disable;
 	intel_encoder->get_hw_state = intel_ddi_get_hw_state;
 	intel_encoder->get_config = intel_ddi_get_config;
+	intel_encoder->suspend = intel_dp_encoder_suspend;
 
 	intel_dig_port->port = port;
 	intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) &
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 78b8ec84d576..e55a82a99e7f 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -5035,7 +5035,7 @@ void intel_dp_encoder_destroy(struct drm_encoder *encoder)
 	kfree(intel_dig_port);
 }
 
-static void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder)
+void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder)
 {
 	struct intel_dp *intel_dp = enc_to_intel_dp(&intel_encoder->base);
 
@@ -5077,7 +5077,7 @@ static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp)
 	edp_panel_vdd_schedule_off(intel_dp);
 }
 
-static void intel_dp_encoder_reset(struct drm_encoder *encoder)
+void intel_dp_encoder_reset(struct drm_encoder *encoder)
 {
 	struct intel_dp *intel_dp;
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 0d00f07b7163..f34a219ec5c4 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1204,6 +1204,8 @@ void intel_dp_set_link_params(struct intel_dp *intel_dp,
 void intel_dp_start_link_train(struct intel_dp *intel_dp);
 void intel_dp_stop_link_train(struct intel_dp *intel_dp);
 void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode);
+void intel_dp_encoder_reset(struct drm_encoder *encoder);
+void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder);
 void intel_dp_encoder_destroy(struct drm_encoder *encoder);
 int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc);
 bool intel_dp_compute_config(struct intel_encoder *encoder,

From fa26a3c6c25bceed402055e06c7e0a2e4e13ebe5 Mon Sep 17 00:00:00 2001
From: Mika Kahola <mika.kahola@intel.com>
Date: Wed, 20 Apr 2016 15:39:02 +0300
Subject: [PATCH 263/424] drm/i915: Fix eDP low vswing for Broadwell
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 992e7a41f9fcc7bcd10e7d346aee5ed7a2c241cb upstream.

It was noticed on bug #94087 that module parameter
i915.edp_vswing=2 that should override the VBT setting
to use default voltage swing (400 mV) was not applied
for Broadwell.

This patch provides a fix for this by checking if default
i.e. higher voltage swing is requested to be used and
applies the DDI translations table for DP instead of eDP
(low vswing) table.

v2: Combine two if statements into one (Jani)
v3: Change dev_priv->edp_low_vswing to use dev_priv->vbt.edp.low_vswing

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94087
Signed-off-by: Mika Kahola <mika.kahola@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1461155942-7749-1-git-send-email-mika.kahola@intel.com
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
(cherry picked from commit 00983519214b61c1b9371ec2ed55a4dde773e384)
[Jani: s/dev_priv->vbt.edp.low_vswing/dev_priv->edp_low_vswing/ to backport]
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_ddi.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 241252de7186..3c6b07683bd9 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -464,9 +464,17 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port,
 	} else if (IS_BROADWELL(dev)) {
 		ddi_translations_fdi = bdw_ddi_translations_fdi;
 		ddi_translations_dp = bdw_ddi_translations_dp;
-		ddi_translations_edp = bdw_ddi_translations_edp;
+
+		if (dev_priv->edp_low_vswing) {
+			ddi_translations_edp = bdw_ddi_translations_edp;
+			n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp);
+		} else {
+			ddi_translations_edp = bdw_ddi_translations_dp;
+			n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_dp);
+		}
+
 		ddi_translations_hdmi = bdw_ddi_translations_hdmi;
-		n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp);
+
 		n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp);
 		n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
 		hdmi_default_entry = 7;

From bc631165a1b6583b3e96404fec4ddc8efb2f4392 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Wed, 20 Apr 2016 16:43:56 +0300
Subject: [PATCH 264/424] drm/i915: Make RPS EI/thresholds multiple of 25 on
 SNB-BDW
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 4ea3959018d09edfa36a9e7b5ccdbd4ec4b99e49 upstream.

Somehow my SNB GT1 (Dell XPS 8300) gets very unhappy around
GPU hangs if the RPS EI/thresholds aren't suitably aligned.
It seems like scheduling/timer interupts stop working somehow
and things get stuck eg. in usleep_range().

I bisected the problem down to
commit 8a5864377b12 ("drm/i915/skl: Restructured the gen6_set_rps_thresholds function")
I observed that before all the values were at least multiples of 25,
but afterwards they are not. And rounding things up to the next multiple
of 25 does seem to help, so lets' do that. I also tried roundup(..., 5)
but that wasn't sufficient. Also I have no idea if we might need this sort of
thing on gen9+ as well.

These are the original EI/thresholds:
 LOW_POWER
  GEN6_RP_UP_EI          12500
  GEN6_RP_UP_THRESHOLD   11800
  GEN6_RP_DOWN_EI        25000
  GEN6_RP_DOWN_THRESHOLD 21250
 BETWEEN
  GEN6_RP_UP_EI          10250
  GEN6_RP_UP_THRESHOLD    9225
  GEN6_RP_DOWN_EI        25000
  GEN6_RP_DOWN_THRESHOLD 18750
 HIGH_POWER
  GEN6_RP_UP_EI           8000
  GEN6_RP_UP_THRESHOLD    6800
  GEN6_RP_DOWN_EI        25000
  GEN6_RP_DOWN_THRESHOLD 15000

These are after 8a5864377b12:
 LOW_POWER
  GEN6_RP_UP_EI          12500
  GEN6_RP_UP_THRESHOLD   11875
  GEN6_RP_DOWN_EI        25000
  GEN6_RP_DOWN_THRESHOLD 21250
 BETWEEN
  GEN6_RP_UP_EI          10156
  GEN6_RP_UP_THRESHOLD    9140
  GEN6_RP_DOWN_EI        25000
  GEN6_RP_DOWN_THRESHOLD 18750
 HIGH_POWER
  GEN6_RP_UP_EI           7812
  GEN6_RP_UP_THRESHOLD    6640
  GEN6_RP_DOWN_EI        25000
  GEN6_RP_DOWN_THRESHOLD 15000

And these are what we have after this patch:
 LOW_POWER
  GEN6_RP_UP_EI          12500
  GEN6_RP_UP_THRESHOLD   11875
  GEN6_RP_DOWN_EI        25000
  GEN6_RP_DOWN_THRESHOLD 21250
 BETWEEN
  GEN6_RP_UP_EI          10175
  GEN6_RP_UP_THRESHOLD    9150
  GEN6_RP_DOWN_EI        25000
  GEN6_RP_DOWN_THRESHOLD 18750
 HIGH_POWER
  GEN6_RP_UP_EI           7825
  GEN6_RP_UP_THRESHOLD    6650
  GEN6_RP_DOWN_EI        25000
  GEN6_RP_DOWN_THRESHOLD 15000

Cc: Akash Goel <akash.goel@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Testcase: igt/kms_pipe_crc_basic/hang-read-crc-pipe-B
Fixes: 8a5864377b12 ("drm/i915/skl: Restructured the gen6_set_rps_thresholds function")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1461159836-9108-1-git-send-email-ville.syrjala@linux.intel.com
Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Patrik Jakobsson <patrik.jakobsson@linux.intel.com>
(cherry picked from commit 8a292d016d1cc4938ff14b4df25328230b08a408)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/i915_reg.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index bc7b8faba84d..7e461dca564c 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2838,7 +2838,14 @@ enum skl_disp_power_wells {
 #define GEN6_RP_STATE_CAP	(MCHBAR_MIRROR_BASE_SNB + 0x5998)
 #define BXT_RP_STATE_CAP        0x138170
 
-#define INTERVAL_1_28_US(us)	(((us) * 100) >> 7)
+/*
+ * Make these a multiple of magic 25 to avoid SNB (eg. Dell XPS
+ * 8300) freezing up around GPU hangs. Looks as if even
+ * scheduling/timer interrupts start misbehaving if the RPS
+ * EI/thresholds are "bad", leading to a very sluggish or even
+ * frozen machine.
+ */
+#define INTERVAL_1_28_US(us)	roundup(((us) * 100) >> 7, 25)
 #define INTERVAL_1_33_US(us)	(((us) * 3)   >> 2)
 #define INTERVAL_0_833_US(us)	(((us) * 6) / 5)
 #define GT_INTERVAL_FROM_US(dev_priv, us) (IS_GEN9(dev_priv) ? \

From 8e1001c5638e244ab9a2ddddf5466b05ddf6af77 Mon Sep 17 00:00:00 2001
From: Shashank Sharma <shashank.sharma@intel.com>
Date: Thu, 21 Apr 2016 16:48:32 +0530
Subject: [PATCH 265/424] drm/i915: Fake HDMI live status
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 60b3143c7cac7e8d2ca65c0b347466c5776395d1 upstream.

This patch does the following:
- Fakes live status of HDMI as connected (even if that's not).
  While testing certain (monitor + cable) combinations with
  various intel  platforms, it seems that live status register
  doesn't work reliably on some older devices. So limit the
  live_status check for HDMI detection, only for platforms
  from gen7 onwards.

V2: restrict faking live_status to certain platforms
V3: (Ville)
   - keep the debug message for !live_status case
   - fix indentation of comment
   - remove "warning" from the debug message

    (Jani)
   - Change format of fix details in the commit message

Fixes: 237ed86c693d ("drm/i915: Check live status before reading edid")
Suggested-by: Ville Syrjala <ville.syrjala@linux.intel.com>
Signed-off-by: Shashank Sharma <shashank.sharma@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1461237606-16491-1-git-send-email-shashank.sharma@intel.com
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
(cherry picked from commit 4f4a8185011773f7520d9916c6857db946e7f9d1)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_hdmi.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index e6c035b0fc1c..4b8ed9f2dabc 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -1388,8 +1388,16 @@ intel_hdmi_detect(struct drm_connector *connector, bool force)
 				hdmi_to_dig_port(intel_hdmi));
 	}
 
-	if (!live_status)
-		DRM_DEBUG_KMS("Live status not up!");
+	if (!live_status) {
+		DRM_DEBUG_KMS("HDMI live status down\n");
+		/*
+		 * Live status register is not reliable on all intel platforms.
+		 * So consider live_status only for certain platforms, for
+		 * others, read EDID to determine presence of sink.
+		 */
+		if (INTEL_INFO(dev_priv)->gen < 7 || IS_IVYBRIDGE(dev_priv))
+			live_status = true;
+	}
 
 	intel_hdmi_unset_edid(connector);
 

From dfa11d586248a21ce2c7fae02c02964c3a4a8379 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Wed, 23 Mar 2016 21:07:39 -0700
Subject: [PATCH 266/424] ACPI / processor: Request native thermal interrupt
 handling via _OSC

commit a21211672c9a1d730a39aa65d4a5b3414700adfb upstream.

There are several reports of freeze on enabling HWP (Hardware PStates)
feature on Skylake-based systems by the Intel P-states driver. The root
cause is identified as the HWP interrupts causing BIOS code to freeze.

HWP interrupts use the thermal LVT which can be handled by Linux
natively, but on the affected Skylake-based systems SMM will respond
to it by default.  This is a problem for several reasons:
 - On the affected systems the SMM thermal LVT handler is broken (it
   will crash when invoked) and a BIOS update is necessary to fix it.
 - With thermal interrupt handled in SMM we lose all of the reporting
   features of the arch/x86/kernel/cpu/mcheck/therm_throt driver.
 - Some thermal drivers like x86-package-temp depend on the thermal
   threshold interrupts signaled via the thermal LVT.
 - The HWP interrupts are useful for debugging and tuning
   performance (if the kernel can handle them).
The native handling of thermal interrupts needs to be enabled
because of that.

This requires some way to tell SMM that the OS can handle thermal
interrupts.  That can be done by using _OSC/_PDC in processor
scope very early during ACPI initialization.

The meaning of _OSC/_PDC bit 12 in processor scope is whether or
not the OS supports native handling of interrupts for Collaborative
Processor Performance Control (CPPC) notifications.  Since on
HWP-capable systems CPPC is a firmware interface to HWP, setting
this bit effectively tells the firmware that the OS will handle
thermal interrupts natively going forward.

For details on _OSC/_PDC refer to:
http://www.intel.com/content/www/us/en/standards/processor-vendor-specific-acpi-specification.html

To implement the _OSC/_PDC handshake as described, introduce a new
function, acpi_early_processor_osc(), that walks the ACPI
namespace looking for ACPI processor objects and invokes _OSC for
them with bit 12 in the capabilities buffer set and terminates the
namespace walk on the first success.

Also modify intel_thermal_interrupt() to clear HWP status bits in
the HWP_STATUS MSR to acknowledge HWP interrupts (which prevents
them from firing continuously).

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
[ rjw: Subject & changelog, function rename ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/mcheck/therm_throt.c |  3 ++
 drivers/acpi/acpi_processor.c            | 52 ++++++++++++++++++++++++
 drivers/acpi/bus.c                       |  3 ++
 drivers/acpi/internal.h                  |  6 +++
 4 files changed, 64 insertions(+)

diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 2c5aaf8c2e2f..05538582a809 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -385,6 +385,9 @@ static void intel_thermal_interrupt(void)
 {
 	__u64 msr_val;
 
+	if (static_cpu_has(X86_FEATURE_HWP))
+		wrmsrl_safe(MSR_HWP_STATUS, 0);
+
 	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
 
 	/* Check for violation of core thermal thresholds*/
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index 6979186dbd4b..9f77943653fb 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -491,6 +491,58 @@ static void acpi_processor_remove(struct acpi_device *device)
 }
 #endif /* CONFIG_ACPI_HOTPLUG_CPU */
 
+#ifdef CONFIG_X86
+static bool acpi_hwp_native_thermal_lvt_set;
+static acpi_status __init acpi_hwp_native_thermal_lvt_osc(acpi_handle handle,
+							  u32 lvl,
+							  void *context,
+							  void **rv)
+{
+	u8 sb_uuid_str[] = "4077A616-290C-47BE-9EBD-D87058713953";
+	u32 capbuf[2];
+	struct acpi_osc_context osc_context = {
+		.uuid_str = sb_uuid_str,
+		.rev = 1,
+		.cap.length = 8,
+		.cap.pointer = capbuf,
+	};
+
+	if (acpi_hwp_native_thermal_lvt_set)
+		return AE_CTRL_TERMINATE;
+
+	capbuf[0] = 0x0000;
+	capbuf[1] = 0x1000; /* set bit 12 */
+
+	if (ACPI_SUCCESS(acpi_run_osc(handle, &osc_context))) {
+		if (osc_context.ret.pointer && osc_context.ret.length > 1) {
+			u32 *capbuf_ret = osc_context.ret.pointer;
+
+			if (capbuf_ret[1] & 0x1000) {
+				acpi_handle_info(handle,
+					"_OSC native thermal LVT Acked\n");
+				acpi_hwp_native_thermal_lvt_set = true;
+			}
+		}
+		kfree(osc_context.ret.pointer);
+	}
+
+	return AE_OK;
+}
+
+void __init acpi_early_processor_osc(void)
+{
+	if (boot_cpu_has(X86_FEATURE_HWP)) {
+		acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
+				    ACPI_UINT32_MAX,
+				    acpi_hwp_native_thermal_lvt_osc,
+				    NULL, NULL, NULL);
+		acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID,
+				 acpi_hwp_native_thermal_lvt_osc,
+				 NULL, NULL);
+	}
+}
+#endif
+
 /*
  * The following ACPI IDs are known to be suitable for representing as
  * processor devices.
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index a212cefae524..ca4f28432d87 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1004,6 +1004,9 @@ static int __init acpi_bus_init(void)
 		goto error1;
 	}
 
+	/* Set capability bits for _OSC under processor scope */
+	acpi_early_processor_osc();
+
 	/*
 	 * _OSC method may exist in module level code,
 	 * so it must be run after ACPI_FULL_INITIALIZATION
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 11d87bf67e73..0f3f41c13b38 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -130,6 +130,12 @@ void acpi_early_processor_set_pdc(void);
 static inline void acpi_early_processor_set_pdc(void) {}
 #endif
 
+#ifdef CONFIG_X86
+void acpi_early_processor_osc(void);
+#else
+static inline void acpi_early_processor_osc(void) {}
+#endif
+
 /* --------------------------------------------------------------------------
                                   Embedded Controller
    -------------------------------------------------------------------------- */

From f6ff7398220d7fda0f4d02b9c9755406d8169bc2 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 2 Feb 2016 16:57:18 -0800
Subject: [PATCH 267/424] lib/test-string_helpers.c: fix and improve
 string_get_size() tests

commit 72676bb53f33fd0ef3a1484fc1ecfd306dc6ff40 upstream.

Recently added commit 564b026fbd0d ("string_helpers: fix precision loss
for some inputs") fixed precision issues for string_get_size() and broke
tests.

Fix and improve them: test both STRING_UNITS_2 and STRING_UNITS_10 at a
time, better failure reporting, test small an huge values.

Fixes: 564b026fbd0d28e9 ("string_helpers: fix precision loss for some inputs")
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: James Bottomley <JBottomley@Odin.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/test-string_helpers.c | 67 ++++++++++++++++++++++++++++-----------
 1 file changed, 49 insertions(+), 18 deletions(-)

diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c
index 98866a770770..25b5cbfb7615 100644
--- a/lib/test-string_helpers.c
+++ b/lib/test-string_helpers.c
@@ -327,36 +327,67 @@ out:
 }
 
 #define string_get_size_maxbuf 16
-#define test_string_get_size_one(size, blk_size, units, exp_result)            \
+#define test_string_get_size_one(size, blk_size, exp_result10, exp_result2)    \
 	do {                                                                   \
-		BUILD_BUG_ON(sizeof(exp_result) >= string_get_size_maxbuf);    \
-		__test_string_get_size((size), (blk_size), (units),            \
-				       (exp_result));                          \
+		BUILD_BUG_ON(sizeof(exp_result10) >= string_get_size_maxbuf);  \
+		BUILD_BUG_ON(sizeof(exp_result2) >= string_get_size_maxbuf);   \
+		__test_string_get_size((size), (blk_size), (exp_result10),     \
+				       (exp_result2));                         \
 	} while (0)
 
 
-static __init void __test_string_get_size(const u64 size, const u64 blk_size,
-					  const enum string_size_units units,
-					  const char *exp_result)
+static __init void test_string_get_size_check(const char *units,
+					      const char *exp,
+					      char *res,
+					      const u64 size,
+					      const u64 blk_size)
 {
-	char buf[string_get_size_maxbuf];
-
-	string_get_size(size, blk_size, units, buf, sizeof(buf));
-	if (!memcmp(buf, exp_result, strlen(exp_result) + 1))
+	if (!memcmp(res, exp, strlen(exp) + 1))
 		return;
 
-	buf[sizeof(buf) - 1] = '\0';
-	pr_warn("Test 'test_string_get_size_one' failed!\n");
-	pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %d\n",
+	res[string_get_size_maxbuf - 1] = '\0';
+
+	pr_warn("Test 'test_string_get_size' failed!\n");
+	pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %s)\n",
 		size, blk_size, units);
-	pr_warn("expected: '%s', got '%s'\n", exp_result, buf);
+	pr_warn("expected: '%s', got '%s'\n", exp, res);
+}
+
+static __init void __test_string_get_size(const u64 size, const u64 blk_size,
+					  const char *exp_result10,
+					  const char *exp_result2)
+{
+	char buf10[string_get_size_maxbuf];
+	char buf2[string_get_size_maxbuf];
+
+	string_get_size(size, blk_size, STRING_UNITS_10, buf10, sizeof(buf10));
+	string_get_size(size, blk_size, STRING_UNITS_2, buf2, sizeof(buf2));
+
+	test_string_get_size_check("STRING_UNITS_10", exp_result10, buf10,
+				   size, blk_size);
+
+	test_string_get_size_check("STRING_UNITS_2", exp_result2, buf2,
+				   size, blk_size);
 }
 
 static __init void test_string_get_size(void)
 {
-	test_string_get_size_one(16384, 512, STRING_UNITS_2, "8.00 MiB");
-	test_string_get_size_one(8192, 4096, STRING_UNITS_10, "32.7 MB");
-	test_string_get_size_one(1, 512, STRING_UNITS_10, "512 B");
+	/* small values */
+	test_string_get_size_one(0, 512, "0 B", "0 B");
+	test_string_get_size_one(1, 512, "512 B", "512 B");
+	test_string_get_size_one(1100, 1, "1.10 kB", "1.07 KiB");
+
+	/* normal values */
+	test_string_get_size_one(16384, 512, "8.39 MB", "8.00 MiB");
+	test_string_get_size_one(500118192, 512, "256 GB", "238 GiB");
+	test_string_get_size_one(8192, 4096, "33.6 MB", "32.0 MiB");
+
+	/* weird block sizes */
+	test_string_get_size_one(3000, 1900, "5.70 MB", "5.44 MiB");
+
+	/* huge values */
+	test_string_get_size_one(U64_MAX, 4096, "75.6 ZB", "64.0 ZiB");
+	test_string_get_size_one(4096, U64_MAX, "75.6 ZB", "64.0 ZiB");
 }
 
 static int __init test_string_helpers_init(void)

From 945b6ec05a475fc80bcb79ef006ee5c0263c7b3a Mon Sep 17 00:00:00 2001
From: Mat Martineau <mathew.j.martineau@linux.intel.com>
Date: Thu, 28 Jan 2016 15:19:23 -0800
Subject: [PATCH 268/424] drm/i915/skl: Fix DMC load on Skylake J0 and K0

commit a41c8882592fb80458959b10e37632ce030b68ca upstream.

The driver does not load firmware for unknown steppings, so these new
steppings must be added to the list.

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1454023163-25469-1-git-send-email-mathew.j.martineau@linux.intel.com
Cc: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_csr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 9e530a739354..fc28c512ece3 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -180,7 +180,8 @@ struct stepping_info {
 static const struct stepping_info skl_stepping_info[] = {
 		{'A', '0'}, {'B', '0'}, {'C', '0'},
 		{'D', '0'}, {'E', '0'}, {'F', '0'},
-		{'G', '0'}, {'H', '0'}, {'I', '0'}
+		{'G', '0'}, {'H', '0'}, {'I', '0'},
+		{'J', '0'}, {'K', '0'}
 };
 
 static struct stepping_info bxt_stepping_info[] = {

From 4c2795dd50f98fa162cb53190eb557be44f92f58 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 11 May 2016 11:23:26 +0200
Subject: [PATCH 269/424] Linux 4.4.10

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 0722cdf52152..5b5f462f834c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 9
+SUBLEVEL = 10
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 

From 32b06020f36dd2dcfd7832ffd34a84f254b14e46 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 15 Jan 2016 13:28:57 +0100
Subject: [PATCH 270/424] arm64: hide __efistub_ aliases from kallsyms

Commit e8f3010f7326 ("arm64/efi: isolate EFI stub from the kernel
proper") isolated the EFI stub code from the kernel proper by prefixing
all of its symbols with __efistub_, and selectively allowing access to
core kernel symbols from the stub by emitting __efistub_ aliases for
functions and variables that the stub can access legally.

As an unintended side effect, these aliases are emitted into the
kallsyms symbol table, which means they may turn up in backtraces,
e.g.,

  ...
  PC is at __efistub_memset+0x108/0x200
  LR is at fixup_init+0x3c/0x48
  ...
  [<ffffff8008328608>] __efistub_memset+0x108/0x200
  [<ffffff8008094dcc>] free_initmem+0x2c/0x40
  [<ffffff8008645198>] kernel_init+0x20/0xe0
  [<ffffff8008085cd0>] ret_from_fork+0x10/0x40

The backtrace in question has nothing to do with the EFI stub, but
simply returns one of the several aliases of memset() that have been
recorded in the kallsyms table. This is undesirable, since it may
suggest to people who are not aware of this that the issue they are
seeing is somehow EFI related.

So hide the __efistub_ aliases from kallsyms, by emitting them as
absolute linker symbols explicitly. The distinction between those
and section relative symbols is completely irrelevant to these
definitions, and to the final link we are performing when these
definitions are being taken into account (the distinction is only
relevant to symbols defined inside a section definition when performing
a partial link), and so the resulting values are identical to the
original ones. Since absolute symbols are ignored by kallsyms, this
will result in these values to be omitted from its symbol table.

After this patch, the backtrace generated from the same address looks
like this:
  ...
  PC is at __memset+0x108/0x200
  LR is at fixup_init+0x3c/0x48
  ...
  [<ffffff8008328608>] __memset+0x108/0x200
  [<ffffff8008094dcc>] free_initmem+0x2c/0x40
  [<ffffff8008645198>] kernel_init+0x20/0xe0
  [<ffffff8008085cd0>] ret_from_fork+0x10/0x40

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 75feee3d9d51775072d3a04f47d4a439a4c4590e)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/image.h | 40 ++++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index bc2abb8b1599..999633bd7294 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -64,6 +64,16 @@
 
 #ifdef CONFIG_EFI
 
+/*
+ * Prevent the symbol aliases below from being emitted into the kallsyms
+ * table, by forcing them to be absolute symbols (which are conveniently
+ * ignored by scripts/kallsyms) rather than section relative symbols.
+ * The distinction is only relevant for partial linking, and only for symbols
+ * that are defined within a section declaration (which is not the case for
+ * the definitions below) so the resulting values will be identical.
+ */
+#define KALLSYMS_HIDE(sym)	ABSOLUTE(sym)
+
 /*
  * The EFI stub has its own symbol namespace prefixed by __efistub_, to
  * isolate it from the kernel proper. The following symbols are legally
@@ -73,25 +83,25 @@
  * linked at. The routines below are all implemented in assembler in a
  * position independent manner
  */
-__efistub_memcmp		= __pi_memcmp;
-__efistub_memchr		= __pi_memchr;
-__efistub_memcpy		= __pi_memcpy;
-__efistub_memmove		= __pi_memmove;
-__efistub_memset		= __pi_memset;
-__efistub_strlen		= __pi_strlen;
-__efistub_strcmp		= __pi_strcmp;
-__efistub_strncmp		= __pi_strncmp;
-__efistub___flush_dcache_area	= __pi___flush_dcache_area;
+__efistub_memcmp		= KALLSYMS_HIDE(__pi_memcmp);
+__efistub_memchr		= KALLSYMS_HIDE(__pi_memchr);
+__efistub_memcpy		= KALLSYMS_HIDE(__pi_memcpy);
+__efistub_memmove		= KALLSYMS_HIDE(__pi_memmove);
+__efistub_memset		= KALLSYMS_HIDE(__pi_memset);
+__efistub_strlen		= KALLSYMS_HIDE(__pi_strlen);
+__efistub_strcmp		= KALLSYMS_HIDE(__pi_strcmp);
+__efistub_strncmp		= KALLSYMS_HIDE(__pi_strncmp);
+__efistub___flush_dcache_area	= KALLSYMS_HIDE(__pi___flush_dcache_area);
 
 #ifdef CONFIG_KASAN
-__efistub___memcpy		= __pi_memcpy;
-__efistub___memmove		= __pi_memmove;
-__efistub___memset		= __pi_memset;
+__efistub___memcpy		= KALLSYMS_HIDE(__pi_memcpy);
+__efistub___memmove		= KALLSYMS_HIDE(__pi_memmove);
+__efistub___memset		= KALLSYMS_HIDE(__pi_memset);
 #endif
 
-__efistub__text			= _text;
-__efistub__end			= _end;
-__efistub__edata		= _edata;
+__efistub__text			= KALLSYMS_HIDE(_text);
+__efistub__end			= KALLSYMS_HIDE(_end);
+__efistub__edata		= KALLSYMS_HIDE(_edata);
 
 #endif
 

From b87cf8adbe0d3b1998a7dafd01b70e9b118f641d Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Fri, 15 Jan 2016 16:55:37 -0800
Subject: [PATCH 271/424] arch/arm64/include/asm/pgtable.h: add pmd_mkclean for
 THP

MADV_FREE needs pmd_dirty and pmd_mkclean for detecting recent overwrite
of the contents since MADV_FREE syscall is called for THP page.

This patch adds pmd_mkclean for THP page MADV_FREE support.

Signed-off-by: Minchan Kim <minchan@kernel.org>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Shaohua Li <shli@kernel.org>
Cc: <yalin.wang2010@gmail.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen Gang <gang.chen.5i5j@gmail.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: Daniel Micay <danielmicay@gmail.com>
Cc: Darrick J. Wong <darrick.wong@oracle.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Helge Deller <deller@gmx.de>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Jason Evans <je@fb.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mika Penttil <mika.penttila@nextfour.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Rik van Riel <riel@redhat.com>
Cc: Roland Dreier <roland@kernel.org>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Shaohua Li <shli@kernel.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
(cherry picked from commit 05ee26d9e7e29ab026995eab79be3c6e8351908c)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/pgtable.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 76ff5d93c6c3..2daf88970731 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -369,6 +369,7 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
 #define pmd_mksplitting(pmd)	pte_pmd(pte_mkspecial(pmd_pte(pmd)))
 #define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
 #define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#define pmd_mkclean(pmd)       pte_pmd(pte_mkclean(pmd_pte(pmd)))
 #define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
 #define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
 #define pmd_mknotpresent(pmd)	(__pmd(pmd_val(pmd) & ~PMD_TYPE_MASK))

From 035fdc46d48ae8a7cbf7199c74bac1de36cca626 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 11 Jan 2016 14:50:21 +0100
Subject: [PATCH 272/424] arm64: kasan: ensure that the KASAN zero page is
 mapped read-only

When switching from the early KASAN shadow region, which maps the
entire shadow space read-write, to the permanent KASAN shadow region,
which uses a zero page to shadow regions that are not subject to
instrumentation, the lowest level table kasan_zero_pte[] may be
reused unmodified, which means that the mappings of the zero page
that it contains will still be read-write.

So update it explicitly to map the zero page read only when we
activate the permanent mapping.

Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 7b1af9795773d745c2a8c7d4ca5f2936e8b6adfb)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/kasan_init.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index cf038c7d9fa9..cab7a5be40aa 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -120,6 +120,7 @@ static void __init cpu_set_ttbr1(unsigned long ttbr1)
 void __init kasan_init(void)
 {
 	struct memblock_region *reg;
+	int i;
 
 	/*
 	 * We are going to perform proper setup of shadow memory.
@@ -155,6 +156,14 @@ void __init kasan_init(void)
 				pfn_to_nid(virt_to_pfn(start)));
 	}
 
+	/*
+	 * KAsan may reuse the contents of kasan_zero_pte directly, so we
+	 * should make sure that it maps the zero page read-only.
+	 */
+	for (i = 0; i < PTRS_PER_PTE; i++)
+		set_pte(&kasan_zero_pte[i],
+			pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO));
+
 	memset(kasan_zero_page, 0, PAGE_SIZE);
 	cpu_set_ttbr1(__pa(swapper_pg_dir));
 	flush_tlb_all();

From 8c226342ae45c7a2029af59cf81edc9147a60d56 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Sun, 24 Jan 2016 15:24:12 +0900
Subject: [PATCH 273/424] arm64: Fix an enum typo in mm/dump.c

This patch fixes a typo in mm/dump.c:
"MODUELS_END_NR" should be "MODULES_END_NR".

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit b3122023df935cf14bf951da98ca598d71b9f826)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/dump.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 5a22a119a74c..0adbebbc2803 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -46,7 +46,7 @@ enum address_markers_idx {
 	PCI_START_NR,
 	PCI_END_NR,
 	MODULES_START_NR,
-	MODUELS_END_NR,
+	MODULES_END_NR,
 	KERNEL_SPACE_NR,
 };
 

From d2779548f7c7156686d4d88ce4ae904460952a8c Mon Sep 17 00:00:00 2001
From: William Cohen <wcohen@redhat.com>
Date: Thu, 21 Jan 2016 22:56:26 -0500
Subject: [PATCH 274/424] Eliminate the .eh_frame sections from the aarch64
 vmlinux and kernel modules

By default the aarch64 gcc generates .eh_frame sections.  Unlike
.debug_frame sections, the .eh_frame sections are loaded into memory
when the associated code is loaded.  On an example kernel being built
with this default the .eh_frame section in vmlinux used an extra 1.7MB
of memory.  The x86 disables the creation of the .eh_frame section.
The aarch64 should probably do the same to save some memory.

Signed-off-by: William Cohen <wcohen@redhat.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 728dabd6d1751cf5e0f8e0535891393da62396e9)
Signed-off-by: Alex Shi <alex.shi@linaro.org>

Conflicts:
	pick 67dfa1751 arm64: errata: Add -mpc-relative-literal-loads
	in arch/arm64/Makefile
---
 arch/arm64/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index b6c90e5006e4..548a2939d7e6 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -28,6 +28,7 @@ endif
 
 KBUILD_CFLAGS	+= -mgeneral-regs-only $(lseinstr)
 KBUILD_CFLAGS	+= $(call cc-option, -mpc-relative-literal-loads)
+KBUILD_CFLAGS	+= -fno-asynchronous-unwind-tables
 KBUILD_AFLAGS	+= $(lseinstr)
 
 ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)

From 7c584b74f039645457bb762f5171e2de515720e4 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:44:55 +0000
Subject: [PATCH 275/424] asm-generic: Fix local variable shadow in
 __set_fixmap_offset

Currently __set_fixmap_offset is a macro function which has a local
variable called 'addr'. If a caller passes a 'phys' parameter which is
derived from a variable also called 'addr', the local variable will
shadow this, and the compiler will complain about the use of an
uninitialized variable. To avoid the issue with namespace clashes,
'addr' is prefixed with a liberal sprinkling of underscores.

Turning __set_fixmap_offset into a static inline breaks the build for
several architectures. Fixing this properly requires updates to a number
of architectures to make them agree on the prototype of __set_fixmap (it
could be done as a subsequent patch series).

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
[catalin.marinas@arm.com: squashed the original function patch and macro fixup]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

(cherry picked from commit 3694bd76781b76c4f8d2ecd85018feeb1609f0e5)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 include/asm-generic/fixmap.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/asm-generic/fixmap.h b/include/asm-generic/fixmap.h
index 1cbb8338edf3..827e4d3bbc7a 100644
--- a/include/asm-generic/fixmap.h
+++ b/include/asm-generic/fixmap.h
@@ -70,12 +70,12 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr)
 #endif
 
 /* Return a pointer with offset calculated */
-#define __set_fixmap_offset(idx, phys, flags)		      \
-({							      \
-	unsigned long addr;				      \
-	__set_fixmap(idx, phys, flags);			      \
-	addr = fix_to_virt(idx) + ((phys) & (PAGE_SIZE - 1)); \
-	addr;						      \
+#define __set_fixmap_offset(idx, phys, flags)				\
+({									\
+	unsigned long ________addr;					\
+	__set_fixmap(idx, phys, flags);					\
+	________addr = fix_to_virt(idx) + ((phys) & (PAGE_SIZE - 1));	\
+	________addr;							\
 })
 
 #define set_fixmap_offset(idx, phys) \

From c8403d828ed9741a4a9d820c829d71211685b659 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:44:56 +0000
Subject: [PATCH 276/424] arm64: mm: specialise pagetable allocators

We pass a size parameter to early_alloc and late_alloc, but these are
only ever used to allocate single pages. In late_alloc we always
allocate a single page.

Both allocators provide us with zeroed pages (such that all entries are
invalid), but we have no barriers between allocating a page and adding
that page to existing (live) tables. A concurrent page table walk may
see stale data, leading to a number of issues.

This patch specialises the two allocators for page tables. The size
parameter is removed and the necessary dsb(ishst) is folded into each.
To make it clear that the functions are intended for use for page table
allocation, they are renamed to {early,late}_pgtable_alloc, with the
related function pointed renamed to pgtable_alloc.

As the dsb(ishst) is now in the allocator, the existing barrier for the
zero page is redundant and thus is removed. The previously missing
include of barrier.h is added.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 21ab99c289d350f4ae454bc069870009db6df20e)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/mmu.c | 52 +++++++++++++++++++++++----------------------
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index c5bd5bca8e3d..3ed128c96618 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -30,6 +30,7 @@
 #include <linux/slab.h>
 #include <linux/stop_machine.h>
 
+#include <asm/barrier.h>
 #include <asm/cputype.h>
 #include <asm/fixmap.h>
 #include <asm/kernel-pgtable.h>
@@ -62,15 +63,18 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 }
 EXPORT_SYMBOL(phys_mem_access_prot);
 
-static void __init *early_alloc(unsigned long sz)
+static void __init *early_pgtable_alloc(void)
 {
 	phys_addr_t phys;
 	void *ptr;
 
-	phys = memblock_alloc(sz, sz);
+	phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 	BUG_ON(!phys);
 	ptr = __va(phys);
-	memset(ptr, 0, sz);
+	memset(ptr, 0, PAGE_SIZE);
+
+	/* Ensure the zeroed page is visible to the page table walker */
+	dsb(ishst);
 	return ptr;
 }
 
@@ -95,12 +99,12 @@ static void split_pmd(pmd_t *pmd, pte_t *pte)
 static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
 				  unsigned long end, unsigned long pfn,
 				  pgprot_t prot,
-				  void *(*alloc)(unsigned long size))
+				  void *(*pgtable_alloc)(void))
 {
 	pte_t *pte;
 
 	if (pmd_none(*pmd) || pmd_sect(*pmd)) {
-		pte = alloc(PTRS_PER_PTE * sizeof(pte_t));
+		pte = pgtable_alloc();
 		if (pmd_sect(*pmd))
 			split_pmd(pmd, pte);
 		__pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE);
@@ -130,7 +134,7 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd)
 static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
 				  unsigned long addr, unsigned long end,
 				  phys_addr_t phys, pgprot_t prot,
-				  void *(*alloc)(unsigned long size))
+				  void *(*pgtable_alloc)(void))
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -139,7 +143,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
 	 * Check for initial section mappings in the pgd/pud and remove them.
 	 */
 	if (pud_none(*pud) || pud_sect(*pud)) {
-		pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t));
+		pmd = pgtable_alloc();
 		if (pud_sect(*pud)) {
 			/*
 			 * need to have the 1G of mappings continue to be
@@ -174,7 +178,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
 			}
 		} else {
 			alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
-				       prot, alloc);
+				       prot, pgtable_alloc);
 		}
 		phys += next - addr;
 	} while (pmd++, addr = next, addr != end);
@@ -195,13 +199,13 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
 static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
 				  unsigned long addr, unsigned long end,
 				  phys_addr_t phys, pgprot_t prot,
-				  void *(*alloc)(unsigned long size))
+				  void *(*pgtable_alloc)(void))
 {
 	pud_t *pud;
 	unsigned long next;
 
 	if (pgd_none(*pgd)) {
-		pud = alloc(PTRS_PER_PUD * sizeof(pud_t));
+		pud = pgtable_alloc();
 		pgd_populate(mm, pgd, pud);
 	}
 	BUG_ON(pgd_bad(*pgd));
@@ -234,7 +238,8 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
 				}
 			}
 		} else {
-			alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc);
+			alloc_init_pmd(mm, pud, addr, next, phys, prot,
+				       pgtable_alloc);
 		}
 		phys += next - addr;
 	} while (pud++, addr = next, addr != end);
@@ -247,7 +252,7 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
 static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
 				    phys_addr_t phys, unsigned long virt,
 				    phys_addr_t size, pgprot_t prot,
-				    void *(*alloc)(unsigned long size))
+				    void *(*pgtable_alloc)(void))
 {
 	unsigned long addr, length, end, next;
 
@@ -265,18 +270,18 @@ static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
 	end = addr + length;
 	do {
 		next = pgd_addr_end(addr, end);
-		alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc);
+		alloc_init_pud(mm, pgd, addr, next, phys, prot, pgtable_alloc);
 		phys += next - addr;
 	} while (pgd++, addr = next, addr != end);
 }
 
-static void *late_alloc(unsigned long size)
+static void *late_pgtable_alloc(void)
 {
-	void *ptr;
-
-	BUG_ON(size > PAGE_SIZE);
-	ptr = (void *)__get_free_page(PGALLOC_GFP);
+	void *ptr = (void *)__get_free_page(PGALLOC_GFP);
 	BUG_ON(!ptr);
+
+	/* Ensure the zeroed page is visible to the page table walker */
+	dsb(ishst);
 	return ptr;
 }
 
@@ -289,7 +294,7 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt,
 		return;
 	}
 	__create_mapping(&init_mm, pgd_offset_k(virt), phys, virt,
-			 size, prot, early_alloc);
+			 size, prot, early_pgtable_alloc);
 }
 
 void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
@@ -297,7 +302,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
 			       pgprot_t prot)
 {
 	__create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot,
-				late_alloc);
+				late_pgtable_alloc);
 }
 
 static void create_mapping_late(phys_addr_t phys, unsigned long virt,
@@ -310,7 +315,7 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
 	}
 
 	return __create_mapping(&init_mm, pgd_offset_k(virt),
-				phys, virt, size, prot, late_alloc);
+				phys, virt, size, prot, late_pgtable_alloc);
 }
 
 #ifdef CONFIG_DEBUG_RODATA
@@ -458,15 +463,12 @@ void __init paging_init(void)
 	fixup_executable();
 
 	/* allocate the zero page. */
-	zero_page = early_alloc(PAGE_SIZE);
+	zero_page = early_pgtable_alloc();
 
 	bootmem_init();
 
 	empty_zero_page = virt_to_page(zero_page);
 
-	/* Ensure the zero page is visible to the page table walker */
-	dsb(ishst);
-
 	/*
 	 * TTBR0 is only used for the identity mapping at this stage. Make it
 	 * point to zero page to avoid speculatively fetching new entries.

From a4593c91bbb59e89df1aefe677493ff364a7ddb2 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:44:57 +0000
Subject: [PATCH 277/424] arm64: mm: place empty_zero_page in bss

Currently the zero page is set up in paging_init, and thus we cannot use
the zero page earlier. We use the zero page as a reserved TTBR value
from which no TLB entries may be allocated (e.g. when uninstalling the
idmap). To enable such usage earlier (as may be required for invasive
changes to the kernel page tables), and to minimise the time that the
idmap is active, we need to be able to use the zero page before
paging_init.

This patch follows the example set by x86, by allocating the zero page
at compile time, in .bss. This means that the zero page itself is
available immediately upon entry to start_kernel (as we zero .bss before
this), and also means that the zero page takes up no space in the raw
Image binary. The associated struct page is allocated in bootmem_init,
and remains unavailable until this time.

Outside of arch code, the only users of empty_zero_page assume that the
empty_zero_page symbol refers to the zeroed memory itself, and that
ZERO_PAGE(x) must be used to acquire the associated struct page,
following the example of x86. This patch also brings arm64 inline with
these assumptions.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 5227cfa71f9e8574373f4d0e9e754942d76cdf67)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/mmu_context.h | 2 +-
 arch/arm64/include/asm/pgtable.h     | 4 ++--
 arch/arm64/kernel/head.S             | 1 +
 arch/arm64/mm/mmu.c                  | 9 +--------
 4 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 24165784b803..600eacb9f7d5 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -48,7 +48,7 @@ static inline void contextidr_thread_switch(struct task_struct *next)
  */
 static inline void cpu_set_reserved_ttbr0(void)
 {
-	unsigned long ttbr = page_to_phys(empty_zero_page);
+	unsigned long ttbr = virt_to_phys(empty_zero_page);
 
 	asm(
 	"	msr	ttbr0_el1, %0			// set TTBR0\n"
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 2daf88970731..8a76e603d737 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -123,8 +123,8 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
  * ZERO_PAGE is a global shared page that is always zero: used
  * for zero-mapped memory areas etc..
  */
-extern struct page *empty_zero_page;
-#define ZERO_PAGE(vaddr)	(empty_zero_page)
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
+#define ZERO_PAGE(vaddr)	virt_to_page(empty_zero_page)
 
 #define pte_ERROR(pte)		__pte_error(__FILE__, __LINE__, pte_val(pte))
 
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 917d98108b3f..53b9f9f128c2 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -421,6 +421,7 @@ __mmap_switched:
 	adr_l	x2, __bss_stop
 	sub	x2, x2, x0
 	bl	__pi_memset
+	dsb	ishst				// Make zero page visible to PTW
 
 	adr_l	sp, initial_sp, x4
 	mov	x4, sp
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 3ed128c96618..e4932aa6c6e9 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -49,7 +49,7 @@ u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
  * Empty_zero_page is a special page that is used for zero-initialized data
  * and COW.
  */
-struct page *empty_zero_page;
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
 EXPORT_SYMBOL(empty_zero_page);
 
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
@@ -457,18 +457,11 @@ void fixup_init(void)
  */
 void __init paging_init(void)
 {
-	void *zero_page;
-
 	map_mem();
 	fixup_executable();
 
-	/* allocate the zero page. */
-	zero_page = early_pgtable_alloc();
-
 	bootmem_init();
 
-	empty_zero_page = virt_to_page(zero_page);
-
 	/*
 	 * TTBR0 is only used for the identity mapping at this stage. Make it
 	 * point to zero page to avoid speculatively fetching new entries.

From 0dedc3948e315aff0f382a58c0d387547d8a35b5 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:44:58 +0000
Subject: [PATCH 278/424] arm64: unify idmap removal

We currently open-code the removal of the idmap and restoration of the
current task's MMU state in a few places.

Before introducing yet more copies of this sequence, unify these to call
a new helper, cpu_uninstall_idmap.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 9e8e865bbe294a69666a1996bda3e87825b258c0)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/mmu_context.h | 25 +++++++++++++++++++++++++
 arch/arm64/kernel/setup.c            |  1 +
 arch/arm64/kernel/smp.c              |  4 +---
 arch/arm64/kernel/suspend.c          | 20 ++++----------------
 arch/arm64/mm/mmu.c                  |  4 +---
 5 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 600eacb9f7d5..b1b2514d8883 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -27,6 +27,7 @@
 #include <asm-generic/mm_hooks.h>
 #include <asm/cputype.h>
 #include <asm/pgtable.h>
+#include <asm/tlbflush.h>
 
 #ifdef CONFIG_PID_IN_CONTEXTIDR
 static inline void contextidr_thread_switch(struct task_struct *next)
@@ -89,6 +90,30 @@ static inline void cpu_set_default_tcr_t0sz(void)
 	: "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
 }
 
+/*
+ * Remove the idmap from TTBR0_EL1 and install the pgd of the active mm.
+ *
+ * The idmap lives in the same VA range as userspace, but uses global entries
+ * and may use a different TCR_EL1.T0SZ. To avoid issues resulting from
+ * speculative TLB fetches, we must temporarily install the reserved page
+ * tables while we invalidate the TLBs and set up the correct TCR_EL1.T0SZ.
+ *
+ * If current is a not a user task, the mm covers the TTBR1_EL1 page tables,
+ * which should not be installed in TTBR0_EL1. In this case we can leave the
+ * reserved page tables in place.
+ */
+static inline void cpu_uninstall_idmap(void)
+{
+	struct mm_struct *mm = current->active_mm;
+
+	cpu_set_reserved_ttbr0();
+	local_flush_tlb_all();
+	cpu_set_default_tcr_t0sz();
+
+	if (mm != &init_mm)
+		cpu_switch_mm(mm->pgd, mm);
+}
+
 /*
  * It would be nice to return ASIDs back to the allocator, but unfortunately
  * that introduces a race with a generation rollover where we could erroneously
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 8119479147db..f6621ba071f9 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -62,6 +62,7 @@
 #include <asm/memblock.h>
 #include <asm/efi.h>
 #include <asm/xen/hypervisor.h>
+#include <asm/mmu_context.h>
 
 phys_addr_t __fdt_pointer __initdata;
 
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index b1adc51b2c2e..68e7f79630d4 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -149,9 +149,7 @@ asmlinkage void secondary_start_kernel(void)
 	 * TTBR0 is only used for the identity mapping at this stage. Make it
 	 * point to zero page to avoid speculatively fetching new entries.
 	 */
-	cpu_set_reserved_ttbr0();
-	local_flush_tlb_all();
-	cpu_set_default_tcr_t0sz();
+	cpu_uninstall_idmap();
 
 	preempt_disable();
 	trace_hardirqs_off();
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 1095aa483a1c..66055392f445 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -60,7 +60,6 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
  */
 int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 {
-	struct mm_struct *mm = current->active_mm;
 	int ret;
 	unsigned long flags;
 
@@ -87,22 +86,11 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 	ret = __cpu_suspend_enter(arg, fn);
 	if (ret == 0) {
 		/*
-		 * We are resuming from reset with TTBR0_EL1 set to the
-		 * idmap to enable the MMU; set the TTBR0 to the reserved
-		 * page tables to prevent speculative TLB allocations, flush
-		 * the local tlb and set the default tcr_el1.t0sz so that
-		 * the TTBR0 address space set-up is properly restored.
-		 * If the current active_mm != &init_mm we entered cpu_suspend
-		 * with mappings in TTBR0 that must be restored, so we switch
-		 * them back to complete the address space configuration
-		 * restoration before returning.
+		 * We are resuming from reset with the idmap active in TTBR0_EL1.
+		 * We must uninstall the idmap and restore the expected MMU
+		 * state before we can possibly return to userspace.
 		 */
-		cpu_set_reserved_ttbr0();
-		local_flush_tlb_all();
-		cpu_set_default_tcr_t0sz();
-
-		if (mm != &init_mm)
-			cpu_switch_mm(mm->pgd, mm);
+		cpu_uninstall_idmap();
 
 		/*
 		 * Restore per-cpu offset before any kernel
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index e4932aa6c6e9..dcc06b23b37f 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -466,9 +466,7 @@ void __init paging_init(void)
 	 * TTBR0 is only used for the identity mapping at this stage. Make it
 	 * point to zero page to avoid speculatively fetching new entries.
 	 */
-	cpu_set_reserved_ttbr0();
-	local_flush_tlb_all();
-	cpu_set_default_tcr_t0sz();
+	cpu_uninstall_idmap();
 }
 
 /*

From 7ed029beef4adcab0ad59356579f6fea71655895 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:44:59 +0000
Subject: [PATCH 279/424] arm64: unmap idmap earlier

During boot we leave the idmap in place until paging_init, as we
previously had to wait for the zero page to become allocated and
accessible.

Now that we have a statically-allocated zero page, we can uninstall the
idmap much earlier in the boot process, making it far easier to spot
accidental use of physical addresses. This also brings the cold boot
path in line with the secondary boot path.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 86ccce896cb0aa800a7a6dcd29b41ffc4eeb1a75)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/setup.c | 6 ++++++
 arch/arm64/mm/mmu.c       | 6 ------
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index f6621ba071f9..cfed56f0ad26 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -314,6 +314,12 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	local_async_enable();
 
+	/*
+	 * TTBR0 is only used for the identity mapping at this stage. Make it
+	 * point to zero page to avoid speculatively fetching new entries.
+	 */
+	cpu_uninstall_idmap();
+
 	efi_init();
 	arm64_memblock_init();
 
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index dcc06b23b37f..8587ed9d81b6 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -461,12 +461,6 @@ void __init paging_init(void)
 	fixup_executable();
 
 	bootmem_init();
-
-	/*
-	 * TTBR0 is only used for the identity mapping at this stage. Make it
-	 * point to zero page to avoid speculatively fetching new entries.
-	 */
-	cpu_uninstall_idmap();
 }
 
 /*

From 34903bb8c69405ec6eb2b2d437fabd0571df94bb Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:00 +0000
Subject: [PATCH 280/424] arm64: add function to install the idmap

In some cases (e.g. when making invasive changes to the kernel page
tables) we will need to execute code from the idmap.

Add a new helper which may be used to install the idmap, complementing
the existing cpu_uninstall_idmap.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 609116d202a8c5fd3fe393eb85373cbee906df68)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/mmu_context.h | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index b1b2514d8883..944f2730a940 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -74,7 +74,7 @@ static inline bool __cpu_uses_extended_idmap(void)
 /*
  * Set TCR.T0SZ to its default value (based on VA_BITS)
  */
-static inline void cpu_set_default_tcr_t0sz(void)
+static inline void __cpu_set_tcr_t0sz(unsigned long t0sz)
 {
 	unsigned long tcr;
 
@@ -87,9 +87,12 @@ static inline void cpu_set_default_tcr_t0sz(void)
 	"	msr	tcr_el1, %0	;"
 	"	isb"
 	: "=&r" (tcr)
-	: "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
+	: "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
 }
 
+#define cpu_set_default_tcr_t0sz()	__cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS))
+#define cpu_set_idmap_tcr_t0sz()	__cpu_set_tcr_t0sz(idmap_t0sz)
+
 /*
  * Remove the idmap from TTBR0_EL1 and install the pgd of the active mm.
  *
@@ -114,6 +117,15 @@ static inline void cpu_uninstall_idmap(void)
 		cpu_switch_mm(mm->pgd, mm);
 }
 
+static inline void cpu_install_idmap(void)
+{
+	cpu_set_reserved_ttbr0();
+	local_flush_tlb_all();
+	cpu_set_idmap_tcr_t0sz();
+
+	cpu_switch_mm(idmap_pg_dir, &init_mm);
+}
+
 /*
  * It would be nice to return ASIDs back to the allocator, but unfortunately
  * that introduces a race with a generation rollover where we could erroneously

From 34fc059805c7dfa19d0d9d1e008fe83f7744c0ed Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:01 +0000
Subject: [PATCH 281/424] arm64: mm: add code to safely replace TTBR1_EL1

If page tables are modified without suitable TLB maintenance, the ARM
architecture permits multiple TLB entries to be allocated for the same
VA. When this occurs, it is permitted that TLB conflict aborts are
raised in response to synchronous data/instruction accesses, and/or and
amalgamation of the TLB entries may be used as a result of a TLB lookup.

The presence of conflicting TLB entries may result in a variety of
behaviours detrimental to the system (e.g. erroneous physical addresses
may be used by I-cache fetches and/or page table walks). Some of these
cases may result in unexpected changes of hardware state, and/or result
in the (asynchronous) delivery of SError.

To avoid these issues, we must avoid situations where conflicting
entries may be allocated into TLBs. For user and module mappings we can
follow a strict break-before-make approach, but this cannot work for
modifications to the swapper page tables that cover the kernel text and
data.

Instead, this patch adds code which is intended to be executed from the
idmap, which can safely unmap the swapper page tables as it only
requires the idmap to be active. This enables us to uninstall the active
TTBR1_EL1 entry, invalidate TLBs, then install a new TTBR1_EL1 entry
without potentially unmapping code or data required for the sequence.
This avoids the risk of conflict, but requires that updates are staged
in a copy of the swapper page tables prior to being installed.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 50e1881ddde2a986c7d0d2150985239e5e3d7d96)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/mmu_context.h | 19 +++++++++++++++++++
 arch/arm64/mm/proc.S                 | 28 ++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 944f2730a940..a00f7cf35bbd 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -126,6 +126,25 @@ static inline void cpu_install_idmap(void)
 	cpu_switch_mm(idmap_pg_dir, &init_mm);
 }
 
+/*
+ * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
+ * avoiding the possibility of conflicting TLB entries being allocated.
+ */
+static inline void cpu_replace_ttbr1(pgd_t *pgd)
+{
+	typedef void (ttbr_replace_func)(phys_addr_t);
+	extern ttbr_replace_func idmap_cpu_replace_ttbr1;
+	ttbr_replace_func *replace_phys;
+
+	phys_addr_t pgd_phys = virt_to_phys(pgd);
+
+	replace_phys = (void *)virt_to_phys(idmap_cpu_replace_ttbr1);
+
+	cpu_install_idmap();
+	replace_phys(pgd_phys);
+	cpu_uninstall_idmap();
+}
+
 /*
  * It would be nice to return ASIDs back to the allocator, but unfortunately
  * that introduces a race with a generation rollover where we could erroneously
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index c164d2cb35c0..0c19534a901e 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -140,6 +140,34 @@ ENTRY(cpu_do_switch_mm)
 	ret
 ENDPROC(cpu_do_switch_mm)
 
+	.pushsection ".idmap.text", "ax"
+/*
+ * void idmap_cpu_replace_ttbr1(phys_addr_t new_pgd)
+ *
+ * This is the low-level counterpart to cpu_replace_ttbr1, and should not be
+ * called by anything else. It can only be executed from a TTBR0 mapping.
+ */
+ENTRY(idmap_cpu_replace_ttbr1)
+	mrs	x2, daif
+	msr	daifset, #0xf
+
+	adrp	x1, empty_zero_page
+	msr	ttbr1_el1, x1
+	isb
+
+	tlbi	vmalle1
+	dsb	nsh
+	isb
+
+	msr	ttbr1_el1, x0
+	isb
+
+	msr	daif, x2
+
+	ret
+ENDPROC(idmap_cpu_replace_ttbr1)
+	.popsection
+
 /*
  *	__cpu_setup
  *

From 8b8513de45f823e10fce3b1563953be70483941b Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:02 +0000
Subject: [PATCH 282/424] arm64: kasan: avoid TLB conflicts

The page table modification performed during the KASAN init risks the
allocation of conflicting TLB entries, as it swaps a set of valid global
entries for another without suitable TLB maintenance.

The presence of conflicting TLB entries can result in the delivery of
synchronous TLB conflict aborts, or may result in the use of erroneous
data being returned in response to a TLB lookup. This can affect
explicit data accesses from software as well as translations performed
asynchronously (e.g. as part of page table walks or speculative I-cache
fetches), and can therefore result in a wide variety of problems.

To avoid this, use cpu_replace_ttbr1 to swap the page tables. This
ensures that when the new tables are installed there are no stale
entries from the old tables which may conflict. As all updates are made
to the tables while they are not active, the updates themselves are
safe.

At the same time, add the missing barrier to ensure that the tmp_pg_dir
entries updated via memcpy are visible to the page table walkers at the
point the tmp_pg_dir is installed. All other page table updates made as
part of KASAN initialisation have the requisite barriers due to the use
of the standard page table accessors.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit c1a88e9124a499939ebd8069d5e4d3937f019157)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/kasan_init.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index cab7a5be40aa..263b59020500 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -16,6 +16,7 @@
 #include <linux/memblock.h>
 #include <linux/start_kernel.h>
 
+#include <asm/mmu_context.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
@@ -108,15 +109,6 @@ static void __init clear_pgds(unsigned long start,
 		set_pgd(pgd_offset_k(start), __pgd(0));
 }
 
-static void __init cpu_set_ttbr1(unsigned long ttbr1)
-{
-	asm(
-	"	msr	ttbr1_el1, %0\n"
-	"	isb"
-	:
-	: "r" (ttbr1));
-}
-
 void __init kasan_init(void)
 {
 	struct memblock_region *reg;
@@ -130,8 +122,8 @@ void __init kasan_init(void)
 	 * setup will be finished.
 	 */
 	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
-	cpu_set_ttbr1(__pa(tmp_pg_dir));
-	flush_tlb_all();
+	dsb(ishst);
+	cpu_replace_ttbr1(tmp_pg_dir);
 
 	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
@@ -165,8 +157,7 @@ void __init kasan_init(void)
 			pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO));
 
 	memset(kasan_zero_page, 0, PAGE_SIZE);
-	cpu_set_ttbr1(__pa(swapper_pg_dir));
-	flush_tlb_all();
+	cpu_replace_ttbr1(swapper_pg_dir);
 
 	/* At this point kasan is fully initialized. Enable error messages */
 	init_task.kasan_depth = 0;

From febef18a360df3f8b4b364167e8fe050b24ccf65 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:03 +0000
Subject: [PATCH 283/424] arm64: mm: move pte_* macros

For pmd, pud, and pgd levels of table, functions including p?d_index and
p?d_offset are defined after the p?d_page_vaddr function for the
immediately higher level of table.

The pte functions however are defined much earlier, even though several
rely on the later definition of pmd_page_vaddr. While this isn't
currently a problem as these are macros, it prevents the logical
grouping of later C functions (which cannot rely on prototypes for
functions not yet defined).

Move these definitions after pmd_page_vaddr, for consistency with the
placement of these functions for other levels of table.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 053520f7d3923cc6d37afb28f9887cb1e7d77454)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/pgtable.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 8a76e603d737..d439523a7910 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -136,16 +136,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 #define pte_clear(mm,addr,ptep)	set_pte(ptep, __pte(0))
 #define pte_page(pte)		(pfn_to_page(pte_pfn(pte)))
 
-/* Find an entry in the third-level page table. */
-#define pte_index(addr)		(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-
-#define pte_offset_kernel(dir,addr)	(pmd_page_vaddr(*(dir)) + pte_index(addr))
-
-#define pte_offset_map(dir,addr)	pte_offset_kernel((dir), (addr))
-#define pte_offset_map_nested(dir,addr)	pte_offset_kernel((dir), (addr))
-#define pte_unmap(pte)			do { } while (0)
-#define pte_unmap_nested(pte)		do { } while (0)
-
 /*
  * The following only work if pte_present(). Undefined behaviour otherwise.
  */
@@ -447,6 +437,16 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
 	return __va(pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK);
 }
 
+/* Find an entry in the third-level page table. */
+#define pte_index(addr)		(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+
+#define pte_offset_kernel(dir,addr)	(pmd_page_vaddr(*(dir)) + pte_index(addr))
+
+#define pte_offset_map(dir,addr)	pte_offset_kernel((dir), (addr))
+#define pte_offset_map_nested(dir,addr)	pte_offset_kernel((dir), (addr))
+#define pte_unmap(pte)			do { } while (0)
+#define pte_unmap_nested(pte)		do { } while (0)
+
 #define pmd_page(pmd)		pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
 
 /*

From 77ea11473be30e0b90b32deb650b6403ad291a12 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:04 +0000
Subject: [PATCH 284/424] arm64: mm: add functions to walk page tables by PA

To allow us to walk tables allocated into the fixmap, we need to acquire
the physical address of a page, rather than the virtual address in the
linear map.

This patch adds new p??_page_paddr and p??_offset_phys functions to
acquire the physical address of a next-level table, and changes
p??_offset* into macros which simply convert this to a linear map VA.
This renders p??_page_vaddr unused, and hence they are removed.

At the pgd level, a new pgd_offset_raw function is added to find the
relevant PGD entry given the base of a PGD and a virtual address.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit dca56dca7124709f3dfca81afe61b4d98eb9cacf)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/pgtable.h | 39 +++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index d439523a7910..db608e7984c6 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -432,15 +432,16 @@ static inline void pmd_clear(pmd_t *pmdp)
 	set_pmd(pmdp, __pmd(0));
 }
 
-static inline pte_t *pmd_page_vaddr(pmd_t pmd)
+static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
 {
-	return __va(pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK);
+	return pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK;
 }
 
 /* Find an entry in the third-level page table. */
 #define pte_index(addr)		(((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 
-#define pte_offset_kernel(dir,addr)	(pmd_page_vaddr(*(dir)) + pte_index(addr))
+#define pte_offset_phys(dir,addr)	(pmd_page_paddr(*(dir)) + pte_index(addr) * sizeof(pte_t))
+#define pte_offset_kernel(dir,addr)	((pte_t *)__va(pte_offset_phys((dir), (addr))))
 
 #define pte_offset_map(dir,addr)	pte_offset_kernel((dir), (addr))
 #define pte_offset_map_nested(dir,addr)	pte_offset_kernel((dir), (addr))
@@ -475,21 +476,23 @@ static inline void pud_clear(pud_t *pudp)
 	set_pud(pudp, __pud(0));
 }
 
-static inline pmd_t *pud_page_vaddr(pud_t pud)
+static inline phys_addr_t pud_page_paddr(pud_t pud)
 {
-	return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK);
+	return pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK;
 }
 
 /* Find an entry in the second-level page table. */
 #define pmd_index(addr)		(((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
 
-static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
-{
-	return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr);
-}
+#define pmd_offset_phys(dir, addr)	(pud_page_paddr(*(dir)) + pmd_index(addr) * sizeof(pmd_t))
+#define pmd_offset(dir, addr)		((pmd_t *)__va(pmd_offset_phys((dir), (addr))))
 
 #define pud_page(pud)		pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK))
 
+#else
+
+#define pud_page_paddr(pud)	({ BUILD_BUG(); 0; })
+
 #endif	/* CONFIG_PGTABLE_LEVELS > 2 */
 
 #if CONFIG_PGTABLE_LEVELS > 3
@@ -511,21 +514,23 @@ static inline void pgd_clear(pgd_t *pgdp)
 	set_pgd(pgdp, __pgd(0));
 }
 
-static inline pud_t *pgd_page_vaddr(pgd_t pgd)
+static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 {
-	return __va(pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK);
+	return pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK;
 }
 
 /* Find an entry in the frst-level page table. */
 #define pud_index(addr)		(((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
 
-static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr)
-{
-	return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr);
-}
+#define pud_offset_phys(dir, addr)	(pgd_page_paddr(*(dir)) + pud_index(addr) * sizeof(pud_t))
+#define pud_offset(dir, addr)		((pud_t *)__va(pud_offset_phys((dir), (addr))))
 
 #define pgd_page(pgd)		pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK))
 
+#else
+
+#define pgd_page_paddr(pgd)	({ BUILD_BUG(); 0;})
+
 #endif  /* CONFIG_PGTABLE_LEVELS > 3 */
 
 #define pgd_ERROR(pgd)		__pgd_error(__FILE__, __LINE__, pgd_val(pgd))
@@ -533,7 +538,9 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr)
 /* to find an entry in a page-table-directory */
 #define pgd_index(addr)		(((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
 
-#define pgd_offset(mm, addr)	((mm)->pgd+pgd_index(addr))
+#define pgd_offset_raw(pgd, addr)	((pgd) + pgd_index(addr))
+
+#define pgd_offset(mm, addr)	(pgd_offset_raw((mm)->pgd, (addr)))
 
 /* to find an entry in a kernel page-table-directory */
 #define pgd_offset_k(addr)	pgd_offset(&init_mm, addr)

From 6f74a379d1593fe728141747c6c594758cc5d328 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:05 +0000
Subject: [PATCH 285/424] arm64: mm: avoid redundant __pa(__va(x))

When we "upgrade" to a section mapping, we free any table we made
redundant by giving it back to memblock. To get the PA, we acquire the
physical address and convert this to a VA, then subsequently convert
this back to a PA.

This works currently, but will not work if the tables are not accessed
via linear map VAs (e.g. is we use fixmap slots).

This patch uses {pmd,pud}_page_paddr to acquire the PA. This avoids the
__pa(__va()) round trip, saving some work and avoiding reliance on the
linear mapping.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 316b39db06718d59d82736df9fc65cf05b467cc7)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/mmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 8587ed9d81b6..0b6c8727bcd1 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -171,7 +171,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
 			if (!pmd_none(old_pmd)) {
 				flush_tlb_all();
 				if (pmd_table(old_pmd)) {
-					phys_addr_t table = __pa(pte_offset_map(&old_pmd, 0));
+					phys_addr_t table = pmd_page_paddr(old_pmd);
 					if (!WARN_ON_ONCE(slab_is_available()))
 						memblock_free(table, PAGE_SIZE);
 				}
@@ -232,7 +232,7 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
 			if (!pud_none(old_pud)) {
 				flush_tlb_all();
 				if (pud_table(old_pud)) {
-					phys_addr_t table = __pa(pmd_offset(&old_pud, 0));
+					phys_addr_t table = pud_page_paddr(old_pud);
 					if (!WARN_ON_ONCE(slab_is_available()))
 						memblock_free(table, PAGE_SIZE);
 				}

From 0061f781a0fe83adfe76f78f89653cbd41249a93 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:06 +0000
Subject: [PATCH 286/424] arm64: mm: add __{pud,pgd}_populate

We currently have __pmd_populate for creating a pmd table entry given
the physical address of a pte, but don't have equivalents for the pud or
pgd levels of table.

To enable us to manipulate tables which are mapped outside of the linear
mapping (where we have a PA, but not a linear map VA), it is useful to
have these functions.

This patch adds __{pud,pgd}_populate. As these should not be called when
the kernel uses folded {pmd,pud}s, in these cases they expand to
BUILD_BUG(). So long as the appropriate checks are made on the {pud,pgd}
entry prior to attempting population, these should be optimized out at
compile time.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 1e531cce68c92b46c7d29f36a72f9a3e5886678f)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/pgalloc.h | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index c15053902942..ff98585d085a 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -42,11 +42,20 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 	free_page((unsigned long)pmd);
 }
 
-static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
 {
-	set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE));
+	set_pud(pud, __pud(pmd | prot));
 }
 
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+	__pud_populate(pud, __pa(pmd), PMD_TYPE_TABLE);
+}
+#else
+static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
+{
+	BUILD_BUG();
+}
 #endif	/* CONFIG_PGTABLE_LEVELS > 2 */
 
 #if CONFIG_PGTABLE_LEVELS > 3
@@ -62,11 +71,20 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 	free_page((unsigned long)pud);
 }
 
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
 {
-	set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE));
+	set_pgd(pgdp, __pgd(pud | prot));
 }
 
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+{
+	__pgd_populate(pgd, __pa(pud), PUD_TYPE_TABLE);
+}
+#else
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
+{
+	BUILD_BUG();
+}
 #endif	/* CONFIG_PGTABLE_LEVELS > 3 */
 
 extern pgd_t *pgd_alloc(struct mm_struct *mm);

From 9568281b80e91974c04f6aaac50ecfd4dcf31df1 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:07 +0000
Subject: [PATCH 287/424] arm64: mm: add functions to walk tables in fixmap

As a preparatory step to allow us to allocate early page tables from
unmapped memory using memblock_alloc, add new p??_{set,clear}_fixmap*
functions which can be used to walk page tables outside of the linear
mapping by using fixmap slots.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 961faac114819a01e627fe9c9c82b830bb3849d4)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/fixmap.h  | 10 ++++++++++
 arch/arm64/include/asm/pgtable.h | 26 ++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index 309704544d22..1a617d46fce9 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -62,6 +62,16 @@ enum fixed_addresses {
 
 	FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
 	FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
+
+	/*
+	 * Used for kernel page table creation, so unmapped memory may be used
+	 * for tables.
+	 */
+	FIX_PTE,
+	FIX_PMD,
+	FIX_PUD,
+	FIX_PGD,
+
 	__end_of_fixed_addresses
 };
 
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index db608e7984c6..c99dfc588deb 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -59,6 +59,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/fixmap.h>
 #include <linux/mmdebug.h>
 
 extern void __pte_error(const char *file, int line, unsigned long val);
@@ -448,6 +449,10 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
 #define pte_unmap(pte)			do { } while (0)
 #define pte_unmap_nested(pte)		do { } while (0)
 
+#define pte_set_fixmap(addr)		((pte_t *)set_fixmap_offset(FIX_PTE, addr))
+#define pte_set_fixmap_offset(pmd, addr)	pte_set_fixmap(pte_offset_phys(pmd, addr))
+#define pte_clear_fixmap()		clear_fixmap(FIX_PTE)
+
 #define pmd_page(pmd)		pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
 
 /*
@@ -487,12 +492,21 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
 #define pmd_offset_phys(dir, addr)	(pud_page_paddr(*(dir)) + pmd_index(addr) * sizeof(pmd_t))
 #define pmd_offset(dir, addr)		((pmd_t *)__va(pmd_offset_phys((dir), (addr))))
 
+#define pmd_set_fixmap(addr)		((pmd_t *)set_fixmap_offset(FIX_PMD, addr))
+#define pmd_set_fixmap_offset(pud, addr)	pmd_set_fixmap(pmd_offset_phys(pud, addr))
+#define pmd_clear_fixmap()		clear_fixmap(FIX_PMD)
+
 #define pud_page(pud)		pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK))
 
 #else
 
 #define pud_page_paddr(pud)	({ BUILD_BUG(); 0; })
 
+/* Match pmd_offset folding in <asm/generic/pgtable-nopmd.h> */
+#define pmd_set_fixmap(addr)		NULL
+#define pmd_set_fixmap_offset(pudp, addr)	((pmd_t *)pudp)
+#define pmd_clear_fixmap()
+
 #endif	/* CONFIG_PGTABLE_LEVELS > 2 */
 
 #if CONFIG_PGTABLE_LEVELS > 3
@@ -525,12 +539,21 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 #define pud_offset_phys(dir, addr)	(pgd_page_paddr(*(dir)) + pud_index(addr) * sizeof(pud_t))
 #define pud_offset(dir, addr)		((pud_t *)__va(pud_offset_phys((dir), (addr))))
 
+#define pud_set_fixmap(addr)		((pud_t *)set_fixmap_offset(FIX_PUD, addr))
+#define pud_set_fixmap_offset(pgd, addr)	pud_set_fixmap(pud_offset_phys(pgd, addr))
+#define pud_clear_fixmap()		clear_fixmap(FIX_PUD)
+
 #define pgd_page(pgd)		pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK))
 
 #else
 
 #define pgd_page_paddr(pgd)	({ BUILD_BUG(); 0;})
 
+/* Match pud_offset folding in <asm/generic/pgtable-nopud.h> */
+#define pud_set_fixmap(addr)		NULL
+#define pud_set_fixmap_offset(pgdp, addr)	((pud_t *)pgdp)
+#define pud_clear_fixmap()
+
 #endif  /* CONFIG_PGTABLE_LEVELS > 3 */
 
 #define pgd_ERROR(pgd)		__pgd_error(__FILE__, __LINE__, pgd_val(pgd))
@@ -545,6 +568,9 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 /* to find an entry in a kernel page-table-directory */
 #define pgd_offset_k(addr)	pgd_offset(&init_mm, addr)
 
+#define pgd_set_fixmap(addr)	((pgd_t *)set_fixmap_offset(FIX_PGD, addr))
+#define pgd_clear_fixmap()	clear_fixmap(FIX_PGD)
+
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
 	const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY |

From c790554bd459b56042e784162b5017c41120538e Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:08 +0000
Subject: [PATCH 288/424] arm64: mm: use fixmap when creating page tables

As a preparatory step to allow us to allocate early page tables from
unmapped memory using memblock_alloc, modify the __create_mapping
callees to map and unmap the tables they modify using fixmap entries.

All but the top-level pgd initialisation is performed via the fixmap.
Subsequent patches will inject the pgd physical address, and migrate to
using the FIX_PGD slot.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit f4710445458c0a1bd1c3c014ada2e7d7dc7b882f)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/mmu.c | 61 ++++++++++++++++++++++++++++++---------------
 1 file changed, 41 insertions(+), 20 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 0b6c8727bcd1..4f5a5fa3f8f4 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -63,19 +63,30 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 }
 EXPORT_SYMBOL(phys_mem_access_prot);
 
-static void __init *early_pgtable_alloc(void)
+static phys_addr_t __init early_pgtable_alloc(void)
 {
 	phys_addr_t phys;
 	void *ptr;
 
 	phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 	BUG_ON(!phys);
-	ptr = __va(phys);
+
+	/*
+	 * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
+	 * slot will be free, so we can (ab)use the FIX_PTE slot to initialise
+	 * any level of table.
+	 */
+	ptr = pte_set_fixmap(phys);
+
 	memset(ptr, 0, PAGE_SIZE);
 
-	/* Ensure the zeroed page is visible to the page table walker */
-	dsb(ishst);
-	return ptr;
+	/*
+	 * Implicit barriers also ensure the zeroed page is visible to the page
+	 * table walker
+	 */
+	pte_clear_fixmap();
+
+	return phys;
 }
 
 /*
@@ -99,24 +110,28 @@ static void split_pmd(pmd_t *pmd, pte_t *pte)
 static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
 				  unsigned long end, unsigned long pfn,
 				  pgprot_t prot,
-				  void *(*pgtable_alloc)(void))
+				  phys_addr_t (*pgtable_alloc)(void))
 {
 	pte_t *pte;
 
 	if (pmd_none(*pmd) || pmd_sect(*pmd)) {
-		pte = pgtable_alloc();
+		phys_addr_t pte_phys = pgtable_alloc();
+		pte = pte_set_fixmap(pte_phys);
 		if (pmd_sect(*pmd))
 			split_pmd(pmd, pte);
-		__pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE);
+		__pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
 		flush_tlb_all();
+		pte_clear_fixmap();
 	}
 	BUG_ON(pmd_bad(*pmd));
 
-	pte = pte_offset_kernel(pmd, addr);
+	pte = pte_set_fixmap_offset(pmd, addr);
 	do {
 		set_pte(pte, pfn_pte(pfn, prot));
 		pfn++;
 	} while (pte++, addr += PAGE_SIZE, addr != end);
+
+	pte_clear_fixmap();
 }
 
 static void split_pud(pud_t *old_pud, pmd_t *pmd)
@@ -134,7 +149,7 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd)
 static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
 				  unsigned long addr, unsigned long end,
 				  phys_addr_t phys, pgprot_t prot,
-				  void *(*pgtable_alloc)(void))
+				  phys_addr_t (*pgtable_alloc)(void))
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -143,7 +158,8 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
 	 * Check for initial section mappings in the pgd/pud and remove them.
 	 */
 	if (pud_none(*pud) || pud_sect(*pud)) {
-		pmd = pgtable_alloc();
+		phys_addr_t pmd_phys = pgtable_alloc();
+		pmd = pmd_set_fixmap(pmd_phys);
 		if (pud_sect(*pud)) {
 			/*
 			 * need to have the 1G of mappings continue to be
@@ -151,12 +167,13 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
 			 */
 			split_pud(pud, pmd);
 		}
-		pud_populate(mm, pud, pmd);
+		__pud_populate(pud, pmd_phys, PUD_TYPE_TABLE);
 		flush_tlb_all();
+		pmd_clear_fixmap();
 	}
 	BUG_ON(pud_bad(*pud));
 
-	pmd = pmd_offset(pud, addr);
+	pmd = pmd_set_fixmap_offset(pud, addr);
 	do {
 		next = pmd_addr_end(addr, end);
 		/* try section mapping first */
@@ -182,6 +199,8 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
 		}
 		phys += next - addr;
 	} while (pmd++, addr = next, addr != end);
+
+	pmd_clear_fixmap();
 }
 
 static inline bool use_1G_block(unsigned long addr, unsigned long next,
@@ -199,18 +218,18 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
 static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
 				  unsigned long addr, unsigned long end,
 				  phys_addr_t phys, pgprot_t prot,
-				  void *(*pgtable_alloc)(void))
+				  phys_addr_t (*pgtable_alloc)(void))
 {
 	pud_t *pud;
 	unsigned long next;
 
 	if (pgd_none(*pgd)) {
-		pud = pgtable_alloc();
-		pgd_populate(mm, pgd, pud);
+		phys_addr_t pud_phys = pgtable_alloc();
+		__pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE);
 	}
 	BUG_ON(pgd_bad(*pgd));
 
-	pud = pud_offset(pgd, addr);
+	pud = pud_set_fixmap_offset(pgd, addr);
 	do {
 		next = pud_addr_end(addr, end);
 
@@ -243,6 +262,8 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
 		}
 		phys += next - addr;
 	} while (pud++, addr = next, addr != end);
+
+	pud_clear_fixmap();
 }
 
 /*
@@ -252,7 +273,7 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
 static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
 				    phys_addr_t phys, unsigned long virt,
 				    phys_addr_t size, pgprot_t prot,
-				    void *(*pgtable_alloc)(void))
+				    phys_addr_t (*pgtable_alloc)(void))
 {
 	unsigned long addr, length, end, next;
 
@@ -275,14 +296,14 @@ static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
 	} while (pgd++, addr = next, addr != end);
 }
 
-static void *late_pgtable_alloc(void)
+static phys_addr_t late_pgtable_alloc(void)
 {
 	void *ptr = (void *)__get_free_page(PGALLOC_GFP);
 	BUG_ON(!ptr);
 
 	/* Ensure the zeroed page is visible to the page table walker */
 	dsb(ishst);
-	return ptr;
+	return __pa(ptr);
 }
 
 static void __init create_mapping(phys_addr_t phys, unsigned long virt,

From 8f64994ff3068b62b867188a12e366ee237da3b5 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:09 +0000
Subject: [PATCH 289/424] arm64: mm: allocate pagetables anywhere

Now that create_mapping uses fixmap slots to modify pte, pmd, and pud
entries, we can access page tables anywhere in physical memory,
regardless of the extent of the linear mapping.

Given that, we no longer need to limit memblock allocations during page
table creation, and can leave the limit as its default
MEMBLOCK_ALLOC_ANYWHERE.

We never add memory which will fall outside of the linear map range
given phys_offset and MAX_MEMBLOCK_ADDR are configured appropriately, so
any tables we create will fall in the linear map of the final tables.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit cdef5f6e9e0e5ee397759b664a9f875ff59ccf01)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/mmu.c | 35 -----------------------------------
 1 file changed, 35 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 4f5a5fa3f8f4..d50535a07c6e 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -384,20 +384,6 @@ static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
 static void __init map_mem(void)
 {
 	struct memblock_region *reg;
-	phys_addr_t limit;
-
-	/*
-	 * Temporarily limit the memblock range. We need to do this as
-	 * create_mapping requires puds, pmds and ptes to be allocated from
-	 * memory addressable from the initial direct kernel mapping.
-	 *
-	 * The initial direct kernel mapping, located at swapper_pg_dir, gives
-	 * us PUD_SIZE (with SECTION maps) or PMD_SIZE (without SECTION maps,
-	 * memory starting from PHYS_OFFSET (which must be aligned to 2MB as
-	 * per Documentation/arm64/booting.txt).
-	 */
-	limit = PHYS_OFFSET + SWAPPER_INIT_MAP_SIZE;
-	memblock_set_current_limit(limit);
 
 	/* map all the memory banks */
 	for_each_memblock(memory, reg) {
@@ -407,29 +393,8 @@ static void __init map_mem(void)
 		if (start >= end)
 			break;
 
-		if (ARM64_SWAPPER_USES_SECTION_MAPS) {
-			/*
-			 * For the first memory bank align the start address and
-			 * current memblock limit to prevent create_mapping() from
-			 * allocating pte page tables from unmapped memory. With
-			 * the section maps, if the first block doesn't end on section
-			 * size boundary, create_mapping() will try to allocate a pte
-			 * page, which may be returned from an unmapped area.
-			 * When section maps are not used, the pte page table for the
-			 * current limit is already present in swapper_pg_dir.
-			 */
-			if (start < limit)
-				start = ALIGN(start, SECTION_SIZE);
-			if (end < limit) {
-				limit = end & SECTION_MASK;
-				memblock_set_current_limit(limit);
-			}
-		}
 		__map_memblock(start, end);
 	}
-
-	/* Limit no longer required. */
-	memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
 }
 
 static void __init fixup_executable(void)

From 55ce0af58717f7651d5a393f687f2ea9a109e4f5 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:10 +0000
Subject: [PATCH 290/424] arm64: mm: allow passing a pgdir to alloc_init_*

To allow us to initialise pgdirs which are fixmapped, allow explicitly
passing a pgdir rather than an mm. A new __create_pgd_mapping function
is added for this, with existing __create_mapping callers migrated to
this.

The mm argument was previously only used at the top level. Now that it
is redundant at all levels, it is removed. To indicate its new found
similarity to alloc_init_{pud,pmd,pte}, __create_mapping is renamed to
init_pgd.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 11509a306bb6ea595878b2d246d2d56b1783e040)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/mmu.c | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index d50535a07c6e..570ba3e3d362 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -146,8 +146,7 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd)
 	} while (pmd++, i++, i < PTRS_PER_PMD);
 }
 
-static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud,
-				  unsigned long addr, unsigned long end,
+static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
 				  phys_addr_t phys, pgprot_t prot,
 				  phys_addr_t (*pgtable_alloc)(void))
 {
@@ -215,8 +214,7 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
 	return true;
 }
 
-static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
-				  unsigned long addr, unsigned long end,
+static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
 				  phys_addr_t phys, pgprot_t prot,
 				  phys_addr_t (*pgtable_alloc)(void))
 {
@@ -257,7 +255,7 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
 				}
 			}
 		} else {
-			alloc_init_pmd(mm, pud, addr, next, phys, prot,
+			alloc_init_pmd(pud, addr, next, phys, prot,
 				       pgtable_alloc);
 		}
 		phys += next - addr;
@@ -270,8 +268,7 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd,
  * Create the page directory entries and any necessary page tables for the
  * mapping specified by 'md'.
  */
-static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
-				    phys_addr_t phys, unsigned long virt,
+static void init_pgd(pgd_t *pgd, phys_addr_t phys, unsigned long virt,
 				    phys_addr_t size, pgprot_t prot,
 				    phys_addr_t (*pgtable_alloc)(void))
 {
@@ -291,7 +288,7 @@ static void  __create_mapping(struct mm_struct *mm, pgd_t *pgd,
 	end = addr + length;
 	do {
 		next = pgd_addr_end(addr, end);
-		alloc_init_pud(mm, pgd, addr, next, phys, prot, pgtable_alloc);
+		alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc);
 		phys += next - addr;
 	} while (pgd++, addr = next, addr != end);
 }
@@ -306,6 +303,14 @@ static phys_addr_t late_pgtable_alloc(void)
 	return __pa(ptr);
 }
 
+static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
+				 unsigned long virt, phys_addr_t size,
+				 pgprot_t prot,
+				 phys_addr_t (*alloc)(void))
+{
+	init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc);
+}
+
 static void __init create_mapping(phys_addr_t phys, unsigned long virt,
 				  phys_addr_t size, pgprot_t prot)
 {
@@ -314,16 +319,16 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt,
 			&phys, virt);
 		return;
 	}
-	__create_mapping(&init_mm, pgd_offset_k(virt), phys, virt,
-			 size, prot, early_pgtable_alloc);
+	__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
+			     early_pgtable_alloc);
 }
 
 void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
 			       unsigned long virt, phys_addr_t size,
 			       pgprot_t prot)
 {
-	__create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot,
-				late_pgtable_alloc);
+	__create_pgd_mapping(mm->pgd, phys, virt, size, prot,
+			     late_pgtable_alloc);
 }
 
 static void create_mapping_late(phys_addr_t phys, unsigned long virt,
@@ -335,8 +340,8 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
 		return;
 	}
 
-	return __create_mapping(&init_mm, pgd_offset_k(virt),
-				phys, virt, size, prot, late_pgtable_alloc);
+	__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
+			     late_pgtable_alloc);
 }
 
 #ifdef CONFIG_DEBUG_RODATA

From 0060e7a78b1a3b208d178b285fb3912f4fd4d9ee Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:11 +0000
Subject: [PATCH 291/424] arm64: ensure _stext and _etext are page-aligned

Currently we have separate ALIGN_DEBUG_RO{,_MIN} directives to align
_etext and __init_begin. While we ensure that __init_begin is
page-aligned, we do not provide the same guarantee for _etext. This is
not problematic currently as the alignment of __init_begin is sufficient
to prevent issues when we modify permissions.

Subsequent patches will assume page alignment of segments of the kernel
we wish to map with different permissions. To ensure this, move _etext
after the ALIGN_DEBUG_RO_MIN for the init section. This renders the
prior ALIGN_DEBUG_RO irrelevant, and hence it is removed. Likewise,
upgrade to ALIGN_DEBUG_RO_MIN(PAGE_SIZE) for _stext.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit fca082bfb543ccaaff864fc0892379ccaa1711cd)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/vmlinux.lds.S | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index e3928f578891..b78a3c772294 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -95,7 +95,7 @@ SECTIONS
 		_text = .;
 		HEAD_TEXT
 	}
-	ALIGN_DEBUG_RO
+	ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
 	.text : {			/* Real text segment		*/
 		_stext = .;		/* Text and read-only data	*/
 			__exception_text_start = .;
@@ -116,10 +116,9 @@ SECTIONS
 	RO_DATA(PAGE_SIZE)
 	EXCEPTION_TABLE(8)
 	NOTES
-	ALIGN_DEBUG_RO
-	_etext = .;			/* End of text and rodata section */
 
 	ALIGN_DEBUG_RO_MIN(PAGE_SIZE)
+	_etext = .;			/* End of text and rodata section */
 	__init_begin = .;
 
 	INIT_TEXT_SECTION(8)

From a8a81d65140e0bdfe27e53ae3529546b5dce6bd4 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 25 Jan 2016 11:45:12 +0000
Subject: [PATCH 292/424] arm64: mm: create new fine-grained mappings at boot

At boot we may change the granularity of the tables mapping the kernel
(by splitting or making sections). This may happen when we create the
linear mapping (in __map_memblock), or at any point we try to apply
fine-grained permissions to the kernel (e.g. fixup_executable,
mark_rodata_ro, fixup_init).

Changing the active page tables in this manner may result in multiple
entries for the same address being allocated into TLBs, risking problems
such as TLB conflict aborts or issues derived from the amalgamation of
TLB entries. Generally, a break-before-make (BBM) approach is necessary
to avoid conflicts, but we cannot do this for the kernel tables as it
risks unmapping text or data being used to do so.

Instead, we can create a new set of tables from scratch in the safety of
the existing mappings, and subsequently migrate over to these using the
new cpu_replace_ttbr1 helper, which avoids the two sets of tables being
active simultaneously.

To avoid issues when we later modify permissions of the page tables
(e.g. in fixup_init), we must create the page tables at a granularity
such that later modification does not result in splitting of tables.

This patch applies this strategy, creating a new set of fine-grained
page tables from scratch, and safely migrating to them. The existing
fixmap and kasan shadow page tables are reused in the new fine-grained
tables.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Cc: Laura Abbott <labbott@fedoraproject.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 068a17a5805dfbca4bbf03e664ca6b19709cc7a8)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/kasan.h |   3 +
 arch/arm64/mm/kasan_init.c     |  15 ++++
 arch/arm64/mm/mmu.c            | 155 +++++++++++++++++++--------------
 3 files changed, 110 insertions(+), 63 deletions(-)

diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h
index 2774fa384c47..de0d21211c34 100644
--- a/arch/arm64/include/asm/kasan.h
+++ b/arch/arm64/include/asm/kasan.h
@@ -7,6 +7,7 @@
 
 #include <linux/linkage.h>
 #include <asm/memory.h>
+#include <asm/pgtable-types.h>
 
 /*
  * KASAN_SHADOW_START: beginning of the kernel virtual addresses.
@@ -28,10 +29,12 @@
 #define KASAN_SHADOW_OFFSET     (KASAN_SHADOW_END - (1ULL << (64 - 3)))
 
 void kasan_init(void);
+void kasan_copy_shadow(pgd_t *pgdir);
 asmlinkage void kasan_early_init(void);
 
 #else
 static inline void kasan_init(void) { }
+static inline void kasan_copy_shadow(pgd_t *pgdir) { }
 #endif
 
 #endif
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 263b59020500..cc569a38bc76 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -97,6 +97,21 @@ asmlinkage void __init kasan_early_init(void)
 	kasan_map_early_shadow();
 }
 
+/*
+ * Copy the current shadow region into a new pgdir.
+ */
+void __init kasan_copy_shadow(pgd_t *pgdir)
+{
+	pgd_t *pgd, *pgd_new, *pgd_end;
+
+	pgd = pgd_offset_k(KASAN_SHADOW_START);
+	pgd_end = pgd_offset_k(KASAN_SHADOW_END);
+	pgd_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);
+	do {
+		set_pgd(pgd_new, *pgd);
+	} while (pgd++, pgd_new++, pgd != pgd_end);
+}
+
 static void __init clear_pgds(unsigned long start,
 			unsigned long end)
 {
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 570ba3e3d362..4874d2fea1c9 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -33,6 +33,7 @@
 #include <asm/barrier.h>
 #include <asm/cputype.h>
 #include <asm/fixmap.h>
+#include <asm/kasan.h>
 #include <asm/kernel-pgtable.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
@@ -344,49 +345,42 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
 			     late_pgtable_alloc);
 }
 
-#ifdef CONFIG_DEBUG_RODATA
-static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
+static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
 {
-	/*
-	 * Set up the executable regions using the existing section mappings
-	 * for now. This will get more fine grained later once all memory
-	 * is mapped
-	 */
-	unsigned long kernel_x_start = round_down(__pa(_stext), SWAPPER_BLOCK_SIZE);
-	unsigned long kernel_x_end = round_up(__pa(__init_end), SWAPPER_BLOCK_SIZE);
 
-	if (end < kernel_x_start) {
-		create_mapping(start, __phys_to_virt(start),
-			end - start, PAGE_KERNEL);
-	} else if (start >= kernel_x_end) {
-		create_mapping(start, __phys_to_virt(start),
-			end - start, PAGE_KERNEL);
-	} else {
-		if (start < kernel_x_start)
-			create_mapping(start, __phys_to_virt(start),
-				kernel_x_start - start,
-				PAGE_KERNEL);
-		create_mapping(kernel_x_start,
-				__phys_to_virt(kernel_x_start),
-				kernel_x_end - kernel_x_start,
-				PAGE_KERNEL_EXEC);
-		if (kernel_x_end < end)
-			create_mapping(kernel_x_end,
-				__phys_to_virt(kernel_x_end),
-				end - kernel_x_end,
-				PAGE_KERNEL);
+	unsigned long kernel_start = __pa(_stext);
+	unsigned long kernel_end = __pa(_end);
+
+	/*
+	 * The kernel itself is mapped at page granularity. Map all other
+	 * memory, making sure we don't overwrite the existing kernel mappings.
+	 */
+
+	/* No overlap with the kernel. */
+	if (end < kernel_start || start >= kernel_end) {
+		__create_pgd_mapping(pgd, start, __phys_to_virt(start),
+				     end - start, PAGE_KERNEL,
+				     early_pgtable_alloc);
+		return;
 	}
 
+	/*
+	 * This block overlaps the kernel mapping. Map the portion(s) which
+	 * don't overlap.
+	 */
+	if (start < kernel_start)
+		__create_pgd_mapping(pgd, start,
+				     __phys_to_virt(start),
+				     kernel_start - start, PAGE_KERNEL,
+				     early_pgtable_alloc);
+	if (kernel_end < end)
+		__create_pgd_mapping(pgd, kernel_end,
+				     __phys_to_virt(kernel_end),
+				     end - kernel_end, PAGE_KERNEL,
+				     early_pgtable_alloc);
 }
-#else
-static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
-{
-	create_mapping(start, __phys_to_virt(start), end - start,
-			PAGE_KERNEL_EXEC);
-}
-#endif
 
-static void __init map_mem(void)
+static void __init map_mem(pgd_t *pgd)
 {
 	struct memblock_region *reg;
 
@@ -398,33 +392,10 @@ static void __init map_mem(void)
 		if (start >= end)
 			break;
 
-		__map_memblock(start, end);
+		__map_memblock(pgd, start, end);
 	}
 }
 
-static void __init fixup_executable(void)
-{
-#ifdef CONFIG_DEBUG_RODATA
-	/* now that we are actually fully mapped, make the start/end more fine grained */
-	if (!IS_ALIGNED((unsigned long)_stext, SWAPPER_BLOCK_SIZE)) {
-		unsigned long aligned_start = round_down(__pa(_stext),
-							 SWAPPER_BLOCK_SIZE);
-
-		create_mapping(aligned_start, __phys_to_virt(aligned_start),
-				__pa(_stext) - aligned_start,
-				PAGE_KERNEL);
-	}
-
-	if (!IS_ALIGNED((unsigned long)__init_end, SWAPPER_BLOCK_SIZE)) {
-		unsigned long aligned_end = round_up(__pa(__init_end),
-							  SWAPPER_BLOCK_SIZE);
-		create_mapping(__pa(__init_end), (unsigned long)__init_end,
-				aligned_end - __pa(__init_end),
-				PAGE_KERNEL);
-	}
-#endif
-}
-
 #ifdef CONFIG_DEBUG_RODATA
 void mark_rodata_ro(void)
 {
@@ -442,14 +413,72 @@ void fixup_init(void)
 			PAGE_KERNEL);
 }
 
+static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end,
+				    pgprot_t prot)
+{
+	phys_addr_t pa_start = __pa(va_start);
+	unsigned long size = va_end - va_start;
+
+	BUG_ON(!PAGE_ALIGNED(pa_start));
+	BUG_ON(!PAGE_ALIGNED(size));
+
+	__create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
+			     early_pgtable_alloc);
+}
+
+/*
+ * Create fine-grained mappings for the kernel.
+ */
+static void __init map_kernel(pgd_t *pgd)
+{
+
+	map_kernel_chunk(pgd, _stext, _etext, PAGE_KERNEL_EXEC);
+	map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC);
+	map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL);
+
+	/*
+	 * The fixmap falls in a separate pgd to the kernel, and doesn't live
+	 * in the carveout for the swapper_pg_dir. We can simply re-use the
+	 * existing dir for the fixmap.
+	 */
+	set_pgd(pgd_offset_raw(pgd, FIXADDR_START), *pgd_offset_k(FIXADDR_START));
+
+	kasan_copy_shadow(pgd);
+}
+
 /*
  * paging_init() sets up the page tables, initialises the zone memory
  * maps and sets up the zero page.
  */
 void __init paging_init(void)
 {
-	map_mem();
-	fixup_executable();
+	phys_addr_t pgd_phys = early_pgtable_alloc();
+	pgd_t *pgd = pgd_set_fixmap(pgd_phys);
+
+	map_kernel(pgd);
+	map_mem(pgd);
+
+	/*
+	 * We want to reuse the original swapper_pg_dir so we don't have to
+	 * communicate the new address to non-coherent secondaries in
+	 * secondary_entry, and so cpu_switch_mm can generate the address with
+	 * adrp+add rather than a load from some global variable.
+	 *
+	 * To do this we need to go via a temporary pgd.
+	 */
+	cpu_replace_ttbr1(__va(pgd_phys));
+	memcpy(swapper_pg_dir, pgd, PAGE_SIZE);
+	cpu_replace_ttbr1(swapper_pg_dir);
+
+	pgd_clear_fixmap();
+	memblock_free(pgd_phys, PAGE_SIZE);
+
+	/*
+	 * We only reuse the PGD from the swapper_pg_dir, not the pud + pmd
+	 * allocated with it.
+	 */
+	memblock_free(__pa(swapper_pg_dir) + PAGE_SIZE,
+		      SWAPPER_DIR_SIZE - PAGE_SIZE);
 
 	bootmem_init();
 }

From f00cf2ba83ca2b1ade50b27dd60d6f4294ddeef7 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Tue, 26 Jan 2016 11:10:38 +0000
Subject: [PATCH 293/424] arm64: kernel: implement ACPI parking protocol

The SBBR and ACPI specifications allow ACPI based systems that do not
implement PSCI (eg systems with no EL3) to boot through the ACPI parking
protocol specification[1].

This patch implements the ACPI parking protocol CPU operations, and adds
code that eases parsing the parking protocol data structures to the
ARM64 SMP initializion carried out at the same time as cpus enumeration.

To wake-up the CPUs from the parked state, this patch implements a
wakeup IPI for ARM64 (ie arch_send_wakeup_ipi_mask()) that mirrors the
ARM one, so that a specific IPI is sent for wake-up purpose in order
to distinguish it from other IPI sources.

Given the current ACPI MADT parsing API, the patch implements a glue
layer that helps passing MADT GICC data structure from SMP initialization
code to the parking protocol implementation somewhat overriding the CPU
operations interfaces. This to avoid creating a completely trasparent
DT/ACPI CPU operations layer that would require creating opaque
structure handling for CPUs data (DT represents CPU through DT nodes, ACPI
through static MADT table entries), which seems overkill given that ACPI
on ARM64 mandates only two booting protocols (PSCI and parking protocol),
so there is no need for further protocol additions.

Based on the original work by Mark Salter <msalter@redhat.com>

[1] https://acpica.org/sites/acpica/files/MP%20Startup%20for%20ARM%20platforms.docx

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Tested-by: Loc Ho <lho@apm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Hanjun Guo <hanjun.guo@linaro.org>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Al Stone <ahs3@redhat.com>
[catalin.marinas@arm.com: Added WARN_ONCE(!acpi_parking_protocol_valid() on the IPI]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

(cherry picked from commit 5e89c55e4ed81d7abb1ce8828db35fa389dc0e90)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/Kconfig                        |   9 ++
 arch/arm64/include/asm/acpi.h             |  19 ++-
 arch/arm64/include/asm/hardirq.h          |   2 +-
 arch/arm64/include/asm/smp.h              |   9 ++
 arch/arm64/kernel/Makefile                |   1 +
 arch/arm64/kernel/acpi_parking_protocol.c | 153 ++++++++++++++++++++++
 arch/arm64/kernel/cpu_ops.c               |  27 +++-
 arch/arm64/kernel/smp.c                   |  28 ++++
 8 files changed, 242 insertions(+), 6 deletions(-)
 create mode 100644 arch/arm64/kernel/acpi_parking_protocol.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index ffa3c549a4ba..4a1b665d90dc 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -706,6 +706,15 @@ endmenu
 
 menu "Boot options"
 
+config ARM64_ACPI_PARKING_PROTOCOL
+	bool "Enable support for the ARM64 ACPI parking protocol"
+	depends on ACPI
+	help
+	  Enable support for the ARM64 ACPI parking protocol. If disabled
+	  the kernel will not allow booting through the ARM64 ACPI parking
+	  protocol even if the corresponding data is present in the ACPI
+	  MADT table.
+
 config CMDLINE
 	string "Default kernel command string"
 	default ""
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index caafd63b8092..aee323b13802 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -87,9 +87,26 @@ void __init acpi_init_cpus(void);
 static inline void acpi_init_cpus(void) { }
 #endif /* CONFIG_ACPI */
 
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+bool acpi_parking_protocol_valid(int cpu);
+void __init
+acpi_set_mailbox_entry(int cpu, struct acpi_madt_generic_interrupt *processor);
+#else
+static inline bool acpi_parking_protocol_valid(int cpu) { return false; }
+static inline void
+acpi_set_mailbox_entry(int cpu, struct acpi_madt_generic_interrupt *processor)
+{}
+#endif
+
 static inline const char *acpi_get_enable_method(int cpu)
 {
-	return acpi_psci_present() ? "psci" : NULL;
+	if (acpi_psci_present())
+		return "psci";
+
+	if (acpi_parking_protocol_valid(cpu))
+		return "parking-protocol";
+
+	return NULL;
 }
 
 #ifdef	CONFIG_ACPI_APEI
diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h
index a57601f9d17c..8740297dac77 100644
--- a/arch/arm64/include/asm/hardirq.h
+++ b/arch/arm64/include/asm/hardirq.h
@@ -20,7 +20,7 @@
 #include <linux/threads.h>
 #include <asm/irq.h>
 
-#define NR_IPI	5
+#define NR_IPI	6
 
 typedef struct {
 	unsigned int __softirq_pending;
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index d9c3d6a6100a..2013a4dc5124 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -64,6 +64,15 @@ extern void secondary_entry(void);
 extern void arch_send_call_function_single_ipi(int cpu);
 extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+extern void arch_send_wakeup_ipi_mask(const struct cpumask *mask);
+#else
+static inline void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
+{
+	BUILD_BUG();
+}
+#endif
+
 extern int __cpu_disable(void);
 
 extern void __cpu_die(unsigned int cpu);
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 474691f8b13a..c4e2f70c0aa0 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -41,6 +41,7 @@ arm64-obj-$(CONFIG_EFI)			+= efi.o efi-entry.stub.o
 arm64-obj-$(CONFIG_PCI)			+= pci.o
 arm64-obj-$(CONFIG_ARMV8_DEPRECATED)	+= armv8_deprecated.o
 arm64-obj-$(CONFIG_ACPI)		+= acpi.o
+arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)	+= acpi_parking_protocol.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c
new file mode 100644
index 000000000000..4b1e5a7a98da
--- /dev/null
+++ b/arch/arm64/kernel/acpi_parking_protocol.c
@@ -0,0 +1,153 @@
+/*
+ * ARM64 ACPI Parking Protocol implementation
+ *
+ * Authors: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ *	    Mark Salter <msalter@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/acpi.h>
+#include <linux/types.h>
+
+#include <asm/cpu_ops.h>
+
+struct cpu_mailbox_entry {
+	phys_addr_t mailbox_addr;
+	u8 version;
+	u8 gic_cpu_id;
+};
+
+static struct cpu_mailbox_entry cpu_mailbox_entries[NR_CPUS];
+
+void __init acpi_set_mailbox_entry(int cpu,
+				   struct acpi_madt_generic_interrupt *p)
+{
+	struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
+
+	cpu_entry->mailbox_addr = p->parked_address;
+	cpu_entry->version = p->parking_version;
+	cpu_entry->gic_cpu_id = p->cpu_interface_number;
+}
+
+bool acpi_parking_protocol_valid(int cpu)
+{
+	struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
+
+	return cpu_entry->mailbox_addr && cpu_entry->version;
+}
+
+static int acpi_parking_protocol_cpu_init(unsigned int cpu)
+{
+	pr_debug("%s: ACPI parked addr=%llx\n", __func__,
+		  cpu_mailbox_entries[cpu].mailbox_addr);
+
+	return 0;
+}
+
+static int acpi_parking_protocol_cpu_prepare(unsigned int cpu)
+{
+	return 0;
+}
+
+struct parking_protocol_mailbox {
+	__le32 cpu_id;
+	__le32 reserved;
+	__le64 entry_point;
+};
+
+static int acpi_parking_protocol_cpu_boot(unsigned int cpu)
+{
+	struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
+	struct parking_protocol_mailbox __iomem *mailbox;
+	__le32 cpu_id;
+
+	/*
+	 * Map mailbox memory with attribute device nGnRE (ie ioremap -
+	 * this deviates from the parking protocol specifications since
+	 * the mailboxes are required to be mapped nGnRnE; the attribute
+	 * discrepancy is harmless insofar as the protocol specification
+	 * is concerned).
+	 * If the mailbox is mistakenly allocated in the linear mapping
+	 * by FW ioremap will fail since the mapping will be prevented
+	 * by the kernel (it clashes with the linear mapping attributes
+	 * specifications).
+	 */
+	mailbox = ioremap(cpu_entry->mailbox_addr, sizeof(*mailbox));
+	if (!mailbox)
+		return -EIO;
+
+	cpu_id = readl_relaxed(&mailbox->cpu_id);
+	/*
+	 * Check if firmware has set-up the mailbox entry properly
+	 * before kickstarting the respective cpu.
+	 */
+	if (cpu_id != ~0U) {
+		iounmap(mailbox);
+		return -ENXIO;
+	}
+
+	/*
+	 * We write the entry point and cpu id as LE regardless of the
+	 * native endianness of the kernel. Therefore, any boot-loaders
+	 * that read this address need to convert this address to the
+	 * Boot-Loader's endianness before jumping.
+	 */
+	writeq_relaxed(__pa(secondary_entry), &mailbox->entry_point);
+	writel_relaxed(cpu_entry->gic_cpu_id, &mailbox->cpu_id);
+
+	arch_send_wakeup_ipi_mask(cpumask_of(cpu));
+
+	iounmap(mailbox);
+
+	return 0;
+}
+
+static void acpi_parking_protocol_cpu_postboot(void)
+{
+	int cpu = smp_processor_id();
+	struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu];
+	struct parking_protocol_mailbox __iomem *mailbox;
+	__le64 entry_point;
+
+	/*
+	 * Map mailbox memory with attribute device nGnRE (ie ioremap -
+	 * this deviates from the parking protocol specifications since
+	 * the mailboxes are required to be mapped nGnRnE; the attribute
+	 * discrepancy is harmless insofar as the protocol specification
+	 * is concerned).
+	 * If the mailbox is mistakenly allocated in the linear mapping
+	 * by FW ioremap will fail since the mapping will be prevented
+	 * by the kernel (it clashes with the linear mapping attributes
+	 * specifications).
+	 */
+	mailbox = ioremap(cpu_entry->mailbox_addr, sizeof(*mailbox));
+	if (!mailbox)
+		return;
+
+	entry_point = readl_relaxed(&mailbox->entry_point);
+	/*
+	 * Check if firmware has cleared the entry_point as expected
+	 * by the protocol specification.
+	 */
+	WARN_ON(entry_point);
+
+	iounmap(mailbox);
+}
+
+const struct cpu_operations acpi_parking_protocol_ops = {
+	.name		= "parking-protocol",
+	.cpu_init	= acpi_parking_protocol_cpu_init,
+	.cpu_prepare	= acpi_parking_protocol_cpu_prepare,
+	.cpu_boot	= acpi_parking_protocol_cpu_boot,
+	.cpu_postboot	= acpi_parking_protocol_cpu_postboot
+};
diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
index b6bd7d447768..c7cfb8fe06f9 100644
--- a/arch/arm64/kernel/cpu_ops.c
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -25,19 +25,30 @@
 #include <asm/smp_plat.h>
 
 extern const struct cpu_operations smp_spin_table_ops;
+extern const struct cpu_operations acpi_parking_protocol_ops;
 extern const struct cpu_operations cpu_psci_ops;
 
 const struct cpu_operations *cpu_ops[NR_CPUS];
 
-static const struct cpu_operations *supported_cpu_ops[] __initconst = {
+static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = {
 	&smp_spin_table_ops,
 	&cpu_psci_ops,
 	NULL,
 };
 
+static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = {
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+	&acpi_parking_protocol_ops,
+#endif
+	&cpu_psci_ops,
+	NULL,
+};
+
 static const struct cpu_operations * __init cpu_get_ops(const char *name)
 {
-	const struct cpu_operations **ops = supported_cpu_ops;
+	const struct cpu_operations **ops;
+
+	ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops;
 
 	while (*ops) {
 		if (!strcmp(name, (*ops)->name))
@@ -75,8 +86,16 @@ static const char *__init cpu_read_enable_method(int cpu)
 		}
 	} else {
 		enable_method = acpi_get_enable_method(cpu);
-		if (!enable_method)
-			pr_err("Unsupported ACPI enable-method\n");
+		if (!enable_method) {
+			/*
+			 * In ACPI systems the boot CPU does not require
+			 * checking the enable method since for some
+			 * boot protocol (ie parking protocol) it need not
+			 * be initialized. Don't warn spuriously.
+			 */
+			if (cpu != 0)
+				pr_err("Unsupported ACPI enable-method\n");
+		}
 	}
 
 	return enable_method;
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 68e7f79630d4..24cb4f800033 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -70,6 +70,7 @@ enum ipi_msg_type {
 	IPI_CPU_STOP,
 	IPI_TIMER,
 	IPI_IRQ_WORK,
+	IPI_WAKEUP
 };
 
 /*
@@ -443,6 +444,17 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
 	/* map the logical cpu id to cpu MPIDR */
 	cpu_logical_map(cpu_count) = hwid;
 
+	/*
+	 * Set-up the ACPI parking protocol cpu entries
+	 * while initializing the cpu_logical_map to
+	 * avoid parsing MADT entries multiple times for
+	 * nothing (ie a valid cpu_logical_map entry should
+	 * contain a valid parking protocol data set to
+	 * initialize the cpu if the parking protocol is
+	 * the only available enable method).
+	 */
+	acpi_set_mailbox_entry(cpu_count, processor);
+
 	cpu_count++;
 }
 
@@ -625,6 +637,7 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = {
 	S(IPI_CPU_STOP, "CPU stop interrupts"),
 	S(IPI_TIMER, "Timer broadcast interrupts"),
 	S(IPI_IRQ_WORK, "IRQ work interrupts"),
+	S(IPI_WAKEUP, "CPU wake-up interrupts"),
 };
 
 static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
@@ -668,6 +681,13 @@ void arch_send_call_function_single_ipi(int cpu)
 	smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC);
 }
 
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+void arch_send_wakeup_ipi_mask(const struct cpumask *mask)
+{
+	smp_cross_call(mask, IPI_WAKEUP);
+}
+#endif
+
 #ifdef CONFIG_IRQ_WORK
 void arch_irq_work_raise(void)
 {
@@ -745,6 +765,14 @@ void handle_IPI(int ipinr, struct pt_regs *regs)
 		break;
 #endif
 
+#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
+	case IPI_WAKEUP:
+		WARN_ONCE(!acpi_parking_protocol_valid(cpu),
+			  "CPU%u: Wake-up IPI outside the ACPI parking protocol\n",
+			  cpu);
+		break;
+#endif
+
 	default:
 		pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr);
 		break;

From a0e40450cf255994501d3f84081f95b1fb41623d Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 27 Jan 2016 10:50:19 +0100
Subject: [PATCH 294/424] arm64: allow vmalloc regions to be set with
 set_memory_*

The range of set_memory_* is currently restricted to the module address
range because of difficulties in breaking down larger block sizes.
vmalloc maps PAGE_SIZE pages so it is safe to use as well. Update the
function ranges and add a comment explaining why the range is restricted
the way it is.

Suggested-by: Laura Abbott <labbott@fedoraproject.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 95f5c80050ad723163aa80dc8bffd48ef4afc6d5)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/pageattr.c | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index cf6240741134..0795c3a36d8f 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -14,6 +14,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/vmalloc.h>
 
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
@@ -44,6 +45,7 @@ static int change_memory_common(unsigned long addr, int numpages,
 	unsigned long end = start + size;
 	int ret;
 	struct page_change_data data;
+	struct vm_struct *area;
 
 	if (!PAGE_ALIGNED(addr)) {
 		start &= PAGE_MASK;
@@ -51,10 +53,23 @@ static int change_memory_common(unsigned long addr, int numpages,
 		WARN_ON_ONCE(1);
 	}
 
-	if (start < MODULES_VADDR || start >= MODULES_END)
-		return -EINVAL;
-
-	if (end < MODULES_VADDR || end >= MODULES_END)
+	/*
+	 * Kernel VA mappings are always live, and splitting live section
+	 * mappings into page mappings may cause TLB conflicts. This means
+	 * we have to ensure that changing the permission bits of the range
+	 * we are operating on does not result in such splitting.
+	 *
+	 * Let's restrict ourselves to mappings created by vmalloc (or vmap).
+	 * Those are guaranteed to consist entirely of page mappings, and
+	 * splitting is never needed.
+	 *
+	 * So check whether the [addr, addr + size) interval is entirely
+	 * covered by precisely one VM area that has the VM_ALLOC flag set.
+	 */
+	area = find_vm_area((void *)addr);
+	if (!area ||
+	    end > (unsigned long)area->addr + area->size ||
+	    !(area->flags & VM_ALLOC))
 		return -EINVAL;
 
 	if (!numpages)

From 41cb2829d020e4fdaeb5eb9286153f4c7dc8e7dd Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 2 Feb 2016 12:46:23 +0000
Subject: [PATCH 295/424] arm64: prefetch: don't provide spin_lock_prefetch
 with LSE

The LSE atomics rely on us not dirtying data at L1 if we can avoid it,
otherwise many of the potential scalability benefits are lost.

This patch replaces spin_lock_prefetch with a nop when the LSE atomics
are in use, so that users don't shoot themselves in the foot by causing
needless coherence traffic at L1.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Tested-by: Andrew Pinski <apinski@cavium.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit cd5e10bdf3795d22f10787bb1991c43798c885d5)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/processor.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 4acb7ca94fcd..31b76fce4477 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -29,6 +29,7 @@
 
 #include <linux/string.h>
 
+#include <asm/alternative.h>
 #include <asm/fpsimd.h>
 #include <asm/hw_breakpoint.h>
 #include <asm/pgtable-hwdef.h>
@@ -177,9 +178,11 @@ static inline void prefetchw(const void *ptr)
 }
 
 #define ARCH_HAS_SPINLOCK_PREFETCH
-static inline void spin_lock_prefetch(const void *x)
+static inline void spin_lock_prefetch(const void *ptr)
 {
-	prefetchw(x);
+	asm volatile(ARM64_LSE_ATOMIC_INSN(
+		     "prfm pstl1strm, %a0",
+		     "nop") : : "p" (ptr));
 }
 
 #define HAVE_ARCH_PICK_MMAP_LAYOUT

From 742e490adaa444e9657528ef38dde35f2916c793 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 2 Feb 2016 12:46:24 +0000
Subject: [PATCH 296/424] arm64: prefetch: add alternative pattern for CPUs
 without a prefetcher

Most CPUs have a hardware prefetcher which generally performs better
without explicit prefetch instructions issued by software, however
some CPUs (e.g. Cavium ThunderX) rely solely on explicit prefetch
instructions.

This patch adds an alternative pattern (ARM64_HAS_NO_HW_PREFETCH) to
allow our library code to make use of explicit prefetch instructions
during things like copy routines only when the CPU does not have the
capability to perform the prefetching itself.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Tested-by: Andrew Pinski <apinski@cavium.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit d5370f754875460662abe8561388e019d90dd0c4)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/cpufeature.h |  3 ++-
 arch/arm64/include/asm/cputype.h    | 17 ++++++++++++++++-
 arch/arm64/kernel/cpu_errata.c      | 18 +++---------------
 arch/arm64/kernel/cpufeature.c      | 17 +++++++++++++++++
 4 files changed, 38 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 8f271b83f910..8d56bd8550dc 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -30,8 +30,9 @@
 #define ARM64_HAS_LSE_ATOMICS			5
 #define ARM64_WORKAROUND_CAVIUM_23154		6
 #define ARM64_WORKAROUND_834220			7
+#define ARM64_HAS_NO_HW_PREFETCH		8
 
-#define ARM64_NCAPS				8
+#define ARM64_NCAPS				9
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 1a5949364ed0..7540284a17fe 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -57,11 +57,22 @@
 #define MIDR_IMPLEMENTOR(midr)	\
 	(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
 
-#define MIDR_CPU_PART(imp, partnum) \
+#define MIDR_CPU_MODEL(imp, partnum) \
 	(((imp)			<< MIDR_IMPLEMENTOR_SHIFT) | \
 	(0xf			<< MIDR_ARCHITECTURE_SHIFT) | \
 	((partnum)		<< MIDR_PARTNUM_SHIFT))
 
+#define MIDR_CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
+			     MIDR_ARCHITECTURE_MASK)
+
+#define MIDR_IS_CPU_MODEL_RANGE(midr, model, rv_min, rv_max)		\
+({									\
+	u32 _model = (midr) & MIDR_CPU_MODEL_MASK;			\
+	u32 rv = (midr) & (MIDR_REVISION_MASK | MIDR_VARIANT_MASK);	\
+									\
+	_model == (model) && rv >= (rv_min) && rv <= (rv_max);		\
+ })
+
 #define ARM_CPU_IMP_ARM			0x41
 #define ARM_CPU_IMP_APM			0x50
 #define ARM_CPU_IMP_CAVIUM		0x43
@@ -75,6 +86,10 @@
 
 #define CAVIUM_CPU_PART_THUNDERX	0x0A1
 
+#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
+#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
+#define MIDR_THUNDERX	MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
+
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index feb6b4efa641..e6bc988e8dbf 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -21,24 +21,12 @@
 #include <asm/cputype.h>
 #include <asm/cpufeature.h>
 
-#define MIDR_CORTEX_A53 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
-#define MIDR_CORTEX_A57 MIDR_CPU_PART(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
-#define MIDR_THUNDERX	MIDR_CPU_PART(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
-
-#define CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \
-			MIDR_ARCHITECTURE_MASK)
-
 static bool __maybe_unused
 is_affected_midr_range(const struct arm64_cpu_capabilities *entry)
 {
-	u32 midr = read_cpuid_id();
-
-	if ((midr & CPU_MODEL_MASK) != entry->midr_model)
-		return false;
-
-	midr &= MIDR_REVISION_MASK | MIDR_VARIANT_MASK;
-
-	return (midr >= entry->midr_range_min && midr <= entry->midr_range_max);
+	return MIDR_IS_CPU_MODEL_RANGE(read_cpuid_id(), entry->midr_model,
+				       entry->midr_range_min,
+				       entry->midr_range_max);
 }
 
 #define MIDR_RANGE(model, min, max) \
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 5c90aa490a2b..3615d7d7c9af 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -621,6 +621,18 @@ static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry)
 	return has_sre;
 }
 
+static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry)
+{
+	u32 midr = read_cpuid_id();
+	u32 rv_min, rv_max;
+
+	/* Cavium ThunderX pass 1.x and 2.x */
+	rv_min = 0;
+	rv_max = (1 << MIDR_VARIANT_SHIFT) | MIDR_REVISION_MASK;
+
+	return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, rv_min, rv_max);
+}
+
 static const struct arm64_cpu_capabilities arm64_features[] = {
 	{
 		.desc = "GIC system register CPU interface",
@@ -651,6 +663,11 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.min_field_value = 2,
 	},
 #endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */
+	{
+		.desc = "Software prefetching using PRFM",
+		.capability = ARM64_HAS_NO_HW_PREFETCH,
+		.matches = has_no_hw_prefetch,
+	},
 	{},
 };
 

From 93c384820cf3c1db51073e746980866d2bce8af9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 2 Feb 2016 12:46:25 +0000
Subject: [PATCH 297/424] arm64: lib: improve copy_page to deal with 128 bytes
 at a time

We want to avoid lots of different copy_page implementations, settling
for something that is "good enough" everywhere and hopefully easy to
understand and maintain whilst we're at it.

This patch reworks our copy_page implementation based on discussions
with Cavium on the list and benchmarking on Cortex-A processors so that:

  - The loop is unrolled to copy 128 bytes per iteration

  - The reads are offset so that we read from the next 128-byte block
    in the same iteration that we store the previous block

  - Explicit prefetch instructions are removed for now, since they hurt
    performance on CPUs with hardware prefetching

  - The loop exit condition is calculated at the start of the loop

Signed-off-by: Will Deacon <will.deacon@arm.com>
Tested-by: Andrew Pinski <apinski@cavium.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 223e23e8aa26b0bb62c597637e77295e14f6a62c)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/lib/copy_page.S | 46 +++++++++++++++++++++++++++++++-------
 1 file changed, 38 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index 512b9a7b980e..2534533ceb1d 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -27,20 +27,50 @@
  *	x1 - src
  */
 ENTRY(copy_page)
-	/* Assume cache line size is 64 bytes. */
-	prfm	pldl1strm, [x1, #64]
-1:	ldp	x2, x3, [x1]
+	ldp	x2, x3, [x1]
 	ldp	x4, x5, [x1, #16]
 	ldp	x6, x7, [x1, #32]
 	ldp	x8, x9, [x1, #48]
-	add	x1, x1, #64
-	prfm	pldl1strm, [x1, #64]
+	ldp	x10, x11, [x1, #64]
+	ldp	x12, x13, [x1, #80]
+	ldp	x14, x15, [x1, #96]
+	ldp	x16, x17, [x1, #112]
+
+	mov	x18, #(PAGE_SIZE - 128)
+	add	x1, x1, #128
+1:
+	subs	x18, x18, #128
+
+	stnp	x2, x3, [x0]
+	ldp	x2, x3, [x1]
+	stnp	x4, x5, [x0, #16]
+	ldp	x4, x5, [x1, #16]
+	stnp	x6, x7, [x0, #32]
+	ldp	x6, x7, [x1, #32]
+	stnp	x8, x9, [x0, #48]
+	ldp	x8, x9, [x1, #48]
+	stnp	x10, x11, [x0, #64]
+	ldp	x10, x11, [x1, #64]
+	stnp	x12, x13, [x0, #80]
+	ldp	x12, x13, [x1, #80]
+	stnp	x14, x15, [x0, #96]
+	ldp	x14, x15, [x1, #96]
+	stnp	x16, x17, [x0, #112]
+	ldp	x16, x17, [x1, #112]
+
+	add	x0, x0, #128
+	add	x1, x1, #128
+
+	b.gt	1b
+
 	stnp	x2, x3, [x0]
 	stnp	x4, x5, [x0, #16]
 	stnp	x6, x7, [x0, #32]
 	stnp	x8, x9, [x0, #48]
-	add	x0, x0, #64
-	tst	x1, #(PAGE_SIZE - 1)
-	b.ne	1b
+	stnp	x10, x11, [x0, #64]
+	stnp	x12, x13, [x0, #80]
+	stnp	x14, x15, [x0, #96]
+	stnp	x16, x17, [x0, #112]
+
 	ret
 ENDPROC(copy_page)

From e46018fe4fd741b5f58b6c3cf8b94ed34603c325 Mon Sep 17 00:00:00 2001
From: Andrew Pinski <apinski@cavium.com>
Date: Tue, 2 Feb 2016 12:46:26 +0000
Subject: [PATCH 298/424] arm64: lib: patch in prfm for copy_page if requested

On ThunderX T88 pass 1 and pass 2, there is no hardware prefetching so
we need to patch in explicit software prefetching instructions

Prefetching improves this code by 60% over the original code and 2x
over the code without prefetching for the affected hardware using the
benchmark code at https://github.com/apinski-cavium/copy_page_benchmark

Signed-off-by: Andrew Pinski <apinski@cavium.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Tested-by: Andrew Pinski <apinski@cavium.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 60e0a09db24adc8809696307e5d97cc4ba7cb3e0)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/lib/copy_page.S | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index 2534533ceb1d..4c1e700840b6 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -18,6 +18,8 @@
 #include <linux/const.h>
 #include <asm/assembler.h>
 #include <asm/page.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative.h>
 
 /*
  * Copy a page from src to dest (both are page aligned)
@@ -27,6 +29,15 @@
  *	x1 - src
  */
 ENTRY(copy_page)
+alternative_if_not ARM64_HAS_NO_HW_PREFETCH
+	nop
+	nop
+alternative_else
+	# Prefetch two cache lines ahead.
+	prfm    pldl1strm, [x1, #128]
+	prfm    pldl1strm, [x1, #256]
+alternative_endif
+
 	ldp	x2, x3, [x1]
 	ldp	x4, x5, [x1, #16]
 	ldp	x6, x7, [x1, #32]
@@ -41,6 +52,12 @@ ENTRY(copy_page)
 1:
 	subs	x18, x18, #128
 
+alternative_if_not ARM64_HAS_NO_HW_PREFETCH
+	nop
+alternative_else
+	prfm    pldl1strm, [x1, #384]
+alternative_endif
+
 	stnp	x2, x3, [x0]
 	ldp	x2, x3, [x1]
 	stnp	x4, x5, [x0, #16]

From a97b93b11bd9f76b0cba55b8ff04a7489b087caf Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 10 Feb 2016 10:07:30 +0000
Subject: [PATCH 299/424] arm64: prefetch: add missing #include for
 spin_lock_prefetch

As of 52e662326e1e ("arm64: prefetch: don't provide spin_lock_prefetch
with LSE"), spin_lock_prefetch is patched at runtime when the LSE atomics
are in use. This relies on the ARM64_LSE_ATOMIC_INSN macro to drive
the alternatives framework, but that macro is only available via
asm/lse.h, which isn't explicitly included in processor.h. Consequently,
drivers can run into build failures such as:

   In file included from include/linux/prefetch.h:14:0,
                    from drivers/net/ethernet/intel/i40e/i40e_txrx.c:27:
   arch/arm64/include/asm/processor.h: In function 'spin_lock_prefetch':
   arch/arm64/include/asm/processor.h:183:15: error: expected string literal before 'ARM64_LSE_ATOMIC_INSN'
     asm volatile(ARM64_LSE_ATOMIC_INSN(

This patch add the missing include and gets things building again.

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit afb83cc3f0e4f86ea0e1cc3db7a90f58f1abd4d5)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/processor.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 31b76fce4477..5bb1d763d17a 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -32,6 +32,7 @@
 #include <asm/alternative.h>
 #include <asm/fpsimd.h>
 #include <asm/hw_breakpoint.h>
+#include <asm/lse.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/ptrace.h>
 #include <asm/types.h>

From 65e670213029f7df5dd20407eecbe691aa078930 Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@fedoraproject.org>
Date: Fri, 5 Feb 2016 16:24:46 -0800
Subject: [PATCH 300/424] arm64: Drop alloc function from create_mapping

create_mapping is only used in fixmap_remap_fdt. All the create_mapping
calls need to happen on existing translation table pages without
additional allocations. Rather than have an alloc function be called
and fail, just set it to NULL and catch its use. Also change
the name to create_mapping_noalloc to better capture what exactly is
going on.

Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Laura Abbott <labbott@fedoraproject.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 132233a759580f5ce9b1bfaac9073e47d03c460d)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/mmu.c | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 4874d2fea1c9..3096240e6eb8 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -116,7 +116,9 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
 	pte_t *pte;
 
 	if (pmd_none(*pmd) || pmd_sect(*pmd)) {
-		phys_addr_t pte_phys = pgtable_alloc();
+		phys_addr_t pte_phys;
+		BUG_ON(!pgtable_alloc);
+		pte_phys = pgtable_alloc();
 		pte = pte_set_fixmap(pte_phys);
 		if (pmd_sect(*pmd))
 			split_pmd(pmd, pte);
@@ -158,7 +160,9 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
 	 * Check for initial section mappings in the pgd/pud and remove them.
 	 */
 	if (pud_none(*pud) || pud_sect(*pud)) {
-		phys_addr_t pmd_phys = pgtable_alloc();
+		phys_addr_t pmd_phys;
+		BUG_ON(!pgtable_alloc);
+		pmd_phys = pgtable_alloc();
 		pmd = pmd_set_fixmap(pmd_phys);
 		if (pud_sect(*pud)) {
 			/*
@@ -223,7 +227,9 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
 	unsigned long next;
 
 	if (pgd_none(*pgd)) {
-		phys_addr_t pud_phys = pgtable_alloc();
+		phys_addr_t pud_phys;
+		BUG_ON(!pgtable_alloc);
+		pud_phys = pgtable_alloc();
 		__pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE);
 	}
 	BUG_ON(pgd_bad(*pgd));
@@ -312,7 +318,12 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
 	init_pgd(pgd_offset_raw(pgdir, virt), phys, virt, size, prot, alloc);
 }
 
-static void __init create_mapping(phys_addr_t phys, unsigned long virt,
+/*
+ * This function can only be used to modify existing table entries,
+ * without allocating new levels of table. Note that this permits the
+ * creation of new section or page entries.
+ */
+static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
 				  phys_addr_t size, pgprot_t prot)
 {
 	if (virt < VMALLOC_START) {
@@ -321,7 +332,7 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt,
 		return;
 	}
 	__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
-			     early_pgtable_alloc);
+			     NULL);
 }
 
 void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
@@ -678,7 +689,7 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
 	/*
 	 * Make sure that the FDT region can be mapped without the need to
 	 * allocate additional translation table pages, so that it is safe
-	 * to call create_mapping() this early.
+	 * to call create_mapping_noalloc() this early.
 	 *
 	 * On 64k pages, the FDT will be mapped using PTEs, so we need to
 	 * be in the same PMD as the rest of the fixmap.
@@ -694,8 +705,8 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
 	dt_virt = (void *)dt_virt_base + offset;
 
 	/* map the first chunk so we can read the size from the header */
-	create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
-		       SWAPPER_BLOCK_SIZE, prot);
+	create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE),
+			dt_virt_base, SWAPPER_BLOCK_SIZE, prot);
 
 	if (fdt_check_header(dt_virt) != 0)
 		return NULL;
@@ -705,7 +716,7 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
 		return NULL;
 
 	if (offset + size > SWAPPER_BLOCK_SIZE)
-		create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
+		create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
 			       round_up(offset + size, SWAPPER_BLOCK_SIZE), prot);
 
 	memblock_reserve(dt_phys, size);

From e4d0298cdff23a21e532291df7a4fb6e0a908be4 Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@fedoraproject.org>
Date: Fri, 5 Feb 2016 16:24:47 -0800
Subject: [PATCH 301/424] arm64: Add support for ARCH_SUPPORTS_DEBUG_PAGEALLOC

ARCH_SUPPORTS_DEBUG_PAGEALLOC provides a hook to map and unmap
pages for debugging purposes. This requires memory be mapped
with PAGE_SIZE mappings since breaking down larger mappings
at runtime will lead to TLB conflicts. Check if debug_pagealloc
is enabled at runtime and if so, map everyting with PAGE_SIZE
pages. Implement the functions to actually map/unmap the
pages at runtime.

Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Laura Abbott <labbott@fedoraproject.org>
[catalin.marinas@arm.com: static annotation block_mappings_allowed() and #ifdef]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

(cherry picked from commit 83863f25e4b8214e994ef8b5647aad614d74b45d)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/Kconfig       |  3 +++
 arch/arm64/mm/mmu.c      | 26 +++++++++++++++++++++--
 arch/arm64/mm/pageattr.c | 46 +++++++++++++++++++++++++++++++---------
 3 files changed, 63 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 4a1b665d90dc..98992dee9a29 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -507,6 +507,9 @@ config HOTPLUG_CPU
 source kernel/Kconfig.preempt
 source kernel/Kconfig.hz
 
+config ARCH_SUPPORTS_DEBUG_PAGEALLOC
+	def_bool y
+
 config ARCH_HAS_HOLES_MEMORYMODEL
 	def_bool y if SPARSEMEM
 
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 3096240e6eb8..d1fa678355c9 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -149,6 +149,26 @@ static void split_pud(pud_t *old_pud, pmd_t *pmd)
 	} while (pmd++, i++, i < PTRS_PER_PMD);
 }
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void))
+{
+
+	/*
+	 * If debug_page_alloc is enabled we must map the linear map
+	 * using pages. However, other mappings created by
+	 * create_mapping_noalloc must use sections in some cases. Allow
+	 * sections to be used in those cases, where no pgtable_alloc
+	 * function is provided.
+	 */
+	return !pgtable_alloc || !debug_pagealloc_enabled();
+}
+#else
+static bool block_mappings_allowed(phys_addr_t (*pgtable_alloc)(void))
+{
+	return true;
+}
+#endif
+
 static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
 				  phys_addr_t phys, pgprot_t prot,
 				  phys_addr_t (*pgtable_alloc)(void))
@@ -181,7 +201,8 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
 	do {
 		next = pmd_addr_end(addr, end);
 		/* try section mapping first */
-		if (((addr | next | phys) & ~SECTION_MASK) == 0) {
+		if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
+		      block_mappings_allowed(pgtable_alloc)) {
 			pmd_t old_pmd =*pmd;
 			set_pmd(pmd, __pmd(phys |
 					   pgprot_val(mk_sect_prot(prot))));
@@ -241,7 +262,8 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
 		/*
 		 * For 4K granule only, attempt to put down a 1GB block
 		 */
-		if (use_1G_block(addr, next, phys)) {
+		if (use_1G_block(addr, next, phys) &&
+		    block_mappings_allowed(pgtable_alloc)) {
 			pud_t old_pud = *pud;
 			set_pud(pud, __pud(phys |
 					   pgprot_val(mk_sect_prot(prot))));
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 0795c3a36d8f..ca6d268e3313 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -37,14 +37,31 @@ static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
 	return 0;
 }
 
+/*
+ * This function assumes that the range is mapped with PAGE_SIZE pages.
+ */
+static int __change_memory_common(unsigned long start, unsigned long size,
+				pgprot_t set_mask, pgprot_t clear_mask)
+{
+	struct page_change_data data;
+	int ret;
+
+	data.set_mask = set_mask;
+	data.clear_mask = clear_mask;
+
+	ret = apply_to_page_range(&init_mm, start, size, change_page_range,
+					&data);
+
+	flush_tlb_kernel_range(start, start + size);
+	return ret;
+}
+
 static int change_memory_common(unsigned long addr, int numpages,
 				pgprot_t set_mask, pgprot_t clear_mask)
 {
 	unsigned long start = addr;
 	unsigned long size = PAGE_SIZE*numpages;
 	unsigned long end = start + size;
-	int ret;
-	struct page_change_data data;
 	struct vm_struct *area;
 
 	if (!PAGE_ALIGNED(addr)) {
@@ -75,14 +92,7 @@ static int change_memory_common(unsigned long addr, int numpages,
 	if (!numpages)
 		return 0;
 
-	data.set_mask = set_mask;
-	data.clear_mask = clear_mask;
-
-	ret = apply_to_page_range(&init_mm, start, size, change_page_range,
-					&data);
-
-	flush_tlb_kernel_range(start, end);
-	return ret;
+	return __change_memory_common(start, size, set_mask, clear_mask);
 }
 
 int set_memory_ro(unsigned long addr, int numpages)
@@ -114,3 +124,19 @@ int set_memory_x(unsigned long addr, int numpages)
 					__pgprot(PTE_PXN));
 }
 EXPORT_SYMBOL_GPL(set_memory_x);
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	unsigned long addr = (unsigned long) page_address(page);
+
+	if (enable)
+		__change_memory_common(addr, PAGE_SIZE * numpages,
+					__pgprot(PTE_VALID),
+					__pgprot(0));
+	else
+		__change_memory_common(addr, PAGE_SIZE * numpages,
+					__pgprot(0),
+					__pgprot(PTE_VALID));
+}
+#endif

From 5ca7d16080e32bb34f8d2ead86adde49d1c4c652 Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@fedoraproject.org>
Date: Fri, 5 Feb 2016 16:24:48 -0800
Subject: [PATCH 302/424] arm64: ptdump: Indicate whether memory should be
 faulting

With CONFIG_DEBUG_PAGEALLOC, pages do not have the valid bit
set when free in the buddy allocator. Add an indiciation to
the page table dumping code that the valid bit is not set,
'F' for fault, to make this easier to understand.

Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Laura Abbott <labbott@fedoraproject.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit d7e9d59494a9a5d83274f5af2148b82ca22dff3f)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/dump.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 0adbebbc2803..0841b2bf0e6a 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -90,6 +90,11 @@ struct prot_bits {
 
 static const struct prot_bits pte_bits[] = {
 	{
+		.mask	= PTE_VALID,
+		.val	= PTE_VALID,
+		.set	= " ",
+		.clear	= "F",
+	}, {
 		.mask	= PTE_USER,
 		.val	= PTE_USER,
 		.set	= "USR",

From 1d1e6a82d643537e9b80cf58f887b2be616aa515 Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linaro.org>
Date: Fri, 5 Feb 2016 15:50:18 -0800
Subject: [PATCH 303/424] arm64: ubsan: select ARCH_HAS_UBSAN_SANITIZE_ALL

To enable UBSAN on arm64, ARCH_HAS_UBSAN_SANITIZE_ALL need to be selected.

Basic kernel bootup test is passed on arm64 with CONFIG_UBSAN_SANITIZE_ALL
enabled.

Signed-off-by: Yang Shi <yang.shi@linaro.org>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit f0b7f8a4b44657386273a67179dd901c81cd11a6)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 98992dee9a29..1420102341d0 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -13,6 +13,7 @@ config ARM64
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	select ARCH_WANT_FRAME_POINTERS
+	select ARCH_HAS_UBSAN_SANITIZE_ALL
 	select ARM_AMBA
 	select ARM_ARCH_TIMER
 	select ARM_GIC

From 391a428880ad64f0ab9dcf48f4666ba2a6aa7e76 Mon Sep 17 00:00:00 2001
From: David Brown <david.brown@linaro.org>
Date: Wed, 10 Feb 2016 13:52:22 -0800
Subject: [PATCH 304/424] arm64: vdso: Mark vDSO code as read-only

Although the arm64 vDSO is cleanly separated by code/data with the
code being read-only in userspace mappings, the code page is still
writable from the kernel.  There have been exploits (such as
http://itszn.com/blog/?p=21) that take advantage of this on x86 to go
from a bad kernel write to full root.

Prevent this specific exploit on arm64 by putting the vDSO code page
in read-only memory as well.

Before the change:
[    3.138366] vdso: 2 pages (1 code @ ffffffc000a71000, 1 data @ ffffffc000a70000)
---[ Kernel Mapping ]---
0xffffffc000000000-0xffffffc000082000         520K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc000082000-0xffffffc000200000        1528K     ro x  SHD AF            UXN MEM/NORMAL
0xffffffc000200000-0xffffffc000800000           6M     ro x  SHD AF        BLK UXN MEM/NORMAL
0xffffffc000800000-0xffffffc0009b6000        1752K     ro x  SHD AF            UXN MEM/NORMAL
0xffffffc0009b6000-0xffffffc000c00000        2344K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc000c00000-0xffffffc008000000         116M     RW NX SHD AF        BLK UXN MEM/NORMAL
0xffffffc00c000000-0xffffffc07f000000        1840M     RW NX SHD AF        BLK UXN MEM/NORMAL
0xffffffc800000000-0xffffffc840000000           1G     RW NX SHD AF        BLK UXN MEM/NORMAL
0xffffffc840000000-0xffffffc87ae00000         942M     RW NX SHD AF        BLK UXN MEM/NORMAL
0xffffffc87ae00000-0xffffffc87ae70000         448K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc87af80000-0xffffffc87af8a000          40K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc87af8b000-0xffffffc87b000000         468K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc87b000000-0xffffffc87fe00000          78M     RW NX SHD AF        BLK UXN MEM/NORMAL
0xffffffc87fe00000-0xffffffc87ff50000        1344K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc87ff90000-0xffffffc87ffa0000          64K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc87fff0000-0xffffffc880000000          64K     RW NX SHD AF            UXN MEM/NORMAL

After:
[    3.138368] vdso: 2 pages (1 code @ ffffffc0006de000, 1 data @ ffffffc000a74000)
---[ Kernel Mapping ]---
0xffffffc000000000-0xffffffc000082000         520K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc000082000-0xffffffc000200000        1528K     ro x  SHD AF            UXN MEM/NORMAL
0xffffffc000200000-0xffffffc000800000           6M     ro x  SHD AF        BLK UXN MEM/NORMAL
0xffffffc000800000-0xffffffc0009b8000        1760K     ro x  SHD AF            UXN MEM/NORMAL
0xffffffc0009b8000-0xffffffc000c00000        2336K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc000c00000-0xffffffc008000000         116M     RW NX SHD AF        BLK UXN MEM/NORMAL
0xffffffc00c000000-0xffffffc07f000000        1840M     RW NX SHD AF        BLK UXN MEM/NORMAL
0xffffffc800000000-0xffffffc840000000           1G     RW NX SHD AF        BLK UXN MEM/NORMAL
0xffffffc840000000-0xffffffc87ae00000         942M     RW NX SHD AF        BLK UXN MEM/NORMAL
0xffffffc87ae00000-0xffffffc87ae70000         448K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc87af80000-0xffffffc87af8a000          40K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc87af8b000-0xffffffc87b000000         468K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc87b000000-0xffffffc87fe00000          78M     RW NX SHD AF        BLK UXN MEM/NORMAL
0xffffffc87fe00000-0xffffffc87ff50000        1344K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc87ff90000-0xffffffc87ffa0000          64K     RW NX SHD AF            UXN MEM/NORMAL
0xffffffc87fff0000-0xffffffc880000000          64K     RW NX SHD AF            UXN MEM/NORMAL

Inspired by https://lkml.org/lkml/2016/1/19/494 based on work by the
PaX Team, Brad Spengler, and Kees Cook.

Signed-off-by: David Brown <david.brown@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[catalin.marinas@arm.com: removed superfluous __PAGE_ALIGNED_DATA]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

(cherry picked from commit 88d8a7994e564d209d4b2583496631c2357d386b)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/vdso/vdso.S | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso/vdso.S
index 60c1db54b41a..82379a70ef03 100644
--- a/arch/arm64/kernel/vdso/vdso.S
+++ b/arch/arm64/kernel/vdso/vdso.S
@@ -21,9 +21,8 @@
 #include <linux/const.h>
 #include <asm/page.h>
 
-	__PAGE_ALIGNED_DATA
-
 	.globl vdso_start, vdso_end
+	.section .rodata
 	.balign PAGE_SIZE
 vdso_start:
 	.incbin "arch/arm64/kernel/vdso/vdso.so"

From 866817f9f155879d1d28f0944958a542bb70475c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 15 Feb 2016 09:51:49 +0100
Subject: [PATCH 305/424] arm64: use local label prefixes for __reg_num symbols

The __reg_num_xNN symbols that are used to implement the msr_s and
mrs_s macros are recorded in the ELF metadata of each object file.
This does not affect the size of the final binary, but it does clutter
the output of tools like readelf, i.e.,

  $ readelf -a vmlinux |grep -c __reg_num_x
  50976

So let's use symbols with the .L prefix, these are strictly local,
and don't end up in the object files.

  $ readelf -a vmlinux |grep -c __reg_num_x
  0

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 7abc7d833c9eb16efc8a59239d3771a6e30be367)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/sysreg.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index d48ab5b41f52..76907c94b11f 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -194,32 +194,32 @@
 #ifdef __ASSEMBLY__
 
 	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
-	.equ	__reg_num_x\num, \num
+	.equ	.L__reg_num_x\num, \num
 	.endr
-	.equ	__reg_num_xzr, 31
+	.equ	.L__reg_num_xzr, 31
 
 	.macro	mrs_s, rt, sreg
-	.inst	0xd5200000|(\sreg)|(__reg_num_\rt)
+	.inst	0xd5200000|(\sreg)|(.L__reg_num_\rt)
 	.endm
 
 	.macro	msr_s, sreg, rt
-	.inst	0xd5000000|(\sreg)|(__reg_num_\rt)
+	.inst	0xd5000000|(\sreg)|(.L__reg_num_\rt)
 	.endm
 
 #else
 
 asm(
 "	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n"
-"	.equ	__reg_num_x\\num, \\num\n"
+"	.equ	.L__reg_num_x\\num, \\num\n"
 "	.endr\n"
-"	.equ	__reg_num_xzr, 31\n"
+"	.equ	.L__reg_num_xzr, 31\n"
 "\n"
 "	.macro	mrs_s, rt, sreg\n"
-"	.inst	0xd5200000|(\\sreg)|(__reg_num_\\rt)\n"
+"	.inst	0xd5200000|(\\sreg)|(.L__reg_num_\\rt)\n"
 "	.endm\n"
 "\n"
 "	.macro	msr_s, sreg, rt\n"
-"	.inst	0xd5000000|(\\sreg)|(__reg_num_\\rt)\n"
+"	.inst	0xd5000000|(\\sreg)|(.L__reg_num_\\rt)\n"
 "	.endm\n"
 );
 

From 0b3419007e096a706ac4894a2cf28cd97b6028e1 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 5 Feb 2016 14:58:46 +0000
Subject: [PATCH 306/424] arm64: cpufeature: Change read_cpuid() to use
 sysreg's mrs_s macro

Older assemblers may not have support for newer feature registers. To get
round this, sysreg.h provides a 'mrs_s' macro that takes a register
encoding and generates the raw instruction.

Change read_cpuid() to use mrs_s in all cases so that new registers
don't have to be a special case. Including sysreg.h means we need to move
the include and definition of read_cpuid() after the #ifndef __ASSEMBLY__
to avoid syntax errors in vmlinux.lds.

Signed-off-by: James Morse <james.morse@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 0f54b14e76f5302afe164dc911b049b5df836ff5)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/cpufeature.h |  2 +-
 arch/arm64/include/asm/cputype.h    | 20 ++++++-----
 arch/arm64/kernel/cpufeature.c      | 54 ++++++++++++++---------------
 arch/arm64/kernel/cpuinfo.c         | 50 +++++++++++++-------------
 arch/arm64/mm/context.c             |  2 +-
 5 files changed, 65 insertions(+), 63 deletions(-)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 8d56bd8550dc..8131abfabb0a 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -177,7 +177,7 @@ u64 read_system_reg(u32 id);
 
 static inline bool cpu_supports_mixed_endian_el0(void)
 {
-	return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
+	return id_aa64mmfr0_mixed_endian_el0(read_cpuid(SYS_ID_AA64MMFR0_EL1));
 }
 
 static inline bool system_supports_mixed_endian_el0(void)
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 7540284a17fe..b3a83da152a7 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -32,12 +32,6 @@
 #define MPIDR_AFFINITY_LEVEL(mpidr, level) \
 	((mpidr >> MPIDR_LEVEL_SHIFT(level)) & MPIDR_LEVEL_MASK)
 
-#define read_cpuid(reg) ({						\
-	u64 __val;							\
-	asm("mrs	%0, " #reg : "=r" (__val));			\
-	__val;								\
-})
-
 #define MIDR_REVISION_MASK	0xf
 #define MIDR_REVISION(midr)	((midr) & MIDR_REVISION_MASK)
 #define MIDR_PARTNUM_SHIFT	4
@@ -92,6 +86,14 @@
 
 #ifndef __ASSEMBLY__
 
+#include <asm/sysreg.h>
+
+#define read_cpuid(reg) ({						\
+	u64 __val;							\
+	asm("mrs_s	%0, " __stringify(reg) : "=r" (__val));		\
+	__val;								\
+})
+
 /*
  * The CPU ID never changes at run time, so we might as well tell the
  * compiler that it's constant.  Use this function to read the CPU ID
@@ -99,12 +101,12 @@
  */
 static inline u32 __attribute_const__ read_cpuid_id(void)
 {
-	return read_cpuid(MIDR_EL1);
+	return read_cpuid(SYS_MIDR_EL1);
 }
 
 static inline u64 __attribute_const__ read_cpuid_mpidr(void)
 {
-	return read_cpuid(MPIDR_EL1);
+	return read_cpuid(SYS_MPIDR_EL1);
 }
 
 static inline unsigned int __attribute_const__ read_cpuid_implementor(void)
@@ -119,7 +121,7 @@ static inline unsigned int __attribute_const__ read_cpuid_part_number(void)
 
 static inline u32 __attribute_const__ read_cpuid_cachetype(void)
 {
-	return read_cpuid(CTR_EL0);
+	return read_cpuid(SYS_CTR_EL0);
 }
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 3615d7d7c9af..1ef10e784031 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -808,35 +808,35 @@ static inline void set_sys_caps_initialised(void)
 static u64 __raw_read_system_reg(u32 sys_id)
 {
 	switch (sys_id) {
-	case SYS_ID_PFR0_EL1:		return (u64)read_cpuid(ID_PFR0_EL1);
-	case SYS_ID_PFR1_EL1:		return (u64)read_cpuid(ID_PFR1_EL1);
-	case SYS_ID_DFR0_EL1:		return (u64)read_cpuid(ID_DFR0_EL1);
-	case SYS_ID_MMFR0_EL1:		return (u64)read_cpuid(ID_MMFR0_EL1);
-	case SYS_ID_MMFR1_EL1:		return (u64)read_cpuid(ID_MMFR1_EL1);
-	case SYS_ID_MMFR2_EL1:		return (u64)read_cpuid(ID_MMFR2_EL1);
-	case SYS_ID_MMFR3_EL1:		return (u64)read_cpuid(ID_MMFR3_EL1);
-	case SYS_ID_ISAR0_EL1:		return (u64)read_cpuid(ID_ISAR0_EL1);
-	case SYS_ID_ISAR1_EL1:		return (u64)read_cpuid(ID_ISAR1_EL1);
-	case SYS_ID_ISAR2_EL1:		return (u64)read_cpuid(ID_ISAR2_EL1);
-	case SYS_ID_ISAR3_EL1:		return (u64)read_cpuid(ID_ISAR3_EL1);
-	case SYS_ID_ISAR4_EL1:		return (u64)read_cpuid(ID_ISAR4_EL1);
-	case SYS_ID_ISAR5_EL1:		return (u64)read_cpuid(ID_ISAR4_EL1);
-	case SYS_MVFR0_EL1:		return (u64)read_cpuid(MVFR0_EL1);
-	case SYS_MVFR1_EL1:		return (u64)read_cpuid(MVFR1_EL1);
-	case SYS_MVFR2_EL1:		return (u64)read_cpuid(MVFR2_EL1);
+	case SYS_ID_PFR0_EL1:		return read_cpuid(SYS_ID_PFR0_EL1);
+	case SYS_ID_PFR1_EL1:		return read_cpuid(SYS_ID_PFR1_EL1);
+	case SYS_ID_DFR0_EL1:		return read_cpuid(SYS_ID_DFR0_EL1);
+	case SYS_ID_MMFR0_EL1:		return read_cpuid(SYS_ID_MMFR0_EL1);
+	case SYS_ID_MMFR1_EL1:		return read_cpuid(SYS_ID_MMFR1_EL1);
+	case SYS_ID_MMFR2_EL1:		return read_cpuid(SYS_ID_MMFR2_EL1);
+	case SYS_ID_MMFR3_EL1:		return read_cpuid(SYS_ID_MMFR3_EL1);
+	case SYS_ID_ISAR0_EL1:		return read_cpuid(SYS_ID_ISAR0_EL1);
+	case SYS_ID_ISAR1_EL1:		return read_cpuid(SYS_ID_ISAR1_EL1);
+	case SYS_ID_ISAR2_EL1:		return read_cpuid(SYS_ID_ISAR2_EL1);
+	case SYS_ID_ISAR3_EL1:		return read_cpuid(SYS_ID_ISAR3_EL1);
+	case SYS_ID_ISAR4_EL1:		return read_cpuid(SYS_ID_ISAR4_EL1);
+	case SYS_ID_ISAR5_EL1:		return read_cpuid(SYS_ID_ISAR4_EL1);
+	case SYS_MVFR0_EL1:		return read_cpuid(SYS_MVFR0_EL1);
+	case SYS_MVFR1_EL1:		return read_cpuid(SYS_MVFR1_EL1);
+	case SYS_MVFR2_EL1:		return read_cpuid(SYS_MVFR2_EL1);
 
-	case SYS_ID_AA64PFR0_EL1:	return (u64)read_cpuid(ID_AA64PFR0_EL1);
-	case SYS_ID_AA64PFR1_EL1:	return (u64)read_cpuid(ID_AA64PFR0_EL1);
-	case SYS_ID_AA64DFR0_EL1:	return (u64)read_cpuid(ID_AA64DFR0_EL1);
-	case SYS_ID_AA64DFR1_EL1:	return (u64)read_cpuid(ID_AA64DFR0_EL1);
-	case SYS_ID_AA64MMFR0_EL1:	return (u64)read_cpuid(ID_AA64MMFR0_EL1);
-	case SYS_ID_AA64MMFR1_EL1:	return (u64)read_cpuid(ID_AA64MMFR1_EL1);
-	case SYS_ID_AA64ISAR0_EL1:	return (u64)read_cpuid(ID_AA64ISAR0_EL1);
-	case SYS_ID_AA64ISAR1_EL1:	return (u64)read_cpuid(ID_AA64ISAR1_EL1);
+	case SYS_ID_AA64PFR0_EL1:	return read_cpuid(SYS_ID_AA64PFR0_EL1);
+	case SYS_ID_AA64PFR1_EL1:	return read_cpuid(SYS_ID_AA64PFR0_EL1);
+	case SYS_ID_AA64DFR0_EL1:	return read_cpuid(SYS_ID_AA64DFR0_EL1);
+	case SYS_ID_AA64DFR1_EL1:	return read_cpuid(SYS_ID_AA64DFR0_EL1);
+	case SYS_ID_AA64MMFR0_EL1:	return read_cpuid(SYS_ID_AA64MMFR0_EL1);
+	case SYS_ID_AA64MMFR1_EL1:	return read_cpuid(SYS_ID_AA64MMFR1_EL1);
+	case SYS_ID_AA64ISAR0_EL1:	return read_cpuid(SYS_ID_AA64ISAR0_EL1);
+	case SYS_ID_AA64ISAR1_EL1:	return read_cpuid(SYS_ID_AA64ISAR1_EL1);
 
-	case SYS_CNTFRQ_EL0:		return (u64)read_cpuid(CNTFRQ_EL0);
-	case SYS_CTR_EL0:		return (u64)read_cpuid(CTR_EL0);
-	case SYS_DCZID_EL0:		return (u64)read_cpuid(DCZID_EL0);
+	case SYS_CNTFRQ_EL0:		return read_cpuid(SYS_CNTFRQ_EL0);
+	case SYS_CTR_EL0:		return read_cpuid(SYS_CTR_EL0);
+	case SYS_DCZID_EL0:		return read_cpuid(SYS_DCZID_EL0);
 	default:
 		BUG();
 		return 0;
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 212ae6361d8b..76df22272804 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -201,35 +201,35 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 {
 	info->reg_cntfrq = arch_timer_get_cntfrq();
 	info->reg_ctr = read_cpuid_cachetype();
-	info->reg_dczid = read_cpuid(DCZID_EL0);
+	info->reg_dczid = read_cpuid(SYS_DCZID_EL0);
 	info->reg_midr = read_cpuid_id();
 
-	info->reg_id_aa64dfr0 = read_cpuid(ID_AA64DFR0_EL1);
-	info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1);
-	info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1);
-	info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1);
-	info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
-	info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
-	info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1);
-	info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1);
+	info->reg_id_aa64dfr0 = read_cpuid(SYS_ID_AA64DFR0_EL1);
+	info->reg_id_aa64dfr1 = read_cpuid(SYS_ID_AA64DFR1_EL1);
+	info->reg_id_aa64isar0 = read_cpuid(SYS_ID_AA64ISAR0_EL1);
+	info->reg_id_aa64isar1 = read_cpuid(SYS_ID_AA64ISAR1_EL1);
+	info->reg_id_aa64mmfr0 = read_cpuid(SYS_ID_AA64MMFR0_EL1);
+	info->reg_id_aa64mmfr1 = read_cpuid(SYS_ID_AA64MMFR1_EL1);
+	info->reg_id_aa64pfr0 = read_cpuid(SYS_ID_AA64PFR0_EL1);
+	info->reg_id_aa64pfr1 = read_cpuid(SYS_ID_AA64PFR1_EL1);
 
-	info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1);
-	info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1);
-	info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1);
-	info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1);
-	info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1);
-	info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1);
-	info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1);
-	info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1);
-	info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1);
-	info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1);
-	info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1);
-	info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1);
-	info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1);
+	info->reg_id_dfr0 = read_cpuid(SYS_ID_DFR0_EL1);
+	info->reg_id_isar0 = read_cpuid(SYS_ID_ISAR0_EL1);
+	info->reg_id_isar1 = read_cpuid(SYS_ID_ISAR1_EL1);
+	info->reg_id_isar2 = read_cpuid(SYS_ID_ISAR2_EL1);
+	info->reg_id_isar3 = read_cpuid(SYS_ID_ISAR3_EL1);
+	info->reg_id_isar4 = read_cpuid(SYS_ID_ISAR4_EL1);
+	info->reg_id_isar5 = read_cpuid(SYS_ID_ISAR5_EL1);
+	info->reg_id_mmfr0 = read_cpuid(SYS_ID_MMFR0_EL1);
+	info->reg_id_mmfr1 = read_cpuid(SYS_ID_MMFR1_EL1);
+	info->reg_id_mmfr2 = read_cpuid(SYS_ID_MMFR2_EL1);
+	info->reg_id_mmfr3 = read_cpuid(SYS_ID_MMFR3_EL1);
+	info->reg_id_pfr0 = read_cpuid(SYS_ID_PFR0_EL1);
+	info->reg_id_pfr1 = read_cpuid(SYS_ID_PFR1_EL1);
 
-	info->reg_mvfr0 = read_cpuid(MVFR0_EL1);
-	info->reg_mvfr1 = read_cpuid(MVFR1_EL1);
-	info->reg_mvfr2 = read_cpuid(MVFR2_EL1);
+	info->reg_mvfr0 = read_cpuid(SYS_MVFR0_EL1);
+	info->reg_mvfr1 = read_cpuid(SYS_MVFR1_EL1);
+	info->reg_mvfr2 = read_cpuid(SYS_MVFR2_EL1);
 
 	cpuinfo_detect_icache_policy(info);
 
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index e87f53ff5f58..7275628ba59f 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -187,7 +187,7 @@ switch_mm_fastpath:
 
 static int asids_init(void)
 {
-	int fld = cpuid_feature_extract_field(read_cpuid(ID_AA64MMFR0_EL1), 4);
+	int fld = cpuid_feature_extract_field(read_cpuid(SYS_ID_AA64MMFR0_EL1), 4);
 
 	switch (fld) {
 	default:

From f6c5d808273093c8220da4163c548932d18e8e36 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 5 Feb 2016 14:58:47 +0000
Subject: [PATCH 307/424] arm64: add ARMv8.2 id_aa64mmfr2 boiler plate

ARMv8.2 adds a new feature register id_aa64mmfr2. This patch adds the
cpu feature boiler plate used by the actual features in later patches.

Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 406e308770a92bd33995b2e5b681e86358328bb0)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/cpu.h    |  1 +
 arch/arm64/include/asm/sysreg.h |  4 ++++
 arch/arm64/kernel/cpufeature.c  | 10 ++++++++++
 arch/arm64/kernel/cpuinfo.c     |  1 +
 4 files changed, 16 insertions(+)

diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index b5e9cee4b5f8..13a6103130cd 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -36,6 +36,7 @@ struct cpuinfo_arm64 {
 	u64		reg_id_aa64isar1;
 	u64		reg_id_aa64mmfr0;
 	u64		reg_id_aa64mmfr1;
+	u64		reg_id_aa64mmfr2;
 	u64		reg_id_aa64pfr0;
 	u64		reg_id_aa64pfr1;
 
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 76907c94b11f..4bc8655529df 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -70,6 +70,7 @@
 
 #define SYS_ID_AA64MMFR0_EL1		sys_reg(3, 0, 0, 7, 0)
 #define SYS_ID_AA64MMFR1_EL1		sys_reg(3, 0, 0, 7, 1)
+#define SYS_ID_AA64MMFR2_EL1		sys_reg(3, 0, 0, 7, 2)
 
 #define SYS_CNTFRQ_EL0			sys_reg(3, 3, 14, 0, 0)
 #define SYS_CTR_EL0			sys_reg(3, 3, 0, 0, 1)
@@ -135,6 +136,9 @@
 #define ID_AA64MMFR1_VMIDBITS_SHIFT	4
 #define ID_AA64MMFR1_HADBS_SHIFT	0
 
+/* id_aa64mmfr2 */
+#define ID_AA64MMFR2_UAO_SHIFT		4
+
 /* id_aa64dfr0 */
 #define ID_AA64DFR0_CTX_CMPS_SHIFT	28
 #define ID_AA64DFR0_WRPS_SHIFT		20
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 1ef10e784031..42918c797e8e 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -123,6 +123,11 @@ static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
 	ARM64_FTR_END,
 };
 
+static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
+	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0),
+	ARM64_FTR_END,
+};
+
 static struct arm64_ftr_bits ftr_ctr[] = {
 	U_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1),	/* RAO */
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0),
@@ -284,6 +289,7 @@ static struct arm64_ftr_reg arm64_ftr_regs[] = {
 	/* Op1 = 0, CRn = 0, CRm = 7 */
 	ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
 	ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1),
+	ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
 
 	/* Op1 = 3, CRn = 0, CRm = 0 */
 	ARM64_FTR_REG(SYS_CTR_EL0, ftr_ctr),
@@ -408,6 +414,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
 	init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1);
 	init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0);
 	init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1);
+	init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2);
 	init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0);
 	init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1);
 	init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0);
@@ -517,6 +524,8 @@ void update_cpu_features(int cpu,
 				      info->reg_id_aa64mmfr0, boot->reg_id_aa64mmfr0);
 	taint |= check_update_ftr_reg(SYS_ID_AA64MMFR1_EL1, cpu,
 				      info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1);
+	taint |= check_update_ftr_reg(SYS_ID_AA64MMFR2_EL1, cpu,
+				      info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2);
 
 	/*
 	 * EL3 is not our concern.
@@ -831,6 +840,7 @@ static u64 __raw_read_system_reg(u32 sys_id)
 	case SYS_ID_AA64DFR1_EL1:	return read_cpuid(SYS_ID_AA64DFR0_EL1);
 	case SYS_ID_AA64MMFR0_EL1:	return read_cpuid(SYS_ID_AA64MMFR0_EL1);
 	case SYS_ID_AA64MMFR1_EL1:	return read_cpuid(SYS_ID_AA64MMFR1_EL1);
+	case SYS_ID_AA64MMFR2_EL1:	return read_cpuid(SYS_ID_AA64MMFR2_EL1);
 	case SYS_ID_AA64ISAR0_EL1:	return read_cpuid(SYS_ID_AA64ISAR0_EL1);
 	case SYS_ID_AA64ISAR1_EL1:	return read_cpuid(SYS_ID_AA64ISAR1_EL1);
 
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 76df22272804..966fbd52550b 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -210,6 +210,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 	info->reg_id_aa64isar1 = read_cpuid(SYS_ID_AA64ISAR1_EL1);
 	info->reg_id_aa64mmfr0 = read_cpuid(SYS_ID_AA64MMFR0_EL1);
 	info->reg_id_aa64mmfr1 = read_cpuid(SYS_ID_AA64MMFR1_EL1);
+	info->reg_id_aa64mmfr2 = read_cpuid(SYS_ID_AA64MMFR2_EL1);
 	info->reg_id_aa64pfr0 = read_cpuid(SYS_ID_AA64PFR0_EL1);
 	info->reg_id_aa64pfr1 = read_cpuid(SYS_ID_AA64PFR1_EL1);
 

From 13e05550e107e46ef982e5c4347e4986aeeee7ec Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 5 Feb 2016 14:58:48 +0000
Subject: [PATCH 308/424] arm64: kernel: Add support for User Access Override

'User Access Override' is a new ARMv8.2 feature which allows the
unprivileged load and store instructions to be overridden to behave in
the normal way.

This patch converts {get,put}_user() and friends to use ldtr*/sttr*
instructions - so that they can only access EL0 memory, then enables
UAO when fs==KERNEL_DS so that these functions can access kernel memory.

This allows user space's read/write permissions to be checked against the
page tables, instead of testing addr<USER_DS, then using the kernel's
read/write permissions.

Signed-off-by: James Morse <james.morse@arm.com>
[catalin.marinas@arm.com: move uao_thread_switch() above dsb()]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

(cherry picked from commit 57f4959bad0a154aeca125b7d38d1d9471a12422)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/Kconfig                   | 21 ++++++++
 arch/arm64/include/asm/alternative.h | 72 ++++++++++++++++++++++++++++
 arch/arm64/include/asm/cpufeature.h  |  3 +-
 arch/arm64/include/asm/processor.h   |  1 +
 arch/arm64/include/asm/sysreg.h      |  3 ++
 arch/arm64/include/asm/thread_info.h |  6 +++
 arch/arm64/include/asm/uaccess.h     | 44 ++++++++++++-----
 arch/arm64/include/uapi/asm/ptrace.h |  1 +
 arch/arm64/kernel/cpufeature.c       | 11 +++++
 arch/arm64/kernel/process.c          | 19 ++++++++
 arch/arm64/lib/clear_user.S          |  8 ++--
 arch/arm64/lib/copy_from_user.S      |  8 ++--
 arch/arm64/lib/copy_in_user.S        | 16 +++----
 arch/arm64/lib/copy_to_user.S        |  8 ++--
 arch/arm64/mm/fault.c                | 31 +++++++++---
 15 files changed, 213 insertions(+), 39 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1420102341d0..4df85b5a2045 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -706,6 +706,27 @@ config ARM64_LSE_ATOMICS
 
 endmenu
 
+config ARM64_UAO
+	bool "Enable support for User Access Override (UAO)"
+	default y
+	help
+	  User Access Override (UAO; part of the ARMv8.2 Extensions)
+	  causes the 'unprivileged' variant of the load/store instructions to
+	  be overriden to be privileged.
+
+	  This option changes get_user() and friends to use the 'unprivileged'
+	  variant of the load/store instructions. This ensures that user-space
+	  really did have access to the supplied memory. When addr_limit is
+	  set to kernel memory the UAO bit will be set, allowing privileged
+	  access to kernel memory.
+
+	  Choosing this option will cause copy_to_user() et al to use user-space
+	  memory permissions.
+
+	  The feature is detected at runtime, the kernel will use the
+	  regular load/store instructions if the cpu does not implement the
+	  feature.
+
 endmenu
 
 menu "Boot options"
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index e4962f04201e..a9fc24ec1aa9 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -1,6 +1,8 @@
 #ifndef __ASM_ALTERNATIVE_H
 #define __ASM_ALTERNATIVE_H
 
+#include <asm/cpufeature.h>
+
 #ifndef __ASSEMBLY__
 
 #include <linux/init.h>
@@ -63,6 +65,8 @@ void apply_alternatives(void *start, size_t length);
 
 #else
 
+#include <asm/assembler.h>
+
 .macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
 	.word \orig_offset - .
 	.word \alt_offset - .
@@ -136,6 +140,74 @@ void apply_alternatives(void *start, size_t length);
 	alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
 
 
+/*
+ * Generate the assembly for UAO alternatives with exception table entries.
+ * This is complicated as there is no post-increment or pair versions of the
+ * unprivileged instructions, and USER() only works for single instructions.
+ */
+#ifdef CONFIG_ARM64_UAO
+	.macro uao_ldp l, reg1, reg2, addr, post_inc
+		alternative_if_not ARM64_HAS_UAO
+8888:			ldp	\reg1, \reg2, [\addr], \post_inc;
+8889:			nop;
+			nop;
+		alternative_else
+			ldtr	\reg1, [\addr];
+			ldtr	\reg2, [\addr, #8];
+			add	\addr, \addr, \post_inc;
+		alternative_endif
+
+		.section __ex_table,"a";
+		.align	3;
+		.quad	8888b,\l;
+		.quad	8889b,\l;
+		.previous;
+	.endm
+
+	.macro uao_stp l, reg1, reg2, addr, post_inc
+		alternative_if_not ARM64_HAS_UAO
+8888:			stp	\reg1, \reg2, [\addr], \post_inc;
+8889:			nop;
+			nop;
+		alternative_else
+			sttr	\reg1, [\addr];
+			sttr	\reg2, [\addr, #8];
+			add	\addr, \addr, \post_inc;
+		alternative_endif
+
+		.section __ex_table,"a";
+		.align	3;
+		.quad	8888b,\l;
+		.quad	8889b,\l;
+		.previous
+	.endm
+
+	.macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
+		alternative_if_not ARM64_HAS_UAO
+8888:			\inst	\reg, [\addr], \post_inc;
+			nop;
+		alternative_else
+			\alt_inst	\reg, [\addr];
+			add		\addr, \addr, \post_inc;
+		alternative_endif
+
+		.section __ex_table,"a";
+		.align	3;
+		.quad	8888b,\l;
+		.previous
+	.endm
+#else
+	.macro uao_ldp l, reg1, reg2, addr, post_inc
+		USER(\l, ldp \reg1, \reg2, [\addr], \post_inc)
+	.endm
+	.macro uao_stp l, reg1, reg2, addr, post_inc
+		USER(\l, stp \reg1, \reg2, [\addr], \post_inc)
+	.endm
+	.macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
+		USER(\l, \inst \reg, [\addr], \post_inc)
+	.endm
+#endif
+
 #endif  /*  __ASSEMBLY__  */
 
 /*
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 8131abfabb0a..a5df7cde616b 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -31,8 +31,9 @@
 #define ARM64_WORKAROUND_CAVIUM_23154		6
 #define ARM64_WORKAROUND_834220			7
 #define ARM64_HAS_NO_HW_PREFETCH		8
+#define ARM64_HAS_UAO				9
 
-#define ARM64_NCAPS				9
+#define ARM64_NCAPS				10
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 5bb1d763d17a..cef1cf398356 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -191,5 +191,6 @@ static inline void spin_lock_prefetch(const void *ptr)
 #endif
 
 void cpu_enable_pan(void *__unused);
+void cpu_enable_uao(void *__unused);
 
 #endif /* __ASM_PROCESSOR_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 4bc8655529df..b9fd8ec79033 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -77,9 +77,12 @@
 #define SYS_DCZID_EL0			sys_reg(3, 3, 0, 0, 7)
 
 #define REG_PSTATE_PAN_IMM		sys_reg(0, 0, 4, 0, 4)
+#define REG_PSTATE_UAO_IMM		sys_reg(0, 0, 4, 0, 3)
 
 #define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\
 				     (!!x)<<8 | 0x1f)
+#define SET_PSTATE_UAO(x) __inst_arm(0xd5000000 | REG_PSTATE_UAO_IMM |\
+				     (!!x)<<8 | 0x1f)
 
 /* SCTLR_EL1 */
 #define SCTLR_EL1_CP15BEN	(0x1 << 5)
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index abd64bd1f6d9..eba8db6838af 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -85,6 +85,12 @@ static inline struct thread_info *current_thread_info(void)
 	return (struct thread_info *)sp_el0;
 }
 
+/* Access struct thread_info of another thread */
+static inline struct thread_info *get_thread_info(unsigned long thread_stack)
+{
+	return (struct thread_info *)(thread_stack & ~(THREAD_SIZE - 1));
+}
+
 #define thread_saved_pc(tsk)	\
 	((unsigned long)(tsk->thread.cpu_context.pc))
 #define thread_saved_sp(tsk)	\
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index b2ede967fe7d..f973bdce8410 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -64,6 +64,16 @@ extern int fixup_exception(struct pt_regs *regs);
 static inline void set_fs(mm_segment_t fs)
 {
 	current_thread_info()->addr_limit = fs;
+
+	/*
+	 * Enable/disable UAO so that copy_to_user() etc can access
+	 * kernel memory with the unprivileged instructions.
+	 */
+	if (IS_ENABLED(CONFIG_ARM64_UAO) && fs == KERNEL_DS)
+		asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO));
+	else
+		asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO,
+				CONFIG_ARM64_UAO));
 }
 
 #define segment_eq(a, b)	((a) == (b))
@@ -113,9 +123,10 @@ static inline void set_fs(mm_segment_t fs)
  * The "__xxx_error" versions set the third argument to -EFAULT if an error
  * occurs, and leave it unchanged on success.
  */
-#define __get_user_asm(instr, reg, x, addr, err)			\
+#define __get_user_asm(instr, alt_instr, reg, x, addr, err, feature)	\
 	asm volatile(							\
-	"1:	" instr "	" reg "1, [%2]\n"			\
+	"1:"ALTERNATIVE(instr "     " reg "1, [%2]\n",			\
+			alt_instr " " reg "1, [%2]\n", feature)		\
 	"2:\n"								\
 	"	.section .fixup, \"ax\"\n"				\
 	"	.align	2\n"						\
@@ -138,16 +149,20 @@ do {									\
 			CONFIG_ARM64_PAN));				\
 	switch (sizeof(*(ptr))) {					\
 	case 1:								\
-		__get_user_asm("ldrb", "%w", __gu_val, (ptr), (err));	\
+		__get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr),  \
+			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	case 2:								\
-		__get_user_asm("ldrh", "%w", __gu_val, (ptr), (err));	\
+		__get_user_asm("ldrh", "ldtrh", "%w", __gu_val, (ptr),  \
+			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	case 4:								\
-		__get_user_asm("ldr", "%w", __gu_val, (ptr), (err));	\
+		__get_user_asm("ldr", "ldtr", "%w", __gu_val, (ptr),	\
+			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	case 8:								\
-		__get_user_asm("ldr", "%",  __gu_val, (ptr), (err));	\
+		__get_user_asm("ldr", "ldtr", "%",  __gu_val, (ptr),	\
+			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	default:							\
 		BUILD_BUG();						\
@@ -181,9 +196,10 @@ do {									\
 		((x) = 0, -EFAULT);					\
 })
 
-#define __put_user_asm(instr, reg, x, addr, err)			\
+#define __put_user_asm(instr, alt_instr, reg, x, addr, err, feature)	\
 	asm volatile(							\
-	"1:	" instr "	" reg "1, [%2]\n"			\
+	"1:"ALTERNATIVE(instr "     " reg "1, [%2]\n",			\
+			alt_instr " " reg "1, [%2]\n", feature)		\
 	"2:\n"								\
 	"	.section .fixup,\"ax\"\n"				\
 	"	.align	2\n"						\
@@ -205,16 +221,20 @@ do {									\
 			CONFIG_ARM64_PAN));				\
 	switch (sizeof(*(ptr))) {					\
 	case 1:								\
-		__put_user_asm("strb", "%w", __pu_val, (ptr), (err));	\
+		__put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr),	\
+			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	case 2:								\
-		__put_user_asm("strh", "%w", __pu_val, (ptr), (err));	\
+		__put_user_asm("strh", "sttrh", "%w", __pu_val, (ptr),	\
+			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	case 4:								\
-		__put_user_asm("str",  "%w", __pu_val, (ptr), (err));	\
+		__put_user_asm("str", "sttr", "%w", __pu_val, (ptr),	\
+			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	case 8:								\
-		__put_user_asm("str",  "%", __pu_val, (ptr), (err));	\
+		__put_user_asm("str", "sttr", "%", __pu_val, (ptr),	\
+			       (err), ARM64_HAS_UAO);			\
 		break;							\
 	default:							\
 		BUILD_BUG();						\
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index 208db3df135a..b5c3933ed441 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -45,6 +45,7 @@
 #define PSR_A_BIT	0x00000100
 #define PSR_D_BIT	0x00000200
 #define PSR_PAN_BIT	0x00400000
+#define PSR_UAO_BIT	0x00800000
 #define PSR_Q_BIT	0x08000000
 #define PSR_V_BIT	0x10000000
 #define PSR_C_BIT	0x20000000
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 42918c797e8e..ae22edf9d3c9 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -677,6 +677,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.capability = ARM64_HAS_NO_HW_PREFETCH,
 		.matches = has_no_hw_prefetch,
 	},
+#ifdef CONFIG_ARM64_UAO
+	{
+		.desc = "User Access Override",
+		.capability = ARM64_HAS_UAO,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64MMFR2_EL1,
+		.field_pos = ID_AA64MMFR2_UAO_SHIFT,
+		.min_field_value = 1,
+		.enable = cpu_enable_uao,
+	},
+#endif /* CONFIG_ARM64_UAO */
 	{},
 };
 
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 88d742ba19d5..c1ca4ea065d4 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -46,6 +46,7 @@
 #include <linux/notifier.h>
 #include <trace/events/power.h>
 
+#include <asm/alternative.h>
 #include <asm/compat.h>
 #include <asm/cacheflush.h>
 #include <asm/fpsimd.h>
@@ -280,6 +281,9 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	} else {
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->pstate = PSR_MODE_EL1h;
+		if (IS_ENABLED(CONFIG_ARM64_UAO) &&
+		    cpus_have_cap(ARM64_HAS_UAO))
+			childregs->pstate |= PSR_UAO_BIT;
 		p->thread.cpu_context.x19 = stack_start;
 		p->thread.cpu_context.x20 = stk_sz;
 	}
@@ -308,6 +312,20 @@ static void tls_thread_switch(struct task_struct *next)
 	: : "r" (tpidr), "r" (tpidrro));
 }
 
+/* Restore the UAO state depending on next's addr_limit */
+static void uao_thread_switch(struct task_struct *next)
+{
+	unsigned long next_sp = next->thread.cpu_context.sp;
+
+	if (IS_ENABLED(CONFIG_ARM64_UAO) &&
+	    get_thread_info(next_sp)->addr_limit == KERNEL_DS)
+		asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO,
+			        CONFIG_ARM64_UAO));
+	else
+		asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO,
+				CONFIG_ARM64_UAO));
+}
+
 /*
  * Thread switching.
  */
@@ -320,6 +338,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	tls_thread_switch(next);
 	hw_breakpoint_thread_switch(next);
 	contextidr_thread_switch(next);
+	uao_thread_switch(next);
 
 	/*
 	 * Complete any pending TLB or cache maintenance on this CPU in case
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index a9723c71c52b..3f950b677c07 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -39,20 +39,20 @@ ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
 	subs	x1, x1, #8
 	b.mi	2f
 1:
-USER(9f, str	xzr, [x0], #8	)
+uao_user_alternative 9f, str, sttr, xzr, x0, 8
 	subs	x1, x1, #8
 	b.pl	1b
 2:	adds	x1, x1, #4
 	b.mi	3f
-USER(9f, str	wzr, [x0], #4	)
+uao_user_alternative 9f, str, sttr, wzr, x0, 4
 	sub	x1, x1, #4
 3:	adds	x1, x1, #2
 	b.mi	4f
-USER(9f, strh	wzr, [x0], #2	)
+uao_user_alternative 9f, strh, sttrh, wzr, x0, 2
 	sub	x1, x1, #2
 4:	adds	x1, x1, #1
 	b.mi	5f
-USER(9f, strb	wzr, [x0]	)
+uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
 5:	mov	x0, #0
 ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
 	    CONFIG_ARM64_PAN)
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 4699cd74f87e..1d982d64f1a7 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -34,7 +34,7 @@
  */
 
 	.macro ldrb1 ptr, regB, val
-	USER(9998f, ldrb  \ptr, [\regB], \val)
+	uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val
 	.endm
 
 	.macro strb1 ptr, regB, val
@@ -42,7 +42,7 @@
 	.endm
 
 	.macro ldrh1 ptr, regB, val
-	USER(9998f, ldrh  \ptr, [\regB], \val)
+	uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val
 	.endm
 
 	.macro strh1 ptr, regB, val
@@ -50,7 +50,7 @@
 	.endm
 
 	.macro ldr1 ptr, regB, val
-	USER(9998f, ldr \ptr, [\regB], \val)
+	uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val
 	.endm
 
 	.macro str1 ptr, regB, val
@@ -58,7 +58,7 @@
 	.endm
 
 	.macro ldp1 ptr, regB, regC, val
-	USER(9998f, ldp \ptr, \regB, [\regC], \val)
+	uao_ldp 9998f, \ptr, \regB, \regC, \val
 	.endm
 
 	.macro stp1 ptr, regB, regC, val
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index 81c8fc93c100..feaad1520dc1 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -35,35 +35,35 @@
  *	x0 - bytes not copied
  */
 	.macro ldrb1 ptr, regB, val
-	USER(9998f, ldrb  \ptr, [\regB], \val)
+	uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val
 	.endm
 
 	.macro strb1 ptr, regB, val
-	USER(9998f, strb \ptr, [\regB], \val)
+	uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val
 	.endm
 
 	.macro ldrh1 ptr, regB, val
-	USER(9998f, ldrh  \ptr, [\regB], \val)
+	uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val
 	.endm
 
 	.macro strh1 ptr, regB, val
-	USER(9998f, strh \ptr, [\regB], \val)
+	uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val
 	.endm
 
 	.macro ldr1 ptr, regB, val
-	USER(9998f, ldr \ptr, [\regB], \val)
+	uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val
 	.endm
 
 	.macro str1 ptr, regB, val
-	USER(9998f, str \ptr, [\regB], \val)
+	uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val
 	.endm
 
 	.macro ldp1 ptr, regB, regC, val
-	USER(9998f, ldp \ptr, \regB, [\regC], \val)
+	uao_ldp 9998f, \ptr, \regB, \regC, \val
 	.endm
 
 	.macro stp1 ptr, regB, regC, val
-	USER(9998f, stp \ptr, \regB, [\regC], \val)
+	uao_stp 9998f, \ptr, \regB, \regC, \val
 	.endm
 
 end	.req	x5
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index 7512bbbc07ac..2dae2cd2c481 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -37,7 +37,7 @@
 	.endm
 
 	.macro strb1 ptr, regB, val
-	USER(9998f, strb \ptr, [\regB], \val)
+	uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val
 	.endm
 
 	.macro ldrh1 ptr, regB, val
@@ -45,7 +45,7 @@
 	.endm
 
 	.macro strh1 ptr, regB, val
-	USER(9998f, strh \ptr, [\regB], \val)
+	uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val
 	.endm
 
 	.macro ldr1 ptr, regB, val
@@ -53,7 +53,7 @@
 	.endm
 
 	.macro str1 ptr, regB, val
-	USER(9998f, str \ptr, [\regB], \val)
+	uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val
 	.endm
 
 	.macro ldp1 ptr, regB, regC, val
@@ -61,7 +61,7 @@
 	.endm
 
 	.macro stp1 ptr, regB, regC, val
-	USER(9998f, stp \ptr, \regB, [\regC], \val)
+	uao_stp 9998f, \ptr, \regB, \regC, \val
 	.endm
 
 end	.req	x5
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 92ddac1e8ca2..820d47353cf0 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -192,6 +192,14 @@ out:
 	return fault;
 }
 
+static inline int permission_fault(unsigned int esr)
+{
+	unsigned int ec       = (esr & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT;
+	unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
+
+	return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM);
+}
+
 static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 				   struct pt_regs *regs)
 {
@@ -225,12 +233,10 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 		mm_flags |= FAULT_FLAG_WRITE;
 	}
 
-	/*
-	 * PAN bit set implies the fault happened in kernel space, but not
-	 * in the arch's user access functions.
-	 */
-	if (IS_ENABLED(CONFIG_ARM64_PAN) && (regs->pstate & PSR_PAN_BIT))
-		goto no_context;
+	if (permission_fault(esr) && (addr < USER_DS)) {
+		if (!search_exception_tables(regs->pc))
+			panic("Accessing user space memory outside uaccess.h routines");
+	}
 
 	/*
 	 * As per x86, we may deadlock here. However, since the kernel only
@@ -561,3 +567,16 @@ void cpu_enable_pan(void *__unused)
 	config_sctlr_el1(SCTLR_EL1_SPAN, 0);
 }
 #endif /* CONFIG_ARM64_PAN */
+
+#ifdef CONFIG_ARM64_UAO
+/*
+ * Kernel threads have fs=KERNEL_DS by default, and don't need to call
+ * set_fs(), devtmpfs in particular relies on this behaviour.
+ * We need to enable the feature at runtime (instead of adding it to
+ * PSR_MODE_EL1h) as the feature may not be implemented by the cpu.
+ */
+void cpu_enable_uao(void *__unused)
+{
+	asm(SET_PSTATE_UAO(1));
+}
+#endif /* CONFIG_ARM64_UAO */

From 31b28ec1f1051bf82515fe38d0ae1884f40783cf Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 5 Feb 2016 14:58:49 +0000
Subject: [PATCH 309/424] arm64: cpufeature: Test 'matches' pointer to find the
 end of the list

CPU feature code uses the desc field as a test to find the end of the list,
this means every entry must have a description. This generates noise for
entries in the list that aren't really features, but combinations of them.
e.g.
> CPU features: detected feature: Privileged Access Never
> CPU features: detected feature: PAN and not UAO

These combination features are needed for corner cases with alternatives,
where cpu features interact.

Change all walkers of the arm64_features[] and arm64_hwcaps[] lists to test
'matches' not 'desc', and only print 'desc' if it is non-NULL.

Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by : Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

(cherry picked from commit 644c2ae198412c956700e55a2acf80b2541f6aa5)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/cpufeature.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index ae22edf9d3c9..9cc8186cd14b 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -771,7 +771,7 @@ static void __init setup_cpu_hwcaps(void)
 	int i;
 	const struct arm64_cpu_capabilities *hwcaps = arm64_hwcaps;
 
-	for (i = 0; hwcaps[i].desc; i++)
+	for (i = 0; hwcaps[i].matches; i++)
 		if (hwcaps[i].matches(&hwcaps[i]))
 			cap_set_hwcap(&hwcaps[i]);
 }
@@ -781,11 +781,11 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
 {
 	int i;
 
-	for (i = 0; caps[i].desc; i++) {
+	for (i = 0; caps[i].matches; i++) {
 		if (!caps[i].matches(&caps[i]))
 			continue;
 
-		if (!cpus_have_cap(caps[i].capability))
+		if (!cpus_have_cap(caps[i].capability) && caps[i].desc)
 			pr_info("%s %s\n", info, caps[i].desc);
 		cpus_set_cap(caps[i].capability);
 	}
@@ -800,7 +800,7 @@ enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
 {
 	int i;
 
-	for (i = 0; caps[i].desc; i++)
+	for (i = 0; caps[i].matches; i++)
 		if (caps[i].enable && cpus_have_cap(caps[i].capability))
 			on_each_cpu(caps[i].enable, NULL, true);
 }
@@ -907,7 +907,7 @@ void verify_local_cpu_capabilities(void)
 		return;
 
 	caps = arm64_features;
-	for (i = 0; caps[i].desc; i++) {
+	for (i = 0; caps[i].matches; i++) {
 		if (!cpus_have_cap(caps[i].capability) || !caps[i].sys_reg)
 			continue;
 		/*
@@ -920,7 +920,7 @@ void verify_local_cpu_capabilities(void)
 			caps[i].enable(NULL);
 	}
 
-	for (i = 0, caps = arm64_hwcaps; caps[i].desc; i++) {
+	for (i = 0, caps = arm64_hwcaps; caps[i].matches; i++) {
 		if (!cpus_have_hwcap(&caps[i]))
 			continue;
 		if (!feature_matches(__raw_read_system_reg(caps[i].sys_reg), &caps[i]))

From cdfec5aaf4a886521b7f54dfe2db61735558d546 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Fri, 5 Feb 2016 14:58:50 +0000
Subject: [PATCH 310/424] arm64: kernel: Don't toggle PAN on systems with UAO

If a CPU supports both Privileged Access Never (PAN) and User Access
Override (UAO), we don't need to disable/re-enable PAN round all
copy_to_user() like calls.

UAO alternatives cause these calls to use the 'unprivileged' load/store
instructions, which are overridden to be the privileged kind when
fs==KERNEL_DS.

This patch changes the copy_to_user() calls to have their PAN toggling
depend on a new composite 'feature' ARM64_ALT_PAN_NOT_UAO.

If both features are detected, PAN will be enabled, but the copy_to_user()
alternatives will not be applied. This means PAN will be enabled all the
time for these functions. If only PAN is detected, the toggling will be
enabled as normal.

This will save the time taken to disable/re-enable PAN, and allow us to
catch copy_to_user() accesses that occur with fs==KERNEL_DS.

Futex and swp-emulation code continue to hang their PAN toggling code on
ARM64_HAS_PAN.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 705441960033e66b63524521f153fbb28c99ddbd)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/cpufeature.h |  3 ++-
 arch/arm64/include/asm/uaccess.h    |  8 ++++----
 arch/arm64/kernel/cpufeature.c      | 16 ++++++++++++++++
 arch/arm64/lib/clear_user.S         |  4 ++--
 arch/arm64/lib/copy_from_user.S     |  4 ++--
 arch/arm64/lib/copy_in_user.S       |  4 ++--
 arch/arm64/lib/copy_to_user.S       |  4 ++--
 arch/arm64/mm/fault.c               |  3 +++
 8 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index a5df7cde616b..37a53fc6b384 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -32,8 +32,9 @@
 #define ARM64_WORKAROUND_834220			7
 #define ARM64_HAS_NO_HW_PREFETCH		8
 #define ARM64_HAS_UAO				9
+#define ARM64_ALT_PAN_NOT_UAO			10
 
-#define ARM64_NCAPS				10
+#define ARM64_NCAPS				11
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index f973bdce8410..16ba0d5c9740 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -145,7 +145,7 @@ static inline void set_fs(mm_segment_t fs)
 do {									\
 	unsigned long __gu_val;						\
 	__chk_user_ptr(ptr);						\
-	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN,	\
+	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\
 			CONFIG_ARM64_PAN));				\
 	switch (sizeof(*(ptr))) {					\
 	case 1:								\
@@ -168,7 +168,7 @@ do {									\
 		BUILD_BUG();						\
 	}								\
 	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
-	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,	\
+	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\
 			CONFIG_ARM64_PAN));				\
 } while (0)
 
@@ -217,7 +217,7 @@ do {									\
 do {									\
 	__typeof__(*(ptr)) __pu_val = (x);				\
 	__chk_user_ptr(ptr);						\
-	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN,	\
+	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\
 			CONFIG_ARM64_PAN));				\
 	switch (sizeof(*(ptr))) {					\
 	case 1:								\
@@ -239,7 +239,7 @@ do {									\
 	default:							\
 		BUILD_BUG();						\
 	}								\
-	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,	\
+	asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\
 			CONFIG_ARM64_PAN));				\
 } while (0)
 
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 9cc8186cd14b..7566cad9fa1d 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -67,6 +67,10 @@ DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 		.width = 0,				\
 	}
 
+/* meta feature for alternatives */
+static bool __maybe_unused
+cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry);
+
 static struct arm64_ftr_bits ftr_id_aa64isar0[] = {
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
@@ -688,6 +692,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.enable = cpu_enable_uao,
 	},
 #endif /* CONFIG_ARM64_UAO */
+#ifdef CONFIG_ARM64_PAN
+	{
+		.capability = ARM64_ALT_PAN_NOT_UAO,
+		.matches = cpufeature_pan_not_uao,
+	},
+#endif /* CONFIG_ARM64_PAN */
 	{},
 };
 
@@ -966,3 +976,9 @@ void __init setup_cpu_features(void)
 		pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n",
 			L1_CACHE_BYTES, cls);
 }
+
+static bool __maybe_unused
+cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry)
+{
+	return (cpus_have_cap(ARM64_HAS_PAN) && !cpus_have_cap(ARM64_HAS_UAO));
+}
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index 3f950b677c07..5d1cad3ce6d6 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -33,7 +33,7 @@
  * Alignment fixed up by hardware.
  */
 ENTRY(__clear_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
 	    CONFIG_ARM64_PAN)
 	mov	x2, x1			// save the size for fixup return
 	subs	x1, x1, #8
@@ -54,7 +54,7 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2
 	b.mi	5f
 uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
 5:	mov	x0, #0
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
 	    CONFIG_ARM64_PAN)
 	ret
 ENDPROC(__clear_user)
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 1d982d64f1a7..17e8306dca29 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -67,11 +67,11 @@
 
 end	.req	x5
 ENTRY(__copy_from_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
 	    CONFIG_ARM64_PAN)
 	add	end, x0, x2
 #include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
 	    CONFIG_ARM64_PAN)
 	mov	x0, #0				// Nothing to copy
 	ret
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index feaad1520dc1..f7292dd08c84 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -68,11 +68,11 @@
 
 end	.req	x5
 ENTRY(__copy_in_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
 	    CONFIG_ARM64_PAN)
 	add	end, x0, x2
 #include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
 	    CONFIG_ARM64_PAN)
 	mov	x0, #0
 	ret
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index 2dae2cd2c481..21faae60f988 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -66,11 +66,11 @@
 
 end	.req	x5
 ENTRY(__copy_to_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
 	    CONFIG_ARM64_PAN)
 	add	end, x0, x2
 #include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \
+ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
 	    CONFIG_ARM64_PAN)
 	mov	x0, #0
 	ret
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 820d47353cf0..d0762a729d01 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -234,6 +234,9 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 	}
 
 	if (permission_fault(esr) && (addr < USER_DS)) {
+		if (get_thread_info(regs->sp)->addr_limit == KERNEL_DS)
+			panic("Accessing user space memory with fs=KERNEL_DS");
+
 		if (!search_exception_tables(regs->pc))
 			panic("Accessing user space memory outside uaccess.h routines");
 	}

From 9193df45aa67b769ee8af97609537a876a81baca Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 18 Feb 2016 15:50:04 +0000
Subject: [PATCH 311/424] arm64: Remove the get_thread_info() function

This function was introduced by previous commits implementing UAO.
However, it can be replaced with task_thread_info() in
uao_thread_switch() or get_fs() in do_page_fault() (the latter being
called only on the current context, so no need for using the saved
pt_regs).

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit e950631e84e7e38892ffbeee5e1816b270026b0e)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/thread_info.h |  6 ------
 arch/arm64/kernel/process.c          | 15 ++++++---------
 arch/arm64/mm/fault.c                |  2 +-
 3 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index eba8db6838af..abd64bd1f6d9 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -85,12 +85,6 @@ static inline struct thread_info *current_thread_info(void)
 	return (struct thread_info *)sp_el0;
 }
 
-/* Access struct thread_info of another thread */
-static inline struct thread_info *get_thread_info(unsigned long thread_stack)
-{
-	return (struct thread_info *)(thread_stack & ~(THREAD_SIZE - 1));
-}
-
 #define thread_saved_pc(tsk)	\
 	((unsigned long)(tsk->thread.cpu_context.pc))
 #define thread_saved_sp(tsk)	\
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index c1ca4ea065d4..80624829db61 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -315,15 +315,12 @@ static void tls_thread_switch(struct task_struct *next)
 /* Restore the UAO state depending on next's addr_limit */
 static void uao_thread_switch(struct task_struct *next)
 {
-	unsigned long next_sp = next->thread.cpu_context.sp;
-
-	if (IS_ENABLED(CONFIG_ARM64_UAO) &&
-	    get_thread_info(next_sp)->addr_limit == KERNEL_DS)
-		asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO,
-			        CONFIG_ARM64_UAO));
-	else
-		asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO,
-				CONFIG_ARM64_UAO));
+	if (IS_ENABLED(CONFIG_ARM64_UAO)) {
+		if (task_thread_info(next)->addr_limit == KERNEL_DS)
+			asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO));
+		else
+			asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO));
+	}
 }
 
 /*
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index d0762a729d01..a8eafeceb08a 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -234,7 +234,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 	}
 
 	if (permission_fault(esr) && (addr < USER_DS)) {
-		if (get_thread_info(regs->sp)->addr_limit == KERNEL_DS)
+		if (get_fs() == KERNEL_DS)
 			panic("Accessing user space memory with fs=KERNEL_DS");
 
 		if (!search_exception_tables(regs->pc))

From c73cfcaf47182e53c779181031f20e6d3793e07b Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Feb 2016 13:52:32 +0100
Subject: [PATCH 312/424] of/fdt: make memblock minimum physical address arch
 configurable

By default, early_init_dt_add_memory_arch() ignores memory below
the base of the kernel image since it won't be addressable via the
linear mapping. However, this is not appropriate anymore once we
decouple the kernel text mapping from the linear mapping, so archs
may want to drop the low limit entirely. So allow the minimum to be
overridden by setting MIN_MEMBLOCK_ADDR.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 270522a04f7a9911983878fa37da467f9ff1c938)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/of/fdt.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 655f79db7899..1f98156f8996 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -976,13 +976,16 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
 }
 
 #ifdef CONFIG_HAVE_MEMBLOCK
+#ifndef MIN_MEMBLOCK_ADDR
+#define MIN_MEMBLOCK_ADDR	__pa(PAGE_OFFSET)
+#endif
 #ifndef MAX_MEMBLOCK_ADDR
 #define MAX_MEMBLOCK_ADDR	((phys_addr_t)~0)
 #endif
 
 void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size)
 {
-	const u64 phys_offset = __pa(PAGE_OFFSET);
+	const u64 phys_offset = MIN_MEMBLOCK_ADDR;
 
 	if (!PAGE_ALIGNED(base)) {
 		if (size < PAGE_SIZE - (base & ~PAGE_MASK)) {

From b01c68c7494903dca326579248d3757c715b84f8 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Feb 2016 13:52:33 +0100
Subject: [PATCH 313/424] of/fdt: factor out assignment of
 initrd_start/initrd_end

Since architectures may not yet have their linear mapping up and running
when the initrd address is discovered from the DT, factor out the
assignment of initrd_start and initrd_end, so that an architecture can
override it and use the translation it needs.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 369bc9abf22bf026e8645a4dd746b90649a2f6ee)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/of/fdt.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 1f98156f8996..3e90bce70545 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -760,6 +760,16 @@ const void * __init of_flat_dt_match_machine(const void *default_match,
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
+#ifndef __early_init_dt_declare_initrd
+static void __early_init_dt_declare_initrd(unsigned long start,
+					   unsigned long end)
+{
+	initrd_start = (unsigned long)__va(start);
+	initrd_end = (unsigned long)__va(end);
+	initrd_below_start_ok = 1;
+}
+#endif
+
 /**
  * early_init_dt_check_for_initrd - Decode initrd location from flat tree
  * @node: reference to node containing initrd location ('chosen')
@@ -782,9 +792,7 @@ static void __init early_init_dt_check_for_initrd(unsigned long node)
 		return;
 	end = of_read_number(prop, len/4);
 
-	initrd_start = (unsigned long)__va(start);
-	initrd_end = (unsigned long)__va(end);
-	initrd_below_start_ok = 1;
+	__early_init_dt_declare_initrd(start, end);
 
 	pr_debug("initrd_start=0x%llx  initrd_end=0x%llx\n",
 		 (unsigned long long)start, (unsigned long long)end);

From 2894f328e3bbb808606c82fa1e2ea300089df728 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Feb 2016 13:52:34 +0100
Subject: [PATCH 314/424] arm64: prevent potential circular header dependencies
 in asm/bug.h

Currently, using BUG_ON() in header files is cumbersome, due to the fact
that asm/bug.h transitively includes a lot of other header files, resulting
in the actual BUG_ON() invocation appearing before its definition in the
preprocessor input. So let's reverse the #include dependency between
asm/bug.h and asm/debug-monitors.h, by moving the definition of BUG_BRK_IMM
from the latter to the former. Also fix up one user of asm/debug-monitors.h
which relied on a transitive include.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 03336b1df9929e5d9c28fd9768948b6151cb046c)
Signed-off-by: Alex Shi <alex.shi@linaro.org>

Conflicts:
	skip arch/arm64/kvm/hyp/debug-sr.c
---
 arch/arm64/include/asm/bug.h            | 2 +-
 arch/arm64/include/asm/debug-monitors.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/bug.h b/arch/arm64/include/asm/bug.h
index 4a748ce9ba1a..679d49221998 100644
--- a/arch/arm64/include/asm/bug.h
+++ b/arch/arm64/include/asm/bug.h
@@ -18,7 +18,7 @@
 #ifndef _ARCH_ARM64_ASM_BUG_H
 #define _ARCH_ARM64_ASM_BUG_H
 
-#include <asm/debug-monitors.h>
+#define BUG_BRK_IMM			0x800
 
 #ifdef CONFIG_GENERIC_BUG
 #define HAVE_ARCH_BUG
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 279c85b5ec09..e893a1fca9c2 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -20,6 +20,7 @@
 
 #include <linux/errno.h>
 #include <linux/types.h>
+#include <asm/bug.h>
 #include <asm/esr.h>
 #include <asm/insn.h>
 #include <asm/ptrace.h>
@@ -57,7 +58,6 @@
 #define FAULT_BRK_IMM			0x100
 #define KGDB_DYN_DBG_BRK_IMM		0x400
 #define KGDB_COMPILED_DBG_BRK_IMM	0x401
-#define BUG_BRK_IMM			0x800
 
 /*
  * BRK instruction encoding

From 37cbc7db8e4fa9b66e15cf8661383a6b51c9a3e7 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Feb 2016 13:52:35 +0100
Subject: [PATCH 315/424] arm64: add support for ioremap() block mappings

This wires up the existing generic huge-vmap feature, which allows
ioremap() to use PMD or PUD sized block mappings. It also adds support
to the unmap path for dealing with block mappings, which will allow us
to unmap the __init region using unmap_kernel_range() in a subsequent
patch.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 324420bf91f60582bb481133db9547111768ef17)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 .../features/vm/huge-vmap/arch-support.txt    |  2 +-
 arch/arm64/Kconfig                            |  1 +
 arch/arm64/include/asm/memory.h               |  6 +++
 arch/arm64/mm/mmu.c                           | 41 +++++++++++++++++++
 4 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/Documentation/features/vm/huge-vmap/arch-support.txt b/Documentation/features/vm/huge-vmap/arch-support.txt
index af6816bccb43..df1d1f3c9af2 100644
--- a/Documentation/features/vm/huge-vmap/arch-support.txt
+++ b/Documentation/features/vm/huge-vmap/arch-support.txt
@@ -9,7 +9,7 @@
     |       alpha: | TODO |
     |         arc: | TODO |
     |         arm: | TODO |
-    |       arm64: | TODO |
+    |       arm64: |  ok  |
     |       avr32: | TODO |
     |    blackfin: | TODO |
     |         c6x: | TODO |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 4df85b5a2045..8cd8d06ece4a 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -49,6 +49,7 @@ config ARM64
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_BITREVERSE
+	select HAVE_ARCH_HUGE_VMAP
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_KASAN if SPARSEMEM_VMEMMAP && !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
 	select HAVE_ARCH_KGDB
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 853953cd1f08..c65aad7b13dc 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -100,6 +100,12 @@
 #define MT_S2_NORMAL		0xf
 #define MT_S2_DEVICE_nGnRE	0x1
 
+#ifdef CONFIG_ARM64_4K_PAGES
+#define IOREMAP_MAX_ORDER	(PUD_SHIFT)
+#else
+#define IOREMAP_MAX_ORDER	(PMD_SHIFT)
+#endif
+
 #ifndef __ASSEMBLY__
 
 extern phys_addr_t		memstart_addr;
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index d1fa678355c9..b4afa9fbb00f 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -745,3 +745,44 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
 
 	return dt_virt;
 }
+
+int __init arch_ioremap_pud_supported(void)
+{
+	/* only 4k granule supports level 1 block mappings */
+	return IS_ENABLED(CONFIG_ARM64_4K_PAGES);
+}
+
+int __init arch_ioremap_pmd_supported(void)
+{
+	return 1;
+}
+
+int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)
+{
+	BUG_ON(phys & ~PUD_MASK);
+	set_pud(pud, __pud(phys | PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))));
+	return 1;
+}
+
+int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)
+{
+	BUG_ON(phys & ~PMD_MASK);
+	set_pmd(pmd, __pmd(phys | PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))));
+	return 1;
+}
+
+int pud_clear_huge(pud_t *pud)
+{
+	if (!pud_sect(*pud))
+		return 0;
+	pud_clear(pud);
+	return 1;
+}
+
+int pmd_clear_huge(pmd_t *pmd)
+{
+	if (!pmd_sect(*pmd))
+		return 0;
+	pmd_clear(pmd);
+	return 1;
+}

From 1dd59fe47656335cd3c913e378718eb49b7b1b38 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Feb 2016 13:52:36 +0100
Subject: [PATCH 316/424] arm64: introduce KIMAGE_VADDR as the virtual base of
 the kernel region

This introduces the preprocessor symbol KIMAGE_VADDR which will serve as
the symbolic virtual base of the kernel region, i.e., the kernel's virtual
offset will be KIMAGE_VADDR + TEXT_OFFSET. For now, we define it as being
equal to PAGE_OFFSET, but in the future, it will be moved below it once
we move the kernel virtual mapping out of the linear mapping.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit ab893fb9f1b17f02139bce547bb4b69e96b9ae16)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/memory.h | 10 ++++++++--
 arch/arm64/kernel/head.S        |  2 +-
 arch/arm64/kernel/vmlinux.lds.S |  4 ++--
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index c65aad7b13dc..aebc739f5a11 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -51,7 +51,8 @@
 #define VA_BITS			(CONFIG_ARM64_VA_BITS)
 #define VA_START		(UL(0xffffffffffffffff) << VA_BITS)
 #define PAGE_OFFSET		(UL(0xffffffffffffffff) << (VA_BITS - 1))
-#define MODULES_END		(PAGE_OFFSET)
+#define KIMAGE_VADDR		(PAGE_OFFSET)
+#define MODULES_END		(KIMAGE_VADDR)
 #define MODULES_VADDR		(MODULES_END - SZ_64M)
 #define PCI_IO_END		(MODULES_VADDR - SZ_2M)
 #define PCI_IO_START		(PCI_IO_END - PCI_IO_SIZE)
@@ -75,8 +76,13 @@
  * private definitions which should NOT be used outside memory.h
  * files.  Use virt_to_phys/phys_to_virt/__pa/__va instead.
  */
-#define __virt_to_phys(x)	(((phys_addr_t)(x) - PAGE_OFFSET + PHYS_OFFSET))
+#define __virt_to_phys(x) ({						\
+	phys_addr_t __x = (phys_addr_t)(x);				\
+	__x >= PAGE_OFFSET ? (__x - PAGE_OFFSET + PHYS_OFFSET) :	\
+			     (__x - KIMAGE_VADDR + PHYS_OFFSET); })
+
 #define __phys_to_virt(x)	((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET))
+#define __phys_to_kimg(x)	((unsigned long)((x) - PHYS_OFFSET + KIMAGE_VADDR))
 
 /*
  * Convert a page to/from a physical address
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 53b9f9f128c2..04d38a058b19 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -389,7 +389,7 @@ __create_page_tables:
 	 * Map the kernel image (starting with PHYS_OFFSET).
 	 */
 	mov	x0, x26				// swapper_pg_dir
-	mov	x5, #PAGE_OFFSET
+	ldr	x5, =KIMAGE_VADDR
 	create_pgd_entry x0, x5, x3, x6
 	ldr	x6, =KERNEL_END			// __va(KERNEL_END)
 	mov	x3, x24				// phys offset
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index b78a3c772294..282e3e64a17e 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -89,7 +89,7 @@ SECTIONS
 		*(.discard.*)
 	}
 
-	. = PAGE_OFFSET + TEXT_OFFSET;
+	. = KIMAGE_VADDR + TEXT_OFFSET;
 
 	.head.text : {
 		_text = .;
@@ -186,4 +186,4 @@ ASSERT(__idmap_text_end - (__idmap_text_start & ~(SZ_4K - 1)) <= SZ_4K,
 /*
  * If padding is applied before .head.text, virt<->phys conversions will fail.
  */
-ASSERT(_text == (PAGE_OFFSET + TEXT_OFFSET), "HEAD is misaligned")
+ASSERT(_text == (KIMAGE_VADDR + TEXT_OFFSET), "HEAD is misaligned")

From 4545faf8e5b81592ba597141d432ca7e2b52a43e Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Feb 2016 13:52:37 +0100
Subject: [PATCH 317/424] arm64: pgtable: implement static [pte|pmd|pud]_offset
 variants

The page table accessors pte_offset(), pud_offset() and pmd_offset()
rely on __va translations, so they can only be used after the linear
mapping has been installed. For the early fixmap and kasan init routines,
whose page tables are allocated statically in the kernel image, these
functions will return bogus values. So implement pte_offset_kimg(),
pmd_offset_kimg() and pud_offset_kimg(), which can be used instead
before any page tables have been allocated dynamically.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 6533945a32c762c5db70d7a3ec251a040b2d9661)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/pgtable.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index c99dfc588deb..9a560b368910 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -455,6 +455,9 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
 
 #define pmd_page(pmd)		pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
 
+/* use ONLY for statically allocated translation tables */
+#define pte_offset_kimg(dir,addr)	((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr))))
+
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
@@ -498,6 +501,9 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
 
 #define pud_page(pud)		pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK))
 
+/* use ONLY for statically allocated translation tables */
+#define pmd_offset_kimg(dir,addr)	((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr))))
+
 #else
 
 #define pud_page_paddr(pud)	({ BUILD_BUG(); 0; })
@@ -507,6 +513,8 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
 #define pmd_set_fixmap_offset(pudp, addr)	((pmd_t *)pudp)
 #define pmd_clear_fixmap()
 
+#define pmd_offset_kimg(dir,addr)	((pmd_t *)dir)
+
 #endif	/* CONFIG_PGTABLE_LEVELS > 2 */
 
 #if CONFIG_PGTABLE_LEVELS > 3
@@ -545,6 +553,9 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 
 #define pgd_page(pgd)		pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK))
 
+/* use ONLY for statically allocated translation tables */
+#define pud_offset_kimg(dir,addr)	((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr))))
+
 #else
 
 #define pgd_page_paddr(pgd)	({ BUILD_BUG(); 0;})
@@ -554,6 +565,8 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 #define pud_set_fixmap_offset(pgdp, addr)	((pud_t *)pgdp)
 #define pud_clear_fixmap()
 
+#define pud_offset_kimg(dir,addr)	((pud_t *)dir)
+
 #endif  /* CONFIG_PGTABLE_LEVELS > 3 */
 
 #define pgd_ERROR(pgd)		__pgd_error(__FILE__, __LINE__, pgd_val(pgd))

From ade984b5fcdda640ca25a1606f9cdaf279e8b4c7 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Feb 2016 13:52:38 +0100
Subject: [PATCH 318/424] arm64: decouple early fixmap init from linear mapping

Since the early fixmap page tables are populated using pages that are
part of the static footprint of the kernel, they are covered by the
initial kernel mapping, and we can refer to them without using __va/__pa
translations, which are tied to the linear mapping.

Since the fixmap page tables are disjoint from the kernel mapping up
to the top level pgd entry, we can refer to bm_pte[] directly, and there
is no need to walk the page tables and perform __pa()/__va() translations
at each step.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 157962f5a8f236cab898b68bdaa69ce68922f0bf)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/mmu.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index b4afa9fbb00f..0f58a45df1f3 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -614,7 +614,7 @@ static inline pud_t * fixmap_pud(unsigned long addr)
 
 	BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
 
-	return pud_offset(pgd, addr);
+	return pud_offset_kimg(pgd, addr);
 }
 
 static inline pmd_t * fixmap_pmd(unsigned long addr)
@@ -623,16 +623,12 @@ static inline pmd_t * fixmap_pmd(unsigned long addr)
 
 	BUG_ON(pud_none(*pud) || pud_bad(*pud));
 
-	return pmd_offset(pud, addr);
+	return pmd_offset_kimg(pud, addr);
 }
 
 static inline pte_t * fixmap_pte(unsigned long addr)
 {
-	pmd_t *pmd = fixmap_pmd(addr);
-
-	BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
-
-	return pte_offset_kernel(pmd, addr);
+	return &bm_pte[pte_index(addr)];
 }
 
 void __init early_fixmap_init(void)
@@ -644,14 +640,14 @@ void __init early_fixmap_init(void)
 
 	pgd = pgd_offset_k(addr);
 	pgd_populate(&init_mm, pgd, bm_pud);
-	pud = pud_offset(pgd, addr);
+	pud = fixmap_pud(addr);
 	pud_populate(&init_mm, pud, bm_pmd);
-	pmd = pmd_offset(pud, addr);
+	pmd = fixmap_pmd(addr);
 	pmd_populate_kernel(&init_mm, pmd, bm_pte);
 
 	/*
 	 * The boot-ioremap range spans multiple pmds, for which
-	 * we are not preparted:
+	 * we are not prepared:
 	 */
 	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
 		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));

From 44b9620e6822e2acb0d65507d89cd0658055843c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Feb 2016 13:52:39 +0100
Subject: [PATCH 319/424] arm64: kvm: deal with kernel symbols outside of
 linear mapping

KVM on arm64 uses a fixed offset between the linear mapping at EL1 and
the HYP mapping at EL2. Before we can move the kernel virtual mapping
out of the linear mapping, we have to make sure that references to kernel
symbols that are accessed via the HYP mapping are translated to their
linear equivalent.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit a0bf9776cd0be4490d4675d4108e13379849fc7f)
Signed-off-by: Alex Shi <alex.shi@linaro.org>

Conflicts:
	skip new funcs create_hyp_mappings(__start_rodata,
	in arch/arm/kvm/arm.c and keep funcs in arch/arm64/kvm/hyp.S
---
 arch/arm/include/asm/kvm_asm.h    |  2 ++
 arch/arm/kvm/arm.c                |  5 +++--
 arch/arm64/include/asm/kvm_asm.h  | 17 +++++++++++++++++
 arch/arm64/include/asm/kvm_host.h |  8 +++++---
 arch/arm64/kvm/hyp.S              |  6 +++---
 5 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h
index 194c91b610ff..c35c349da069 100644
--- a/arch/arm/include/asm/kvm_asm.h
+++ b/arch/arm/include/asm/kvm_asm.h
@@ -79,6 +79,8 @@
 #define rr_lo_hi(a1, a2) a1, a2
 #endif
 
+#define kvm_ksym_ref(kva)	(kva)
+
 #ifndef __ASSEMBLY__
 struct kvm;
 struct kvm_vcpu;
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index e06fd299de08..70e6d557c75f 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -969,7 +969,7 @@ static void cpu_init_hyp_mode(void *dummy)
 	pgd_ptr = kvm_mmu_get_httbr();
 	stack_page = __this_cpu_read(kvm_arm_hyp_stack_page);
 	hyp_stack_ptr = stack_page + PAGE_SIZE;
-	vector_ptr = (unsigned long)__kvm_hyp_vector;
+	vector_ptr = (unsigned long)kvm_ksym_ref(__kvm_hyp_vector);
 
 	__cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr);
 
@@ -1061,7 +1061,8 @@ static int init_hyp_mode(void)
 	/*
 	 * Map the Hyp-code called directly from the host
 	 */
-	err = create_hyp_mappings(__kvm_hyp_code_start, __kvm_hyp_code_end);
+	err = create_hyp_mappings(kvm_ksym_ref(__kvm_hyp_code_start),
+				  kvm_ksym_ref(__kvm_hyp_code_end));
 	if (err) {
 		kvm_err("Cannot map world-switch code\n");
 		goto out_free_mappings;
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 5e377101f919..e95c39543629 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -102,7 +102,24 @@
 #define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
 #define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
 
+#define kvm_ksym_ref(sym)		((void *)&sym + kvm_ksym_shift)
+
 #ifndef __ASSEMBLY__
+#if __GNUC__ > 4
+#define kvm_ksym_shift			(PAGE_OFFSET - KIMAGE_VADDR)
+#else
+/*
+ * GCC versions 4.9 and older will fold the constant below into the addend of
+ * the reference to 'sym' above if kvm_ksym_shift is declared static or if the
+ * constant is used directly. However, since we use the small code model for
+ * the core kernel, the reference to 'sym' will be emitted as a adrp/add pair,
+ * with a +/- 4 GB range, resulting in linker relocation errors if the shift
+ * is sufficiently large. So prevent the compiler from folding the shift into
+ * the addend, by making the shift a variable with external linkage.
+ */
+__weak u64 kvm_ksym_shift = PAGE_OFFSET - KIMAGE_VADDR;
+#endif
+
 struct kvm;
 struct kvm_vcpu;
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a35ce7266aac..90c6368ad7c8 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -222,7 +222,7 @@ static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
 struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
 
-u64 kvm_call_hyp(void *hypfn, ...);
+u64 __kvm_call_hyp(void *hypfn, ...);
 void force_vm_exit(const cpumask_t *mask);
 void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
 
@@ -243,8 +243,8 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr,
 	 * Call initialization code, and switch to the full blown
 	 * HYP code.
 	 */
-	kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr,
-		     hyp_stack_ptr, vector_ptr);
+	__kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr,
+		       hyp_stack_ptr, vector_ptr);
 }
 
 static inline void kvm_arch_hardware_disable(void) {}
@@ -258,4 +258,6 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
 void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
 void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
 
+#define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__)
+
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 86c289832272..309e3479dc2c 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -923,7 +923,7 @@ __hyp_panic_str:
 	.align	2
 
 /*
- * u64 kvm_call_hyp(void *hypfn, ...);
+ * u64 __kvm_call_hyp(void *hypfn, ...);
  *
  * This is not really a variadic function in the classic C-way and care must
  * be taken when calling this to ensure parameters are passed in registers
@@ -940,10 +940,10 @@ __hyp_panic_str:
  * used to implement __hyp_get_vectors in the same way as in
  * arch/arm64/kernel/hyp_stub.S.
  */
-ENTRY(kvm_call_hyp)
+ENTRY(__kvm_call_hyp)
 	hvc	#0
 	ret
-ENDPROC(kvm_call_hyp)
+ENDPROC(__kvm_call_hyp)
 
 .macro invalid_vector	label, target
 	.align	2

From 49d5b2c298815fcaee00779e69a741ad1e80e740 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Feb 2016 13:52:40 +0100
Subject: [PATCH 320/424] arm64: move kernel image to base of vmalloc area

This moves the module area to right before the vmalloc area, and moves
the kernel image to the base of the vmalloc area. This is an intermediate
step towards implementing KASLR, which allows the kernel image to be
located anywhere in the vmalloc area.

Since other subsystems such as hibernate may still need to refer to the
kernel text or data segments via their linears addresses, both are mapped
in the linear region as well. The linear alias of the text region is
mapped read-only/non-executable to prevent inadvertent modification or
execution.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit f9040773b7bbbd9e98eb6184a263512a7cfc133f)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/kasan.h   |   2 +-
 arch/arm64/include/asm/memory.h  |  21 ++++--
 arch/arm64/include/asm/pgtable.h |  10 +--
 arch/arm64/mm/dump.c             |  12 ++--
 arch/arm64/mm/init.c             |  23 +++----
 arch/arm64/mm/kasan_init.c       |  27 +++++++-
 arch/arm64/mm/mmu.c              | 110 +++++++++++++++++++++----------
 7 files changed, 137 insertions(+), 68 deletions(-)

diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h
index de0d21211c34..71ad0f93eb71 100644
--- a/arch/arm64/include/asm/kasan.h
+++ b/arch/arm64/include/asm/kasan.h
@@ -14,7 +14,7 @@
  * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses.
  */
 #define KASAN_SHADOW_START      (VA_START)
-#define KASAN_SHADOW_END        (KASAN_SHADOW_START + (1UL << (VA_BITS - 3)))
+#define KASAN_SHADOW_END        (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
 
 /*
  * This value is used to map an address to the corresponding shadow
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index aebc739f5a11..4388651d1f0d 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -45,16 +45,15 @@
  * VA_START - the first kernel virtual address.
  * TASK_SIZE - the maximum size of a user space task.
  * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
- * The module space lives between the addresses given by TASK_SIZE
- * and PAGE_OFFSET - it must be within 128MB of the kernel text.
  */
 #define VA_BITS			(CONFIG_ARM64_VA_BITS)
 #define VA_START		(UL(0xffffffffffffffff) << VA_BITS)
 #define PAGE_OFFSET		(UL(0xffffffffffffffff) << (VA_BITS - 1))
-#define KIMAGE_VADDR		(PAGE_OFFSET)
-#define MODULES_END		(KIMAGE_VADDR)
-#define MODULES_VADDR		(MODULES_END - SZ_64M)
-#define PCI_IO_END		(MODULES_VADDR - SZ_2M)
+#define KIMAGE_VADDR		(MODULES_END)
+#define MODULES_END		(MODULES_VADDR + MODULES_VSIZE)
+#define MODULES_VADDR		(VA_START + KASAN_SHADOW_SIZE)
+#define MODULES_VSIZE		(SZ_64M)
+#define PCI_IO_END		(PAGE_OFFSET - SZ_2M)
 #define PCI_IO_START		(PCI_IO_END - PCI_IO_SIZE)
 #define FIXADDR_TOP		(PCI_IO_START - SZ_2M)
 #define TASK_SIZE_64		(UL(1) << VA_BITS)
@@ -71,6 +70,16 @@
 
 #define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 4))
 
+/*
+ * The size of the KASAN shadow region. This should be 1/8th of the
+ * size of the entire kernel virtual address space.
+ */
+#ifdef CONFIG_KASAN
+#define KASAN_SHADOW_SIZE	(UL(1) << (VA_BITS - 3))
+#else
+#define KASAN_SHADOW_SIZE	(0)
+#endif
+
 /*
  * Physical vs virtual RAM address space conversion.  These are
  * private definitions which should NOT be used outside memory.h
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 9a560b368910..c3c2518eecfe 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -36,19 +36,13 @@
  *
  * VMEMAP_SIZE: allows the whole linear region to be covered by a struct page array
  *	(rounded up to PUD_SIZE).
- * VMALLOC_START: beginning of the kernel VA space
+ * VMALLOC_START: beginning of the kernel vmalloc space
  * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space,
  *	fixed mappings and modules
  */
 #define VMEMMAP_SIZE		ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE)
 
-#ifndef CONFIG_KASAN
-#define VMALLOC_START		(VA_START)
-#else
-#include <asm/kasan.h>
-#define VMALLOC_START		(KASAN_SHADOW_END + SZ_64K)
-#endif
-
+#define VMALLOC_START		(MODULES_END)
 #define VMALLOC_END		(PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
 
 #define VMEMMAP_START		(VMALLOC_END + SZ_64K)
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 0841b2bf0e6a..6be918478f85 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -35,7 +35,9 @@ struct addr_marker {
 };
 
 enum address_markers_idx {
-	VMALLOC_START_NR = 0,
+	MODULES_START_NR = 0,
+	MODULES_END_NR,
+	VMALLOC_START_NR,
 	VMALLOC_END_NR,
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 	VMEMMAP_START_NR,
@@ -45,12 +47,12 @@ enum address_markers_idx {
 	FIXADDR_END_NR,
 	PCI_START_NR,
 	PCI_END_NR,
-	MODULES_START_NR,
-	MODULES_END_NR,
 	KERNEL_SPACE_NR,
 };
 
 static struct addr_marker address_markers[] = {
+	{ MODULES_VADDR,	"Modules start" },
+	{ MODULES_END,		"Modules end" },
 	{ VMALLOC_START,	"vmalloc() Area" },
 	{ VMALLOC_END,		"vmalloc() End" },
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
@@ -61,9 +63,7 @@ static struct addr_marker address_markers[] = {
 	{ FIXADDR_TOP,		"Fixmap end" },
 	{ PCI_IO_START,		"PCI I/O start" },
 	{ PCI_IO_END,		"PCI I/O end" },
-	{ MODULES_VADDR,	"Modules start" },
-	{ MODULES_END,		"Modules end" },
-	{ PAGE_OFFSET,		"Kernel Mapping" },
+	{ PAGE_OFFSET,		"Linear Mapping" },
 	{ -1,			NULL },
 };
 
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index dba32ceff17a..ac4d8159d6f3 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -36,6 +36,7 @@
 #include <linux/swiotlb.h>
 
 #include <asm/fixmap.h>
+#include <asm/kasan.h>
 #include <asm/memory.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
@@ -302,22 +303,26 @@ void __init mem_init(void)
 #ifdef CONFIG_KASAN
 		  "    kasan   : 0x%16lx - 0x%16lx   (%6ld GB)\n"
 #endif
+		  "    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n"
 		  "    vmalloc : 0x%16lx - 0x%16lx   (%6ld GB)\n"
+		  "      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n"
+		  "      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n"
+		  "      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n"
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 		  "    vmemmap : 0x%16lx - 0x%16lx   (%6ld GB maximum)\n"
 		  "              0x%16lx - 0x%16lx   (%6ld MB actual)\n"
 #endif
 		  "    fixed   : 0x%16lx - 0x%16lx   (%6ld KB)\n"
 		  "    PCI I/O : 0x%16lx - 0x%16lx   (%6ld MB)\n"
-		  "    modules : 0x%16lx - 0x%16lx   (%6ld MB)\n"
-		  "    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n"
-		  "      .init : 0x%p" " - 0x%p" "   (%6ld KB)\n"
-		  "      .text : 0x%p" " - 0x%p" "   (%6ld KB)\n"
-		  "      .data : 0x%p" " - 0x%p" "   (%6ld KB)\n",
+		  "    memory  : 0x%16lx - 0x%16lx   (%6ld MB)\n",
 #ifdef CONFIG_KASAN
 		  MLG(KASAN_SHADOW_START, KASAN_SHADOW_END),
 #endif
+		  MLM(MODULES_VADDR, MODULES_END),
 		  MLG(VMALLOC_START, VMALLOC_END),
+		  MLK_ROUNDUP(__init_begin, __init_end),
+		  MLK_ROUNDUP(_text, _etext),
+		  MLK_ROUNDUP(_sdata, _edata),
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 		  MLG(VMEMMAP_START,
 		      VMEMMAP_START + VMEMMAP_SIZE),
@@ -326,11 +331,7 @@ void __init mem_init(void)
 #endif
 		  MLK(FIXADDR_START, FIXADDR_TOP),
 		  MLM(PCI_IO_START, PCI_IO_END),
-		  MLM(MODULES_VADDR, MODULES_END),
-		  MLM(PAGE_OFFSET, (unsigned long)high_memory),
-		  MLK_ROUNDUP(__init_begin, __init_end),
-		  MLK_ROUNDUP(_text, _etext),
-		  MLK_ROUNDUP(_sdata, _edata));
+		  MLM(PAGE_OFFSET, (unsigned long)high_memory));
 
 #undef MLK
 #undef MLM
@@ -358,8 +359,8 @@ void __init mem_init(void)
 
 void free_initmem(void)
 {
-	fixup_init();
 	free_initmem_default(0);
+	fixup_init();
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index cc569a38bc76..7f10cc91fa8a 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -17,9 +17,11 @@
 #include <linux/start_kernel.h>
 
 #include <asm/mmu_context.h>
+#include <asm/kernel-pgtable.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
+#include <asm/sections.h>
 #include <asm/tlbflush.h>
 
 static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
@@ -33,7 +35,7 @@ static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
 	if (pmd_none(*pmd))
 		pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte);
 
-	pte = pte_offset_kernel(pmd, addr);
+	pte = pte_offset_kimg(pmd, addr);
 	do {
 		next = addr + PAGE_SIZE;
 		set_pte(pte, pfn_pte(virt_to_pfn(kasan_zero_page),
@@ -51,7 +53,7 @@ static void __init kasan_early_pmd_populate(pud_t *pud,
 	if (pud_none(*pud))
 		pud_populate(&init_mm, pud, kasan_zero_pmd);
 
-	pmd = pmd_offset(pud, addr);
+	pmd = pmd_offset_kimg(pud, addr);
 	do {
 		next = pmd_addr_end(addr, end);
 		kasan_early_pte_populate(pmd, addr, next);
@@ -68,7 +70,7 @@ static void __init kasan_early_pud_populate(pgd_t *pgd,
 	if (pgd_none(*pgd))
 		pgd_populate(&init_mm, pgd, kasan_zero_pud);
 
-	pud = pud_offset(pgd, addr);
+	pud = pud_offset_kimg(pgd, addr);
 	do {
 		next = pud_addr_end(addr, end);
 		kasan_early_pmd_populate(pud, addr, next);
@@ -126,9 +128,13 @@ static void __init clear_pgds(unsigned long start,
 
 void __init kasan_init(void)
 {
+	u64 kimg_shadow_start, kimg_shadow_end;
 	struct memblock_region *reg;
 	int i;
 
+	kimg_shadow_start = (u64)kasan_mem_to_shadow(_text);
+	kimg_shadow_end = (u64)kasan_mem_to_shadow(_end);
+
 	/*
 	 * We are going to perform proper setup of shadow memory.
 	 * At first we should unmap early shadow (clear_pgds() call bellow).
@@ -142,8 +148,23 @@ void __init kasan_init(void)
 
 	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
+	vmemmap_populate(kimg_shadow_start, kimg_shadow_end, NUMA_NO_NODE);
+
+	/*
+	 * vmemmap_populate() has populated the shadow region that covers the
+	 * kernel image with SWAPPER_BLOCK_SIZE mappings, so we have to round
+	 * the start and end addresses to SWAPPER_BLOCK_SIZE as well, to prevent
+	 * kasan_populate_zero_shadow() from replacing the PMD block mappings
+	 * with PMD table mappings at the edges of the shadow region for the
+	 * kernel image.
+	 */
+	if (ARM64_SWAPPER_USES_SECTION_MAPS)
+		kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE);
+
 	kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
 			kasan_mem_to_shadow((void *)MODULES_VADDR));
+	kasan_populate_zero_shadow((void *)kimg_shadow_end,
+			kasan_mem_to_shadow((void *)PAGE_OFFSET));
 
 	for_each_memblock(memory, reg) {
 		void *start = (void *)__phys_to_virt(reg->base);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 0f58a45df1f3..895a8457259c 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -53,6 +53,10 @@ u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
 unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
 EXPORT_SYMBOL(empty_zero_page);
 
+static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
+static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
+static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
+
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 			      unsigned long size, pgprot_t vma_prot)
 {
@@ -380,16 +384,15 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
 
 static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
 {
-
 	unsigned long kernel_start = __pa(_stext);
-	unsigned long kernel_end = __pa(_end);
+	unsigned long kernel_end = __pa(_etext);
 
 	/*
-	 * The kernel itself is mapped at page granularity. Map all other
-	 * memory, making sure we don't overwrite the existing kernel mappings.
+	 * Take care not to create a writable alias for the
+	 * read-only text and rodata sections of the kernel image.
 	 */
 
-	/* No overlap with the kernel. */
+	/* No overlap with the kernel text */
 	if (end < kernel_start || start >= kernel_end) {
 		__create_pgd_mapping(pgd, start, __phys_to_virt(start),
 				     end - start, PAGE_KERNEL,
@@ -398,8 +401,8 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end
 	}
 
 	/*
-	 * This block overlaps the kernel mapping. Map the portion(s) which
-	 * don't overlap.
+	 * This block overlaps the kernel text mapping.
+	 * Map the portion(s) which don't overlap.
 	 */
 	if (start < kernel_start)
 		__create_pgd_mapping(pgd, start,
@@ -411,6 +414,16 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end
 				     __phys_to_virt(kernel_end),
 				     end - kernel_end, PAGE_KERNEL,
 				     early_pgtable_alloc);
+
+	/*
+	 * Map the linear alias of the [_stext, _etext) interval as
+	 * read-only/non-executable. This makes the contents of the
+	 * region accessible to subsystems such as hibernate, but
+	 * protects it from inadvertent modification or execution.
+	 */
+	__create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start),
+			     kernel_end - kernel_start, PAGE_KERNEL_RO,
+			     early_pgtable_alloc);
 }
 
 static void __init map_mem(pgd_t *pgd)
@@ -429,25 +442,28 @@ static void __init map_mem(pgd_t *pgd)
 	}
 }
 
-#ifdef CONFIG_DEBUG_RODATA
 void mark_rodata_ro(void)
 {
+	if (!IS_ENABLED(CONFIG_DEBUG_RODATA))
+		return;
+
 	create_mapping_late(__pa(_stext), (unsigned long)_stext,
 				(unsigned long)_etext - (unsigned long)_stext,
 				PAGE_KERNEL_ROX);
-
 }
-#endif
 
 void fixup_init(void)
 {
-	create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin,
-			(unsigned long)__init_end - (unsigned long)__init_begin,
-			PAGE_KERNEL);
+	/*
+	 * Unmap the __init region but leave the VM area in place. This
+	 * prevents the region from being reused for kernel modules, which
+	 * is not supported by kallsyms.
+	 */
+	unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin));
 }
 
 static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end,
-				    pgprot_t prot)
+				    pgprot_t prot, struct vm_struct *vma)
 {
 	phys_addr_t pa_start = __pa(va_start);
 	unsigned long size = va_end - va_start;
@@ -457,6 +473,14 @@ static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end,
 
 	__create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
 			     early_pgtable_alloc);
+
+	vma->addr	= va_start;
+	vma->phys_addr	= pa_start;
+	vma->size	= size;
+	vma->flags	= VM_MAP;
+	vma->caller	= __builtin_return_address(0);
+
+	vm_area_add_early(vma);
 }
 
 /*
@@ -464,17 +488,35 @@ static void __init map_kernel_chunk(pgd_t *pgd, void *va_start, void *va_end,
  */
 static void __init map_kernel(pgd_t *pgd)
 {
+	static struct vm_struct vmlinux_text, vmlinux_init, vmlinux_data;
 
-	map_kernel_chunk(pgd, _stext, _etext, PAGE_KERNEL_EXEC);
-	map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC);
-	map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL);
+	map_kernel_chunk(pgd, _stext, _etext, PAGE_KERNEL_EXEC, &vmlinux_text);
+	map_kernel_chunk(pgd, __init_begin, __init_end, PAGE_KERNEL_EXEC,
+			 &vmlinux_init);
+	map_kernel_chunk(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data);
 
-	/*
-	 * The fixmap falls in a separate pgd to the kernel, and doesn't live
-	 * in the carveout for the swapper_pg_dir. We can simply re-use the
-	 * existing dir for the fixmap.
-	 */
-	set_pgd(pgd_offset_raw(pgd, FIXADDR_START), *pgd_offset_k(FIXADDR_START));
+	if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) {
+		/*
+		 * The fixmap falls in a separate pgd to the kernel, and doesn't
+		 * live in the carveout for the swapper_pg_dir. We can simply
+		 * re-use the existing dir for the fixmap.
+		 */
+		set_pgd(pgd_offset_raw(pgd, FIXADDR_START),
+			*pgd_offset_k(FIXADDR_START));
+	} else if (CONFIG_PGTABLE_LEVELS > 3) {
+		/*
+		 * The fixmap shares its top level pgd entry with the kernel
+		 * mapping. This can really only occur when we are running
+		 * with 16k/4 levels, so we can simply reuse the pud level
+		 * entry instead.
+		 */
+		BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
+		set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START),
+			__pud(__pa(bm_pmd) | PUD_TYPE_TABLE));
+		pud_clear_fixmap();
+	} else {
+		BUG();
+	}
 
 	kasan_copy_shadow(pgd);
 }
@@ -600,14 +642,6 @@ void vmemmap_free(unsigned long start, unsigned long end)
 }
 #endif	/* CONFIG_SPARSEMEM_VMEMMAP */
 
-static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
-#if CONFIG_PGTABLE_LEVELS > 2
-static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
-#endif
-#if CONFIG_PGTABLE_LEVELS > 3
-static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
-#endif
-
 static inline pud_t * fixmap_pud(unsigned long addr)
 {
 	pgd_t *pgd = pgd_offset_k(addr);
@@ -639,8 +673,18 @@ void __init early_fixmap_init(void)
 	unsigned long addr = FIXADDR_START;
 
 	pgd = pgd_offset_k(addr);
-	pgd_populate(&init_mm, pgd, bm_pud);
-	pud = fixmap_pud(addr);
+	if (CONFIG_PGTABLE_LEVELS > 3 && !pgd_none(*pgd)) {
+		/*
+		 * We only end up here if the kernel mapping and the fixmap
+		 * share the top level pgd entry, which should only happen on
+		 * 16k/4 levels configurations.
+		 */
+		BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
+		pud = pud_offset_kimg(pgd, addr);
+	} else {
+		pgd_populate(&init_mm, pgd, bm_pud);
+		pud = fixmap_pud(addr);
+	}
 	pud_populate(&init_mm, pud, bm_pmd);
 	pmd = fixmap_pmd(addr);
 	pmd_populate_kernel(&init_mm, pmd, bm_pte);

From 368a063148f5d4e5b40e65b9954a9b98b1a7cc3c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Feb 2016 13:52:41 +0100
Subject: [PATCH 321/424] arm64: defer __va translation of initrd_start and
 initrd_end

Before deferring the assignment of memstart_addr in a subsequent patch, to
the moment where all memory has been discovered and possibly clipped based
on the size of the linear region and the presence of a mem= command line
parameter, we need to ensure that memstart_addr is not used to perform __va
translations before it is assigned.

One such use is in the generic early DT discovery of the initrd location,
which is recorded as a virtual address in the globals initrd_start and
initrd_end. So wire up the generic support to declare the initrd addresses,
and implement it without __va() translations, and perform the translation
after memstart_addr has been assigned.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit a89dea585371a9d5d85499db47c93f129be8e0c4)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/memory.h |  8 ++++++++
 arch/arm64/mm/init.c            | 13 +++++++++----
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 4388651d1f0d..18b7e77c7495 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -121,6 +121,14 @@
 #define IOREMAP_MAX_ORDER	(PMD_SHIFT)
 #endif
 
+#ifdef CONFIG_BLK_DEV_INITRD
+#define __early_init_dt_declare_initrd(__start, __end)			\
+	do {								\
+		initrd_start = (__start);				\
+		initrd_end = (__end);					\
+	} while (0)
+#endif
+
 #ifndef __ASSEMBLY__
 
 extern phys_addr_t		memstart_addr;
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index ac4d8159d6f3..92acbee2bb8b 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -59,8 +59,8 @@ static int __init early_initrd(char *p)
 	if (*endp == ',') {
 		size = memparse(endp + 1, NULL);
 
-		initrd_start = (unsigned long)__va(start);
-		initrd_end = (unsigned long)__va(start + size);
+		initrd_start = start;
+		initrd_end = start + size;
 	}
 	return 0;
 }
@@ -168,8 +168,13 @@ void __init arm64_memblock_init(void)
 	 */
 	memblock_reserve(__pa(_text), _end - _text);
 #ifdef CONFIG_BLK_DEV_INITRD
-	if (initrd_start)
-		memblock_reserve(__virt_to_phys(initrd_start), initrd_end - initrd_start);
+	if (initrd_start) {
+		memblock_reserve(initrd_start, initrd_end - initrd_start);
+
+		/* the generic initrd code expects virtual addresses */
+		initrd_start = __phys_to_virt(initrd_start);
+		initrd_end = __phys_to_virt(initrd_end);
+	}
 #endif
 
 	early_init_fdt_scan_reserved_mem();

From 72b991537db5f4c361b540cbf0059c7268d848c4 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 16 Feb 2016 13:52:42 +0100
Subject: [PATCH 322/424] arm64: allow kernel Image to be loaded anywhere in
 physical memory

This relaxes the kernel Image placement requirements, so that it
may be placed at any 2 MB aligned offset in physical memory.

This is accomplished by ignoring PHYS_OFFSET when installing
memblocks, and accounting for the apparent virtual offset of
the kernel Image. As a result, virtual address references
below PAGE_OFFSET are correctly mapped onto physical references
into the kernel Image regardless of where it sits in memory.

Special care needs to be taken for dealing with memory limits passed
via mem=, since the generic implementation clips memory top down, which
may clip the kernel image itself if it is loaded high up in memory. To
deal with this case, we simply add back the memory covering the kernel
image, which may result in more memory to be retained than was passed
as a mem= parameter.

Since mem= should not be considered a production feature, a panic notifier
handler is installed that dumps the memory limit at panic time if one was
set.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit a7f8de168ace487fa7b88cb154e413cf40e87fc6)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 Documentation/arm64/booting.txt         | 20 +++++---
 arch/arm64/include/asm/boot.h           |  6 +++
 arch/arm64/include/asm/kernel-pgtable.h | 12 +++++
 arch/arm64/include/asm/kvm_asm.h        | 17 +------
 arch/arm64/include/asm/memory.h         | 18 +++----
 arch/arm64/kernel/head.S                |  6 ++-
 arch/arm64/kernel/image.h               | 13 +++--
 arch/arm64/mm/init.c                    | 63 ++++++++++++++++++++++++-
 arch/arm64/mm/mmu.c                     |  3 ++
 9 files changed, 119 insertions(+), 39 deletions(-)

diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt
index 701d39d3171a..56d6d8b796db 100644
--- a/Documentation/arm64/booting.txt
+++ b/Documentation/arm64/booting.txt
@@ -109,7 +109,13 @@ Header notes:
 			1 - 4K
 			2 - 16K
 			3 - 64K
-  Bits 3-63:	Reserved.
+  Bit 3:	Kernel physical placement
+			0 - 2MB aligned base should be as close as possible
+			    to the base of DRAM, since memory below it is not
+			    accessible via the linear mapping
+			1 - 2MB aligned base may be anywhere in physical
+			    memory
+  Bits 4-63:	Reserved.
 
 - When image_size is zero, a bootloader should attempt to keep as much
   memory as possible free for use by the kernel immediately after the
@@ -117,14 +123,14 @@ Header notes:
   depending on selected features, and is effectively unbound.
 
 The Image must be placed text_offset bytes from a 2MB aligned base
-address near the start of usable system RAM and called there. Memory
-below that base address is currently unusable by Linux, and therefore it
-is strongly recommended that this location is the start of system RAM.
-The region between the 2 MB aligned base address and the start of the
-image has no special significance to the kernel, and may be used for
-other purposes.
+address anywhere in usable system RAM and called there. The region
+between the 2 MB aligned base address and the start of the image has no
+special significance to the kernel, and may be used for other purposes.
 At least image_size bytes from the start of the image must be free for
 use by the kernel.
+NOTE: versions prior to v4.6 cannot make use of memory below the
+physical offset of the Image so it is recommended that the Image be
+placed as close as possible to the start of system RAM.
 
 Any memory described to the kernel (even that below the start of the
 image) which is not marked as reserved from the kernel (e.g., with a
diff --git a/arch/arm64/include/asm/boot.h b/arch/arm64/include/asm/boot.h
index 81151b67b26b..ebf2481889c3 100644
--- a/arch/arm64/include/asm/boot.h
+++ b/arch/arm64/include/asm/boot.h
@@ -11,4 +11,10 @@
 #define MIN_FDT_ALIGN		8
 #define MAX_FDT_SIZE		SZ_2M
 
+/*
+ * arm64 requires the kernel image to placed
+ * TEXT_OFFSET bytes beyond a 2 MB aligned base
+ */
+#define MIN_KIMG_ALIGN		SZ_2M
+
 #endif
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index a459714ee29e..5c6375d8528b 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -79,5 +79,17 @@
 #define SWAPPER_MM_MMUFLAGS	(PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
 #endif
 
+/*
+ * To make optimal use of block mappings when laying out the linear
+ * mapping, round down the base of physical memory to a size that can
+ * be mapped efficiently, i.e., either PUD_SIZE (4k granule) or PMD_SIZE
+ * (64k granule), or a multiple that can be mapped using contiguous bits
+ * in the page tables: 32 * PMD_SIZE (16k granule)
+ */
+#ifdef CONFIG_ARM64_64K_PAGES
+#define ARM64_MEMSTART_ALIGN	SZ_512M
+#else
+#define ARM64_MEMSTART_ALIGN	SZ_1G
+#endif
 
 #endif	/* __ASM_KERNEL_PGTABLE_H */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index e95c39543629..419bc6661b5c 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -102,24 +102,9 @@
 #define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
 #define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
 
-#define kvm_ksym_ref(sym)		((void *)&sym + kvm_ksym_shift)
+#define kvm_ksym_ref(sym)		phys_to_virt((u64)&sym - kimage_voffset)
 
 #ifndef __ASSEMBLY__
-#if __GNUC__ > 4
-#define kvm_ksym_shift			(PAGE_OFFSET - KIMAGE_VADDR)
-#else
-/*
- * GCC versions 4.9 and older will fold the constant below into the addend of
- * the reference to 'sym' above if kvm_ksym_shift is declared static or if the
- * constant is used directly. However, since we use the small code model for
- * the core kernel, the reference to 'sym' will be emitted as a adrp/add pair,
- * with a +/- 4 GB range, resulting in linker relocation errors if the shift
- * is sufficiently large. So prevent the compiler from folding the shift into
- * the addend, by making the shift a variable with external linkage.
- */
-__weak u64 kvm_ksym_shift = PAGE_OFFSET - KIMAGE_VADDR;
-#endif
-
 struct kvm;
 struct kvm_vcpu;
 
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 18b7e77c7495..3239e4d78e0d 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -24,6 +24,7 @@
 #include <linux/compiler.h>
 #include <linux/const.h>
 #include <linux/types.h>
+#include <asm/bug.h>
 #include <asm/sizes.h>
 
 /*
@@ -88,10 +89,10 @@
 #define __virt_to_phys(x) ({						\
 	phys_addr_t __x = (phys_addr_t)(x);				\
 	__x >= PAGE_OFFSET ? (__x - PAGE_OFFSET + PHYS_OFFSET) :	\
-			     (__x - KIMAGE_VADDR + PHYS_OFFSET); })
+			     (__x - kimage_voffset); })
 
 #define __phys_to_virt(x)	((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET))
-#define __phys_to_kimg(x)	((unsigned long)((x) - PHYS_OFFSET + KIMAGE_VADDR))
+#define __phys_to_kimg(x)	((unsigned long)((x) + kimage_voffset))
 
 /*
  * Convert a page to/from a physical address
@@ -133,15 +134,16 @@
 
 extern phys_addr_t		memstart_addr;
 /* PHYS_OFFSET - the physical address of the start of memory. */
-#define PHYS_OFFSET		({ memstart_addr; })
+#define PHYS_OFFSET		({ BUG_ON(memstart_addr & 1); memstart_addr; })
+
+/* the offset between the kernel virtual and physical mappings */
+extern u64			kimage_voffset;
 
 /*
- * The maximum physical address that the linear direct mapping
- * of system RAM can cover. (PAGE_OFFSET can be interpreted as
- * a 2's complement signed quantity and negated to derive the
- * maximum size of the linear mapping.)
+ * Allow all memory at the discovery stage. We will clip it later.
  */
-#define MAX_MEMBLOCK_ADDR	({ memstart_addr - PAGE_OFFSET - 1; })
+#define MIN_MEMBLOCK_ADDR	0
+#define MAX_MEMBLOCK_ADDR	U64_MAX
 
 /*
  * PFNs are used to describe any physical page; this means
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 04d38a058b19..05b98289093e 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -428,7 +428,11 @@ __mmap_switched:
 	and	x4, x4, #~(THREAD_SIZE - 1)
 	msr	sp_el0, x4			// Save thread_info
 	str_l	x21, __fdt_pointer, x5		// Save FDT pointer
-	str_l	x24, memstart_addr, x6		// Save PHYS_OFFSET
+
+	ldr	x4, =KIMAGE_VADDR		// Save the offset between
+	sub	x4, x4, x24			// the kernel virtual and
+	str_l	x4, kimage_voffset, x5		// physical mappings
+
 	mov	x29, #0
 #ifdef CONFIG_KASAN
 	bl	kasan_early_init
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index 999633bd7294..c9c62cab25a4 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -42,15 +42,18 @@
 #endif
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
-#define __HEAD_FLAG_BE	1
+#define __HEAD_FLAG_BE		1
 #else
-#define __HEAD_FLAG_BE	0
+#define __HEAD_FLAG_BE		0
 #endif
 
-#define __HEAD_FLAG_PAGE_SIZE ((PAGE_SHIFT - 10) / 2)
+#define __HEAD_FLAG_PAGE_SIZE	((PAGE_SHIFT - 10) / 2)
 
-#define __HEAD_FLAGS	((__HEAD_FLAG_BE << 0) |	\
-			 (__HEAD_FLAG_PAGE_SIZE << 1))
+#define __HEAD_FLAG_PHYS_BASE	1
+
+#define __HEAD_FLAGS		((__HEAD_FLAG_BE << 0) |	\
+				 (__HEAD_FLAG_PAGE_SIZE << 1) |	\
+				 (__HEAD_FLAG_PHYS_BASE << 3))
 
 /*
  * These will output as part of the Image header, which should be little-endian
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 92acbee2bb8b..2c7a3c2868e4 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -35,8 +35,10 @@
 #include <linux/efi.h>
 #include <linux/swiotlb.h>
 
+#include <asm/boot.h>
 #include <asm/fixmap.h>
 #include <asm/kasan.h>
+#include <asm/kernel-pgtable.h>
 #include <asm/memory.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
@@ -46,7 +48,13 @@
 
 #include "mm.h"
 
-phys_addr_t memstart_addr __read_mostly = 0;
+/*
+ * We need to be able to catch inadvertent references to memstart_addr
+ * that occur (potentially in generic code) before arm64_memblock_init()
+ * executes, which assigns it its actual value. So use a default value
+ * that cannot be mistaken for a real physical address.
+ */
+phys_addr_t memstart_addr __read_mostly = ~0ULL;
 phys_addr_t arm64_dma_phys_limit __read_mostly;
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -160,7 +168,33 @@ early_param("mem", early_mem);
 
 void __init arm64_memblock_init(void)
 {
-	memblock_enforce_memory_limit(memory_limit);
+	const s64 linear_region_size = -(s64)PAGE_OFFSET;
+
+	/*
+	 * Select a suitable value for the base of physical memory.
+	 */
+	memstart_addr = round_down(memblock_start_of_DRAM(),
+				   ARM64_MEMSTART_ALIGN);
+
+	/*
+	 * Remove the memory that we will not be able to cover with the
+	 * linear mapping. Take care not to clip the kernel which may be
+	 * high in memory.
+	 */
+	memblock_remove(max(memstart_addr + linear_region_size, __pa(_end)),
+			ULLONG_MAX);
+	if (memblock_end_of_DRAM() > linear_region_size)
+		memblock_remove(0, memblock_end_of_DRAM() - linear_region_size);
+
+	/*
+	 * Apply the memory limit if it was set. Since the kernel may be loaded
+	 * high up in memory, add back the kernel region that must be accessible
+	 * via the linear mapping.
+	 */
+	if (memory_limit != (phys_addr_t)ULLONG_MAX) {
+		memblock_enforce_memory_limit(memory_limit);
+		memblock_add(__pa(_text), (u64)(_end - _text));
+	}
 
 	/*
 	 * Register the kernel text, kernel data, initrd, and initial
@@ -386,3 +420,28 @@ static int __init keepinitrd_setup(char *__unused)
 
 __setup("keepinitrd", keepinitrd_setup);
 #endif
+
+/*
+ * Dump out memory limit information on panic.
+ */
+static int dump_mem_limit(struct notifier_block *self, unsigned long v, void *p)
+{
+	if (memory_limit != (phys_addr_t)ULLONG_MAX) {
+		pr_emerg("Memory Limit: %llu MB\n", memory_limit >> 20);
+	} else {
+		pr_emerg("Memory Limit: none\n");
+	}
+	return 0;
+}
+
+static struct notifier_block mem_limit_notifier = {
+	.notifier_call = dump_mem_limit,
+};
+
+static int __init register_mem_limit_dumper(void)
+{
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &mem_limit_notifier);
+	return 0;
+}
+__initcall(register_mem_limit_dumper);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 895a8457259c..fb5c872fe3d6 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -46,6 +46,9 @@
 
 u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
 
+u64 kimage_voffset __read_mostly;
+EXPORT_SYMBOL(kimage_voffset);
+
 /*
  * Empty_zero_page is a special page that is used for zero-initialized data
  * and COW.

From a67099df67dec4550a650aa3b871e2b0ecd20957 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 19 Feb 2016 14:28:58 +0000
Subject: [PATCH 323/424] arm64: User die() instead of panic() in
 do_page_fault()

The former gives better error reporting on unhandled permission faults
(introduced by the UAO patches).

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 70c8abc28762d04e36c92e07eee2ce6ab41049cb)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/fault.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index a8eafeceb08a..44e56de23f79 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -235,10 +235,10 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
 
 	if (permission_fault(esr) && (addr < USER_DS)) {
 		if (get_fs() == KERNEL_DS)
-			panic("Accessing user space memory with fs=KERNEL_DS");
+			die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
 
 		if (!search_exception_tables(regs->pc))
-			panic("Accessing user space memory outside uaccess.h routines");
+			die("Accessing user space memory outside uaccess.h routines", regs, esr);
 	}
 
 	/*

From bf7cb966b2f5ef5a456a25d84d2b64c79730a07a Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 22 Feb 2016 18:46:03 +0100
Subject: [PATCH 324/424] arm64: mm: only perform memstart_addr sanity check if
 DEBUG_VM

Checking whether memstart_addr has been assigned every time it is
referenced adds a branch instruction that may hurt performance if
the reference in question occurs on a hot path. So only perform the
check if CONFIG_DEBUG_VM=y.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[catalin.marinas@arm.com: replaced #ifdef with VM_BUG_ON]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

(cherry picked from commit a92405f082d43267575444a6927085e4c8a69e4e)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/memory.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 3239e4d78e0d..460d09bf9442 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -132,9 +132,11 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/mmdebug.h>
+
 extern phys_addr_t		memstart_addr;
 /* PHYS_OFFSET - the physical address of the start of memory. */
-#define PHYS_OFFSET		({ BUG_ON(memstart_addr & 1); memstart_addr; })
+#define PHYS_OFFSET		({ VM_BUG_ON(memstart_addr & 1); memstart_addr; })
 
 /* the offset between the kernel virtual and physical mappings */
 extern u64			kimage_voffset;

From d3bb0180b3ecbcff076ca7e41d4ad4fa0ee4c9d7 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 22 Feb 2016 18:46:04 +0100
Subject: [PATCH 325/424] arm64: mm: use bit ops rather than arithmetic in
 pa/va translations

Since PAGE_OFFSET is chosen such that it cuts the kernel VA space right
in half, and since the size of the kernel VA space itself is always a
power of 2, we can treat PAGE_OFFSET as a bitmask and replace the
additions/subtractions with 'or' and 'and-not' operations.

For the comparison against PAGE_OFFSET, a mov/cmp/branch sequence ends
up getting replaced with a single tbz instruction. For the additions and
subtractions, we save a mov instruction since the mask is folded into the
instruction's immediate field.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 8439e62a15614e8fcd43835d57b7245cd9870dc5)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/memory.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 460d09bf9442..eb798156cf56 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -88,10 +88,10 @@
  */
 #define __virt_to_phys(x) ({						\
 	phys_addr_t __x = (phys_addr_t)(x);				\
-	__x >= PAGE_OFFSET ? (__x - PAGE_OFFSET + PHYS_OFFSET) :	\
-			     (__x - kimage_voffset); })
+	__x & BIT(VA_BITS - 1) ? (__x & ~PAGE_OFFSET) + PHYS_OFFSET :	\
+				 (__x - kimage_voffset); })
 
-#define __phys_to_virt(x)	((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET))
+#define __phys_to_virt(x)	((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET)
 #define __phys_to_kimg(x)	((unsigned long)((x) + kimage_voffset))
 
 /*
@@ -132,6 +132,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/bitops.h>
 #include <linux/mmdebug.h>
 
 extern phys_addr_t		memstart_addr;

From 11e7d3ccfae5510815c72cec4066d5d5acbfa718 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 23 Feb 2016 08:56:45 +0100
Subject: [PATCH 326/424] arm64: move brk immediate argument definitions to
 separate header

Instead of reversing the header dependency between asm/bug.h and
asm/debug-monitors.h, split off the brk instruction immediate value
defines into a new header asm/brk-imm.h, and include it from both.

This solves the circular dependency issue that prevents BUG() from
being used in some header files, and keeps the definitions together.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit f98deee9a9f8c47d05a0f64d86440882dca772ff)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/brk-imm.h        | 25 +++++++++++++++++++++++++
 arch/arm64/include/asm/bug.h            |  2 +-
 arch/arm64/include/asm/debug-monitors.h | 14 +-------------
 3 files changed, 27 insertions(+), 14 deletions(-)
 create mode 100644 arch/arm64/include/asm/brk-imm.h

diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h
new file mode 100644
index 000000000000..ed693c5bcec0
--- /dev/null
+++ b/arch/arm64/include/asm/brk-imm.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_BRK_IMM_H
+#define __ASM_BRK_IMM_H
+
+/*
+ * #imm16 values used for BRK instruction generation
+ * Allowed values for kgdb are 0x400 - 0x7ff
+ * 0x100: for triggering a fault on purpose (reserved)
+ * 0x400: for dynamic BRK instruction
+ * 0x401: for compile time BRK instruction
+ * 0x800: kernel-mode BUG() and WARN() traps
+ */
+#define FAULT_BRK_IMM			0x100
+#define KGDB_DYN_DBG_BRK_IMM		0x400
+#define KGDB_COMPILED_DBG_BRK_IMM	0x401
+#define BUG_BRK_IMM			0x800
+
+#endif
diff --git a/arch/arm64/include/asm/bug.h b/arch/arm64/include/asm/bug.h
index 679d49221998..561190d15881 100644
--- a/arch/arm64/include/asm/bug.h
+++ b/arch/arm64/include/asm/bug.h
@@ -18,7 +18,7 @@
 #ifndef _ARCH_ARM64_ASM_BUG_H
 #define _ARCH_ARM64_ASM_BUG_H
 
-#define BUG_BRK_IMM			0x800
+#include <asm/brk-imm.h>
 
 #ifdef CONFIG_GENERIC_BUG
 #define HAVE_ARCH_BUG
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index e893a1fca9c2..2fcb9b7c876c 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -20,7 +20,7 @@
 
 #include <linux/errno.h>
 #include <linux/types.h>
-#include <asm/bug.h>
+#include <asm/brk-imm.h>
 #include <asm/esr.h>
 #include <asm/insn.h>
 #include <asm/ptrace.h>
@@ -47,18 +47,6 @@
  */
 #define BREAK_INSTR_SIZE		AARCH64_INSN_SIZE
 
-/*
- * #imm16 values used for BRK instruction generation
- * Allowed values for kgbd are 0x400 - 0x7ff
- * 0x100: for triggering a fault on purpose (reserved)
- * 0x400: for dynamic BRK instruction
- * 0x401: for compile time BRK instruction
- * 0x800: kernel-mode BUG() and WARN() traps
- */
-#define FAULT_BRK_IMM			0x100
-#define KGDB_DYN_DBG_BRK_IMM		0x400
-#define KGDB_COMPILED_DBG_BRK_IMM	0x401
-
 /*
  * BRK instruction encoding
  * The #imm16 value should be placed at bits[20:5] within BRK ins

From 6537906675622c231257664593ed3174b117b3ef Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 24 Nov 2015 12:37:35 +0100
Subject: [PATCH 327/424] arm64: add support for module PLTs

This adds support for emitting PLTs at module load time for relative
branches that are out of range. This is a prerequisite for KASLR, which
may place the kernel and the modules anywhere in the vmalloc area,
making it more likely that branch target offsets exceed the maximum
range of +/- 128 MB.

In this version, I removed the distinction between relocations against
.init executable sections and ordinary executable sections. The reason
is that it is hardly worth the trouble, given that .init.text usually
does not contain that many far branches, and this version now only
reserves PLT entry space for jump and call relocations against undefined
symbols (since symbols defined in the same module can be assumed to be
within +/- 128 MB)

For example, the mac80211.ko module (which is fairly sizable at ~400 KB)
built with -mcmodel=large gives the following relocation counts:

                    relocs    branches   unique     !local
  .text              3925       3347       518        219
  .init.text           11          8         7          1
  .exit.text            4          4         4          1
  .text.unlikely       81         67        36         17

('unique' means branches to unique type/symbol/addend combos, of which
!local is the subset referring to undefined symbols)

IOW, we are only emitting a single PLT entry for the .init sections, and
we are better off just adding it to the core PLT section instead.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit fd045f6cd98ec4953147b318418bd45e441e52a3)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/Kconfig              |   9 ++
 arch/arm64/Makefile             |   6 +-
 arch/arm64/include/asm/module.h |  11 ++
 arch/arm64/kernel/Makefile      |   1 +
 arch/arm64/kernel/module-plts.c | 201 ++++++++++++++++++++++++++++++++
 arch/arm64/kernel/module.c      |  22 ++++
 arch/arm64/kernel/module.lds    |   3 +
 7 files changed, 252 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/kernel/module-plts.c
 create mode 100644 arch/arm64/kernel/module.lds

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8cd8d06ece4a..22db20491733 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -365,6 +365,7 @@ config ARM64_ERRATUM_843419
 	bool "Cortex-A53: 843419: A load or store might access an incorrect address"
 	depends on MODULES
 	default y
+	select ARM64_MODULE_CMODEL_LARGE
 	help
 	  This option builds kernel modules using the large memory model in
 	  order to avoid the use of the ADRP instruction, which can cause
@@ -728,6 +729,14 @@ config ARM64_UAO
 	  regular load/store instructions if the cpu does not implement the
 	  feature.
 
+config ARM64_MODULE_CMODEL_LARGE
+	bool
+
+config ARM64_MODULE_PLTS
+	bool
+	select ARM64_MODULE_CMODEL_LARGE
+	select HAVE_MOD_ARCH_SPECIFIC
+
 endmenu
 
 menu "Boot options"
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 548a2939d7e6..71054a38decf 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -43,10 +43,14 @@ endif
 
 CHECKFLAGS	+= -D__aarch64__
 
-ifeq ($(CONFIG_ARM64_ERRATUM_843419), y)
+ifeq ($(CONFIG_ARM64_MODULE_CMODEL_LARGE), y)
 KBUILD_CFLAGS_MODULE	+= -mcmodel=large
 endif
 
+ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
+KBUILD_LDFLAGS_MODULE	+= -T $(srctree)/arch/arm64/kernel/module.lds
+endif
+
 # Default value
 head-y		:= arch/arm64/kernel/head.o
 
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index e80e232b730e..8652fb613304 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -20,4 +20,15 @@
 
 #define MODULE_ARCH_VERMAGIC	"aarch64"
 
+#ifdef CONFIG_ARM64_MODULE_PLTS
+struct mod_arch_specific {
+	struct elf64_shdr	*plt;
+	int			plt_num_entries;
+	int			plt_max_entries;
+};
+#endif
+
+u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela,
+			  Elf64_Sym *sym);
+
 #endif /* __ASM_MODULE_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index c4e2f70c0aa0..8d971f9c6ed5 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -30,6 +30,7 @@ arm64-obj-$(CONFIG_COMPAT)		+= sys32.o kuser32.o signal32.o 	\
 					   ../../arm/kernel/opcodes.o
 arm64-obj-$(CONFIG_FUNCTION_TRACER)	+= ftrace.o entry-ftrace.o
 arm64-obj-$(CONFIG_MODULES)		+= arm64ksyms.o module.o
+arm64-obj-$(CONFIG_ARM64_MODULE_PLTS)	+= module-plts.o
 arm64-obj-$(CONFIG_PERF_EVENTS)		+= perf_regs.o perf_callchain.o
 arm64-obj-$(CONFIG_HW_PERF_EVENTS)	+= perf_event.o
 arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
new file mode 100644
index 000000000000..1ce90d8450ae
--- /dev/null
+++ b/arch/arm64/kernel/module-plts.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2014-2016 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/elf.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sort.h>
+
+struct plt_entry {
+	/*
+	 * A program that conforms to the AArch64 Procedure Call Standard
+	 * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or
+	 * IP1 (x17) may be inserted at any branch instruction that is
+	 * exposed to a relocation that supports long branches. Since that
+	 * is exactly what we are dealing with here, we are free to use x16
+	 * as a scratch register in the PLT veneers.
+	 */
+	__le32	mov0;	/* movn	x16, #0x....			*/
+	__le32	mov1;	/* movk	x16, #0x...., lsl #16		*/
+	__le32	mov2;	/* movk	x16, #0x...., lsl #32		*/
+	__le32	br;	/* br	x16				*/
+};
+
+u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela,
+			  Elf64_Sym *sym)
+{
+	struct plt_entry *plt = (struct plt_entry *)mod->arch.plt->sh_addr;
+	int i = mod->arch.plt_num_entries;
+	u64 val = sym->st_value + rela->r_addend;
+
+	/*
+	 * We only emit PLT entries against undefined (SHN_UNDEF) symbols,
+	 * which are listed in the ELF symtab section, but without a type
+	 * or a size.
+	 * So, similar to how the module loader uses the Elf64_Sym::st_value
+	 * field to store the resolved addresses of undefined symbols, let's
+	 * borrow the Elf64_Sym::st_size field (whose value is never used by
+	 * the module loader, even for symbols that are defined) to record
+	 * the address of a symbol's associated PLT entry as we emit it for a
+	 * zero addend relocation (which is the only kind we have to deal with
+	 * in practice). This allows us to find duplicates without having to
+	 * go through the table every time.
+	 */
+	if (rela->r_addend == 0 && sym->st_size != 0) {
+		BUG_ON(sym->st_size < (u64)plt || sym->st_size >= (u64)&plt[i]);
+		return sym->st_size;
+	}
+
+	mod->arch.plt_num_entries++;
+	BUG_ON(mod->arch.plt_num_entries > mod->arch.plt_max_entries);
+
+	/*
+	 * MOVK/MOVN/MOVZ opcode:
+	 * +--------+------------+--------+-----------+-------------+---------+
+	 * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] |
+	 * +--------+------------+--------+-----------+-------------+---------+
+	 *
+	 * Rd     := 0x10 (x16)
+	 * hw     := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32)
+	 * opc    := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ)
+	 * sf     := 1 (64-bit variant)
+	 */
+	plt[i] = (struct plt_entry){
+		cpu_to_le32(0x92800010 | (((~val      ) & 0xffff)) << 5),
+		cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5),
+		cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5),
+		cpu_to_le32(0xd61f0200)
+	};
+
+	if (rela->r_addend == 0)
+		sym->st_size = (u64)&plt[i];
+
+	return (u64)&plt[i];
+}
+
+#define cmp_3way(a,b)	((a) < (b) ? -1 : (a) > (b))
+
+static int cmp_rela(const void *a, const void *b)
+{
+	const Elf64_Rela *x = a, *y = b;
+	int i;
+
+	/* sort by type, symbol index and addend */
+	i = cmp_3way(ELF64_R_TYPE(x->r_info), ELF64_R_TYPE(y->r_info));
+	if (i == 0)
+		i = cmp_3way(ELF64_R_SYM(x->r_info), ELF64_R_SYM(y->r_info));
+	if (i == 0)
+		i = cmp_3way(x->r_addend, y->r_addend);
+	return i;
+}
+
+static bool duplicate_rel(const Elf64_Rela *rela, int num)
+{
+	/*
+	 * Entries are sorted by type, symbol index and addend. That means
+	 * that, if a duplicate entry exists, it must be in the preceding
+	 * slot.
+	 */
+	return num > 0 && cmp_rela(rela + num, rela + num - 1) == 0;
+}
+
+static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num)
+{
+	unsigned int ret = 0;
+	Elf64_Sym *s;
+	int i;
+
+	for (i = 0; i < num; i++) {
+		switch (ELF64_R_TYPE(rela[i].r_info)) {
+		case R_AARCH64_JUMP26:
+		case R_AARCH64_CALL26:
+			/*
+			 * We only have to consider branch targets that resolve
+			 * to undefined symbols. This is not simply a heuristic,
+			 * it is a fundamental limitation, since the PLT itself
+			 * is part of the module, and needs to be within 128 MB
+			 * as well, so modules can never grow beyond that limit.
+			 */
+			s = syms + ELF64_R_SYM(rela[i].r_info);
+			if (s->st_shndx != SHN_UNDEF)
+				break;
+
+			/*
+			 * Jump relocations with non-zero addends against
+			 * undefined symbols are supported by the ELF spec, but
+			 * do not occur in practice (e.g., 'jump n bytes past
+			 * the entry point of undefined function symbol f').
+			 * So we need to support them, but there is no need to
+			 * take them into consideration when trying to optimize
+			 * this code. So let's only check for duplicates when
+			 * the addend is zero: this allows us to record the PLT
+			 * entry address in the symbol table itself, rather than
+			 * having to search the list for duplicates each time we
+			 * emit one.
+			 */
+			if (rela[i].r_addend != 0 || !duplicate_rel(rela, i))
+				ret++;
+			break;
+		}
+	}
+	return ret;
+}
+
+int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+			      char *secstrings, struct module *mod)
+{
+	unsigned long plt_max_entries = 0;
+	Elf64_Sym *syms = NULL;
+	int i;
+
+	/*
+	 * Find the empty .plt section so we can expand it to store the PLT
+	 * entries. Record the symtab address as well.
+	 */
+	for (i = 0; i < ehdr->e_shnum; i++) {
+		if (strcmp(".plt", secstrings + sechdrs[i].sh_name) == 0)
+			mod->arch.plt = sechdrs + i;
+		else if (sechdrs[i].sh_type == SHT_SYMTAB)
+			syms = (Elf64_Sym *)sechdrs[i].sh_addr;
+	}
+
+	if (!mod->arch.plt) {
+		pr_err("%s: module PLT section missing\n", mod->name);
+		return -ENOEXEC;
+	}
+	if (!syms) {
+		pr_err("%s: module symtab section missing\n", mod->name);
+		return -ENOEXEC;
+	}
+
+	for (i = 0; i < ehdr->e_shnum; i++) {
+		Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset;
+		int numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela);
+		Elf64_Shdr *dstsec = sechdrs + sechdrs[i].sh_info;
+
+		if (sechdrs[i].sh_type != SHT_RELA)
+			continue;
+
+		/* ignore relocations that operate on non-exec sections */
+		if (!(dstsec->sh_flags & SHF_EXECINSTR))
+			continue;
+
+		/* sort by type, symbol index and addend */
+		sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL);
+
+		plt_max_entries += count_plts(syms, rels, numrels);
+	}
+
+	mod->arch.plt->sh_type = SHT_NOBITS;
+	mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	mod->arch.plt->sh_addralign = L1_CACHE_BYTES;
+	mod->arch.plt->sh_size = plt_max_entries * sizeof(struct plt_entry);
+	mod->arch.plt_num_entries = 0;
+	mod->arch.plt_max_entries = plt_max_entries;
+	return 0;
+}
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 93e970231ca9..a9dde97f5ca5 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -38,6 +38,21 @@ void *module_alloc(unsigned long size)
 				GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
 				NUMA_NO_NODE, __builtin_return_address(0));
 
+	if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
+	    !IS_ENABLED(CONFIG_KASAN))
+		/*
+		 * KASAN can only deal with module allocations being served
+		 * from the reserved module region, since the remainder of
+		 * the vmalloc region is already backed by zero shadow pages,
+		 * and punching holes into it is non-trivial. Since the module
+		 * region is not randomized when KASAN is enabled, it is even
+		 * less likely that the module region gets exhausted, so we
+		 * can simply omit this fallback in that case.
+		 */
+		p = __vmalloc_node_range(size, MODULE_ALIGN, VMALLOC_START,
+				VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
+				NUMA_NO_NODE, __builtin_return_address(0));
+
 	if (p && (kasan_module_alloc(p, size) < 0)) {
 		vfree(p);
 		return NULL;
@@ -361,6 +376,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 		case R_AARCH64_CALL26:
 			ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26,
 					     AARCH64_INSN_IMM_26);
+
+			if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
+			    ovf == -ERANGE) {
+				val = module_emit_plt_entry(me, &rel[i], sym);
+				ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2,
+						     26, AARCH64_INSN_IMM_26);
+			}
 			break;
 
 		default:
diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds
new file mode 100644
index 000000000000..8949f6c6f729
--- /dev/null
+++ b/arch/arm64/kernel/module.lds
@@ -0,0 +1,3 @@
+SECTIONS {
+	.plt (NOLOAD) : { BYTE(0) }
+}

From 9bd7f88a1dd7b6c513b6f7cf39154e69d0cbf62e Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sat, 26 Dec 2015 13:48:02 +0100
Subject: [PATCH 328/424] arm64: avoid R_AARCH64_ABS64 relocations for Image
 header fields

Unfortunately, the current way of using the linker to emit build time
constants into the Image header will no longer work once we switch to
the use of PIE executables. The reason is that such constants are emitted
into the binary using R_AARCH64_ABS64 relocations, which are resolved at
runtime, not at build time, and the places targeted by those relocations
will contain zeroes before that.

So refactor the endian swapping linker script constant generation code so
that it emits the upper and lower 32-bit words separately.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 6ad1fe5d9077a1ab40bf74b61994d2e770b00b14)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/assembler.h | 11 ++++++++++
 arch/arm64/kernel/head.S           |  6 +++---
 arch/arm64/kernel/image.h          | 32 ++++++++++++++++++------------
 3 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index bb7b72734c24..ba5aff6c830e 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -215,4 +215,15 @@ lr	.req	x30		// link register
 	.size	__pi_##x, . - x;	\
 	ENDPROC(x)
 
+	/*
+	 * Emit a 64-bit absolute little endian symbol reference in a way that
+	 * ensures that it will be resolved at build time, even when building a
+	 * PIE binary. This requires cooperation from the linker script, which
+	 * must emit the lo32/hi32 halves individually.
+	 */
+	.macro	le64sym, sym
+	.long	\sym\()_lo32
+	.long	\sym\()_hi32
+	.endm
+
 #endif	/* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 05b98289093e..f076debf392d 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -83,9 +83,9 @@ efi_head:
 	b	stext				// branch to kernel start, magic
 	.long	0				// reserved
 #endif
-	.quad	_kernel_offset_le		// Image load offset from start of RAM, little-endian
-	.quad	_kernel_size_le			// Effective size of kernel image, little-endian
-	.quad	_kernel_flags_le		// Informative flags, little-endian
+	le64sym	_kernel_offset_le		// Image load offset from start of RAM, little-endian
+	le64sym	_kernel_size_le			// Effective size of kernel image, little-endian
+	le64sym	_kernel_flags_le		// Informative flags, little-endian
 	.quad	0				// reserved
 	.quad	0				// reserved
 	.quad	0				// reserved
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index c9c62cab25a4..db1bf57948f1 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -26,21 +26,27 @@
  * There aren't any ELF relocations we can use to endian-swap values known only
  * at link time (e.g. the subtraction of two symbol addresses), so we must get
  * the linker to endian-swap certain values before emitting them.
+ *
+ * Note that, in order for this to work when building the ELF64 PIE executable
+ * (for KASLR), these values should not be referenced via R_AARCH64_ABS64
+ * relocations, since these are fixed up at runtime rather than at build time
+ * when PIE is in effect. So we need to split them up in 32-bit high and low
+ * words.
  */
 #ifdef CONFIG_CPU_BIG_ENDIAN
-#define DATA_LE64(data)					\
-	((((data) & 0x00000000000000ff) << 56) |	\
-	 (((data) & 0x000000000000ff00) << 40) |	\
-	 (((data) & 0x0000000000ff0000) << 24) |	\
-	 (((data) & 0x00000000ff000000) << 8)  |	\
-	 (((data) & 0x000000ff00000000) >> 8)  |	\
-	 (((data) & 0x0000ff0000000000) >> 24) |	\
-	 (((data) & 0x00ff000000000000) >> 40) |	\
-	 (((data) & 0xff00000000000000) >> 56))
+#define DATA_LE32(data)				\
+	((((data) & 0x000000ff) << 24) |	\
+	 (((data) & 0x0000ff00) << 8)  |	\
+	 (((data) & 0x00ff0000) >> 8)  |	\
+	 (((data) & 0xff000000) >> 24))
 #else
-#define DATA_LE64(data) ((data) & 0xffffffffffffffff)
+#define DATA_LE32(data) ((data) & 0xffffffff)
 #endif
 
+#define DEFINE_IMAGE_LE64(sym, data)				\
+	sym##_lo32 = DATA_LE32((data) & 0xffffffff);		\
+	sym##_hi32 = DATA_LE32((data) >> 32)
+
 #ifdef CONFIG_CPU_BIG_ENDIAN
 #define __HEAD_FLAG_BE		1
 #else
@@ -61,9 +67,9 @@
  * endian swapped in head.S, all are done here for consistency.
  */
 #define HEAD_SYMBOLS						\
-	_kernel_size_le		= DATA_LE64(_end - _text);	\
-	_kernel_offset_le	= DATA_LE64(TEXT_OFFSET);	\
-	_kernel_flags_le	= DATA_LE64(__HEAD_FLAGS);
+	DEFINE_IMAGE_LE64(_kernel_size_le, _end - _text);	\
+	DEFINE_IMAGE_LE64(_kernel_offset_le, TEXT_OFFSET);	\
+	DEFINE_IMAGE_LE64(_kernel_flags_le, __HEAD_FLAGS);
 
 #ifdef CONFIG_EFI
 

From 63f9fbe469f17deee50491bc293bb3d2843f3e4a Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sat, 26 Dec 2015 12:46:40 +0100
Subject: [PATCH 329/424] arm64: avoid dynamic relocations in early boot code

Before implementing KASLR for arm64 by building a self-relocating PIE
executable, we have to ensure that values we use before the relocation
routine is executed are not subject to dynamic relocation themselves.
This applies not only to virtual addresses, but also to values that are
supplied by the linker at build time and relocated using R_AARCH64_ABS64
relocations.

So instead, use assemble time constants, or force the use of static
relocations by folding the constants into the instructions.

Reviewed-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 2bf31a4a05f5b00f37d65ba029d36a0230286cb7)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/efi-entry.S |  2 +-
 arch/arm64/kernel/head.S      | 39 +++++++++++++++++++++++------------
 2 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S
index a773db92908b..f82036e02485 100644
--- a/arch/arm64/kernel/efi-entry.S
+++ b/arch/arm64/kernel/efi-entry.S
@@ -61,7 +61,7 @@ ENTRY(entry)
 	 */
 	mov	x20, x0		// DTB address
 	ldr	x0, [sp, #16]	// relocated _text address
-	ldr	x21, =stext_offset
+	movz	x21, #:abs_g0:stext_offset
 	add	x21, x0, x21
 
 	/*
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index f076debf392d..4cad8f9f2268 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -67,12 +67,11 @@
  * in the entry routines.
  */
 	__HEAD
-
+_head:
 	/*
 	 * DO NOT MODIFY. Image header expected by Linux boot-loaders.
 	 */
 #ifdef CONFIG_EFI
-efi_head:
 	/*
 	 * This add instruction has no meaningful effect except that
 	 * its opcode forms the magic "MZ" signature required by UEFI.
@@ -94,14 +93,14 @@ efi_head:
 	.byte	0x4d
 	.byte	0x64
 #ifdef CONFIG_EFI
-	.long	pe_header - efi_head		// Offset to the PE header.
+	.long	pe_header - _head		// Offset to the PE header.
 #else
 	.word	0				// reserved
 #endif
 
 #ifdef CONFIG_EFI
 	.globl	__efistub_stext_offset
-	.set	__efistub_stext_offset, stext - efi_head
+	.set	__efistub_stext_offset, stext - _head
 	.align 3
 pe_header:
 	.ascii	"PE"
@@ -124,7 +123,7 @@ optional_header:
 	.long	_end - stext			// SizeOfCode
 	.long	0				// SizeOfInitializedData
 	.long	0				// SizeOfUninitializedData
-	.long	__efistub_entry - efi_head	// AddressOfEntryPoint
+	.long	__efistub_entry - _head		// AddressOfEntryPoint
 	.long	__efistub_stext_offset		// BaseOfCode
 
 extra_header_fields:
@@ -139,7 +138,7 @@ extra_header_fields:
 	.short	0				// MinorSubsystemVersion
 	.long	0				// Win32VersionValue
 
-	.long	_end - efi_head			// SizeOfImage
+	.long	_end - _head			// SizeOfImage
 
 	// Everything before the kernel image is considered part of the header
 	.long	__efistub_stext_offset		// SizeOfHeaders
@@ -219,11 +218,13 @@ ENTRY(stext)
 	 * On return, the CPU will be ready for the MMU to be turned on and
 	 * the TCR will have been set.
 	 */
-	ldr	x27, =__mmap_switched		// address to jump to after
+	ldr	x27, 0f				// address to jump to after
 						// MMU has been enabled
 	adr_l	lr, __enable_mmu		// return (PIC) address
 	b	__cpu_setup			// initialise processor
 ENDPROC(stext)
+	.align	3
+0:	.quad	__mmap_switched - (_head - TEXT_OFFSET) + KIMAGE_VADDR
 
 /*
  * Preserve the arguments passed by the bootloader in x0 .. x3
@@ -391,7 +392,8 @@ __create_page_tables:
 	mov	x0, x26				// swapper_pg_dir
 	ldr	x5, =KIMAGE_VADDR
 	create_pgd_entry x0, x5, x3, x6
-	ldr	x6, =KERNEL_END			// __va(KERNEL_END)
+	ldr	w6, kernel_img_size
+	add	x6, x6, x5
 	mov	x3, x24				// phys offset
 	create_block_map x0, x7, x3, x5, x6
 
@@ -408,6 +410,9 @@ __create_page_tables:
 	mov	lr, x27
 	ret
 ENDPROC(__create_page_tables)
+
+kernel_img_size:
+	.long	_end - (_head - TEXT_OFFSET)
 	.ltorg
 
 /*
@@ -415,6 +420,10 @@ ENDPROC(__create_page_tables)
  */
 	.set	initial_sp, init_thread_union + THREAD_START_SP
 __mmap_switched:
+	adr_l	x8, vectors			// load VBAR_EL1 with virtual
+	msr	vbar_el1, x8			// vector table address
+	isb
+
 	// Clear BSS
 	adr_l	x0, __bss_start
 	mov	x1, xzr
@@ -610,13 +619,19 @@ ENTRY(secondary_startup)
 	adrp	x26, swapper_pg_dir
 	bl	__cpu_setup			// initialise processor
 
-	ldr	x21, =secondary_data
-	ldr	x27, =__secondary_switched	// address to jump to after enabling the MMU
+	ldr	x8, =KIMAGE_VADDR
+	ldr	w9, 0f
+	sub	x27, x8, w9, sxtw		// address to jump to after enabling the MMU
 	b	__enable_mmu
 ENDPROC(secondary_startup)
+0:	.long	(_text - TEXT_OFFSET) - __secondary_switched
 
 ENTRY(__secondary_switched)
-	ldr	x0, [x21]			// get secondary_data.stack
+	adr_l	x5, vectors
+	msr	vbar_el1, x5
+	isb
+
+	ldr_l	x0, secondary_data		// get secondary_data.stack
 	mov	sp, x0
 	and	x0, x0, #~(THREAD_SIZE - 1)
 	msr	sp_el0, x0			// save thread_info
@@ -641,8 +656,6 @@ __enable_mmu:
 	ubfx	x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
 	cmp	x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
 	b.ne	__no_granule_support
-	ldr	x5, =vectors
-	msr	vbar_el1, x5
 	msr	ttbr0_el1, x25			// load TTBR0
 	msr	ttbr1_el1, x26			// load TTBR1
 	isb

From 632fd2f00cb569f65adc7d2c67282b70a6af8634 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 11 Jan 2016 17:08:26 +0100
Subject: [PATCH 330/424] arm64: make asm/elf.h available to asm files

This reshuffles some code in asm/elf.h and puts a #ifndef __ASSEMBLY__
around its C definitions so that the CPP defines can be used in asm
source files as well.

Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 4a2e034e5cdadde4c712f79bdd57d1455c76a3db)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/elf.h | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index faad6df49e5b..435f55952e1f 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -24,15 +24,6 @@
 #include <asm/ptrace.h>
 #include <asm/user.h>
 
-typedef unsigned long elf_greg_t;
-
-#define ELF_NGREG (sizeof(struct user_pt_regs) / sizeof(elf_greg_t))
-#define ELF_CORE_COPY_REGS(dest, regs)	\
-	*(struct user_pt_regs *)&(dest) = (regs)->user_regs;
-
-typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-typedef struct user_fpsimd_state elf_fpregset_t;
-
 /*
  * AArch64 static relocation types.
  */
@@ -127,6 +118,17 @@ typedef struct user_fpsimd_state elf_fpregset_t;
  */
 #define ELF_ET_DYN_BASE	(2 * TASK_SIZE_64 / 3)
 
+#ifndef __ASSEMBLY__
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof(struct user_pt_regs) / sizeof(elf_greg_t))
+#define ELF_CORE_COPY_REGS(dest, regs)	\
+	*(struct user_pt_regs *)&(dest) = (regs)->user_regs;
+
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+typedef struct user_fpsimd_state elf_fpregset_t;
+
 /*
  * When the program starts, a1 contains a pointer to a function to be
  * registered with atexit, as per the SVR4 ABI.  A value of 0 means we have no
@@ -186,4 +188,6 @@ extern int aarch32_setup_vectors_page(struct linux_binprm *bprm,
 
 #endif /* CONFIG_COMPAT */
 
+#endif /* !__ASSEMBLY__ */
+
 #endif

From 6ef77fd5962d90de7957ef9eafddd659c7375a4f Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Tue, 2 Feb 2016 15:53:59 +0000
Subject: [PATCH 331/424] arm64: futex.h: Add missing PAN toggling

futex.h's futex_atomic_cmpxchg_inatomic() does not use the
__futex_atomic_op() macro and needs its own PAN toggling. This was missed
when the feature was implemented.

Fixes: 338d4f49d6f ("arm64: kernel: Add support for Privileged Access Never")
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit 811d61e384e24759372bb3f01772f3744b0a8327)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/futex.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 007a69fc4f40..5f3ab8c1db55 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -121,6 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 		return -EFAULT;
 
 	asm volatile("// futex_atomic_cmpxchg_inatomic\n"
+ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
 "	prfm	pstl1strm, %2\n"
 "1:	ldxr	%w1, %2\n"
 "	sub	%w3, %w1, %w4\n"
@@ -137,6 +138,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 "	.align	3\n"
 "	.quad	1b, 4b, 2b, 4b\n"
 "	.popsection\n"
+ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
 	: "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp)
 	: "r" (oldval), "r" (newval), "Ir" (-EFAULT)
 	: "memory");

From afc69bdc2ac79942a59f67296738ba8e85e784fb Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sun, 10 Jan 2016 11:42:28 +0100
Subject: [PATCH 332/424] scripts/sortextable: add support for ET_DYN binaries

Add support to scripts/sortextable for handling relocatable (PIE)
executables, whose ELF type is ET_DYN, not ET_EXEC. Other than adding
support for the new type, no changes are needed.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 7b957b6e603623ef8b2e8222fa94b976df613fa2)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 scripts/sortextable.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/sortextable.c b/scripts/sortextable.c
index c2423d913b46..ecefa0a634f8 100644
--- a/scripts/sortextable.c
+++ b/scripts/sortextable.c
@@ -266,9 +266,9 @@ do_file(char const *const fname)
 		break;
 	}  /* end switch */
 	if (memcmp(ELFMAG, ehdr->e_ident, SELFMAG) != 0
-	||  r2(&ehdr->e_type) != ET_EXEC
+	||  (r2(&ehdr->e_type) != ET_EXEC && r2(&ehdr->e_type) != ET_DYN)
 	||  ehdr->e_ident[EI_VERSION] != EV_CURRENT) {
-		fprintf(stderr, "unrecognized ET_EXEC file %s\n", fname);
+		fprintf(stderr, "unrecognized ET_EXEC/ET_DYN file %s\n", fname);
 		fail_file();
 	}
 
@@ -304,7 +304,7 @@ do_file(char const *const fname)
 		if (r2(&ehdr->e_ehsize) != sizeof(Elf32_Ehdr)
 		||  r2(&ehdr->e_shentsize) != sizeof(Elf32_Shdr)) {
 			fprintf(stderr,
-				"unrecognized ET_EXEC file: %s\n", fname);
+				"unrecognized ET_EXEC/ET_DYN file: %s\n", fname);
 			fail_file();
 		}
 		do32(ehdr, fname, custom_sort);
@@ -314,7 +314,7 @@ do_file(char const *const fname)
 		if (r2(&ghdr->e_ehsize) != sizeof(Elf64_Ehdr)
 		||  r2(&ghdr->e_shentsize) != sizeof(Elf64_Shdr)) {
 			fprintf(stderr,
-				"unrecognized ET_EXEC file: %s\n", fname);
+				"unrecognized ET_EXEC/ET_DYN file: %s\n", fname);
 			fail_file();
 		}
 		do64(ghdr, fname, custom_sort);

From 5f1dcb6d02a6f6dd104539a19f7db8a189ba4bb8 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 1 Jan 2016 12:39:09 +0100
Subject: [PATCH 333/424] extable: add support for relative extables to search
 and sort routines

This adds support to the generic search_extable() and sort_extable()
implementations for dealing with exception table entries whose fields
contain relative offsets rather than absolute addresses.

Acked-by: Helge Deller <deller@gmx.de>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: H. Peter Anvin <hpa@linux.intel.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit a272858a3c1ecd4a935ba23c66668f81214bd110)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 lib/extable.c | 50 +++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 41 insertions(+), 9 deletions(-)

diff --git a/lib/extable.c b/lib/extable.c
index 4cac81ec225e..0be02ad561e9 100644
--- a/lib/extable.c
+++ b/lib/extable.c
@@ -14,7 +14,37 @@
 #include <linux/sort.h>
 #include <asm/uaccess.h>
 
+#ifndef ARCH_HAS_RELATIVE_EXTABLE
+#define ex_to_insn(x)	((x)->insn)
+#else
+static inline unsigned long ex_to_insn(const struct exception_table_entry *x)
+{
+	return (unsigned long)&x->insn + x->insn;
+}
+#endif
+
 #ifndef ARCH_HAS_SORT_EXTABLE
+#ifndef ARCH_HAS_RELATIVE_EXTABLE
+#define swap_ex		NULL
+#else
+static void swap_ex(void *a, void *b, int size)
+{
+	struct exception_table_entry *x = a, *y = b, tmp;
+	int delta = b - a;
+
+	tmp = *x;
+	x->insn = y->insn + delta;
+	y->insn = tmp.insn - delta;
+
+#ifdef swap_ex_entry_fixup
+	swap_ex_entry_fixup(x, y, tmp, delta);
+#else
+	x->fixup = y->fixup + delta;
+	y->fixup = tmp.fixup - delta;
+#endif
+}
+#endif /* ARCH_HAS_RELATIVE_EXTABLE */
+
 /*
  * The exception table needs to be sorted so that the binary
  * search that we use to find entries in it works properly.
@@ -26,9 +56,9 @@ static int cmp_ex(const void *a, const void *b)
 	const struct exception_table_entry *x = a, *y = b;
 
 	/* avoid overflow */
-	if (x->insn > y->insn)
+	if (ex_to_insn(x) > ex_to_insn(y))
 		return 1;
-	if (x->insn < y->insn)
+	if (ex_to_insn(x) < ex_to_insn(y))
 		return -1;
 	return 0;
 }
@@ -37,7 +67,7 @@ void sort_extable(struct exception_table_entry *start,
 		  struct exception_table_entry *finish)
 {
 	sort(start, finish - start, sizeof(struct exception_table_entry),
-	     cmp_ex, NULL);
+	     cmp_ex, swap_ex);
 }
 
 #ifdef CONFIG_MODULES
@@ -48,13 +78,15 @@ void sort_extable(struct exception_table_entry *start,
 void trim_init_extable(struct module *m)
 {
 	/*trim the beginning*/
-	while (m->num_exentries && within_module_init(m->extable[0].insn, m)) {
+	while (m->num_exentries &&
+	       within_module_init(ex_to_insn(&m->extable[0]), m)) {
 		m->extable++;
 		m->num_exentries--;
 	}
 	/*trim the end*/
 	while (m->num_exentries &&
-		within_module_init(m->extable[m->num_exentries-1].insn, m))
+	       within_module_init(ex_to_insn(&m->extable[m->num_exentries - 1]),
+				  m))
 		m->num_exentries--;
 }
 #endif /* CONFIG_MODULES */
@@ -81,13 +113,13 @@ search_extable(const struct exception_table_entry *first,
 		 * careful, the distance between value and insn
 		 * can be larger than MAX_LONG:
 		 */
-		if (mid->insn < value)
+		if (ex_to_insn(mid) < value)
 			first = mid + 1;
-		else if (mid->insn > value)
+		else if (ex_to_insn(mid) > value)
 			last = mid - 1;
 		else
 			return mid;
-        }
-        return NULL;
+	}
+	return NULL;
 }
 #endif

From 525787eea48f8c6a1630e6dab07313896cfc6b8d Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 1 Jan 2016 15:02:12 +0100
Subject: [PATCH 334/424] arm64: switch to relative exception tables

Instead of using absolute addresses for both the exception location
and the fixup, use offsets relative to the exception table entry values.
Not only does this cut the size of the exception table in half, it is
also a prerequisite for KASLR, since absolute exception table entries
are subject to dynamic relocation, which is incompatible with the sorting
of the exception table that occurs at build time.

This patch also introduces the _ASM_EXTABLE preprocessor macro (which
exists on x86 as well) and its _asm_extable assembly counterpart, as
shorthands to emit exception table entries.

Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 6c94f27ac847ff8ef15b3da5b200574923bd6287)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/alternative.h    | 19 +++++-----------
 arch/arm64/include/asm/assembler.h      | 15 +++++++++----
 arch/arm64/include/asm/futex.h          | 12 ++++------
 arch/arm64/include/asm/uaccess.h        | 30 +++++++++++++------------
 arch/arm64/include/asm/word-at-a-time.h |  7 +++---
 arch/arm64/kernel/armv8_deprecated.c    |  7 ++----
 arch/arm64/mm/extable.c                 |  2 +-
 scripts/sortextable.c                   |  2 +-
 8 files changed, 43 insertions(+), 51 deletions(-)

diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index a9fc24ec1aa9..beccbdefa106 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -157,11 +157,8 @@ void apply_alternatives(void *start, size_t length);
 			add	\addr, \addr, \post_inc;
 		alternative_endif
 
-		.section __ex_table,"a";
-		.align	3;
-		.quad	8888b,\l;
-		.quad	8889b,\l;
-		.previous;
+		_asm_extable	8888b,\l;
+		_asm_extable	8889b,\l;
 	.endm
 
 	.macro uao_stp l, reg1, reg2, addr, post_inc
@@ -175,11 +172,8 @@ void apply_alternatives(void *start, size_t length);
 			add	\addr, \addr, \post_inc;
 		alternative_endif
 
-		.section __ex_table,"a";
-		.align	3;
-		.quad	8888b,\l;
-		.quad	8889b,\l;
-		.previous
+		_asm_extable	8888b,\l;
+		_asm_extable	8889b,\l;
 	.endm
 
 	.macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc
@@ -191,10 +185,7 @@ void apply_alternatives(void *start, size_t length);
 			add		\addr, \addr, \post_inc;
 		alternative_endif
 
-		.section __ex_table,"a";
-		.align	3;
-		.quad	8888b,\l;
-		.previous
+		_asm_extable	8888b,\l;
 	.endm
 #else
 	.macro uao_ldp l, reg1, reg2, addr, post_inc
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index ba5aff6c830e..70f7b9e04598 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -94,12 +94,19 @@
 	dmb	\opt
 	.endm
 
+/*
+ * Emit an entry into the exception table
+ */
+	.macro		_asm_extable, from, to
+	.pushsection	__ex_table, "a"
+	.align		3
+	.long		(\from - .), (\to - .)
+	.popsection
+	.endm
+
 #define USER(l, x...)				\
 9999:	x;					\
-	.section __ex_table,"a";		\
-	.align	3;				\
-	.quad	9999b,l;			\
-	.previous
+	_asm_extable	9999b, l
 
 /*
  * Register aliases.
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 5f3ab8c1db55..f2585cdd32c2 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -42,10 +42,8 @@
 "4:	mov	%w0, %w5\n"						\
 "	b	3b\n"							\
 "	.popsection\n"							\
-"	.pushsection __ex_table,\"a\"\n"				\
-"	.align	3\n"							\
-"	.quad	1b, 4b, 2b, 4b\n"					\
-"	.popsection\n"							\
+	_ASM_EXTABLE(1b, 4b)						\
+	_ASM_EXTABLE(2b, 4b)						\
 	ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,		\
 		    CONFIG_ARM64_PAN)					\
 	: "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp)	\
@@ -134,10 +132,8 @@ ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
 "4:	mov	%w0, %w6\n"
 "	b	3b\n"
 "	.popsection\n"
-"	.pushsection __ex_table,\"a\"\n"
-"	.align	3\n"
-"	.quad	1b, 4b, 2b, 4b\n"
-"	.popsection\n"
+	_ASM_EXTABLE(1b, 4b)
+	_ASM_EXTABLE(2b, 4b)
 ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
 	: "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp)
 	: "r" (oldval), "r" (newval), "Ir" (-EFAULT)
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 16ba0d5c9740..0685d74572af 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -36,11 +36,11 @@
 #define VERIFY_WRITE 1
 
 /*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue.  No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
+ * The exception table consists of pairs of relative offsets: the first
+ * is the relative offset to an instruction that is allowed to fault,
+ * and the second is the relative offset at which the program should
+ * continue. No registers are modified, so it is entirely up to the
+ * continuation code to figure out what to do.
  *
  * All the routines below use bits of fixup code that are out of line
  * with the main instruction path.  This means when everything is well,
@@ -50,9 +50,11 @@
 
 struct exception_table_entry
 {
-	unsigned long insn, fixup;
+	int insn, fixup;
 };
 
+#define ARCH_HAS_RELATIVE_EXTABLE
+
 extern int fixup_exception(struct pt_regs *regs);
 
 #define KERNEL_DS	(-1UL)
@@ -115,6 +117,12 @@ static inline void set_fs(mm_segment_t fs)
 #define access_ok(type, addr, size)	__range_ok(addr, size)
 #define user_addr_max			get_fs
 
+#define _ASM_EXTABLE(from, to)						\
+	"	.pushsection	__ex_table, \"a\"\n"			\
+	"	.align		3\n"					\
+	"	.long		(" #from " - .), (" #to " - .)\n"	\
+	"	.popsection\n"
+
 /*
  * The "__xxx" versions of the user access functions do not verify the address
  * space - it must have been done previously with a separate "access_ok()"
@@ -134,10 +142,7 @@ static inline void set_fs(mm_segment_t fs)
 	"	mov	%1, #0\n"					\
 	"	b	2b\n"						\
 	"	.previous\n"						\
-	"	.section __ex_table,\"a\"\n"				\
-	"	.align	3\n"						\
-	"	.quad	1b, 3b\n"					\
-	"	.previous"						\
+	_ASM_EXTABLE(1b, 3b)						\
 	: "+r" (err), "=&r" (x)						\
 	: "r" (addr), "i" (-EFAULT))
 
@@ -206,10 +211,7 @@ do {									\
 	"3:	mov	%w0, %3\n"					\
 	"	b	2b\n"						\
 	"	.previous\n"						\
-	"	.section __ex_table,\"a\"\n"				\
-	"	.align	3\n"						\
-	"	.quad	1b, 3b\n"					\
-	"	.previous"						\
+	_ASM_EXTABLE(1b, 3b)						\
 	: "+r" (err)							\
 	: "r" (x), "r" (addr), "i" (-EFAULT))
 
diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h
index aab5bf09e9d9..2b79b8a89457 100644
--- a/arch/arm64/include/asm/word-at-a-time.h
+++ b/arch/arm64/include/asm/word-at-a-time.h
@@ -16,6 +16,8 @@
 #ifndef __ASM_WORD_AT_A_TIME_H
 #define __ASM_WORD_AT_A_TIME_H
 
+#include <asm/uaccess.h>
+
 #ifndef __AARCH64EB__
 
 #include <linux/kernel.h>
@@ -81,10 +83,7 @@ static inline unsigned long load_unaligned_zeropad(const void *addr)
 #endif
 	"	b	2b\n"
 	"	.popsection\n"
-	"	.pushsection __ex_table,\"a\"\n"
-	"	.align	3\n"
-	"	.quad	1b, 3b\n"
-	"	.popsection"
+	_ASM_EXTABLE(1b, 3b)
 	: "=&r" (ret), "=&r" (offset)
 	: "r" (addr), "Q" (*(unsigned long *)addr));
 
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index 3e01207917b1..c37202c0c838 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -297,11 +297,8 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table)
 	"4:	mov		%w0, %w5\n"			\
 	"	b		3b\n"				\
 	"	.popsection"					\
-	"	.pushsection	 __ex_table,\"a\"\n"		\
-	"	.align		3\n"				\
-	"	.quad		0b, 4b\n"			\
-	"	.quad		1b, 4b\n"			\
-	"	.popsection\n"					\
+	_ASM_EXTABLE(0b, 4b)					\
+	_ASM_EXTABLE(1b, 4b)					\
 	ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,	\
 		CONFIG_ARM64_PAN)				\
 	: "=&r" (res), "+r" (data), "=&r" (temp)		\
diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
index 79444279ba8c..81acd4706878 100644
--- a/arch/arm64/mm/extable.c
+++ b/arch/arm64/mm/extable.c
@@ -11,7 +11,7 @@ int fixup_exception(struct pt_regs *regs)
 
 	fixup = search_exception_tables(instruction_pointer(regs));
 	if (fixup)
-		regs->pc = fixup->fixup;
+		regs->pc = (unsigned long)&fixup->fixup + fixup->fixup;
 
 	return fixup != NULL;
 }
diff --git a/scripts/sortextable.c b/scripts/sortextable.c
index ecefa0a634f8..19d83647846c 100644
--- a/scripts/sortextable.c
+++ b/scripts/sortextable.c
@@ -282,12 +282,12 @@ do_file(char const *const fname)
 	case EM_386:
 	case EM_X86_64:
 	case EM_S390:
+	case EM_AARCH64:
 		custom_sort = sort_relative_table;
 		break;
 	case EM_ARCOMPACT:
 	case EM_ARCV2:
 	case EM_ARM:
-	case EM_AARCH64:
 	case EM_MICROBLAZE:
 	case EM_MIPS:
 	case EM_XTENSA:

From 89328d41aa99df071dadb43c722cd88ffafc77e2 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 26 Jan 2016 09:13:44 +0100
Subject: [PATCH 335/424] arm64: add support for building vmlinux as a
 relocatable PIE binary

This implements CONFIG_RELOCATABLE, which links the final vmlinux
image with a dynamic relocation section, allowing the early boot code
to perform a relocation to a different virtual address at runtime.

This is a prerequisite for KASLR (CONFIG_RANDOMIZE_BASE).

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 1e48ef7fcc374051730381a2a05da77eb4eafdb0)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/Kconfig              | 11 +++++++++++
 arch/arm64/Makefile             |  4 ++++
 arch/arm64/include/asm/elf.h    |  2 ++
 arch/arm64/kernel/head.S        | 32 ++++++++++++++++++++++++++++++++
 arch/arm64/kernel/vmlinux.lds.S | 16 ++++++++++++++++
 5 files changed, 65 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 22db20491733..ac1475f559e6 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -737,6 +737,17 @@ config ARM64_MODULE_PLTS
 	select ARM64_MODULE_CMODEL_LARGE
 	select HAVE_MOD_ARCH_SPECIFIC
 
+config RELOCATABLE
+	bool
+	help
+	  This builds the kernel as a Position Independent Executable (PIE),
+	  which retains all relocation metadata required to relocate the
+	  kernel binary at runtime to a different virtual address than the
+	  address it was linked at.
+	  Since AArch64 uses the RELA relocation format, this requires a
+	  relocation pass at runtime even if the kernel is loaded at the
+	  same address it was linked at.
+
 endmenu
 
 menu "Boot options"
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 71054a38decf..304dcc3da06f 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -15,6 +15,10 @@ CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET)
 OBJCOPYFLAGS	:=-O binary -R .note -R .note.gnu.build-id -R .comment -S
 GZFLAGS		:=-9
 
+ifneq ($(CONFIG_RELOCATABLE),)
+LDFLAGS_vmlinux		+= -pie
+endif
+
 KBUILD_DEFCONFIG := defconfig
 
 # Check for binutils support for specific extensions
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 435f55952e1f..24ed037f09fd 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -77,6 +77,8 @@
 #define R_AARCH64_MOVW_PREL_G2_NC	292
 #define R_AARCH64_MOVW_PREL_G3		293
 
+#define R_AARCH64_RELATIVE		1027
+
 /*
  * These are used to set parameters in the core dumps.
  */
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 4cad8f9f2268..4e69412a7323 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -29,6 +29,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/cache.h>
 #include <asm/cputype.h>
+#include <asm/elf.h>
 #include <asm/kernel-pgtable.h>
 #include <asm/memory.h>
 #include <asm/pgtable-hwdef.h>
@@ -432,6 +433,37 @@ __mmap_switched:
 	bl	__pi_memset
 	dsb	ishst				// Make zero page visible to PTW
 
+#ifdef CONFIG_RELOCATABLE
+
+	/*
+	 * Iterate over each entry in the relocation table, and apply the
+	 * relocations in place.
+	 */
+	adr_l	x8, __dynsym_start		// start of symbol table
+	adr_l	x9, __reloc_start		// start of reloc table
+	adr_l	x10, __reloc_end		// end of reloc table
+
+0:	cmp	x9, x10
+	b.hs	2f
+	ldp	x11, x12, [x9], #24
+	ldr	x13, [x9, #-8]
+	cmp	w12, #R_AARCH64_RELATIVE
+	b.ne	1f
+	str	x13, [x11]
+	b	0b
+
+1:	cmp	w12, #R_AARCH64_ABS64
+	b.ne	0b
+	add	x12, x12, x12, lsl #1		// symtab offset: 24x top word
+	add	x12, x8, x12, lsr #(32 - 3)	// ... shifted into bottom word
+	ldr	x15, [x12, #8]			// Elf64_Sym::st_value
+	add	x15, x13, x15
+	str	x15, [x11]
+	b	0b
+
+2:
+#endif
+
 	adr_l	sp, initial_sp, x4
 	mov	x4, sp
 	and	x4, x4, #~(THREAD_SIZE - 1)
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 282e3e64a17e..e3f6cd740ea3 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -87,6 +87,7 @@ SECTIONS
 		EXIT_CALL
 		*(.discard)
 		*(.discard.*)
+		*(.interp .dynamic)
 	}
 
 	. = KIMAGE_VADDR + TEXT_OFFSET;
@@ -149,6 +150,21 @@ SECTIONS
 	.altinstr_replacement : {
 		*(.altinstr_replacement)
 	}
+	.rela : ALIGN(8) {
+		__reloc_start = .;
+		*(.rela .rela*)
+		__reloc_end = .;
+	}
+	.dynsym : ALIGN(8) {
+		__dynsym_start = .;
+		*(.dynsym)
+	}
+	.dynstr : {
+		*(.dynstr)
+	}
+	.hash : {
+		*(.hash)
+	}
 
 	. = ALIGN(PAGE_SIZE);
 	__init_end = .;

From d0a12e9199c75cad71361f746ac40e4612945a43 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 26 Jan 2016 14:12:01 +0100
Subject: [PATCH 336/424] arm64: add support for kernel ASLR

This adds support for KASLR is implemented, based on entropy provided by
the bootloader in the /chosen/kaslr-seed DT property. Depending on the size
of the address space (VA_BITS) and the page size, the entropy in the
virtual displacement is up to 13 bits (16k/2 levels) and up to 25 bits (all
4 levels), with the sidenote that displacements that result in the kernel
image straddling a 1GB/32MB/512MB alignment boundary (for 4KB/16KB/64KB
granule kernels, respectively) are not allowed, and will be rounded up to
an acceptable value.

If CONFIG_RANDOMIZE_MODULE_REGION_FULL is enabled, the module region is
randomized independently from the core kernel. This makes it less likely
that the location of core kernel data structures can be determined by an
adversary, but causes all function calls from modules into the core kernel
to be resolved via entries in the module PLTs.

If CONFIG_RANDOMIZE_MODULE_REGION_FULL is not enabled, the module region is
randomized by choosing a page aligned 128 MB region inside the interval
[_etext - 128 MB, _stext + 128 MB). This gives between 10 and 14 bits of
entropy (depending on page size), independently of the kernel randomization,
but still guarantees that modules are within the range of relative branch
and jump instructions (with the caveat that, since the module region is
shared with other uses of the vmalloc area, modules may need to be loaded
further away if the module region is exhausted)

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit f80fb3a3d50843a401dac4b566b3b131da8077a2)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/Kconfig              |  29 ++++++
 arch/arm64/include/asm/memory.h |   5 +-
 arch/arm64/include/asm/module.h |   6 ++
 arch/arm64/kernel/Makefile      |   1 +
 arch/arm64/kernel/head.S        |  59 +++++++++--
 arch/arm64/kernel/kaslr.c       | 173 ++++++++++++++++++++++++++++++++
 arch/arm64/kernel/module.c      |   3 +-
 arch/arm64/kernel/setup.c       |  29 ++++++
 arch/arm64/mm/kasan_init.c      |  17 +++-
 arch/arm64/mm/mmu.c             |  29 ++++--
 10 files changed, 329 insertions(+), 22 deletions(-)
 create mode 100644 arch/arm64/kernel/kaslr.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index ac1475f559e6..b311ac23c989 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -748,6 +748,35 @@ config RELOCATABLE
 	  relocation pass at runtime even if the kernel is loaded at the
 	  same address it was linked at.
 
+config RANDOMIZE_BASE
+	bool "Randomize the address of the kernel image"
+	select ARM64_MODULE_PLTS
+	select RELOCATABLE
+	help
+	  Randomizes the virtual address at which the kernel image is
+	  loaded, as a security feature that deters exploit attempts
+	  relying on knowledge of the location of kernel internals.
+
+	  It is the bootloader's job to provide entropy, by passing a
+	  random u64 value in /chosen/kaslr-seed at kernel entry.
+
+	  If unsure, say N.
+
+config RANDOMIZE_MODULE_REGION_FULL
+	bool "Randomize the module region independently from the core kernel"
+	depends on RANDOMIZE_BASE
+	default y
+	help
+	  Randomizes the location of the module region without considering the
+	  location of the core kernel. This way, it is impossible for modules
+	  to leak information about the location of core kernel data structures
+	  but it does imply that function calls between modules and the core
+	  kernel will need to be resolved via veneers in the module PLT.
+
+	  When this option is not set, the module region will be randomized over
+	  a limited range that contains the [_stext, _etext] interval of the
+	  core kernel, so branch relocations are always in range.
+
 endmenu
 
 menu "Boot options"
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index eb798156cf56..5f8667a99e41 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -53,7 +53,7 @@
 #define KIMAGE_VADDR		(MODULES_END)
 #define MODULES_END		(MODULES_VADDR + MODULES_VSIZE)
 #define MODULES_VADDR		(VA_START + KASAN_SHADOW_SIZE)
-#define MODULES_VSIZE		(SZ_64M)
+#define MODULES_VSIZE		(SZ_128M)
 #define PCI_IO_END		(PAGE_OFFSET - SZ_2M)
 #define PCI_IO_START		(PCI_IO_END - PCI_IO_SIZE)
 #define FIXADDR_TOP		(PCI_IO_START - SZ_2M)
@@ -139,6 +139,9 @@ extern phys_addr_t		memstart_addr;
 /* PHYS_OFFSET - the physical address of the start of memory. */
 #define PHYS_OFFSET		({ VM_BUG_ON(memstart_addr & 1); memstart_addr; })
 
+/* the virtual base of the kernel image (minus TEXT_OFFSET) */
+extern u64			kimage_vaddr;
+
 /* the offset between the kernel virtual and physical mappings */
 extern u64			kimage_voffset;
 
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index 8652fb613304..e12af6754634 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -31,4 +31,10 @@ struct mod_arch_specific {
 u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela,
 			  Elf64_Sym *sym);
 
+#ifdef CONFIG_RANDOMIZE_BASE
+extern u64 module_alloc_base;
+#else
+#define module_alloc_base	((u64)_etext - MODULES_VSIZE)
+#endif
+
 #endif /* __ASM_MODULE_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 8d971f9c6ed5..49a2430b0786 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -43,6 +43,7 @@ arm64-obj-$(CONFIG_PCI)			+= pci.o
 arm64-obj-$(CONFIG_ARMV8_DEPRECATED)	+= armv8_deprecated.o
 arm64-obj-$(CONFIG_ACPI)		+= acpi.o
 arm64-obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL)	+= acpi_parking_protocol.o
+arm64-obj-$(CONFIG_RANDOMIZE_BASE)	+= kaslr.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 4e69412a7323..319f896c6e74 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -210,6 +210,7 @@ section_table:
 ENTRY(stext)
 	bl	preserve_boot_args
 	bl	el2_setup			// Drop to EL1, w20=cpu_boot_mode
+	mov	x23, xzr			// KASLR offset, defaults to 0
 	adrp	x24, __PHYS_OFFSET
 	bl	set_cpu_boot_mode_flag
 	bl	__create_page_tables		// x25=TTBR0, x26=TTBR1
@@ -313,7 +314,7 @@ ENDPROC(preserve_boot_args)
 __create_page_tables:
 	adrp	x25, idmap_pg_dir
 	adrp	x26, swapper_pg_dir
-	mov	x27, lr
+	mov	x28, lr
 
 	/*
 	 * Invalidate the idmap and swapper page tables to avoid potential
@@ -392,6 +393,7 @@ __create_page_tables:
 	 */
 	mov	x0, x26				// swapper_pg_dir
 	ldr	x5, =KIMAGE_VADDR
+	add	x5, x5, x23			// add KASLR displacement
 	create_pgd_entry x0, x5, x3, x6
 	ldr	w6, kernel_img_size
 	add	x6, x6, x5
@@ -408,8 +410,7 @@ __create_page_tables:
 	dmb	sy
 	bl	__inval_cache_range
 
-	mov	lr, x27
-	ret
+	ret	x28
 ENDPROC(__create_page_tables)
 
 kernel_img_size:
@@ -421,6 +422,7 @@ kernel_img_size:
  */
 	.set	initial_sp, init_thread_union + THREAD_START_SP
 __mmap_switched:
+	mov	x28, lr				// preserve LR
 	adr_l	x8, vectors			// load VBAR_EL1 with virtual
 	msr	vbar_el1, x8			// vector table address
 	isb
@@ -449,19 +451,26 @@ __mmap_switched:
 	ldr	x13, [x9, #-8]
 	cmp	w12, #R_AARCH64_RELATIVE
 	b.ne	1f
-	str	x13, [x11]
+	add	x13, x13, x23			// relocate
+	str	x13, [x11, x23]
 	b	0b
 
 1:	cmp	w12, #R_AARCH64_ABS64
 	b.ne	0b
 	add	x12, x12, x12, lsl #1		// symtab offset: 24x top word
 	add	x12, x8, x12, lsr #(32 - 3)	// ... shifted into bottom word
+	ldrsh	w14, [x12, #6]			// Elf64_Sym::st_shndx
 	ldr	x15, [x12, #8]			// Elf64_Sym::st_value
+	cmp	w14, #-0xf			// SHN_ABS (0xfff1) ?
+	add	x14, x15, x23			// relocate
+	csel	x15, x14, x15, ne
 	add	x15, x13, x15
-	str	x15, [x11]
+	str	x15, [x11, x23]
 	b	0b
 
-2:
+2:	adr_l	x8, kimage_vaddr		// make relocated kimage_vaddr
+	dc	cvac, x8			// value visible to secondaries
+	dsb	sy				// with MMU off
 #endif
 
 	adr_l	sp, initial_sp, x4
@@ -470,13 +479,23 @@ __mmap_switched:
 	msr	sp_el0, x4			// Save thread_info
 	str_l	x21, __fdt_pointer, x5		// Save FDT pointer
 
-	ldr	x4, =KIMAGE_VADDR		// Save the offset between
+	ldr_l	x4, kimage_vaddr		// Save the offset between
 	sub	x4, x4, x24			// the kernel virtual and
 	str_l	x4, kimage_voffset, x5		// physical mappings
 
 	mov	x29, #0
 #ifdef CONFIG_KASAN
 	bl	kasan_early_init
+#endif
+#ifdef CONFIG_RANDOMIZE_BASE
+	cbnz	x23, 0f				// already running randomized?
+	mov	x0, x21				// pass FDT address in x0
+	bl	kaslr_early_init		// parse FDT for KASLR options
+	cbz	x0, 0f				// KASLR disabled? just proceed
+	mov	x23, x0				// record KASLR offset
+	ret	x28				// we must enable KASLR, return
+						// to __enable_mmu()
+0:
 #endif
 	b	start_kernel
 ENDPROC(__mmap_switched)
@@ -486,6 +505,10 @@ ENDPROC(__mmap_switched)
  * hotplug and needs to have the same protections as the text region
  */
 	.section ".text","ax"
+
+ENTRY(kimage_vaddr)
+	.quad		_text - TEXT_OFFSET
+
 /*
  * If we're fortunate enough to boot at EL2, ensure that the world is
  * sane before dropping to EL1.
@@ -651,7 +674,7 @@ ENTRY(secondary_startup)
 	adrp	x26, swapper_pg_dir
 	bl	__cpu_setup			// initialise processor
 
-	ldr	x8, =KIMAGE_VADDR
+	ldr	x8, kimage_vaddr
 	ldr	w9, 0f
 	sub	x27, x8, w9, sxtw		// address to jump to after enabling the MMU
 	b	__enable_mmu
@@ -684,6 +707,7 @@ ENDPROC(__secondary_switched)
  */
 	.section	".idmap.text", "ax"
 __enable_mmu:
+	mrs	x18, sctlr_el1			// preserve old SCTLR_EL1 value
 	mrs	x1, ID_AA64MMFR0_EL1
 	ubfx	x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
 	cmp	x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
@@ -701,6 +725,25 @@ __enable_mmu:
 	ic	iallu
 	dsb	nsh
 	isb
+#ifdef CONFIG_RANDOMIZE_BASE
+	mov	x19, x0				// preserve new SCTLR_EL1 value
+	blr	x27
+
+	/*
+	 * If we return here, we have a KASLR displacement in x23 which we need
+	 * to take into account by discarding the current kernel mapping and
+	 * creating a new one.
+	 */
+	msr	sctlr_el1, x18			// disable the MMU
+	isb
+	bl	__create_page_tables		// recreate kernel mapping
+
+	msr	sctlr_el1, x19			// re-enable the MMU
+	isb
+	ic	ialluis				// flush instructions fetched
+	isb					// via old mapping
+	add	x27, x27, x23			// relocated __mmap_switched
+#endif
 	br	x27
 ENDPROC(__enable_mmu)
 
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
new file mode 100644
index 000000000000..8b32a1f8f09f
--- /dev/null
+++ b/arch/arm64/kernel/kaslr.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/crc32.h>
+#include <linux/init.h>
+#include <linux/libfdt.h>
+#include <linux/mm_types.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+
+#include <asm/fixmap.h>
+#include <asm/kernel-pgtable.h>
+#include <asm/memory.h>
+#include <asm/mmu.h>
+#include <asm/pgtable.h>
+#include <asm/sections.h>
+
+u64 __read_mostly module_alloc_base;
+
+static __init u64 get_kaslr_seed(void *fdt)
+{
+	int node, len;
+	u64 *prop;
+	u64 ret;
+
+	node = fdt_path_offset(fdt, "/chosen");
+	if (node < 0)
+		return 0;
+
+	prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
+	if (!prop || len != sizeof(u64))
+		return 0;
+
+	ret = fdt64_to_cpu(*prop);
+	*prop = 0;
+	return ret;
+}
+
+static __init const u8 *get_cmdline(void *fdt)
+{
+	static __initconst const u8 default_cmdline[] = CONFIG_CMDLINE;
+
+	if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
+		int node;
+		const u8 *prop;
+
+		node = fdt_path_offset(fdt, "/chosen");
+		if (node < 0)
+			goto out;
+
+		prop = fdt_getprop(fdt, node, "bootargs", NULL);
+		if (!prop)
+			goto out;
+		return prop;
+	}
+out:
+	return default_cmdline;
+}
+
+extern void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size,
+				       pgprot_t prot);
+
+/*
+ * This routine will be executed with the kernel mapped at its default virtual
+ * address, and if it returns successfully, the kernel will be remapped, and
+ * start_kernel() will be executed from a randomized virtual offset. The
+ * relocation will result in all absolute references (e.g., static variables
+ * containing function pointers) to be reinitialized, and zero-initialized
+ * .bss variables will be reset to 0.
+ */
+u64 __init kaslr_early_init(u64 dt_phys)
+{
+	void *fdt;
+	u64 seed, offset, mask, module_range;
+	const u8 *cmdline, *str;
+	int size;
+
+	/*
+	 * Set a reasonable default for module_alloc_base in case
+	 * we end up running with module randomization disabled.
+	 */
+	module_alloc_base = (u64)_etext - MODULES_VSIZE;
+
+	/*
+	 * Try to map the FDT early. If this fails, we simply bail,
+	 * and proceed with KASLR disabled. We will make another
+	 * attempt at mapping the FDT in setup_machine()
+	 */
+	early_fixmap_init();
+	fdt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL);
+	if (!fdt)
+		return 0;
+
+	/*
+	 * Retrieve (and wipe) the seed from the FDT
+	 */
+	seed = get_kaslr_seed(fdt);
+	if (!seed)
+		return 0;
+
+	/*
+	 * Check if 'nokaslr' appears on the command line, and
+	 * return 0 if that is the case.
+	 */
+	cmdline = get_cmdline(fdt);
+	str = strstr(cmdline, "nokaslr");
+	if (str == cmdline || (str > cmdline && *(str - 1) == ' '))
+		return 0;
+
+	/*
+	 * OK, so we are proceeding with KASLR enabled. Calculate a suitable
+	 * kernel image offset from the seed. Let's place the kernel in the
+	 * lower half of the VMALLOC area (VA_BITS - 2).
+	 * Even if we could randomize at page granularity for 16k and 64k pages,
+	 * let's always round to 2 MB so we don't interfere with the ability to
+	 * map using contiguous PTEs
+	 */
+	mask = ((1UL << (VA_BITS - 2)) - 1) & ~(SZ_2M - 1);
+	offset = seed & mask;
+
+	/*
+	 * The kernel Image should not extend across a 1GB/32MB/512MB alignment
+	 * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this
+	 * happens, increase the KASLR offset by the size of the kernel image.
+	 */
+	if ((((u64)_text + offset) >> SWAPPER_TABLE_SHIFT) !=
+	    (((u64)_end + offset) >> SWAPPER_TABLE_SHIFT))
+		offset = (offset + (u64)(_end - _text)) & mask;
+
+	if (IS_ENABLED(CONFIG_KASAN))
+		/*
+		 * KASAN does not expect the module region to intersect the
+		 * vmalloc region, since shadow memory is allocated for each
+		 * module at load time, whereas the vmalloc region is shadowed
+		 * by KASAN zero pages. So keep modules out of the vmalloc
+		 * region if KASAN is enabled.
+		 */
+		return offset;
+
+	if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
+		/*
+		 * Randomize the module region independently from the core
+		 * kernel. This prevents modules from leaking any information
+		 * about the address of the kernel itself, but results in
+		 * branches between modules and the core kernel that are
+		 * resolved via PLTs. (Branches between modules will be
+		 * resolved normally.)
+		 */
+		module_range = VMALLOC_END - VMALLOC_START - MODULES_VSIZE;
+		module_alloc_base = VMALLOC_START;
+	} else {
+		/*
+		 * Randomize the module region by setting module_alloc_base to
+		 * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE,
+		 * _stext) . This guarantees that the resulting region still
+		 * covers [_stext, _etext], and that all relative branches can
+		 * be resolved without veneers.
+		 */
+		module_range = MODULES_VSIZE - (u64)(_etext - _stext);
+		module_alloc_base = (u64)_etext + offset - MODULES_VSIZE;
+	}
+
+	/* use the lower 21 bits to randomize the base of the module region */
+	module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
+	module_alloc_base &= PAGE_MASK;
+
+	return offset;
+}
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index a9dde97f5ca5..7f316982ce00 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -34,7 +34,8 @@ void *module_alloc(unsigned long size)
 {
 	void *p;
 
-	p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END,
+	p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
+				module_alloc_base + MODULES_VSIZE,
 				GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
 				NUMA_NO_NODE, __builtin_return_address(0));
 
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index cfed56f0ad26..42371f69def3 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -388,3 +388,32 @@ static int __init topology_init(void)
 	return 0;
 }
 subsys_initcall(topology_init);
+
+/*
+ * Dump out kernel offset information on panic.
+ */
+static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
+			      void *p)
+{
+	u64 const kaslr_offset = kimage_vaddr - KIMAGE_VADDR;
+
+	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset > 0) {
+		pr_emerg("Kernel Offset: 0x%llx from 0x%lx\n",
+			 kaslr_offset, KIMAGE_VADDR);
+	} else {
+		pr_emerg("Kernel Offset: disabled\n");
+	}
+	return 0;
+}
+
+static struct notifier_block kernel_offset_notifier = {
+	.notifier_call = dump_kernel_offset
+};
+
+static int __init register_kernel_offset_dumper(void)
+{
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &kernel_offset_notifier);
+	return 0;
+}
+__initcall(register_kernel_offset_dumper);
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 7f10cc91fa8a..56e19d150c21 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -129,12 +129,16 @@ static void __init clear_pgds(unsigned long start,
 void __init kasan_init(void)
 {
 	u64 kimg_shadow_start, kimg_shadow_end;
+	u64 mod_shadow_start, mod_shadow_end;
 	struct memblock_region *reg;
 	int i;
 
 	kimg_shadow_start = (u64)kasan_mem_to_shadow(_text);
 	kimg_shadow_end = (u64)kasan_mem_to_shadow(_end);
 
+	mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR);
+	mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END);
+
 	/*
 	 * We are going to perform proper setup of shadow memory.
 	 * At first we should unmap early shadow (clear_pgds() call bellow).
@@ -158,13 +162,20 @@ void __init kasan_init(void)
 	 * with PMD table mappings at the edges of the shadow region for the
 	 * kernel image.
 	 */
-	if (ARM64_SWAPPER_USES_SECTION_MAPS)
+	if (ARM64_SWAPPER_USES_SECTION_MAPS) {
+		kimg_shadow_start = round_down(kimg_shadow_start,
+					       SWAPPER_BLOCK_SIZE);
 		kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE);
+	}
 
 	kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
-			kasan_mem_to_shadow((void *)MODULES_VADDR));
+				   (void *)mod_shadow_start);
 	kasan_populate_zero_shadow((void *)kimg_shadow_end,
-			kasan_mem_to_shadow((void *)PAGE_OFFSET));
+				   kasan_mem_to_shadow((void *)PAGE_OFFSET));
+
+	if (kimg_shadow_start > mod_shadow_end)
+		kasan_populate_zero_shadow((void *)mod_shadow_end,
+					   (void *)kimg_shadow_start);
 
 	for_each_memblock(memory, reg) {
 		void *start = (void *)__phys_to_virt(reg->base);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index fb5c872fe3d6..ff0f5a46b552 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -676,7 +676,8 @@ void __init early_fixmap_init(void)
 	unsigned long addr = FIXADDR_START;
 
 	pgd = pgd_offset_k(addr);
-	if (CONFIG_PGTABLE_LEVELS > 3 && !pgd_none(*pgd)) {
+	if (CONFIG_PGTABLE_LEVELS > 3 &&
+	    !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa(bm_pud))) {
 		/*
 		 * We only end up here if the kernel mapping and the fixmap
 		 * share the top level pgd entry, which should only happen on
@@ -733,11 +734,10 @@ void __set_fixmap(enum fixed_addresses idx,
 	}
 }
 
-void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
+void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
 {
 	const u64 dt_virt_base = __fix_to_virt(FIX_FDT);
-	pgprot_t prot = PAGE_KERNEL_RO;
-	int size, offset;
+	int offset;
 	void *dt_virt;
 
 	/*
@@ -776,16 +776,27 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
 	if (fdt_check_header(dt_virt) != 0)
 		return NULL;
 
-	size = fdt_totalsize(dt_virt);
-	if (size > MAX_FDT_SIZE)
+	*size = fdt_totalsize(dt_virt);
+	if (*size > MAX_FDT_SIZE)
 		return NULL;
 
-	if (offset + size > SWAPPER_BLOCK_SIZE)
+	if (offset + *size > SWAPPER_BLOCK_SIZE)
 		create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
-			       round_up(offset + size, SWAPPER_BLOCK_SIZE), prot);
+			       round_up(offset + *size, SWAPPER_BLOCK_SIZE), prot);
+
+	return dt_virt;
+}
+
+void *__init fixmap_remap_fdt(phys_addr_t dt_phys)
+{
+	void *dt_virt;
+	int size;
+
+	dt_virt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO);
+	if (!dt_virt)
+		return NULL;
 
 	memblock_reserve(dt_phys, size);
-
 	return dt_virt;
 }
 

From 98e23ea3a3dd23269a69282291f9bef53e262bc2 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 29 Jan 2016 11:59:03 +0100
Subject: [PATCH 337/424] arm64: kaslr: randomize the linear region

When KASLR is enabled (CONFIG_RANDOMIZE_BASE=y), and entropy has been
provided by the bootloader, randomize the placement of RAM inside the
linear region if sufficient space is available. For instance, on a 4KB
granule/3 levels kernel, the linear region is 256 GB in size, and we can
choose any 1 GB aligned offset that is far enough from the top of the
address space to fit the distance between the start of the lowest memblock
and the top of the highest memblock.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit c031a4213c11a5db475f528c182f7b3858df11db)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/kaslr.c |  4 ++++
 arch/arm64/mm/init.c      | 22 ++++++++++++++++++++--
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 8b32a1f8f09f..582983920054 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -21,6 +21,7 @@
 #include <asm/sections.h>
 
 u64 __read_mostly module_alloc_base;
+u16 __initdata memstart_offset_seed;
 
 static __init u64 get_kaslr_seed(void *fdt)
 {
@@ -123,6 +124,9 @@ u64 __init kaslr_early_init(u64 dt_phys)
 	mask = ((1UL << (VA_BITS - 2)) - 1) & ~(SZ_2M - 1);
 	offset = seed & mask;
 
+	/* use the top 16 bits to randomize the linear region */
+	memstart_offset_seed = seed >> 48;
+
 	/*
 	 * The kernel Image should not extend across a 1GB/32MB/512MB alignment
 	 * boundary (for 4KB/16KB/64KB granule kernels, respectively). If this
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 2c7a3c2868e4..58a6d3f7525c 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -196,6 +196,23 @@ void __init arm64_memblock_init(void)
 		memblock_add(__pa(_text), (u64)(_end - _text));
 	}
 
+	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+		extern u16 memstart_offset_seed;
+		u64 range = linear_region_size -
+			    (memblock_end_of_DRAM() - memblock_start_of_DRAM());
+
+		/*
+		 * If the size of the linear region exceeds, by a sufficient
+		 * margin, the size of the region that the available physical
+		 * memory spans, randomize the linear region as well.
+		 */
+		if (memstart_offset_seed > 0 && range >= ARM64_MEMSTART_ALIGN) {
+			range = range / ARM64_MEMSTART_ALIGN + 1;
+			memstart_addr -= ARM64_MEMSTART_ALIGN *
+					 ((range * memstart_offset_seed) >> 16);
+		}
+	}
+
 	/*
 	 * Register the kernel text, kernel data, initrd, and initial
 	 * pagetables with memblock.
@@ -365,12 +382,13 @@ void __init mem_init(void)
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 		  MLG(VMEMMAP_START,
 		      VMEMMAP_START + VMEMMAP_SIZE),
-		  MLM((unsigned long)virt_to_page(PAGE_OFFSET),
+		  MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()),
 		      (unsigned long)virt_to_page(high_memory)),
 #endif
 		  MLK(FIXADDR_START, FIXADDR_TOP),
 		  MLM(PCI_IO_START, PCI_IO_END),
-		  MLM(PAGE_OFFSET, (unsigned long)high_memory));
+		  MLM(__phys_to_virt(memblock_start_of_DRAM()),
+		      (unsigned long)high_memory));
 
 #undef MLK
 #undef MLM

From ee6457583818600e4c9b7f3a09d358d6ae3727b8 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sun, 10 Jan 2016 11:29:07 +0100
Subject: [PATCH 338/424] efi: stub: implement efi_get_random_bytes() based on
 EFI_RNG_PROTOCOL

This exposes the firmware's implementation of EFI_RNG_PROTOCOL via a new
function efi_get_random_bytes().

Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit e4fbf4767440472f9d23b0f25a2b905e1c63b6a8)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/firmware/efi/libstub/Makefile  |  3 ++-
 drivers/firmware/efi/libstub/efistub.h |  3 +++
 drivers/firmware/efi/libstub/random.c  | 35 ++++++++++++++++++++++++++
 include/linux/efi.h                    |  6 ++++-
 4 files changed, 45 insertions(+), 2 deletions(-)
 create mode 100644 drivers/firmware/efi/libstub/random.c

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index c0ddd1b8dca3..c4098748e1fe 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -34,7 +34,8 @@ $(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
 lib-$(CONFIG_EFI_ARMSTUB)	+= arm-stub.o fdt.o string.o \
 				   $(patsubst %.c,lib-%.o,$(arm-deps))
 
-lib-$(CONFIG_ARM64)		+= arm64-stub.o
+lib-$(CONFIG_ARM)		+= arm32-stub.o
+lib-$(CONFIG_ARM64)		+= arm64-stub.o random.o
 CFLAGS_arm64-stub.o 		:= -DTEXT_OFFSET=$(TEXT_OFFSET)
 
 #
diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index 6b6548fda089..206b7252b9d1 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -43,4 +43,7 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size,
 		     unsigned long desc_size, efi_memory_desc_t *runtime_map,
 		     int *count);
 
+efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table,
+				  unsigned long size, u8 *out);
+
 #endif
diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c
new file mode 100644
index 000000000000..97941ee5954f
--- /dev/null
+++ b/drivers/firmware/efi/libstub/random.c
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2016 Linaro Ltd;  <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/efi.h>
+#include <asm/efi.h>
+
+#include "efistub.h"
+
+struct efi_rng_protocol {
+	efi_status_t (*get_info)(struct efi_rng_protocol *,
+				 unsigned long *, efi_guid_t *);
+	efi_status_t (*get_rng)(struct efi_rng_protocol *,
+				efi_guid_t *, unsigned long, u8 *out);
+};
+
+efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table_arg,
+				  unsigned long size, u8 *out)
+{
+	efi_guid_t rng_proto = EFI_RNG_PROTOCOL_GUID;
+	efi_status_t status;
+	struct efi_rng_protocol *rng;
+
+	status = efi_call_early(locate_protocol, &rng_proto, NULL,
+				(void **)&rng);
+	if (status != EFI_SUCCESS)
+		return status;
+
+	return rng->get_rng(rng, NULL, size, out);
+}
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 47be3ad7d3e5..333d0ca6940f 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -299,7 +299,7 @@ typedef struct {
 	void *open_protocol_information;
 	void *protocols_per_handle;
 	void *locate_handle_buffer;
-	void *locate_protocol;
+	efi_status_t (*locate_protocol)(efi_guid_t *, void *, void **);
 	void *install_multiple_protocol_interfaces;
 	void *uninstall_multiple_protocol_interfaces;
 	void *calculate_crc32;
@@ -599,6 +599,10 @@ void efi_native_runtime_setup(void);
 #define EFI_PROPERTIES_TABLE_GUID \
     EFI_GUID(  0x880aaca3, 0x4adc, 0x4a04, 0x90, 0x79, 0xb7, 0x47, 0x34, 0x08, 0x25, 0xe5 )
 
+#define EFI_RNG_PROTOCOL_GUID \
+	EFI_GUID(0x3152bca5, 0xeade, 0x433d, \
+		 0x86, 0x2e, 0xc0, 0x1c, 0xdc, 0x29, 0x1f, 0x44)
+
 typedef struct {
 	efi_guid_t guid;
 	u64 table;

From 0f01a865b4feb17ff014717ed2745a845e0c0ee3 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 11 Jan 2016 10:43:16 +0100
Subject: [PATCH 339/424] efi: stub: add implementation of efi_random_alloc()

This implements efi_random_alloc(), which allocates a chunk of memory of
a certain size at a certain alignment, and uses the random_seed argument
it receives to randomize the address of the allocation.

This is implemented by iterating over the UEFI memory map, counting the
number of suitable slots (aligned offsets) within each region, and picking
a random number between 0 and 'number of slots - 1' to select the slot,
This should guarantee that each possible offset is chosen equally likely.

Suggested-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 2ddbfc81eac84a299cb4747a8764bc43f23e9008)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 drivers/firmware/efi/libstub/efistub.h |   4 +
 drivers/firmware/efi/libstub/random.c  | 100 +++++++++++++++++++++++++
 2 files changed, 104 insertions(+)

diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h
index 206b7252b9d1..5ed3d3f38166 100644
--- a/drivers/firmware/efi/libstub/efistub.h
+++ b/drivers/firmware/efi/libstub/efistub.h
@@ -46,4 +46,8 @@ void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size,
 efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table,
 				  unsigned long size, u8 *out);
 
+efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg,
+			      unsigned long size, unsigned long align,
+			      unsigned long *addr, unsigned long random_seed);
+
 #endif
diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c
index 97941ee5954f..53f6d3fe6d86 100644
--- a/drivers/firmware/efi/libstub/random.c
+++ b/drivers/firmware/efi/libstub/random.c
@@ -33,3 +33,103 @@ efi_status_t efi_get_random_bytes(efi_system_table_t *sys_table_arg,
 
 	return rng->get_rng(rng, NULL, size, out);
 }
+
+/*
+ * Return the number of slots covered by this entry, i.e., the number of
+ * addresses it covers that are suitably aligned and supply enough room
+ * for the allocation.
+ */
+static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
+					 unsigned long size,
+					 unsigned long align)
+{
+	u64 start, end;
+
+	if (md->type != EFI_CONVENTIONAL_MEMORY)
+		return 0;
+
+	start = round_up(md->phys_addr, align);
+	end = round_down(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - size,
+			 align);
+
+	if (start > end)
+		return 0;
+
+	return (end - start + 1) / align;
+}
+
+/*
+ * The UEFI memory descriptors have a virtual address field that is only used
+ * when installing the virtual mapping using SetVirtualAddressMap(). Since it
+ * is unused here, we can reuse it to keep track of each descriptor's slot
+ * count.
+ */
+#define MD_NUM_SLOTS(md)	((md)->virt_addr)
+
+efi_status_t efi_random_alloc(efi_system_table_t *sys_table_arg,
+			      unsigned long size,
+			      unsigned long align,
+			      unsigned long *addr,
+			      unsigned long random_seed)
+{
+	unsigned long map_size, desc_size, total_slots = 0, target_slot;
+	efi_status_t status;
+	efi_memory_desc_t *memory_map;
+	int map_offset;
+
+	status = efi_get_memory_map(sys_table_arg, &memory_map, &map_size,
+				    &desc_size, NULL, NULL);
+	if (status != EFI_SUCCESS)
+		return status;
+
+	if (align < EFI_ALLOC_ALIGN)
+		align = EFI_ALLOC_ALIGN;
+
+	/* count the suitable slots in each memory map entry */
+	for (map_offset = 0; map_offset < map_size; map_offset += desc_size) {
+		efi_memory_desc_t *md = (void *)memory_map + map_offset;
+		unsigned long slots;
+
+		slots = get_entry_num_slots(md, size, align);
+		MD_NUM_SLOTS(md) = slots;
+		total_slots += slots;
+	}
+
+	/* find a random number between 0 and total_slots */
+	target_slot = (total_slots * (u16)random_seed) >> 16;
+
+	/*
+	 * target_slot is now a value in the range [0, total_slots), and so
+	 * it corresponds with exactly one of the suitable slots we recorded
+	 * when iterating over the memory map the first time around.
+	 *
+	 * So iterate over the memory map again, subtracting the number of
+	 * slots of each entry at each iteration, until we have found the entry
+	 * that covers our chosen slot. Use the residual value of target_slot
+	 * to calculate the randomly chosen address, and allocate it directly
+	 * using EFI_ALLOCATE_ADDRESS.
+	 */
+	for (map_offset = 0; map_offset < map_size; map_offset += desc_size) {
+		efi_memory_desc_t *md = (void *)memory_map + map_offset;
+		efi_physical_addr_t target;
+		unsigned long pages;
+
+		if (target_slot >= MD_NUM_SLOTS(md)) {
+			target_slot -= MD_NUM_SLOTS(md);
+			continue;
+		}
+
+		target = round_up(md->phys_addr, align) + target_slot * align;
+		pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
+
+		status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS,
+					EFI_LOADER_DATA, pages, &target);
+		if (status == EFI_SUCCESS)
+			*addr = target;
+		break;
+	}
+
+	efi_call_early(free_pool, memory_map);
+
+	return status;
+}

From 4a9c1460b2b904c4a9b6438a14d10c56e0e9ab78 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 11 Jan 2016 11:47:49 +0100
Subject: [PATCH 340/424] efi: stub: use high allocation for converted command
 line

Before we can move the command line processing before the allocation
of the kernel, which is required for detecting the 'nokaslr' option
which controls that allocation, move the converted command line higher
up in memory, to prevent it from interfering with the kernel itself.

Since x86 needs the address to fit in 32 bits, use UINT_MAX as the upper
bound there. Otherwise, use ULONG_MAX (i.e., no limit)

Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 48fcb2d0216103d15306caa4814e2381104df6d8)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/x86/include/asm/efi.h                     | 2 ++
 drivers/firmware/efi/libstub/efi-stub-helper.c | 7 ++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 0010c78c4998..08b1f2f6ea50 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -25,6 +25,8 @@
 #define EFI32_LOADER_SIGNATURE	"EL32"
 #define EFI64_LOADER_SIGNATURE	"EL64"
 
+#define MAX_CMDLINE_ADDRESS	UINT_MAX
+
 #ifdef CONFIG_X86_32
 
 
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index f07d4a67fa76..29ed2f9b218c 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -649,6 +649,10 @@ static u8 *efi_utf16_to_utf8(u8 *dst, const u16 *src, int n)
 	return dst;
 }
 
+#ifndef MAX_CMDLINE_ADDRESS
+#define MAX_CMDLINE_ADDRESS	ULONG_MAX
+#endif
+
 /*
  * Convert the unicode UEFI command line to ASCII to pass to kernel.
  * Size of memory allocated return in *cmd_line_len.
@@ -684,7 +688,8 @@ char *efi_convert_cmdline(efi_system_table_t *sys_table_arg,
 
 	options_bytes++;	/* NUL termination */
 
-	status = efi_low_alloc(sys_table_arg, options_bytes, 0, &cmdline_addr);
+	status = efi_high_alloc(sys_table_arg, options_bytes, 0,
+				&cmdline_addr, MAX_CMDLINE_ADDRESS);
 	if (status != EFI_SUCCESS)
 		return NULL;
 

From e009472925ee90986397518ef6796e6f8d12e1da Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 26 Jan 2016 14:48:29 +0100
Subject: [PATCH 341/424] arm64: efi: invoke EFI_RNG_PROTOCOL to supply KASLR
 randomness

Since arm64 does not use a decompressor that supplies an execution
environment where it is feasible to some extent to provide a source of
randomness, the arm64 KASLR kernel depends on the bootloader to supply
some random bits in the /chosen/kaslr-seed DT property upon kernel entry.

On UEFI systems, we can use the EFI_RNG_PROTOCOL, if supplied, to obtain
some random bits. At the same time, use it to randomize the offset of the
kernel Image in physical memory.

Reviewed-by: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 2b5fe07a78a09a32002642b8a823428ade611f16)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/Kconfig                        |  5 ++
 drivers/firmware/efi/libstub/arm-stub.c   | 40 +++++++----
 drivers/firmware/efi/libstub/arm64-stub.c | 84 ++++++++++++++++-------
 drivers/firmware/efi/libstub/fdt.c        | 14 ++++
 4 files changed, 105 insertions(+), 38 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b311ac23c989..97583a1878db 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -760,6 +760,11 @@ config RANDOMIZE_BASE
 	  It is the bootloader's job to provide entropy, by passing a
 	  random u64 value in /chosen/kaslr-seed at kernel entry.
 
+	  When booting via the UEFI stub, it will invoke the firmware's
+	  EFI_RNG_PROTOCOL implementation (if available) to supply entropy
+	  to the kernel proper. In addition, it will randomise the physical
+	  location of the kernel Image as well.
+
 	  If unsure, say N.
 
 config RANDOMIZE_MODULE_REGION_FULL
diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index 950c87f5d279..d5aa1d16154f 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c
@@ -18,6 +18,8 @@
 
 #include "efistub.h"
 
+bool __nokaslr;
+
 static int efi_secureboot_enabled(efi_system_table_t *sys_table_arg)
 {
 	static efi_guid_t const var_guid = EFI_GLOBAL_VARIABLE_GUID;
@@ -207,14 +209,6 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
 		pr_efi_err(sys_table, "Failed to find DRAM base\n");
 		goto fail;
 	}
-	status = handle_kernel_image(sys_table, image_addr, &image_size,
-				     &reserve_addr,
-				     &reserve_size,
-				     dram_base, image);
-	if (status != EFI_SUCCESS) {
-		pr_efi_err(sys_table, "Failed to relocate kernel\n");
-		goto fail;
-	}
 
 	/*
 	 * Get the command line from EFI, using the LOADED_IMAGE
@@ -224,7 +218,28 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
 	cmdline_ptr = efi_convert_cmdline(sys_table, image, &cmdline_size);
 	if (!cmdline_ptr) {
 		pr_efi_err(sys_table, "getting command line via LOADED_IMAGE_PROTOCOL\n");
-		goto fail_free_image;
+		goto fail;
+	}
+
+	/* check whether 'nokaslr' was passed on the command line */
+	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+		static const u8 default_cmdline[] = CONFIG_CMDLINE;
+		const u8 *str, *cmdline = cmdline_ptr;
+
+		if (IS_ENABLED(CONFIG_CMDLINE_FORCE))
+			cmdline = default_cmdline;
+		str = strstr(cmdline, "nokaslr");
+		if (str == cmdline || (str > cmdline && *(str - 1) == ' '))
+			__nokaslr = true;
+	}
+
+	status = handle_kernel_image(sys_table, image_addr, &image_size,
+				     &reserve_addr,
+				     &reserve_size,
+				     dram_base, image);
+	if (status != EFI_SUCCESS) {
+		pr_efi_err(sys_table, "Failed to relocate kernel\n");
+		goto fail_free_cmdline;
 	}
 
 	status = efi_parse_options(cmdline_ptr);
@@ -244,7 +259,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
 
 		if (status != EFI_SUCCESS) {
 			pr_efi_err(sys_table, "Failed to load device tree!\n");
-			goto fail_free_cmdline;
+			goto fail_free_image;
 		}
 	}
 
@@ -286,12 +301,11 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
 	efi_free(sys_table, initrd_size, initrd_addr);
 	efi_free(sys_table, fdt_size, fdt_addr);
 
-fail_free_cmdline:
-	efi_free(sys_table, cmdline_size, (unsigned long)cmdline_ptr);
-
 fail_free_image:
 	efi_free(sys_table, image_size, *image_addr);
 	efi_free(sys_table, reserve_size, reserve_addr);
+fail_free_cmdline:
+	efi_free(sys_table, cmdline_size, (unsigned long)cmdline_ptr);
 fail:
 	return EFI_ERROR;
 }
diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
index 78dfbd34b6bf..e0e6b74fef8f 100644
--- a/drivers/firmware/efi/libstub/arm64-stub.c
+++ b/drivers/firmware/efi/libstub/arm64-stub.c
@@ -13,6 +13,10 @@
 #include <asm/efi.h>
 #include <asm/sections.h>
 
+#include "efistub.h"
+
+extern bool __nokaslr;
+
 efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg,
 					unsigned long *image_addr,
 					unsigned long *image_size,
@@ -23,26 +27,52 @@ efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg,
 {
 	efi_status_t status;
 	unsigned long kernel_size, kernel_memsize = 0;
-	unsigned long nr_pages;
 	void *old_image_addr = (void *)*image_addr;
 	unsigned long preferred_offset;
+	u64 phys_seed = 0;
+
+	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+		if (!__nokaslr) {
+			status = efi_get_random_bytes(sys_table_arg,
+						      sizeof(phys_seed),
+						      (u8 *)&phys_seed);
+			if (status == EFI_NOT_FOUND) {
+				pr_efi(sys_table_arg, "EFI_RNG_PROTOCOL unavailable, no randomness supplied\n");
+			} else if (status != EFI_SUCCESS) {
+				pr_efi_err(sys_table_arg, "efi_get_random_bytes() failed\n");
+				return status;
+			}
+		} else {
+			pr_efi(sys_table_arg, "KASLR disabled on kernel command line\n");
+		}
+	}
 
 	/*
 	 * The preferred offset of the kernel Image is TEXT_OFFSET bytes beyond
 	 * a 2 MB aligned base, which itself may be lower than dram_base, as
 	 * long as the resulting offset equals or exceeds it.
 	 */
-	preferred_offset = round_down(dram_base, SZ_2M) + TEXT_OFFSET;
+	preferred_offset = round_down(dram_base, MIN_KIMG_ALIGN) + TEXT_OFFSET;
 	if (preferred_offset < dram_base)
-		preferred_offset += SZ_2M;
+		preferred_offset += MIN_KIMG_ALIGN;
 
-	/* Relocate the image, if required. */
 	kernel_size = _edata - _text;
-	if (*image_addr != preferred_offset) {
-		kernel_memsize = kernel_size + (_end - _edata);
+	kernel_memsize = kernel_size + (_end - _edata);
 
+	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && phys_seed != 0) {
 		/*
-		 * First, try a straight allocation at the preferred offset.
+		 * If KASLR is enabled, and we have some randomness available,
+		 * locate the kernel at a randomized offset in physical memory.
+		 */
+		*reserve_size = kernel_memsize + TEXT_OFFSET;
+		status = efi_random_alloc(sys_table_arg, *reserve_size,
+					  MIN_KIMG_ALIGN, reserve_addr,
+					  phys_seed);
+
+		*image_addr = *reserve_addr + TEXT_OFFSET;
+	} else {
+		/*
+		 * Else, try a straight allocation at the preferred offset.
 		 * This will work around the issue where, if dram_base == 0x0,
 		 * efi_low_alloc() refuses to allocate at 0x0 (to prevent the
 		 * address of the allocation to be mistaken for a FAIL return
@@ -52,27 +82,31 @@ efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg,
 		 * Mustang), we can still place the kernel at the address
 		 * 'dram_base + TEXT_OFFSET'.
 		 */
-		*image_addr = *reserve_addr = preferred_offset;
-		nr_pages = round_up(kernel_memsize, EFI_ALLOC_ALIGN) /
-			   EFI_PAGE_SIZE;
-		status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS,
-					EFI_LOADER_DATA, nr_pages,
-					(efi_physical_addr_t *)reserve_addr);
-		if (status != EFI_SUCCESS) {
-			kernel_memsize += TEXT_OFFSET;
-			status = efi_low_alloc(sys_table_arg, kernel_memsize,
-					       SZ_2M, reserve_addr);
+		if (*image_addr == preferred_offset)
+			return EFI_SUCCESS;
 
-			if (status != EFI_SUCCESS) {
-				pr_efi_err(sys_table_arg, "Failed to relocate kernel\n");
-				return status;
-			}
-			*image_addr = *reserve_addr + TEXT_OFFSET;
-		}
-		memcpy((void *)*image_addr, old_image_addr, kernel_size);
-		*reserve_size = kernel_memsize;
+		*image_addr = *reserve_addr = preferred_offset;
+		*reserve_size = round_up(kernel_memsize, EFI_ALLOC_ALIGN);
+
+		status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS,
+					EFI_LOADER_DATA,
+					*reserve_size / EFI_PAGE_SIZE,
+					(efi_physical_addr_t *)reserve_addr);
 	}
 
+	if (status != EFI_SUCCESS) {
+		*reserve_size = kernel_memsize + TEXT_OFFSET;
+		status = efi_low_alloc(sys_table_arg, *reserve_size,
+				       MIN_KIMG_ALIGN, reserve_addr);
+
+		if (status != EFI_SUCCESS) {
+			pr_efi_err(sys_table_arg, "Failed to relocate kernel\n");
+			*reserve_size = 0;
+			return status;
+		}
+		*image_addr = *reserve_addr + TEXT_OFFSET;
+	}
+	memcpy((void *)*image_addr, old_image_addr, kernel_size);
 
 	return EFI_SUCCESS;
 }
diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
index b62e2f5dcab3..b1c22cf18f7d 100644
--- a/drivers/firmware/efi/libstub/fdt.c
+++ b/drivers/firmware/efi/libstub/fdt.c
@@ -147,6 +147,20 @@ efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
 	if (status)
 		goto fdt_set_fail;
 
+	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+		efi_status_t efi_status;
+
+		efi_status = efi_get_random_bytes(sys_table, sizeof(fdt_val64),
+						  (u8 *)&fdt_val64);
+		if (efi_status == EFI_SUCCESS) {
+			status = fdt_setprop(fdt, node, "kaslr-seed",
+					     &fdt_val64, sizeof(fdt_val64));
+			if (status)
+				goto fdt_set_fail;
+		} else if (efi_status != EFI_NOT_FOUND) {
+			return efi_status;
+		}
+	}
 	return EFI_SUCCESS;
 
 fdt_set_fail:

From 885af350336f3e3256999234cd4948210df6c946 Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linaro.org>
Date: Thu, 11 Feb 2016 13:53:10 -0800
Subject: [PATCH 342/424] arm64: make irq_stack_ptr more robust

Switching between stacks is only valid if we are tracing ourselves while on the
irq_stack, so it is only valid when in current and non-preemptible context,
otherwise is is just zeroed off.

Fixes: 132cd887b5c5 ("arm64: Modify stack trace and dump for use with irq_stack")
Acked-by: James Morse <james.morse@arm.com>
Tested-by: James Morse <james.morse@arm.com>
Signed-off-by: Yang Shi <yang.shi@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit a80a0eb70c358f8c7dda4bb62b2278dc6285217b)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/stacktrace.c | 13 ++++++-------
 arch/arm64/kernel/traps.c      | 11 ++++++++++-
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 4fad9787ab46..cfd46c227c8c 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -44,14 +44,13 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 	unsigned long irq_stack_ptr;
 
 	/*
-	 * Use raw_smp_processor_id() to avoid false-positives from
-	 * CONFIG_DEBUG_PREEMPT. get_wchan() calls unwind_frame() on sleeping
-	 * task stacks, we can be pre-empted in this case, so
-	 * {raw_,}smp_processor_id() may give us the wrong value. Sleeping
-	 * tasks can't ever be on an interrupt stack, so regardless of cpu,
-	 * the checks will always fail.
+	 * Switching between stacks is valid when tracing current and in
+	 * non-preemptible context.
 	 */
-	irq_stack_ptr = IRQ_STACK_PTR(raw_smp_processor_id());
+	if (tsk == current && !preemptible())
+		irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
+	else
+		irq_stack_ptr = 0;
 
 	low  = frame->sp;
 	/* irq stacks are not THREAD_SIZE aligned */
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index cbedd724f48e..c5392081b49b 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -146,9 +146,18 @@ static void dump_instr(const char *lvl, struct pt_regs *regs)
 static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 {
 	struct stackframe frame;
-	unsigned long irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
+	unsigned long irq_stack_ptr;
 	int skip;
 
+	/*
+	 * Switching between stacks is valid when tracing current and in
+	 * non-preemptible context.
+	 */
+	if (tsk == current && !preemptible())
+		irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
+	else
+		irq_stack_ptr = 0;
+
 	pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
 
 	if (!tsk)

From 37829fdb8c27a5a506cb535db156c917a6e0061a Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 9 Mar 2016 15:22:55 +0000
Subject: [PATCH 343/424] arm64: hugetlb: partial revert of 66b3923a1a0f

Commit 66b3923a1a0f ("arm64: hugetlb: add support for PTE contiguous bit")
introduced support for huge pages using the contiguous bit in the PTE
as opposed to block mappings, which may be slightly unwieldy (512M) in
64k page configurations.

Unfortunately, this support has resulted in some late regressions when
running the libhugetlbfs test suite with 64k pages and CONFIG_DEBUG_VM
as a result of a BUG:

 | readback (2M: 64):	------------[ cut here ]------------
 | kernel BUG at fs/hugetlbfs/inode.c:446!
 | Internal error: Oops - BUG: 0 [#1] SMP
 | Modules linked in:
 | CPU: 7 PID: 1448 Comm: readback Not tainted 4.5.0-rc7 #148
 | Hardware name: linux,dummy-virt (DT)
 | task: fffffe0040964b00 ti: fffffe00c2668000 task.ti: fffffe00c2668000
 | PC is at remove_inode_hugepages+0x44c/0x480
 | LR is at remove_inode_hugepages+0x264/0x480

Rather than revert the entire patch, simply avoid advertising the
contiguous huge page sizes for now while people are actively working on
a fix. This patch can then be reverted once things have been sorted out.

Cc: David Woods <dwoods@ezchip.com>
Reported-by: Steve Capper <steve.capper@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
(cherry picked from commit ff7925848b50050732ac0401e0acf27e8b241d7b)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/hugetlbpage.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 82d607c3614e..da30529bb1f6 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -306,10 +306,6 @@ static __init int setup_hugepagesz(char *opt)
 		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
 	} else if (ps == PUD_SIZE) {
 		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
-	} else if (ps == (PAGE_SIZE * CONT_PTES)) {
-		hugetlb_add_hstate(CONT_PTE_SHIFT);
-	} else if (ps == (PMD_SIZE * CONT_PMDS)) {
-		hugetlb_add_hstate((PMD_SHIFT + CONT_PMD_SHIFT) - PAGE_SHIFT);
 	} else {
 		pr_err("hugepagesz: Unsupported page size %lu K\n", ps >> 10);
 		return 0;
@@ -317,13 +313,3 @@ static __init int setup_hugepagesz(char *opt)
 	return 1;
 }
 __setup("hugepagesz=", setup_hugepagesz);
-
-#ifdef CONFIG_ARM64_64K_PAGES
-static __init int add_default_hugepagesz(void)
-{
-	if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
-		hugetlb_add_hstate(CONT_PMD_SHIFT);
-	return 0;
-}
-arch_initcall(add_default_hugepagesz);
-#endif

From f2971e0e6c42c2b0197e43280ef6a48d8a46097e Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 15 Mar 2016 11:22:57 +0000
Subject: [PATCH 344/424] arm64: fix KASLR boot-time I-cache maintenance

Commit f80fb3a3d50843a4 ("arm64: add support for kernel ASLR") missed a
DSB necessary to complete I-cache maintenance in the primary boot path,
and hence stale instructions may still be present in the I-cache and may
be executed until the I-cache maintenance naturally completes.

Since commit 8ec41987436d566f ("arm64: mm: ensure patched kernel text is
fetched from PoU"), all CPUs invalidate their I-caches after their MMU
is enabled. Prior a CPU's MMU having been enabled, arbitrary lines may
have been fetched from the PoC into I-caches. We never patch text
expected to be executed with the MMU off. Thus, it is unnecessary to
perform broadcast I-cache maintenance in the primary boot path.

This patch reduces the scope of the I-cache maintenance to the local
CPU, and adds the missing DSB with similar scope, matching prior
maintenance in the primary boot path.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Ard Biesehvuel <ard.biesheuvel@linaro.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit b90b4a608ea2401cc491828f7a385edd2e236e37)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/kernel/head.S | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 319f896c6e74..a88a15447c3b 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -740,8 +740,9 @@ __enable_mmu:
 
 	msr	sctlr_el1, x19			// re-enable the MMU
 	isb
-	ic	ialluis				// flush instructions fetched
-	isb					// via old mapping
+	ic	iallu				// flush instructions fetched
+	dsb	nsh				// via old mapping
+	isb
 	add	x27, x27, x23			// relocated __mmap_switched
 #endif
 	br	x27

From 2426266ca6318722160645a720bd02bece8400c7 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 22 Mar 2016 10:11:45 +0000
Subject: [PATCH 345/424] arm64: consistently use p?d_set_huge

Commit 324420bf91f60582 ("arm64: add support for ioremap() block
mappings") added new p?d_set_huge functions which do the hard work to
generate and set a correct block entry.

These differ from open-coded huge page creation in the early page table
code by explicitly setting the P?D_TYPE_SECT bits (which are implicitly
retained by mk_sect_prot() for any valid prot), but are otherwise
identical (and cannot fail on arm64).

For simplicity and consistency, make use of these in the initial page
table creation code.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit c661cb1c537e2364bfdabb298fb934fd77445e98)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/mmu.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index ff0f5a46b552..41421c724fb9 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -211,8 +211,7 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
 		if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
 		      block_mappings_allowed(pgtable_alloc)) {
 			pmd_t old_pmd =*pmd;
-			set_pmd(pmd, __pmd(phys |
-					   pgprot_val(mk_sect_prot(prot))));
+			pmd_set_huge(pmd, phys, prot);
 			/*
 			 * Check for previous table entries created during
 			 * boot (__create_page_tables) and flush them.
@@ -272,8 +271,7 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
 		if (use_1G_block(addr, next, phys) &&
 		    block_mappings_allowed(pgtable_alloc)) {
 			pud_t old_pud = *pud;
-			set_pud(pud, __pud(phys |
-					   pgprot_val(mk_sect_prot(prot))));
+			pud_set_huge(pud, phys, prot);
 
 			/*
 			 * If we have an old value for a pud, it will

From 9ca29910090bc04686fbed05306131093da667f1 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 10 Mar 2016 18:41:16 +0000
Subject: [PATCH 346/424] arm64: kasan: Fix zero shadow mapping overriding
 kernel image shadow

With the 16KB and 64KB page size configurations, SWAPPER_BLOCK_SIZE is
PAGE_SIZE and ARM64_SWAPPER_USES_SECTION_MAPS is 0. Since
kimg_shadow_end is not page aligned (_end shifted by
KASAN_SHADOW_SCALE_SHIFT), the edges of previously mapped kernel image
shadow via vmemmap_populate() may be overridden by subsequent calls to
kasan_populate_zero_shadow(), leading to kernel panics like below:

------------------------------------------------------------------------------
Unable to handle kernel paging request at virtual address fffffc100135068c
pgd = fffffc8009ac0000
[fffffc100135068c] *pgd=00000009ffee0003, *pud=00000009ffee0003, *pmd=00000009ffee0003, *pte=00e0000081a00793
Internal error: Oops: 9600004f [#1] PREEMPT SMP
Modules linked in:
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.5.0-rc4+ #1984
Hardware name: Juno (DT)
task: fffffe09001a0000 ti: fffffe0900200000 task.ti: fffffe0900200000
PC is at __memset+0x4c/0x200
LR is at kasan_unpoison_shadow+0x34/0x50
pc : [<fffffc800846f1cc>] lr : [<fffffc800821ff54>] pstate: 00000245
sp : fffffe0900203db0
x29: fffffe0900203db0 x28: 0000000000000000
x27: 0000000000000000 x26: 0000000000000000
x25: fffffc80099b69d0 x24: 0000000000000001
x23: 0000000000000000 x22: 0000000000002000
x21: dffffc8000000000 x20: 1fffff9001350a8c
x19: 0000000000002000 x18: 0000000000000008
x17: 0000000000000147 x16: ffffffffffffffff
x15: 79746972100e041d x14: ffffff0000000000
x13: ffff000000000000 x12: 0000000000000000
x11: 0101010101010101 x10: 1fffffc11c000000
x9 : 0000000000000000 x8 : fffffc100135068c
x7 : 0000000000000000 x6 : 000000000000003f
x5 : 0000000000000040 x4 : 0000000000000004
x3 : fffffc100134f651 x2 : 0000000000000400
x1 : 0000000000000000 x0 : fffffc100135068c

Process swapper/0 (pid: 1, stack limit = 0xfffffe0900200020)
Call trace:
[<fffffc800846f1cc>] __memset+0x4c/0x200
[<fffffc8008220044>] __asan_register_globals+0x5c/0xb0
[<fffffc8008a09d34>] _GLOBAL__sub_I_65535_1_sunrpc_cache_lookup+0x1c/0x28
[<fffffc8008f20d28>] kernel_init_freeable+0x104/0x274
[<fffffc80089e1948>] kernel_init+0x10/0xf8
[<fffffc8008093a00>] ret_from_fork+0x10/0x50
------------------------------------------------------------------------------

This patch aligns kimg_shadow_start and kimg_shadow_end to
SWAPPER_BLOCK_SIZE in all configurations.

Fixes: f9040773b7bb ("arm64: move kernel image to base of vmalloc area")
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
(cherry picked from commit 2776e0e8ef683a42fe3e9a5facf576b73579700e)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/kasan_init.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 56e19d150c21..206dd95ea292 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -158,15 +158,12 @@ void __init kasan_init(void)
 	 * vmemmap_populate() has populated the shadow region that covers the
 	 * kernel image with SWAPPER_BLOCK_SIZE mappings, so we have to round
 	 * the start and end addresses to SWAPPER_BLOCK_SIZE as well, to prevent
-	 * kasan_populate_zero_shadow() from replacing the PMD block mappings
-	 * with PMD table mappings at the edges of the shadow region for the
-	 * kernel image.
+	 * kasan_populate_zero_shadow() from replacing the page table entries
+	 * (PMD or PTE) at the edges of the shadow region for the kernel
+	 * image.
 	 */
-	if (ARM64_SWAPPER_USES_SECTION_MAPS) {
-		kimg_shadow_start = round_down(kimg_shadow_start,
-					       SWAPPER_BLOCK_SIZE);
-		kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE);
-	}
+	kimg_shadow_start = round_down(kimg_shadow_start, SWAPPER_BLOCK_SIZE);
+	kimg_shadow_end = round_up(kimg_shadow_end, SWAPPER_BLOCK_SIZE);
 
 	kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
 				   (void *)mod_shadow_start);

From 3d3fe7cf1fbc97ea285fd1ee3ed55b7527e4d1ae Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 2 Mar 2016 09:47:13 +0100
Subject: [PATCH 347/424] arm64: mm: check at build time that PAGE_OFFSET
 divides the VA space evenly

Commit 8439e62a1561 ("arm64: mm: use bit ops rather than arithmetic in
pa/va translations") changed the boundary check against PAGE_OFFSET from
an arithmetic comparison to a bit test. This means we now silently assume
that PAGE_OFFSET is a power of 2 that divides the kernel virtual address
space into two equal halves. So make that assumption explicit.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 6d2aa549de1fc998581d216de3853aa131aa4446)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/init.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 58a6d3f7525c..19ccdb73c680 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -170,6 +170,13 @@ void __init arm64_memblock_init(void)
 {
 	const s64 linear_region_size = -(s64)PAGE_OFFSET;
 
+	/*
+	 * Ensure that the linear region takes up exactly half of the kernel
+	 * virtual address space. This way, we can distinguish a linear address
+	 * from a kernel/module/vmalloc address by testing a single bit.
+	 */
+	BUILD_BUG_ON(linear_region_size != BIT(VA_BITS - 1));
+
 	/*
 	 * Select a suitable value for the base of physical memory.
 	 */

From 27fa6e51b8ddfd0224dad957b85fd8097caa5978 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 25 Feb 2016 20:48:53 +0100
Subject: [PATCH 348/424] arm64: lse: deal with clobbered IP registers after
 branch via PLT

The LSE atomics implementation uses runtime patching to patch in calls
to out of line non-LSE atomics implementations on cores that lack hardware
support for LSE. To avoid paying the overhead cost of a function call even
if no call ends up being made, the bl instruction is kept invisible to the
compiler, and the out of line implementations preserve all registers, not
just the ones that they are required to preserve as per the AAPCS64.

However, commit fd045f6cd98e ("arm64: add support for module PLTs") added
support for routing branch instructions via veneers if the branch target
offset exceeds the range of the ordinary relative branch instructions.
Since this deals with jump and call instructions that are exposed to ELF
relocations, the PLT code uses x16 to hold the address of the branch target
when it performs an indirect branch-to-register, something which is
explicitly allowed by the AAPCS64 (and ordinary compiler generated code
does not expect register x16 or x17 to retain their values across a bl
instruction).

Since the lse runtime patched bl instructions don't adhere to the AAPCS64,
they don't deal with this clobbering of registers x16 and x17. So add them
to the clobber list of the asm() statements that perform the call
instructions, and drop x16 and x17 from the list of registers that are
callee saved in the out of line non-LSE implementations.

In addition, since we have given these functions two scratch registers,
they no longer need to stack/unstack temp registers.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[will: factored clobber list into #define, updated Makefile comment]
Signed-off-by: Will Deacon <will.deacon@arm.com>

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 5be8b70af1ca78cefb8b756d157532360a5fd663)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/atomic_lse.h | 38 ++++++++++++++---------------
 arch/arm64/include/asm/lse.h        |  1 +
 arch/arm64/lib/Makefile             | 13 +++++-----
 3 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 197e06afbf71..39c1d340fec5 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -36,7 +36,7 @@ static inline void atomic_andnot(int i, atomic_t *v)
 	"	stclr	%w[i], %[v]\n")
 	: [i] "+r" (w0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 static inline void atomic_or(int i, atomic_t *v)
@@ -48,7 +48,7 @@ static inline void atomic_or(int i, atomic_t *v)
 	"	stset	%w[i], %[v]\n")
 	: [i] "+r" (w0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 static inline void atomic_xor(int i, atomic_t *v)
@@ -60,7 +60,7 @@ static inline void atomic_xor(int i, atomic_t *v)
 	"	steor	%w[i], %[v]\n")
 	: [i] "+r" (w0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 static inline void atomic_add(int i, atomic_t *v)
@@ -72,7 +72,7 @@ static inline void atomic_add(int i, atomic_t *v)
 	"	stadd	%w[i], %[v]\n")
 	: [i] "+r" (w0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 #define ATOMIC_OP_ADD_RETURN(name, mb, cl...)				\
@@ -90,7 +90,7 @@ static inline int atomic_add_return##name(int i, atomic_t *v)		\
 	"	add	%w[i], %w[i], w30")				\
 	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
 	: "r" (x1)							\
-	: "x30" , ##cl);						\
+	: __LL_SC_CLOBBERS, ##cl);					\
 									\
 	return w0;							\
 }
@@ -116,7 +116,7 @@ static inline void atomic_and(int i, atomic_t *v)
 	"	stclr	%w[i], %[v]")
 	: [i] "+r" (w0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 static inline void atomic_sub(int i, atomic_t *v)
@@ -133,7 +133,7 @@ static inline void atomic_sub(int i, atomic_t *v)
 	"	stadd	%w[i], %[v]")
 	: [i] "+r" (w0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 #define ATOMIC_OP_SUB_RETURN(name, mb, cl...)				\
@@ -153,7 +153,7 @@ static inline int atomic_sub_return##name(int i, atomic_t *v)		\
 	"	add	%w[i], %w[i], w30")				\
 	: [i] "+r" (w0), [v] "+Q" (v->counter)				\
 	: "r" (x1)							\
-	: "x30" , ##cl);						\
+	: __LL_SC_CLOBBERS , ##cl);					\
 									\
 	return w0;							\
 }
@@ -177,7 +177,7 @@ static inline void atomic64_andnot(long i, atomic64_t *v)
 	"	stclr	%[i], %[v]\n")
 	: [i] "+r" (x0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 static inline void atomic64_or(long i, atomic64_t *v)
@@ -189,7 +189,7 @@ static inline void atomic64_or(long i, atomic64_t *v)
 	"	stset	%[i], %[v]\n")
 	: [i] "+r" (x0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 static inline void atomic64_xor(long i, atomic64_t *v)
@@ -201,7 +201,7 @@ static inline void atomic64_xor(long i, atomic64_t *v)
 	"	steor	%[i], %[v]\n")
 	: [i] "+r" (x0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 static inline void atomic64_add(long i, atomic64_t *v)
@@ -213,7 +213,7 @@ static inline void atomic64_add(long i, atomic64_t *v)
 	"	stadd	%[i], %[v]\n")
 	: [i] "+r" (x0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 #define ATOMIC64_OP_ADD_RETURN(name, mb, cl...)				\
@@ -231,7 +231,7 @@ static inline long atomic64_add_return##name(long i, atomic64_t *v)	\
 	"	add	%[i], %[i], x30")				\
 	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
 	: "r" (x1)							\
-	: "x30" , ##cl);						\
+	: __LL_SC_CLOBBERS, ##cl);					\
 									\
 	return x0;							\
 }
@@ -257,7 +257,7 @@ static inline void atomic64_and(long i, atomic64_t *v)
 	"	stclr	%[i], %[v]")
 	: [i] "+r" (x0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 static inline void atomic64_sub(long i, atomic64_t *v)
@@ -274,7 +274,7 @@ static inline void atomic64_sub(long i, atomic64_t *v)
 	"	stadd	%[i], %[v]")
 	: [i] "+r" (x0), [v] "+Q" (v->counter)
 	: "r" (x1)
-	: "x30");
+	: __LL_SC_CLOBBERS);
 }
 
 #define ATOMIC64_OP_SUB_RETURN(name, mb, cl...)				\
@@ -294,7 +294,7 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v)	\
 	"	add	%[i], %[i], x30")				\
 	: [i] "+r" (x0), [v] "+Q" (v->counter)				\
 	: "r" (x1)							\
-	: "x30" , ##cl);						\
+	: __LL_SC_CLOBBERS, ##cl);					\
 									\
 	return x0;							\
 }
@@ -330,7 +330,7 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 	"2:")
 	: [ret] "+&r" (x0), [v] "+Q" (v->counter)
 	:
-	: "x30", "cc", "memory");
+	: __LL_SC_CLOBBERS, "cc", "memory");
 
 	return x0;
 }
@@ -359,7 +359,7 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr,	\
 	"	mov	%" #w "[ret], " #w "30")			\
 	: [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr)		\
 	: [old] "r" (x1), [new] "r" (x2)				\
-	: "x30" , ##cl);						\
+	: __LL_SC_CLOBBERS, ##cl);					\
 									\
 	return x0;							\
 }
@@ -416,7 +416,7 @@ static inline long __cmpxchg_double##name(unsigned long old1,		\
 	  [v] "+Q" (*(unsigned long *)ptr)				\
 	: [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4),		\
 	  [oldval1] "r" (oldval1), [oldval2] "r" (oldval2)		\
-	: "x30" , ##cl);						\
+	: __LL_SC_CLOBBERS, ##cl);					\
 									\
 	return x0;							\
 }
diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h
index 3de42d68611d..23acc00be32d 100644
--- a/arch/arm64/include/asm/lse.h
+++ b/arch/arm64/include/asm/lse.h
@@ -26,6 +26,7 @@ __asm__(".arch_extension	lse");
 
 /* Macro for constructing calls to out-of-line ll/sc atomics */
 #define __LL_SC_CALL(op)	"bl\t" __stringify(__LL_SC_PREFIX(op)) "\n"
+#define __LL_SC_CLOBBERS	"x16", "x17", "x30"
 
 /* In-line patching at runtime */
 #define ARM64_LSE_ATOMIC_INSN(llsc, lse)				\
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 1a811ecf71da..c86b7909ef31 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -4,15 +4,16 @@ lib-y		:= bitops.o clear_user.o delay.o copy_from_user.o	\
 		   memcmp.o strcmp.o strncmp.o strlen.o strnlen.o	\
 		   strchr.o strrchr.o
 
-# Tell the compiler to treat all general purpose registers as
-# callee-saved, which allows for efficient runtime patching of the bl
-# instruction in the caller with an atomic instruction when supported by
-# the CPU. Result and argument registers are handled correctly, based on
-# the function prototype.
+# Tell the compiler to treat all general purpose registers (with the
+# exception of the IP registers, which are already handled by the caller
+# in case of a PLT) as callee-saved, which allows for efficient runtime
+# patching of the bl instruction in the caller with an atomic instruction
+# when supported by the CPU. Result and argument registers are handled
+# correctly, based on the function prototype.
 lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o
 CFLAGS_atomic_ll_sc.o	:= -fcall-used-x0 -ffixed-x1 -ffixed-x2		\
 		   -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6		\
 		   -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9		\
 		   -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12	\
 		   -fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15	\
-		   -fcall-saved-x16 -fcall-saved-x17 -fcall-saved-x18
+		   -fcall-saved-x18

From b78c702db9fefface6f68cfade7a1afbadd829ab Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 26 Feb 2016 17:57:14 +0100
Subject: [PATCH 349/424] arm64: mm: treat memstart_addr as a signed quantity

Commit c031a4213c11 ("arm64: kaslr: randomize the linear region")
implements randomization of the linear region, by subtracting a random
multiple of PUD_SIZE from memstart_addr. This causes the virtual mapping
of system RAM to move upwards in the linear region, and at the same time
causes memstart_addr to assume a value which may be negative if the offset
of system RAM in the physical space is smaller than its offset relative to
PAGE_OFFSET in the virtual space.

Since memstart_addr is effectively an offset now, redefine its type as s64
so that expressions involving shifting or division preserve its sign.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
(cherry picked from commit 020d044f66874eba058ce8264fc550f3eca67879)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/include/asm/memory.h | 2 +-
 arch/arm64/mm/init.c            | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 5f8667a99e41..12f8a00fb3f1 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -135,7 +135,7 @@
 #include <linux/bitops.h>
 #include <linux/mmdebug.h>
 
-extern phys_addr_t		memstart_addr;
+extern s64			memstart_addr;
 /* PHYS_OFFSET - the physical address of the start of memory. */
 #define PHYS_OFFSET		({ VM_BUG_ON(memstart_addr & 1); memstart_addr; })
 
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 19ccdb73c680..9db46dfb6afb 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -54,7 +54,7 @@
  * executes, which assigns it its actual value. So use a default value
  * that cannot be mistaken for a real physical address.
  */
-phys_addr_t memstart_addr __read_mostly = ~0ULL;
+s64 memstart_addr __read_mostly = -1;
 phys_addr_t arm64_dma_phys_limit __read_mostly;
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -188,7 +188,7 @@ void __init arm64_memblock_init(void)
 	 * linear mapping. Take care not to clip the kernel which may be
 	 * high in memory.
 	 */
-	memblock_remove(max(memstart_addr + linear_region_size, __pa(_end)),
+	memblock_remove(max_t(u64, memstart_addr + linear_region_size, __pa(_end)),
 			ULLONG_MAX);
 	if (memblock_end_of_DRAM() > linear_region_size)
 		memblock_remove(0, memblock_end_of_DRAM() - linear_region_size);

From 200d9e78dba04ae2a5ee4b847f389758db5152dd Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 10 Mar 2016 18:30:56 +0000
Subject: [PATCH 350/424] arm64: kasan: Use actual memory node when populating
 the kernel image shadow

With the 16KB or 64KB page configurations, the generic
vmemmap_populate() implementation warns on potential offnode
page_structs via vmemmap_verify() because the arm64 kasan_init() passes
NUMA_NO_NODE instead of the actual node for the kernel image memory.

Fixes: f9040773b7bb ("arm64: move kernel image to base of vmalloc area")
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: James Morse <james.morse@arm.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
(cherry picked from commit 2f76969f2eef051bdd63d38b08d78e790440b0ad)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/arm64/mm/kasan_init.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 206dd95ea292..757009daa9ed 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -152,7 +152,8 @@ void __init kasan_init(void)
 
 	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
-	vmemmap_populate(kimg_shadow_start, kimg_shadow_end, NUMA_NO_NODE);
+	vmemmap_populate(kimg_shadow_start, kimg_shadow_end,
+			 pfn_to_nid(virt_to_pfn(_text)));
 
 	/*
 	 * vmemmap_populate() has populated the shadow region that covers the

From 5dd612ebfad71f5463d89ff92d1bc307cd286b5d Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 23 Mar 2016 16:00:46 +0100
Subject: [PATCH 351/424] parisc: Use generic extable search and sort routines

Switch to the generic extable search and sort routines which were introduced
with commit a272858 from Ard Biesheuvel. This saves quite some memory in the
vmlinux binary with the 64bit kernel.

Signed-off-by: Helge Deller <deller@gmx.de>
(cherry picked from commit 0de798584bdedfdad19db21e3c7aec84f252f4f3)
Signed-off-by: Alex Shi <alex.shi@linaro.org>
---
 arch/parisc/Kconfig                | 1 +
 arch/parisc/include/asm/assembly.h | 2 +-
 arch/parisc/include/asm/uaccess.h  | 7 ++++---
 arch/parisc/mm/fault.c             | 9 ++-------
 scripts/sortextable.c              | 1 +
 5 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 729f89163bc3..d2256fa97ea0 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -11,6 +11,7 @@ config PARISC
 	select RTC_DRV_GENERIC
 	select INIT_ALL_POSSIBLE
 	select BUG
+	select BUILDTIME_EXTABLE_SORT
 	select HAVE_PERF_EVENTS
 	select GENERIC_ATOMIC64 if !64BIT
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h
index b3069fd83468..60e6f07b7e32 100644
--- a/arch/parisc/include/asm/assembly.h
+++ b/arch/parisc/include/asm/assembly.h
@@ -523,7 +523,7 @@
 	 */
 #define ASM_EXCEPTIONTABLE_ENTRY(fault_addr, except_addr)	\
 	.section __ex_table,"aw"			!	\
-	ASM_ULONG_INSN	fault_addr, except_addr		!	\
+	.word (fault_addr - .), (except_addr - .)	!	\
 	.previous
 
 
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 1960b87c1c8b..6f893d29f1b2 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -60,14 +60,15 @@ static inline long access_ok(int type, const void __user * addr,
  * use a 32bit (unsigned int) address here.
  */
 
+#define ARCH_HAS_RELATIVE_EXTABLE
 struct exception_table_entry {
-	unsigned long insn;	/* address of insn that is allowed to fault. */
-	unsigned long fixup;	/* fixup routine */
+	int insn;	/* relative address of insn that is allowed to fault. */
+	int fixup;	/* relative address of fixup routine */
 };
 
 #define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\
 	".section __ex_table,\"aw\"\n"			   \
-	ASM_WORD_INSN #fault_addr ", " #except_addr "\n\t" \
+	".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \
 	".previous\n"
 
 /*
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index f9064449908a..16dbe81c97c9 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -140,12 +140,6 @@ int fixup_exception(struct pt_regs *regs)
 {
 	const struct exception_table_entry *fix;
 
-	/* If we only stored 32bit addresses in the exception table we can drop
-	 * out if we faulted on a 64bit address. */
-	if ((sizeof(regs->iaoq[0]) > sizeof(fix->insn))
-		&& (regs->iaoq[0] >> 32))
-			return 0;
-
 	fix = search_exception_tables(regs->iaoq[0]);
 	if (fix) {
 		struct exception_data *d;
@@ -155,7 +149,8 @@ int fixup_exception(struct pt_regs *regs)
 		d->fault_space = regs->isr;
 		d->fault_addr = regs->ior;
 
-		regs->iaoq[0] = ((fix->fixup) & ~3);
+		regs->iaoq[0] = (unsigned long)&fix->fixup + fix->fixup;
+		regs->iaoq[0] &= ~3;
 		/*
 		 * NOTE: In some cases the faulting instruction
 		 * may be in the delay slot of a branch. We
diff --git a/scripts/sortextable.c b/scripts/sortextable.c
index 19d83647846c..a2c0d620ca80 100644
--- a/scripts/sortextable.c
+++ b/scripts/sortextable.c
@@ -283,6 +283,7 @@ do_file(char const *const fname)
 	case EM_X86_64:
 	case EM_S390:
 	case EM_AARCH64:
+	case EM_PARISC:
 		custom_sort = sort_relative_table;
 		break;
 	case EM_ARCOMPACT:

From c21fec188ab43661e5bf08d9d377ecafc7a8abf9 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 10 Apr 2016 23:01:30 -0400
Subject: [PATCH 352/424] decnet: Do not build routes to devices without decnet
 private data.

[ Upstream commit a36a0d4008488fa545c74445d69eaf56377d5d4e ]

In particular, make sure we check for decnet private presence
for loopback devices.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/decnet/dn_route.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 607a14f20d88..b1dc096d22f8 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1034,10 +1034,13 @@ source_ok:
 	if (!fld.daddr) {
 		fld.daddr = fld.saddr;
 
-		err = -EADDRNOTAVAIL;
 		if (dev_out)
 			dev_put(dev_out);
+		err = -EINVAL;
 		dev_out = init_net.loopback_dev;
+		if (!dev_out->dn_ptr)
+			goto out;
+		err = -EADDRNOTAVAIL;
 		dev_hold(dev_out);
 		if (!fld.daddr) {
 			fld.daddr =
@@ -1110,6 +1113,8 @@ source_ok:
 		if (dev_out == NULL)
 			goto out;
 		dn_db = rcu_dereference_raw(dev_out->dn_ptr);
+		if (!dn_db)
+			goto e_inval;
 		/* Possible improvement - check all devices for local addr */
 		if (dn_dev_islocal(dev_out, fld.daddr)) {
 			dev_put(dev_out);
@@ -1151,6 +1156,8 @@ select_source:
 			dev_put(dev_out);
 		dev_out = init_net.loopback_dev;
 		dev_hold(dev_out);
+		if (!dev_out->dn_ptr)
+			goto e_inval;
 		fld.flowidn_oif = dev_out->ifindex;
 		if (res.fi)
 			dn_fib_info_put(res.fi);

From d0bfda58b414487f06ac0911674a9808f71d3ab1 Mon Sep 17 00:00:00 2001
From: Chris Friesen <chris.friesen@windriver.com>
Date: Fri, 8 Apr 2016 15:21:30 -0600
Subject: [PATCH 353/424] route: do not cache fib route info on local routes
 with oif

[ Upstream commit d6d5e999e5df67f8ec20b6be45e2229455ee3699 ]

For local routes that require a particular output interface we do not want
to cache the result.  Caching the result causes incorrect behaviour when
there are multiple source addresses on the interface.  The end result
being that if the intended recipient is waiting on that interface for the
packet he won't receive it because it will be delivered on the loopback
interface and the IP_PKTINFO ipi_ifindex will be set to the loopback
interface as well.

This can be tested by running a program such as "dhcp_release" which
attempts to inject a packet on a particular interface so that it is
received by another program on the same board.  The receiving process
should see an IP_PKTINFO ipi_ifndex value of the source interface
(e.g., eth1) instead of the loopback interface (e.g., lo).  The packet
will still appear on the loopback interface in tcpdump but the important
aspect is that the CMSG info is correct.

Sample dhcp_release command line:

   dhcp_release eth1 192.168.204.222 02:11:33:22:44:66

Signed-off-by: Allain Legacy <allain.legacy@windriver.com>
Signed off-by: Chris Friesen <chris.friesen@windriver.com>
Reviewed-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/route.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 02c62299d717..b050cf980a57 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2045,6 +2045,18 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 		 */
 		if (fi && res->prefixlen < 4)
 			fi = NULL;
+	} else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
+		   (orig_oif != dev_out->ifindex)) {
+		/* For local routes that require a particular output interface
+		 * we do not want to cache the result.  Caching the result
+		 * causes incorrect behaviour when there are multiple source
+		 * addresses on the interface, the end result being that if the
+		 * intended recipient is waiting on that interface for the
+		 * packet he won't receive it because it will be delivered on
+		 * the loopback interface and the IP_PKTINFO ipi_ifindex will
+		 * be set to the loopback interface as well.
+		 */
+		fi = NULL;
 	}
 
 	fnhe = NULL;

From a416c9483c719cc9160b4b9af09085dfe461383f Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Sun, 10 Apr 2016 12:52:28 +0200
Subject: [PATCH 354/424] packet: fix heap info leak in PACKET_DIAG_MCLIST
 sock_diag interface

[ Upstream commit 309cf37fe2a781279b7675d4bb7173198e532867 ]

Because we miss to wipe the remainder of i->addr[] in packet_mc_add(),
pdiag_put_mclist() leaks uninitialized heap bytes via the
PACKET_DIAG_MCLIST netlink attribute.

Fix this by explicitly memset(0)ing the remaining bytes in i->addr[].

Fixes: eea68e2f1a00 ("packet: Report socket mclist info via diag module")
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Acked-by: Pavel Emelyanov <xemul@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/packet/af_packet.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index da1ae0e13cb5..9cc7b512b472 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3436,6 +3436,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
 	i->ifindex = mreq->mr_ifindex;
 	i->alen = mreq->mr_alen;
 	memcpy(i->addr, mreq->mr_address, i->alen);
+	memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen);
 	i->count = 1;
 	i->next = po->mclist;
 	po->mclist = i;

From 5c136901ef17cd42d56b5c02135a0c67fb58424f Mon Sep 17 00:00:00 2001
From: Lars Persson <lars.persson@axis.com>
Date: Tue, 12 Apr 2016 08:45:52 +0200
Subject: [PATCH 355/424] net: sched: do not requeue a NULL skb

[ Upstream commit 3dcd493fbebfd631913df6e2773cc295d3bf7d22 ]

A failure in validate_xmit_skb_list() triggered an unconditional call
to dev_requeue_skb with skb=NULL. This slowly grows the queue
discipline's qlen count until all traffic through the queue stops.

We take the optimistic approach and continue running the queue after a
failure since it is unknown if later packets also will fail in the
validate path.

Fixes: 55a93b3ea780 ("qdisc: validate skb without holding lock")
Signed-off-by: Lars Persson <larper@axis.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sched/sch_generic.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 16bc83b2842a..aa4725038f94 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -159,12 +159,15 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 	if (validate)
 		skb = validate_xmit_skb_list(skb, dev);
 
-	if (skb) {
+	if (likely(skb)) {
 		HARD_TX_LOCK(dev, txq, smp_processor_id());
 		if (!netif_xmit_frozen_or_stopped(txq))
 			skb = dev_hard_start_xmit(skb, dev, txq, &ret);
 
 		HARD_TX_UNLOCK(dev, txq);
+	} else {
+		spin_lock(root_lock);
+		return qdisc_qlen(q);
 	}
 	spin_lock(root_lock);
 

From 8427d5547d0b63beb70d3858127942f828400ad2 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Tue, 12 Apr 2016 10:26:19 -0700
Subject: [PATCH 356/424] bpf/verifier: reject invalid LD_ABS | BPF_DW
 instruction

[ Upstream commit d82bccc69041a51f7b7b9b4a36db0772f4cdba21 ]

verifier must check for reserved size bits in instruction opcode and
reject BPF_LD | BPF_ABS | BPF_DW and BPF_LD | BPF_IND | BPF_DW instructions,
otherwise interpreter will WARN_RATELIMIT on them during execution.

Fixes: ddd872bc3098 ("bpf: verifier: add checks for BPF_ABS | BPF_IND instructions")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/bpf/verifier.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2e7f7ab739e4..c21cb146086c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1348,6 +1348,7 @@ static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn)
 	}
 
 	if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
+	    BPF_SIZE(insn->code) == BPF_DW ||
 	    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
 		verbose("BPF_LD_ABS uses reserved fields\n");
 		return -EINVAL;

From 1d794379798b33797d9afe4a477bfd89ce399184 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Tue, 12 Apr 2016 16:11:12 +0200
Subject: [PATCH 357/424] cdc_mbim: apply "NDP to end" quirk to all Huawei
 devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit c5b5343cfbc9f46af65033fa4f407d7b7d98371d ]

We now have a positive report of another Huawei device needing
this quirk: The ME906s-158 (12d1:15c1).  This is an m.2 form
factor modem with no obvious relationship to the E3372 (12d1:157d)
we already have a quirk entry for.  This is reason enough to
believe the quirk might be necessary for any number of current
and future Huawei devices.

Applying the quirk to all Huawei devices, since it is crucial
to any device affected by the firmware bug, while the impact
on non-affected devices is negligible.

The quirk can if necessary be disabled per-device by writing
N to /sys/class/net/<iface>/cdc_ncm/ndp_to_end

Reported-by: Andreas Fett <andreas.fett@secunet.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/cdc_mbim.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index bdd83d95ec0a..96a5028621c8 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -617,8 +617,13 @@ static const struct usb_device_id mbim_devs[] = {
 	{ USB_VENDOR_AND_INTERFACE_INFO(0x0bdb, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
 	  .driver_info = (unsigned long)&cdc_mbim_info,
 	},
-	/* Huawei E3372 fails unless NDP comes after the IP packets */
-	{ USB_DEVICE_AND_INTERFACE_INFO(0x12d1, 0x157d, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
+
+	/* Some Huawei devices, ME906s-158 (12d1:15c1) and E3372
+	 * (12d1:157d), are known to fail unless the NDP is placed
+	 * after the IP packets.  Applying the quirk to all Huawei
+	 * devices is broader than necessary, but harmless.
+	 */
+	{ USB_VENDOR_AND_INTERFACE_INFO(0x12d1, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE),
 	  .driver_info = (unsigned long)&cdc_mbim_info_ndp_to_end,
 	},
 	/* default entry */

From 79fdabe8706bd240c083a848f64081a4bc46d7af Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 20 Feb 2016 00:29:30 +0100
Subject: [PATCH 358/424] net: use skb_postpush_rcsum instead of own
 implementations

[ Upstream commit 6b83d28a55a891a9d70fc61ccb1c138e47dcbe74 ]

Replace individual implementations with the recently introduced
skb_postpush_rcsum() helper.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Tom Herbert <tom@herbertland.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/skbuff.c              | 4 +---
 net/ipv6/reassembly.c          | 6 ++----
 net/openvswitch/actions.c      | 8 +++-----
 net/openvswitch/vport-netdev.c | 2 +-
 net/openvswitch/vport.h        | 7 -------
 5 files changed, 7 insertions(+), 20 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8616d1147c93..78abe110c7a5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4433,9 +4433,7 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
 		skb->mac_len += VLAN_HLEN;
 		__skb_pull(skb, offset);
 
-		if (skb->ip_summed == CHECKSUM_COMPLETE)
-			skb->csum = csum_add(skb->csum, csum_partial(skb->data
-					+ (2 * ETH_ALEN), VLAN_HLEN, 0));
+		skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
 	}
 	__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
 	return 0;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 45f5ae51de65..a234552a7e3d 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -496,10 +496,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 	IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
 
 	/* Yes, and fold redundant checksum back. 8) */
-	if (head->ip_summed == CHECKSUM_COMPLETE)
-		head->csum = csum_partial(skb_network_header(head),
-					  skb_network_header_len(head),
-					  head->csum);
+	skb_postpush_rcsum(head, skb_network_header(head),
+			   skb_network_header_len(head));
 
 	rcu_read_lock();
 	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index c88d0f2d3e01..7aef0c8bd2c5 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -158,9 +158,7 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 	new_mpls_lse = (__be32 *)skb_mpls_header(skb);
 	*new_mpls_lse = mpls->mpls_lse;
 
-	if (skb->ip_summed == CHECKSUM_COMPLETE)
-		skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
-							     MPLS_HLEN, 0));
+	skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);
 
 	hdr = eth_hdr(skb);
 	hdr->h_proto = mpls->mpls_ethertype;
@@ -280,7 +278,7 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
 	ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
 			       mask->eth_dst);
 
-	ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
+	skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
 
 	ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
 	ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
@@ -639,7 +637,7 @@ static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *sk
 	/* Reconstruct the MAC header.  */
 	skb_push(skb, data->l2_len);
 	memcpy(skb->data, &data->l2_data, data->l2_len);
-	ovs_skb_postpush_rcsum(skb, skb->data, data->l2_len);
+	skb_postpush_rcsum(skb, skb->data, data->l2_len);
 	skb_reset_mac_header(skb);
 
 	ovs_vport_send(vport, skb);
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 6b0190b987ec..76fcaf1fd2a9 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -58,7 +58,7 @@ static void netdev_port_receive(struct sk_buff *skb)
 		return;
 
 	skb_push(skb, ETH_HLEN);
-	ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
+	skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
 	ovs_vport_receive(vport, skb, skb_tunnel_info(skb));
 	return;
 error:
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 8ea3a96980ac..6e2b62f9d595 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -184,13 +184,6 @@ static inline struct vport *vport_from_priv(void *priv)
 int ovs_vport_receive(struct vport *, struct sk_buff *,
 		      const struct ip_tunnel_info *);
 
-static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
-				      const void *start, unsigned int len)
-{
-	if (skb->ip_summed == CHECKSUM_COMPLETE)
-		skb->csum = csum_add(skb->csum, csum_partial(start, len, 0));
-}
-
 static inline const char *ovs_vport_name(struct vport *vport)
 {
 	return vport->dev->name;

From a66ce519a3e9aca63739da0d716555588df5cc48 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 16 Apr 2016 02:27:58 +0200
Subject: [PATCH 359/424] vlan: pull on __vlan_insert_tag error path and fix
 csum correction

[ Upstream commit 9241e2df4fbc648a92ea0752918e05c26255649e ]

When __vlan_insert_tag() fails from skb_vlan_push() path due to the
skb_cow_head(), we need to undo the __skb_push() in the error path
as well that was done earlier to move skb->data pointer to mac header.

Moreover, I noticed that when in the non-error path the __skb_pull()
is done and the original offset to mac header was non-zero, we fixup
from a wrong skb->data offset in the checksum complete processing.

So the skb_postpush_rcsum() really needs to be done before __skb_pull()
where skb->data still points to the mac header start and thus operates
under the same conditions as in __vlan_insert_tag().

Fixes: 93515d53b133 ("net: move vlan pop/push functions into common code")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/skbuff.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 78abe110c7a5..9835d9a8a7a4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4427,13 +4427,16 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
 		__skb_push(skb, offset);
 		err = __vlan_insert_tag(skb, skb->vlan_proto,
 					skb_vlan_tag_get(skb));
-		if (err)
+		if (err) {
+			__skb_pull(skb, offset);
 			return err;
+		}
+
 		skb->protocol = skb->vlan_proto;
 		skb->mac_len += VLAN_HLEN;
-		__skb_pull(skb, offset);
 
 		skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
+		__skb_pull(skb, offset);
 	}
 	__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
 	return 0;

From b5c9a73c501e8aed86dd578309813c7818ca248c Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Wed, 20 Apr 2016 23:23:08 +0100
Subject: [PATCH 360/424] atl2: Disable unimplemented scatter/gather feature

[ Upstream commit f43bfaeddc79effbf3d0fcb53ca477cca66f3db8 ]

atl2 includes NETIF_F_SG in hw_features even though it has no support
for non-linear skbs.  This bug was originally harmless since the
driver does not claim to implement checksum offload and that used to
be a requirement for SG.

Now that SG and checksum offload are independent features, if you
explicitly enable SG *and* use one of the rare protocols that can use
SG without checkusm offload, this potentially leaks sensitive
information (before you notice that it just isn't working).  Therefore
this obscure bug has been designated CVE-2016-2117.

Reported-by: Justin Yackoski <jyackoski@crypto-nite.com>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Fixes: ec5f06156423 ("net: Kill link between CSUM and SG features.")
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/atheros/atlx/atl2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index 8f76f4558a88..2ff465848b65 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -1412,7 +1412,7 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	err = -EIO;
 
-	netdev->hw_features = NETIF_F_SG | NETIF_F_HW_VLAN_CTAG_RX;
+	netdev->hw_features = NETIF_F_HW_VLAN_CTAG_RX;
 	netdev->features |= (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX);
 
 	/* Init PHY as early as possible due to power saving issue  */

From 2a33f756ae1986a67d145f050bf4350e5ee17746 Mon Sep 17 00:00:00 2001
From: Simon Horman <simon.horman@netronome.com>
Date: Thu, 21 Apr 2016 11:49:15 +1000
Subject: [PATCH 361/424] openvswitch: use flow protocol when recalculating
 ipv6 checksums

[ Upstream commit b4f70527f052b0c00be4d7cac562baa75b212df5 ]

When using masked actions the ipv6_proto field of an action
to set IPv6 fields may be zero rather than the prevailing protocol
which will result in skipping checksum recalculation.

This patch resolves the problem by relying on the protocol
in the flow key rather than that in the set field action.

Fixes: 83d2b9ba1abc ("net: openvswitch: Support masked set actions.")
Cc: Jarno Rajahalme <jrajahalme@nicira.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/openvswitch/actions.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 7aef0c8bd2c5..7cb8184ac165 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -461,7 +461,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
 		mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);
 
 		if (unlikely(memcmp(saddr, masked, sizeof(masked)))) {
-			set_ipv6_addr(skb, key->ipv6_proto, saddr, masked,
+			set_ipv6_addr(skb, flow_key->ip.proto, saddr, masked,
 				      true);
 			memcpy(&flow_key->ipv6.addr.src, masked,
 			       sizeof(flow_key->ipv6.addr.src));
@@ -483,7 +483,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
 							     NULL, &flags)
 					       != NEXTHDR_ROUTING);
 
-			set_ipv6_addr(skb, key->ipv6_proto, daddr, masked,
+			set_ipv6_addr(skb, flow_key->ip.proto, daddr, masked,
 				      recalc_csum);
 			memcpy(&flow_key->ipv6.addr.dst, masked,
 			       sizeof(flow_key->ipv6.addr.dst));

From 3032b09874a4709f8529d620dd270e14a56eb61d Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Fri, 22 Apr 2016 00:33:03 +0300
Subject: [PATCH 362/424] net/mlx5e: Device's mtu field is u16 and not int

[ Upstream commit 046339eaab26804f52f6604877f5674f70815b26 ]

For set/query MTU port firmware commands the MTU field
is 16 bits, here I changed all the "int mtu" parameters
of the functions wrapping those firmware commands to be u16.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/infiniband/hw/mlx5/main.c                 |  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/port.c    | 10 +++++-----
 include/linux/mlx5/driver.h                       |  6 +++---
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 721d63f5b461..fd17443aeacd 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -405,8 +405,8 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 	struct mlx5_core_dev *mdev = dev->mdev;
 	struct mlx5_hca_vport_context *rep;
-	int max_mtu;
-	int oper_mtu;
+	u16 max_mtu;
+	u16 oper_mtu;
 	int err;
 	u8 ib_link_width_oper;
 	u8 vl_hw_cap;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 1203d892e842..d6a7213649f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1372,7 +1372,7 @@ static int mlx5e_set_dev_port_mtu(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
-	int hw_mtu;
+	u16 hw_mtu;
 	int err;
 
 	err = mlx5_set_port_mtu(mdev, MLX5E_SW2HW_MTU(netdev->mtu), 1);
@@ -1896,7 +1896,7 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu)
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
 	bool was_opened;
-	int max_mtu;
+	u16 max_mtu;
 	int err = 0;
 
 	mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index a87e773e93f3..53a793bc2e3d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -246,8 +246,8 @@ int mlx5_query_port_admin_status(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL_GPL(mlx5_query_port_admin_status);
 
-static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, int *admin_mtu,
-				int *max_mtu, int *oper_mtu, u8 port)
+static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, u16 *admin_mtu,
+				u16 *max_mtu, u16 *oper_mtu, u8 port)
 {
 	u32 in[MLX5_ST_SZ_DW(pmtu_reg)];
 	u32 out[MLX5_ST_SZ_DW(pmtu_reg)];
@@ -267,7 +267,7 @@ static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, int *admin_mtu,
 		*admin_mtu = MLX5_GET(pmtu_reg, out, admin_mtu);
 }
 
-int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port)
+int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port)
 {
 	u32 in[MLX5_ST_SZ_DW(pmtu_reg)];
 	u32 out[MLX5_ST_SZ_DW(pmtu_reg)];
@@ -282,14 +282,14 @@ int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port)
 }
 EXPORT_SYMBOL_GPL(mlx5_set_port_mtu);
 
-void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu,
+void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu,
 			     u8 port)
 {
 	mlx5_query_port_mtu(dev, NULL, max_mtu, NULL, port);
 }
 EXPORT_SYMBOL_GPL(mlx5_query_port_max_mtu);
 
-void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu,
+void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu,
 			      u8 port)
 {
 	mlx5_query_port_mtu(dev, NULL, NULL, oper_mtu, port);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index af3efd9157f0..412aa988c6ad 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -792,9 +792,9 @@ int mlx5_set_port_admin_status(struct mlx5_core_dev *dev,
 int mlx5_query_port_admin_status(struct mlx5_core_dev *dev,
 				 enum mlx5_port_status *status);
 
-int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port);
-void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, u8 port);
-void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu,
+int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port);
+void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port);
+void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu,
 			      u8 port);
 
 int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev,

From da465bd9391efca3649ab34c3575a2b27ac384e3 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Fri, 22 Apr 2016 00:33:04 +0300
Subject: [PATCH 363/424] net/mlx5e: Fix minimum MTU

[ Upstream commit d8edd2469ace550db707798180d1c84d81f93bca ]

Minimum MTU that can be set in Connectx4 device is 68.

This fixes the case where a user wants to set invalid MTU,
the driver will fail to satisfy this request and the interface
will stay down.

It is better to report an error and continue working with old
mtu.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index d6a7213649f6..cbd17e25beeb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1891,22 +1891,27 @@ static int mlx5e_set_features(struct net_device *netdev,
 	return err;
 }
 
+#define MXL5_HW_MIN_MTU 64
+#define MXL5E_MIN_MTU (MXL5_HW_MIN_MTU + ETH_FCS_LEN)
+
 static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
 	bool was_opened;
 	u16 max_mtu;
+	u16 min_mtu;
 	int err = 0;
 
 	mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
 
 	max_mtu = MLX5E_HW2SW_MTU(max_mtu);
+	min_mtu = MLX5E_HW2SW_MTU(MXL5E_MIN_MTU);
 
-	if (new_mtu > max_mtu) {
+	if (new_mtu > max_mtu || new_mtu < min_mtu) {
 		netdev_err(netdev,
-			   "%s: Bad MTU (%d) > (%d) Max\n",
-			   __func__, new_mtu, max_mtu);
+			   "%s: Bad MTU (%d), valid range is: [%d..%d]\n",
+			   __func__, new_mtu, min_mtu, max_mtu);
 		return -EINVAL;
 	}
 

From 0633185047d91174ee2cbc8f1bd59e75e3c45ccd Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 21 Apr 2016 22:23:31 +0200
Subject: [PATCH 364/424] ipv4/fib: don't warn when primary address is missing
 if in_dev is dead

[ Upstream commit 391a20333b8393ef2e13014e6e59d192c5594471 ]

After commit fbd40ea0180a ("ipv4: Don't do expensive useless work
during inetdev destroy.") when deleting an interface,
fib_del_ifaddr() can be executed without any primary address
present on the dead interface.

The above is safe, but triggers some "bug: prim == NULL" warnings.

This commit avoids warning if the in_dev is dead

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/fib_frontend.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 8a9246deccfe..63566ec54794 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -904,7 +904,11 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
 	if (ifa->ifa_flags & IFA_F_SECONDARY) {
 		prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
 		if (!prim) {
-			pr_warn("%s: bug: prim == NULL\n", __func__);
+			/* if the device has been deleted, we don't perform
+			 * address promotion
+			 */
+			if (!in_dev->dead)
+				pr_warn("%s: bug: prim == NULL\n", __func__);
 			return;
 		}
 		if (iprim && iprim != prim) {

From 828255b591768b4e5a762df21a0133065ddcb44b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 23 Apr 2016 11:35:46 -0700
Subject: [PATCH 365/424] net/mlx4_en: fix spurious timestamping callbacks

[ Upstream commit fc96256c906362e845d848d0f6a6354450059e81 ]

When multiple skb are TX-completed in a row, we might incorrectly keep
a timestamp of a prior skb and cause extra work.

Fixes: ec693d47010e8 ("net/mlx4_en: Add HW timestamping (TS) support")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlx4/en_tx.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 4421bf5463f6..e4019a803a9c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -400,7 +400,6 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
 	u32 packets = 0;
 	u32 bytes = 0;
 	int factor = priv->cqe_factor;
-	u64 timestamp = 0;
 	int done = 0;
 	int budget = priv->tx_work_limit;
 	u32 last_nr_txbb;
@@ -440,9 +439,12 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
 		new_index = be16_to_cpu(cqe->wqe_index) & size_mask;
 
 		do {
+			u64 timestamp = 0;
+
 			txbbs_skipped += last_nr_txbb;
 			ring_index = (ring_index + last_nr_txbb) & size_mask;
-			if (ring->tx_info[ring_index].ts_requested)
+
+			if (unlikely(ring->tx_info[ring_index].ts_requested))
 				timestamp = mlx4_en_get_cqe_ts(cqe);
 
 			/* free next descriptor */

From 608d2c3c7a046c222cae2e857cf648a9f89e772b Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Tue, 26 Apr 2016 22:26:26 +0200
Subject: [PATCH 366/424] bpf: fix double-fdput in
 replace_map_fd_with_map_ptr()

[ Upstream commit 8358b02bf67d3a5d8a825070e1aa73f25fb2e4c7 ]

When bpf(BPF_PROG_LOAD, ...) was invoked with a BPF program whose bytecode
references a non-map file descriptor as a map file descriptor, the error
handling code called fdput() twice instead of once (in __bpf_map_get() and
in replace_map_fd_with_map_ptr()). If the file descriptor table of the
current task is shared, this causes f_count to be decremented too much,
allowing the struct file to be freed while it is still in use
(use-after-free). This can be exploited to gain root privileges by an
unprivileged user.

This bug was introduced in
commit 0246e64d9a5f ("bpf: handle pseudo BPF_LD_IMM64 insn"), but is only
exploitable since
commit 1be7f75d1668 ("bpf: enable non-root eBPF programs") because
previously, CAP_SYS_ADMIN was required to reach the vulnerable code.

(posted publicly according to request by maintainer)

Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/bpf/verifier.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c21cb146086c..e3798cf7f49d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2004,7 +2004,6 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
 			if (IS_ERR(map)) {
 				verbose("fd %d is not pointing to valid bpf_map\n",
 					insn->imm);
-				fdput(f);
 				return PTR_ERR(map);
 			}
 

From 3899251bdb9c2b31fc73d4cc132f52d3710101de Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Wed, 27 Apr 2016 18:56:20 -0700
Subject: [PATCH 367/424] bpf: fix refcnt overflow

[ Upstream commit 92117d8443bc5afacc8d5ba82e541946310f106e ]

On a system with >32Gbyte of phyiscal memory and infinite RLIMIT_MEMLOCK,
the malicious application may overflow 32-bit bpf program refcnt.
It's also possible to overflow map refcnt on 1Tb system.
Impose 32k hard limit which means that the same bpf program or
map cannot be shared by more than 32k processes.

Fixes: 1be7f75d1668 ("bpf: enable non-root eBPF programs")
Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/bpf.h   |  3 ++-
 kernel/bpf/inode.c    |  7 ++++---
 kernel/bpf/syscall.c  | 24 ++++++++++++++++++++----
 kernel/bpf/verifier.c | 11 +++++++----
 4 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 83d1926c61e4..67bc2da5d233 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -165,12 +165,13 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl);
 void bpf_register_map_type(struct bpf_map_type_list *tl);
 
 struct bpf_prog *bpf_prog_get(u32 ufd);
+struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog);
 void bpf_prog_put(struct bpf_prog *prog);
 void bpf_prog_put_rcu(struct bpf_prog *prog);
 
 struct bpf_map *bpf_map_get_with_uref(u32 ufd);
 struct bpf_map *__bpf_map_get(struct fd f);
-void bpf_map_inc(struct bpf_map *map, bool uref);
+struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref);
 void bpf_map_put_with_uref(struct bpf_map *map);
 void bpf_map_put(struct bpf_map *map);
 
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 5a8a797d50b7..d1a7646f79c5 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -31,10 +31,10 @@ static void *bpf_any_get(void *raw, enum bpf_type type)
 {
 	switch (type) {
 	case BPF_TYPE_PROG:
-		atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt);
+		raw = bpf_prog_inc(raw);
 		break;
 	case BPF_TYPE_MAP:
-		bpf_map_inc(raw, true);
+		raw = bpf_map_inc(raw, true);
 		break;
 	default:
 		WARN_ON_ONCE(1);
@@ -277,7 +277,8 @@ static void *bpf_obj_do_get(const struct filename *pathname,
 		goto out;
 
 	raw = bpf_any_get(inode->i_private, *type);
-	touch_atime(&path);
+	if (!IS_ERR(raw))
+		touch_atime(&path);
 
 	path_put(&path);
 	return raw;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 3b39550d8485..4e32cc94edd9 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -181,11 +181,18 @@ struct bpf_map *__bpf_map_get(struct fd f)
 	return f.file->private_data;
 }
 
-void bpf_map_inc(struct bpf_map *map, bool uref)
+/* prog's and map's refcnt limit */
+#define BPF_MAX_REFCNT 32768
+
+struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
 {
-	atomic_inc(&map->refcnt);
+	if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
+		atomic_dec(&map->refcnt);
+		return ERR_PTR(-EBUSY);
+	}
 	if (uref)
 		atomic_inc(&map->usercnt);
+	return map;
 }
 
 struct bpf_map *bpf_map_get_with_uref(u32 ufd)
@@ -197,7 +204,7 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd)
 	if (IS_ERR(map))
 		return map;
 
-	bpf_map_inc(map, true);
+	map = bpf_map_inc(map, true);
 	fdput(f);
 
 	return map;
@@ -580,6 +587,15 @@ static struct bpf_prog *__bpf_prog_get(struct fd f)
 	return f.file->private_data;
 }
 
+struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
+{
+	if (atomic_inc_return(&prog->aux->refcnt) > BPF_MAX_REFCNT) {
+		atomic_dec(&prog->aux->refcnt);
+		return ERR_PTR(-EBUSY);
+	}
+	return prog;
+}
+
 /* called by sockets/tracing/seccomp before attaching program to an event
  * pairs with bpf_prog_put()
  */
@@ -592,7 +608,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
 	if (IS_ERR(prog))
 		return prog;
 
-	atomic_inc(&prog->aux->refcnt);
+	prog = bpf_prog_inc(prog);
 	fdput(f);
 
 	return prog;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e3798cf7f49d..5a615c188001 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2023,15 +2023,18 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
 				return -E2BIG;
 			}
 
-			/* remember this map */
-			env->used_maps[env->used_map_cnt++] = map;
-
 			/* hold the map. If the program is rejected by verifier,
 			 * the map will be released by release_maps() or it
 			 * will be used by the valid program until it's unloaded
 			 * and all maps are released in free_bpf_prog_info()
 			 */
-			bpf_map_inc(map, false);
+			map = bpf_map_inc(map, false);
+			if (IS_ERR(map)) {
+				fdput(f);
+				return PTR_ERR(map);
+			}
+			env->used_maps[env->used_map_cnt++] = map;
+
 			fdput(f);
 next_insn:
 			insn++;

From bb10156f572f06f3b6cadd378e5a0ab3ed8da991 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Wed, 27 Apr 2016 18:56:21 -0700
Subject: [PATCH 368/424] bpf: fix check_map_func_compatibility logic

[ Upstream commit 6aff67c85c9e5a4bc99e5211c1bac547936626ca ]

The commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter")
introduced clever way to check bpf_helper<->map_type compatibility.
Later on commit a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") adjusted
the logic and inadvertently broke it.
Get rid of the clever bool compare and go back to two-way check
from map and from helper perspective.

Fixes: a43eec304259 ("bpf: introduce bpf_perf_event_output() helper")
Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/bpf/verifier.c | 53 ++++++++++++++++++++++++++-----------------
 1 file changed, 32 insertions(+), 21 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5a615c188001..2cbfba78d3db 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -239,15 +239,6 @@ static const char * const reg_type_str[] = {
 	[CONST_IMM]		= "imm",
 };
 
-static const struct {
-	int map_type;
-	int func_id;
-} func_limit[] = {
-	{BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call},
-	{BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read},
-	{BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output},
-};
-
 static void print_verifier_state(struct verifier_env *env)
 {
 	enum bpf_reg_type t;
@@ -898,24 +889,44 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
 
 static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 {
-	bool bool_map, bool_func;
-	int i;
-
 	if (!map)
 		return 0;
 
-	for (i = 0; i < ARRAY_SIZE(func_limit); i++) {
-		bool_map = (map->map_type == func_limit[i].map_type);
-		bool_func = (func_id == func_limit[i].func_id);
-		/* only when map & func pair match it can continue.
-		 * don't allow any other map type to be passed into
-		 * the special func;
-		 */
-		if (bool_func && bool_map != bool_func)
-			return -EINVAL;
+	/* We need a two way check, first is from map perspective ... */
+	switch (map->map_type) {
+	case BPF_MAP_TYPE_PROG_ARRAY:
+		if (func_id != BPF_FUNC_tail_call)
+			goto error;
+		break;
+	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+		if (func_id != BPF_FUNC_perf_event_read &&
+		    func_id != BPF_FUNC_perf_event_output)
+			goto error;
+		break;
+	default:
+		break;
+	}
+
+	/* ... and second from the function itself. */
+	switch (func_id) {
+	case BPF_FUNC_tail_call:
+		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
+			goto error;
+		break;
+	case BPF_FUNC_perf_event_read:
+	case BPF_FUNC_perf_event_output:
+		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
+			goto error;
+		break;
+	default:
+		break;
 	}
 
 	return 0;
+error:
+	verbose("cannot pass map_type %d into func %d\n",
+		map->map_type, func_id);
+	return -EINVAL;
 }
 
 static int check_call(struct verifier_env *env, int func_id)

From 85256f78bff1c4a24ad8edac63726e8ca5ccedc1 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Wed, 27 Apr 2016 18:56:22 -0700
Subject: [PATCH 369/424] samples/bpf: fix trace_output example

[ Upstream commit 569cc39d39385a74b23145496bca2df5ac8b2fb8 ]

llvm cannot always recognize memset as builtin function and optimize
it away, so just delete it. It was a leftover from testing
of bpf_perf_event_output() with large data structures.

Fixes: 39111695b1b8 ("samples: bpf: add bpf_perf_event_output example")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 samples/bpf/trace_output_kern.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output_kern.c
index 8d8d1ec429eb..9b96f4fb8cea 100644
--- a/samples/bpf/trace_output_kern.c
+++ b/samples/bpf/trace_output_kern.c
@@ -18,7 +18,6 @@ int bpf_prog1(struct pt_regs *ctx)
 		u64 cookie;
 	} data;
 
-	memset(&data, 0, sizeof(data));
 	data.pid = bpf_get_current_pid_tgid();
 	data.cookie = 0x12345678;
 

From 390d4b3e0d0f1f57ec5d8a0cf3e0d93444563e9c Mon Sep 17 00:00:00 2001
From: Tim Bingham <tbingham@akamai.com>
Date: Fri, 29 Apr 2016 13:30:23 -0400
Subject: [PATCH 370/424] net: Implement net_dbg_ratelimited() for
 CONFIG_DYNAMIC_DEBUG case

[ Upstream commit 2c94b53738549d81dc7464a32117d1f5112c64d3 ]

Prior to commit d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op
when !DEBUG") the implementation of net_dbg_ratelimited() was buggy
for both the DEBUG and CONFIG_DYNAMIC_DEBUG cases.

The bug was that net_ratelimit() was being called and, despite
returning true, nothing was being printed to the console. This
resulted in messages like the following -

"net_ratelimit: %d callbacks suppressed"

with no other output nearby.

After commit d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op when
!DEBUG") the bug is fixed for the DEBUG case. However, there's no
output at all for CONFIG_DYNAMIC_DEBUG case.

This patch restores debug output (if enabled) for the
CONFIG_DYNAMIC_DEBUG case.

Add a definition of net_dbg_ratelimited() for the CONFIG_DYNAMIC_DEBUG
case. The implementation takes care to check that dynamic debugging is
enabled before calling net_ratelimit().

Fixes: d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op when !DEBUG")
Signed-off-by: Tim Bingham <tbingham@akamai.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/net.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/linux/net.h b/include/linux/net.h
index 0b4ac7da583a..25ef630f1bd6 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -245,7 +245,15 @@ do {								\
 	net_ratelimited_function(pr_warn, fmt, ##__VA_ARGS__)
 #define net_info_ratelimited(fmt, ...)				\
 	net_ratelimited_function(pr_info, fmt, ##__VA_ARGS__)
-#if defined(DEBUG)
+#if defined(CONFIG_DYNAMIC_DEBUG)
+#define net_dbg_ratelimited(fmt, ...)					\
+do {									\
+	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);			\
+	if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) &&	\
+	    net_ratelimit())						\
+		__dynamic_pr_debug(&descriptor, fmt, ##__VA_ARGS__);	\
+} while (0)
+#elif defined(DEBUG)
 #define net_dbg_ratelimited(fmt, ...)				\
 	net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__)
 #else

From c985780791efec375865c0fbd21794d631c68fce Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Fri, 29 Apr 2016 23:31:32 +0200
Subject: [PATCH 371/424] gre: do not pull header in ICMP error processing

[ Upstream commit b7f8fe251e4609e2a437bd2c2dea01e61db6849c ]

iptunnel_pull_header expects that IP header was already pulled; with this
expectation, it pulls the tunnel header. This is not true in gre_err.
Furthermore, ipv4_update_pmtu and ipv4_redirect expect that skb->data points
to the IP header.

We cannot pull the tunnel header in this path. It's just a matter of not
calling iptunnel_pull_header - we don't need any of its effects.

Fixes: bda7bb463436 ("gre: Allow multiple protocol listener for gre protocol.")
Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ip_gre.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 614521437e30..7dc962b89fa1 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -180,6 +180,7 @@ static __be16 tnl_flags_to_gre_flags(__be16 tflags)
 	return flags;
 }
 
+/* Fills in tpi and returns header length to be pulled. */
 static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 			    bool *csum_err)
 {
@@ -239,7 +240,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 				return -EINVAL;
 		}
 	}
-	return iptunnel_pull_header(skb, hdr_len, tpi->proto);
+	return hdr_len;
 }
 
 static void ipgre_err(struct sk_buff *skb, u32 info,
@@ -342,7 +343,7 @@ static void gre_err(struct sk_buff *skb, u32 info)
 	struct tnl_ptk_info tpi;
 	bool csum_err = false;
 
-	if (parse_gre_header(skb, &tpi, &csum_err)) {
+	if (parse_gre_header(skb, &tpi, &csum_err) < 0) {
 		if (!csum_err)		/* ignore csum errors. */
 			return;
 	}
@@ -420,6 +421,7 @@ static int gre_rcv(struct sk_buff *skb)
 {
 	struct tnl_ptk_info tpi;
 	bool csum_err = false;
+	int hdr_len;
 
 #ifdef CONFIG_NET_IPGRE_BROADCAST
 	if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
@@ -429,7 +431,10 @@ static int gre_rcv(struct sk_buff *skb)
 	}
 #endif
 
-	if (parse_gre_header(skb, &tpi, &csum_err) < 0)
+	hdr_len = parse_gre_header(skb, &tpi, &csum_err);
+	if (hdr_len < 0)
+		goto drop;
+	if (iptunnel_pull_header(skb, hdr_len, tpi.proto) < 0)
 		goto drop;
 
 	if (ipgre_rcv(skb, &tpi) == PACKET_RCVD)

From 1188e1403a772d9698c06bcc53c44783536ff09e Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Thu, 25 Feb 2016 14:55:00 -0800
Subject: [PATCH 372/424] net_sched: introduce qdisc_replace() helper

[ Upstream commit 86a7996cc8a078793670d82ed97d5a99bb4e8496 ]

Remove nearly duplicated code and prepare for the following patch.

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/sch_generic.h | 17 +++++++++++++++++
 net/sched/sch_cbq.c       |  7 +------
 net/sched/sch_drr.c       |  6 +-----
 net/sched/sch_dsmark.c    |  8 +-------
 net/sched/sch_hfsc.c      |  6 +-----
 net/sched/sch_htb.c       |  9 +--------
 net/sched/sch_multiq.c    |  8 +-------
 net/sched/sch_netem.c     | 10 +---------
 net/sched/sch_prio.c      |  8 +-------
 net/sched/sch_qfq.c       |  6 +-----
 net/sched/sch_red.c       |  7 +------
 net/sched/sch_sfb.c       |  7 +------
 net/sched/sch_tbf.c       |  8 +-------
 13 files changed, 29 insertions(+), 78 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index b2a8e6338576..4dba2663eaed 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -698,6 +698,23 @@ static inline void qdisc_reset_queue(struct Qdisc *sch)
 	sch->qstats.backlog = 0;
 }
 
+static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new,
+					  struct Qdisc **pold)
+{
+	struct Qdisc *old;
+
+	sch_tree_lock(sch);
+	old = *pold;
+	*pold = new;
+	if (old != NULL) {
+		qdisc_tree_decrease_qlen(old, old->q.qlen);
+		qdisc_reset(old);
+	}
+	sch_tree_unlock(sch);
+
+	return old;
+}
+
 static inline unsigned int __qdisc_queue_drop(struct Qdisc *sch,
 					      struct sk_buff_head *list)
 {
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index c538d9e4a8f6..7f8474cdce32 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1624,13 +1624,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 			new->reshape_fail = cbq_reshape_fail;
 #endif
 	}
-	sch_tree_lock(sch);
-	*old = cl->q;
-	cl->q = new;
-	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
-	qdisc_reset(*old);
-	sch_tree_unlock(sch);
 
+	*old = qdisc_replace(sch, new, &cl->q);
 	return 0;
 }
 
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index f26bdea875c1..c76cdd423b6f 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -226,11 +226,7 @@ static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
 			new = &noop_qdisc;
 	}
 
-	sch_tree_lock(sch);
-	drr_purge_queue(cl);
-	*old = cl->qdisc;
-	cl->qdisc = new;
-	sch_tree_unlock(sch);
+	*old = qdisc_replace(sch, new, &cl->qdisc);
 	return 0;
 }
 
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index f357f34d02d2..cfddb1c635c3 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -73,13 +73,7 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
 			new = &noop_qdisc;
 	}
 
-	sch_tree_lock(sch);
-	*old = p->q;
-	p->q = new;
-	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
-	qdisc_reset(*old);
-	sch_tree_unlock(sch);
-
+	*old = qdisc_replace(sch, new, &p->q);
 	return 0;
 }
 
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index b7ebe2c87586..089f3b667d36 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1215,11 +1215,7 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 			new = &noop_qdisc;
 	}
 
-	sch_tree_lock(sch);
-	hfsc_purge_queue(sch, cl);
-	*old = cl->qdisc;
-	cl->qdisc = new;
-	sch_tree_unlock(sch);
+	*old = qdisc_replace(sch, new, &cl->qdisc);
 	return 0;
 }
 
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 15ccd7f8fb2a..0efbcf358cd0 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1163,14 +1163,7 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 				     cl->common.classid)) == NULL)
 		return -ENOBUFS;
 
-	sch_tree_lock(sch);
-	*old = cl->un.leaf.q;
-	cl->un.leaf.q = new;
-	if (*old != NULL) {
-		qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
-		qdisc_reset(*old);
-	}
-	sch_tree_unlock(sch);
+	*old = qdisc_replace(sch, new, &cl->un.leaf.q);
 	return 0;
 }
 
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 4e904ca0af9d..a0103a138563 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -303,13 +303,7 @@ static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 	if (new == NULL)
 		new = &noop_qdisc;
 
-	sch_tree_lock(sch);
-	*old = q->queues[band];
-	q->queues[band] = new;
-	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
-	qdisc_reset(*old);
-	sch_tree_unlock(sch);
-
+	*old = qdisc_replace(sch, new, &q->queues[band]);
 	return 0;
 }
 
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 5abd1d9de989..0a6ddaf7f561 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -1037,15 +1037,7 @@ static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
 
-	sch_tree_lock(sch);
-	*old = q->qdisc;
-	q->qdisc = new;
-	if (*old) {
-		qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
-		qdisc_reset(*old);
-	}
-	sch_tree_unlock(sch);
-
+	*old = qdisc_replace(sch, new, &q->qdisc);
 	return 0;
 }
 
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index ba6487f2741f..1b4aaec64a24 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -268,13 +268,7 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 	if (new == NULL)
 		new = &noop_qdisc;
 
-	sch_tree_lock(sch);
-	*old = q->queues[band];
-	q->queues[band] = new;
-	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
-	qdisc_reset(*old);
-	sch_tree_unlock(sch);
-
+	*old = qdisc_replace(sch, new, &q->queues[band]);
 	return 0;
 }
 
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 3dc3a6e56052..b5c52caf2e73 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -617,11 +617,7 @@ static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
 			new = &noop_qdisc;
 	}
 
-	sch_tree_lock(sch);
-	qfq_purge_queue(cl);
-	*old = cl->qdisc;
-	cl->qdisc = new;
-	sch_tree_unlock(sch);
+	*old = qdisc_replace(sch, new, &cl->qdisc);
 	return 0;
 }
 
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 6c0534cc7758..d5abcee454d8 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -313,12 +313,7 @@ static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 	if (new == NULL)
 		new = &noop_qdisc;
 
-	sch_tree_lock(sch);
-	*old = q->qdisc;
-	q->qdisc = new;
-	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
-	qdisc_reset(*old);
-	sch_tree_unlock(sch);
+	*old = qdisc_replace(sch, new, &q->qdisc);
 	return 0;
 }
 
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 5bbb6332ec57..0e74e55fda15 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -606,12 +606,7 @@ static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 	if (new == NULL)
 		new = &noop_qdisc;
 
-	sch_tree_lock(sch);
-	*old = q->qdisc;
-	q->qdisc = new;
-	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
-	qdisc_reset(*old);
-	sch_tree_unlock(sch);
+	*old = qdisc_replace(sch, new, &q->qdisc);
 	return 0;
 }
 
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index a4afde14e865..56a1aef3495f 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -502,13 +502,7 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
 	if (new == NULL)
 		new = &noop_qdisc;
 
-	sch_tree_lock(sch);
-	*old = q->qdisc;
-	q->qdisc = new;
-	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
-	qdisc_reset(*old);
-	sch_tree_unlock(sch);
-
+	*old = qdisc_replace(sch, new, &q->qdisc);
 	return 0;
 }
 

From ca375cf34a7186f5b8817082d2b594dcd8cedc8b Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Thu, 25 Feb 2016 14:55:01 -0800
Subject: [PATCH 373/424] net_sched: update hierarchical backlog too

[ Upstream commit 2ccccf5fb43ff62b2b96cc58d95fc0b3596516e4 ]

When the bottom qdisc decides to, for example, drop some packet,
it calls qdisc_tree_decrease_qlen() to update the queue length
for all its ancestors, we need to update the backlog too to
keep the stats on root qdisc accurate.

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/codel.h       |  4 ++++
 include/net/sch_generic.h |  5 +++--
 net/sched/sch_api.c       |  8 +++++---
 net/sched/sch_cbq.c       |  5 +++--
 net/sched/sch_choke.c     |  6 ++++--
 net/sched/sch_codel.c     | 10 ++++++----
 net/sched/sch_drr.c       |  3 ++-
 net/sched/sch_fq.c        |  4 +++-
 net/sched/sch_fq_codel.c  | 17 ++++++++++++-----
 net/sched/sch_hfsc.c      |  3 ++-
 net/sched/sch_hhf.c       | 10 +++++++---
 net/sched/sch_htb.c       | 10 ++++++----
 net/sched/sch_multiq.c    |  8 +++++---
 net/sched/sch_netem.c     |  3 ++-
 net/sched/sch_pie.c       |  5 +++--
 net/sched/sch_prio.c      |  7 ++++---
 net/sched/sch_qfq.c       |  3 ++-
 net/sched/sch_red.c       |  3 ++-
 net/sched/sch_sfb.c       |  3 ++-
 net/sched/sch_sfq.c       | 16 +++++++++-------
 net/sched/sch_tbf.c       |  7 +++++--
 21 files changed, 91 insertions(+), 49 deletions(-)

diff --git a/include/net/codel.h b/include/net/codel.h
index 267e70210061..d168aca115cc 100644
--- a/include/net/codel.h
+++ b/include/net/codel.h
@@ -162,12 +162,14 @@ struct codel_vars {
  * struct codel_stats - contains codel shared variables and stats
  * @maxpacket:	largest packet we've seen so far
  * @drop_count:	temp count of dropped packets in dequeue()
+ * @drop_len:	bytes of dropped packets in dequeue()
  * ecn_mark:	number of packets we ECN marked instead of dropping
  * ce_mark:	number of packets CE marked because sojourn time was above ce_threshold
  */
 struct codel_stats {
 	u32		maxpacket;
 	u32		drop_count;
+	u32		drop_len;
 	u32		ecn_mark;
 	u32		ce_mark;
 };
@@ -308,6 +310,7 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch,
 								  vars->rec_inv_sqrt);
 					goto end;
 				}
+				stats->drop_len += qdisc_pkt_len(skb);
 				qdisc_drop(skb, sch);
 				stats->drop_count++;
 				skb = dequeue_func(vars, sch);
@@ -330,6 +333,7 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch,
 		if (params->ecn && INET_ECN_set_ce(skb)) {
 			stats->ecn_mark++;
 		} else {
+			stats->drop_len += qdisc_pkt_len(skb);
 			qdisc_drop(skb, sch);
 			stats->drop_count++;
 
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 4dba2663eaed..86df0835f6b5 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -396,7 +396,8 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
 			      struct Qdisc *qdisc);
 void qdisc_reset(struct Qdisc *qdisc);
 void qdisc_destroy(struct Qdisc *qdisc);
-void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n);
+void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n,
+			       unsigned int len);
 struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 			  const struct Qdisc_ops *ops);
 struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
@@ -707,7 +708,7 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new,
 	old = *pold;
 	*pold = new;
 	if (old != NULL) {
-		qdisc_tree_decrease_qlen(old, old->q.qlen);
+		qdisc_tree_reduce_backlog(old, old->q.qlen, old->qstats.backlog);
 		qdisc_reset(old);
 	}
 	sch_tree_unlock(sch);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index af1acf009866..95b560f0b253 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -744,14 +744,15 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
 	return 0;
 }
 
-void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
+void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
+			       unsigned int len)
 {
 	const struct Qdisc_class_ops *cops;
 	unsigned long cl;
 	u32 parentid;
 	int drops;
 
-	if (n == 0)
+	if (n == 0 && len == 0)
 		return;
 	drops = max_t(int, n, 0);
 	rcu_read_lock();
@@ -774,11 +775,12 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
 			cops->put(sch, cl);
 		}
 		sch->q.qlen -= n;
+		sch->qstats.backlog -= len;
 		__qdisc_qstats_drop(sch, drops);
 	}
 	rcu_read_unlock();
 }
-EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
+EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
 
 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
 			       struct nlmsghdr *n, u32 clid,
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 7f8474cdce32..baafddf229ce 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1909,7 +1909,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl = (struct cbq_class *)arg;
-	unsigned int qlen;
+	unsigned int qlen, backlog;
 
 	if (cl->filters || cl->children || cl == &q->link)
 		return -EBUSY;
@@ -1917,8 +1917,9 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
 	sch_tree_lock(sch);
 
 	qlen = cl->q->q.qlen;
+	backlog = cl->q->qstats.backlog;
 	qdisc_reset(cl->q);
-	qdisc_tree_decrease_qlen(cl->q, qlen);
+	qdisc_tree_reduce_backlog(cl->q, qlen, backlog);
 
 	if (cl->next_alive)
 		cbq_deactivate_class(cl);
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 5ffb8b8337c7..0a08c860eee4 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -128,8 +128,8 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
 		choke_zap_tail_holes(q);
 
 	qdisc_qstats_backlog_dec(sch, skb);
+	qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
 	qdisc_drop(skb, sch);
-	qdisc_tree_decrease_qlen(sch, 1);
 	--sch->q.qlen;
 }
 
@@ -456,6 +456,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
 		old = q->tab;
 		if (old) {
 			unsigned int oqlen = sch->q.qlen, tail = 0;
+			unsigned dropped = 0;
 
 			while (q->head != q->tail) {
 				struct sk_buff *skb = q->tab[q->head];
@@ -467,11 +468,12 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
 					ntab[tail++] = skb;
 					continue;
 				}
+				dropped += qdisc_pkt_len(skb);
 				qdisc_qstats_backlog_dec(sch, skb);
 				--sch->q.qlen;
 				qdisc_drop(skb, sch);
 			}
-			qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen);
+			qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
 			q->head = 0;
 			q->tail = tail;
 		}
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 535007d5f0b5..9b7e2980ee5c 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -79,12 +79,13 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
 
 	skb = codel_dequeue(sch, &q->params, &q->vars, &q->stats, dequeue);
 
-	/* We cant call qdisc_tree_decrease_qlen() if our qlen is 0,
+	/* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
 	 * or HTB crashes. Defer it for next round.
 	 */
 	if (q->stats.drop_count && sch->q.qlen) {
-		qdisc_tree_decrease_qlen(sch, q->stats.drop_count);
+		qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len);
 		q->stats.drop_count = 0;
+		q->stats.drop_len = 0;
 	}
 	if (skb)
 		qdisc_bstats_update(sch, skb);
@@ -116,7 +117,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct codel_sched_data *q = qdisc_priv(sch);
 	struct nlattr *tb[TCA_CODEL_MAX + 1];
-	unsigned int qlen;
+	unsigned int qlen, dropped = 0;
 	int err;
 
 	if (!opt)
@@ -156,10 +157,11 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
 	while (sch->q.qlen > sch->limit) {
 		struct sk_buff *skb = __skb_dequeue(&sch->q);
 
+		dropped += qdisc_pkt_len(skb);
 		qdisc_qstats_backlog_dec(sch, skb);
 		qdisc_drop(skb, sch);
 	}
-	qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+	qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
 
 	sch_tree_unlock(sch);
 	return 0;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index c76cdd423b6f..d6e3ad43cecb 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -53,9 +53,10 @@ static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid)
 static void drr_purge_queue(struct drr_class *cl)
 {
 	unsigned int len = cl->qdisc->q.qlen;
+	unsigned int backlog = cl->qdisc->qstats.backlog;
 
 	qdisc_reset(cl->qdisc);
-	qdisc_tree_decrease_qlen(cl->qdisc, len);
+	qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
 }
 
 static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = {
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 109b2322778f..3c6a47d66a04 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -662,6 +662,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
 	struct fq_sched_data *q = qdisc_priv(sch);
 	struct nlattr *tb[TCA_FQ_MAX + 1];
 	int err, drop_count = 0;
+	unsigned drop_len = 0;
 	u32 fq_log;
 
 	if (!opt)
@@ -736,10 +737,11 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
 
 		if (!skb)
 			break;
+		drop_len += qdisc_pkt_len(skb);
 		kfree_skb(skb);
 		drop_count++;
 	}
-	qdisc_tree_decrease_qlen(sch, drop_count);
+	qdisc_tree_reduce_backlog(sch, drop_count, drop_len);
 
 	sch_tree_unlock(sch);
 	return err;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 4c834e93dafb..d3fc8f9dd3d4 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -175,7 +175,7 @@ static unsigned int fq_codel_qdisc_drop(struct Qdisc *sch)
 static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct fq_codel_sched_data *q = qdisc_priv(sch);
-	unsigned int idx;
+	unsigned int idx, prev_backlog;
 	struct fq_codel_flow *flow;
 	int uninitialized_var(ret);
 
@@ -203,6 +203,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (++sch->q.qlen <= sch->limit)
 		return NET_XMIT_SUCCESS;
 
+	prev_backlog = sch->qstats.backlog;
 	q->drop_overlimit++;
 	/* Return Congestion Notification only if we dropped a packet
 	 * from this flow.
@@ -211,7 +212,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		return NET_XMIT_CN;
 
 	/* As we dropped a packet, better let upper stack know this */
-	qdisc_tree_decrease_qlen(sch, 1);
+	qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog);
 	return NET_XMIT_SUCCESS;
 }
 
@@ -241,6 +242,7 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch)
 	struct fq_codel_flow *flow;
 	struct list_head *head;
 	u32 prev_drop_count, prev_ecn_mark;
+	unsigned int prev_backlog;
 
 begin:
 	head = &q->new_flows;
@@ -259,6 +261,7 @@ begin:
 
 	prev_drop_count = q->cstats.drop_count;
 	prev_ecn_mark = q->cstats.ecn_mark;
+	prev_backlog = sch->qstats.backlog;
 
 	skb = codel_dequeue(sch, &q->cparams, &flow->cvars, &q->cstats,
 			    dequeue);
@@ -276,12 +279,14 @@ begin:
 	}
 	qdisc_bstats_update(sch, skb);
 	flow->deficit -= qdisc_pkt_len(skb);
-	/* We cant call qdisc_tree_decrease_qlen() if our qlen is 0,
+	/* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
 	 * or HTB crashes. Defer it for next round.
 	 */
 	if (q->cstats.drop_count && sch->q.qlen) {
-		qdisc_tree_decrease_qlen(sch, q->cstats.drop_count);
+		qdisc_tree_reduce_backlog(sch, q->cstats.drop_count,
+					  q->cstats.drop_len);
 		q->cstats.drop_count = 0;
+		q->cstats.drop_len = 0;
 	}
 	return skb;
 }
@@ -372,11 +377,13 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
 	while (sch->q.qlen > sch->limit) {
 		struct sk_buff *skb = fq_codel_dequeue(sch);
 
+		q->cstats.drop_len += qdisc_pkt_len(skb);
 		kfree_skb(skb);
 		q->cstats.drop_count++;
 	}
-	qdisc_tree_decrease_qlen(sch, q->cstats.drop_count);
+	qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len);
 	q->cstats.drop_count = 0;
+	q->cstats.drop_len = 0;
 
 	sch_tree_unlock(sch);
 	return 0;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 089f3b667d36..d783d7cc3348 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -895,9 +895,10 @@ static void
 hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl)
 {
 	unsigned int len = cl->qdisc->q.qlen;
+	unsigned int backlog = cl->qdisc->qstats.backlog;
 
 	qdisc_reset(cl->qdisc);
-	qdisc_tree_decrease_qlen(cl->qdisc, len);
+	qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
 }
 
 static void
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 86b04e31e60b..13d6f83ec491 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -382,6 +382,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	struct hhf_sched_data *q = qdisc_priv(sch);
 	enum wdrr_bucket_idx idx;
 	struct wdrr_bucket *bucket;
+	unsigned int prev_backlog;
 
 	idx = hhf_classify(skb, sch);
 
@@ -409,6 +410,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	if (++sch->q.qlen <= sch->limit)
 		return NET_XMIT_SUCCESS;
 
+	prev_backlog = sch->qstats.backlog;
 	q->drop_overlimit++;
 	/* Return Congestion Notification only if we dropped a packet from this
 	 * bucket.
@@ -417,7 +419,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		return NET_XMIT_CN;
 
 	/* As we dropped a packet, better let upper stack know this. */
-	qdisc_tree_decrease_qlen(sch, 1);
+	qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog);
 	return NET_XMIT_SUCCESS;
 }
 
@@ -527,7 +529,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct hhf_sched_data *q = qdisc_priv(sch);
 	struct nlattr *tb[TCA_HHF_MAX + 1];
-	unsigned int qlen;
+	unsigned int qlen, prev_backlog;
 	int err;
 	u64 non_hh_quantum;
 	u32 new_quantum = q->quantum;
@@ -577,12 +579,14 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
 	}
 
 	qlen = sch->q.qlen;
+	prev_backlog = sch->qstats.backlog;
 	while (sch->q.qlen > sch->limit) {
 		struct sk_buff *skb = hhf_dequeue(sch);
 
 		kfree_skb(skb);
 	}
-	qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+	qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen,
+				  prev_backlog - sch->qstats.backlog);
 
 	sch_tree_unlock(sch);
 	return 0;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 0efbcf358cd0..846a7f98cef9 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1265,7 +1265,6 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 {
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = (struct htb_class *)arg;
-	unsigned int qlen;
 	struct Qdisc *new_q = NULL;
 	int last_child = 0;
 
@@ -1285,9 +1284,11 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 	sch_tree_lock(sch);
 
 	if (!cl->level) {
-		qlen = cl->un.leaf.q->q.qlen;
+		unsigned int qlen = cl->un.leaf.q->q.qlen;
+		unsigned int backlog = cl->un.leaf.q->qstats.backlog;
+
 		qdisc_reset(cl->un.leaf.q);
-		qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
+		qdisc_tree_reduce_backlog(cl->un.leaf.q, qlen, backlog);
 	}
 
 	/* delete from hash and active; remainder in destroy_class */
@@ -1421,10 +1422,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 		sch_tree_lock(sch);
 		if (parent && !parent->level) {
 			unsigned int qlen = parent->un.leaf.q->q.qlen;
+			unsigned int backlog = parent->un.leaf.q->qstats.backlog;
 
 			/* turn parent into inner node */
 			qdisc_reset(parent->un.leaf.q);
-			qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
+			qdisc_tree_reduce_backlog(parent->un.leaf.q, qlen, backlog);
 			qdisc_destroy(parent->un.leaf.q);
 			if (parent->prio_activity)
 				htb_deactivate(q, parent);
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index a0103a138563..bcdd54bb101c 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -218,7 +218,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
 		if (q->queues[i] != &noop_qdisc) {
 			struct Qdisc *child = q->queues[i];
 			q->queues[i] = &noop_qdisc;
-			qdisc_tree_decrease_qlen(child, child->q.qlen);
+			qdisc_tree_reduce_backlog(child, child->q.qlen,
+						  child->qstats.backlog);
 			qdisc_destroy(child);
 		}
 	}
@@ -238,8 +239,9 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
 				q->queues[i] = child;
 
 				if (old != &noop_qdisc) {
-					qdisc_tree_decrease_qlen(old,
-								 old->q.qlen);
+					qdisc_tree_reduce_backlog(old,
+								  old->q.qlen,
+								  old->qstats.backlog);
 					qdisc_destroy(old);
 				}
 				sch_tree_unlock(sch);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 0a6ddaf7f561..9640bb39a5d2 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -598,7 +598,8 @@ deliver:
 				if (unlikely(err != NET_XMIT_SUCCESS)) {
 					if (net_xmit_drop_count(err)) {
 						qdisc_qstats_drop(sch);
-						qdisc_tree_decrease_qlen(sch, 1);
+						qdisc_tree_reduce_backlog(sch, 1,
+									  qdisc_pkt_len(skb));
 					}
 				}
 				goto tfifo_dequeue;
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index b783a446d884..71ae3b9629f9 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -183,7 +183,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct pie_sched_data *q = qdisc_priv(sch);
 	struct nlattr *tb[TCA_PIE_MAX + 1];
-	unsigned int qlen;
+	unsigned int qlen, dropped = 0;
 	int err;
 
 	if (!opt)
@@ -232,10 +232,11 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt)
 	while (sch->q.qlen > sch->limit) {
 		struct sk_buff *skb = __skb_dequeue(&sch->q);
 
+		dropped += qdisc_pkt_len(skb);
 		qdisc_qstats_backlog_dec(sch, skb);
 		qdisc_drop(skb, sch);
 	}
-	qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+	qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
 
 	sch_tree_unlock(sch);
 	return 0;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 1b4aaec64a24..fee1b15506b2 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -191,7 +191,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 		struct Qdisc *child = q->queues[i];
 		q->queues[i] = &noop_qdisc;
 		if (child != &noop_qdisc) {
-			qdisc_tree_decrease_qlen(child, child->q.qlen);
+			qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog);
 			qdisc_destroy(child);
 		}
 	}
@@ -210,8 +210,9 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
 				q->queues[i] = child;
 
 				if (old != &noop_qdisc) {
-					qdisc_tree_decrease_qlen(old,
-								 old->q.qlen);
+					qdisc_tree_reduce_backlog(old,
+								  old->q.qlen,
+								  old->qstats.backlog);
 					qdisc_destroy(old);
 				}
 				sch_tree_unlock(sch);
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index b5c52caf2e73..8d2d8d953432 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -220,9 +220,10 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
 static void qfq_purge_queue(struct qfq_class *cl)
 {
 	unsigned int len = cl->qdisc->q.qlen;
+	unsigned int backlog = cl->qdisc->qstats.backlog;
 
 	qdisc_reset(cl->qdisc);
-	qdisc_tree_decrease_qlen(cl->qdisc, len);
+	qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
 }
 
 static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index d5abcee454d8..8c0508c0e287 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -210,7 +210,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 	q->flags = ctl->flags;
 	q->limit = ctl->limit;
 	if (child) {
-		qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
+		qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
+					  q->qdisc->qstats.backlog);
 		qdisc_destroy(q->qdisc);
 		q->qdisc = child;
 	}
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 0e74e55fda15..c69611640fa5 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -510,7 +510,8 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
 
 	sch_tree_lock(sch);
 
-	qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
+	qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
+				  q->qdisc->qstats.backlog);
 	qdisc_destroy(q->qdisc);
 	q->qdisc = child;
 
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 3abab534eb5c..498f0a2cb47f 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -346,7 +346,7 @@ static int
 sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
-	unsigned int hash;
+	unsigned int hash, dropped;
 	sfq_index x, qlen;
 	struct sfq_slot *slot;
 	int uninitialized_var(ret);
@@ -461,7 +461,7 @@ enqueue:
 		return NET_XMIT_SUCCESS;
 
 	qlen = slot->qlen;
-	sfq_drop(sch);
+	dropped = sfq_drop(sch);
 	/* Return Congestion Notification only if we dropped a packet
 	 * from this flow.
 	 */
@@ -469,7 +469,7 @@ enqueue:
 		return NET_XMIT_CN;
 
 	/* As we dropped a packet, better let upper stack know this */
-	qdisc_tree_decrease_qlen(sch, 1);
+	qdisc_tree_reduce_backlog(sch, 1, dropped);
 	return NET_XMIT_SUCCESS;
 }
 
@@ -537,6 +537,7 @@ static void sfq_rehash(struct Qdisc *sch)
 	struct sfq_slot *slot;
 	struct sk_buff_head list;
 	int dropped = 0;
+	unsigned int drop_len = 0;
 
 	__skb_queue_head_init(&list);
 
@@ -565,6 +566,7 @@ static void sfq_rehash(struct Qdisc *sch)
 			if (x >= SFQ_MAX_FLOWS) {
 drop:
 				qdisc_qstats_backlog_dec(sch, skb);
+				drop_len += qdisc_pkt_len(skb);
 				kfree_skb(skb);
 				dropped++;
 				continue;
@@ -594,7 +596,7 @@ drop:
 		}
 	}
 	sch->q.qlen -= dropped;
-	qdisc_tree_decrease_qlen(sch, dropped);
+	qdisc_tree_reduce_backlog(sch, dropped, drop_len);
 }
 
 static void sfq_perturbation(unsigned long arg)
@@ -618,7 +620,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	struct tc_sfq_qopt *ctl = nla_data(opt);
 	struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
-	unsigned int qlen;
+	unsigned int qlen, dropped = 0;
 	struct red_parms *p = NULL;
 
 	if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
@@ -667,8 +669,8 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 
 	qlen = sch->q.qlen;
 	while (sch->q.qlen > q->limit)
-		sfq_drop(sch);
-	qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+		dropped += sfq_drop(sch);
+	qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
 
 	del_timer(&q->perturb_timer);
 	if (q->perturb_period) {
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 56a1aef3495f..c2fbde742f37 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -160,6 +160,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
 	struct tbf_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *segs, *nskb;
 	netdev_features_t features = netif_skb_features(skb);
+	unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
 	int ret, nb;
 
 	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
@@ -172,6 +173,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
 		nskb = segs->next;
 		segs->next = NULL;
 		qdisc_skb_cb(segs)->pkt_len = segs->len;
+		len += segs->len;
 		ret = qdisc_enqueue(segs, q->qdisc);
 		if (ret != NET_XMIT_SUCCESS) {
 			if (net_xmit_drop_count(ret))
@@ -183,7 +185,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
 	}
 	sch->q.qlen += nb;
 	if (nb > 1)
-		qdisc_tree_decrease_qlen(sch, 1 - nb);
+		qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
 	consume_skb(skb);
 	return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
 }
@@ -399,7 +401,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
 
 	sch_tree_lock(sch);
 	if (child) {
-		qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
+		qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
+					  q->qdisc->qstats.backlog);
 		qdisc_destroy(q->qdisc);
 		q->qdisc = child;
 	}

From 67b014f957cfb29f02a71834facf9f3d7ef4b3fd Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Thu, 25 Feb 2016 14:55:02 -0800
Subject: [PATCH 374/424] sch_htb: update backlog as well

[ Upstream commit 431e3a8e36a05a37126f34b41aa3a5a6456af04e ]

We saw qlen!=0 but backlog==0 on our production machine:

qdisc htb 1: dev eth0 root refcnt 2 r2q 10 default 1 direct_packets_stat 0 ver 3.17
 Sent 172680457356 bytes 222469449 pkt (dropped 0, overlimits 123575834 requeues 0)
 backlog 0b 72p requeues 0

The problem is we only count qlen for HTB qdisc but not backlog.
We need to update backlog too when we update qlen, so that we
can at least know the average packet length.

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sched/sch_htb.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 846a7f98cef9..87b02ed3d5f2 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -600,6 +600,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		htb_activate(q, cl);
 	}
 
+	qdisc_qstats_backlog_inc(sch, skb);
 	sch->q.qlen++;
 	return NET_XMIT_SUCCESS;
 }
@@ -889,6 +890,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 ok:
 		qdisc_bstats_update(sch, skb);
 		qdisc_unthrottled(sch);
+		qdisc_qstats_backlog_dec(sch, skb);
 		sch->q.qlen--;
 		return skb;
 	}
@@ -955,6 +957,7 @@ static unsigned int htb_drop(struct Qdisc *sch)
 			unsigned int len;
 			if (cl->un.leaf.q->ops->drop &&
 			    (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
+				sch->qstats.backlog -= len;
 				sch->q.qlen--;
 				if (!cl->un.leaf.q->q.qlen)
 					htb_deactivate(q, cl);
@@ -984,12 +987,12 @@ static void htb_reset(struct Qdisc *sch)
 			}
 			cl->prio_activity = 0;
 			cl->cmode = HTB_CAN_SEND;
-
 		}
 	}
 	qdisc_watchdog_cancel(&q->watchdog);
 	__skb_queue_purge(&q->direct_queue);
 	sch->q.qlen = 0;
+	sch->qstats.backlog = 0;
 	memset(q->hlevel, 0, sizeof(q->hlevel));
 	memset(q->row_mask, 0, sizeof(q->row_mask));
 	for (i = 0; i < TC_HTB_NUMPRIO; i++)

From 5ecc98e1b8de36e2ac351d174a5f5d92b5085d15 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Thu, 25 Feb 2016 14:55:03 -0800
Subject: [PATCH 375/424] sch_dsmark: update backlog as well

[ Upstream commit bdf17661f63a79c3cb4209b970b1cc39e34f7543 ]

Similarly, we need to update backlog too when we update qlen.

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sched/sch_dsmark.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index cfddb1c635c3..d0dff0cd8186 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -258,6 +258,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		return err;
 	}
 
+	qdisc_qstats_backlog_inc(sch, skb);
 	sch->q.qlen++;
 
 	return NET_XMIT_SUCCESS;
@@ -280,6 +281,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 		return NULL;
 
 	qdisc_bstats_update(sch, skb);
+	qdisc_qstats_backlog_dec(sch, skb);
 	sch->q.qlen--;
 
 	index = skb->tc_index & (p->indices - 1);
@@ -395,6 +397,7 @@ static void dsmark_reset(struct Qdisc *sch)
 
 	pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
 	qdisc_reset(p->q);
+	sch->qstats.backlog = 0;
 	sch->q.qlen = 0;
 }
 

From 71a783bce6dd0a39937322de683cf4bda2f3a1be Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 2 May 2016 12:20:15 -0400
Subject: [PATCH 376/424] netem: Segment GSO packets on enqueue

[ Upstream commit 6071bd1aa13ed9e41824bafad845b7b7f4df5cfd ]

This was recently reported to me, and reproduced on the latest net kernel,
when attempting to run netperf from a host that had a netem qdisc attached
to the egress interface:

[  788.073771] ---------------------[ cut here ]---------------------------
[  788.096716] WARNING: at net/core/dev.c:2253 skb_warn_bad_offload+0xcd/0xda()
[  788.129521] bnx2: caps=(0x00000001801949b3, 0x0000000000000000) len=2962
data_len=0 gso_size=1448 gso_type=1 ip_summed=3
[  788.182150] Modules linked in: sch_netem kvm_amd kvm crc32_pclmul ipmi_ssif
ghash_clmulni_intel sp5100_tco amd64_edac_mod aesni_intel lrw gf128mul
glue_helper ablk_helper edac_mce_amd cryptd pcspkr sg edac_core hpilo ipmi_si
i2c_piix4 k10temp fam15h_power hpwdt ipmi_msghandler shpchp acpi_power_meter
pcc_cpufreq nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c
sd_mod crc_t10dif crct10dif_generic mgag200 syscopyarea sysfillrect sysimgblt
i2c_algo_bit drm_kms_helper ahci ata_generic pata_acpi ttm libahci
crct10dif_pclmul pata_atiixp tg3 libata crct10dif_common drm crc32c_intel ptp
serio_raw bnx2 r8169 hpsa pps_core i2c_core mii dm_mirror dm_region_hash dm_log
dm_mod
[  788.465294] CPU: 16 PID: 0 Comm: swapper/16 Tainted: G        W
------------   3.10.0-327.el7.x86_64 #1
[  788.511521] Hardware name: HP ProLiant DL385p Gen8, BIOS A28 12/17/2012
[  788.542260]  ffff880437c036b8 f7afc56532a53db9 ffff880437c03670
ffffffff816351f1
[  788.576332]  ffff880437c036a8 ffffffff8107b200 ffff880633e74200
ffff880231674000
[  788.611943]  0000000000000001 0000000000000003 0000000000000000
ffff880437c03710
[  788.647241] Call Trace:
[  788.658817]  <IRQ>  [<ffffffff816351f1>] dump_stack+0x19/0x1b
[  788.686193]  [<ffffffff8107b200>] warn_slowpath_common+0x70/0xb0
[  788.713803]  [<ffffffff8107b29c>] warn_slowpath_fmt+0x5c/0x80
[  788.741314]  [<ffffffff812f92f3>] ? ___ratelimit+0x93/0x100
[  788.767018]  [<ffffffff81637f49>] skb_warn_bad_offload+0xcd/0xda
[  788.796117]  [<ffffffff8152950c>] skb_checksum_help+0x17c/0x190
[  788.823392]  [<ffffffffa01463a1>] netem_enqueue+0x741/0x7c0 [sch_netem]
[  788.854487]  [<ffffffff8152cb58>] dev_queue_xmit+0x2a8/0x570
[  788.880870]  [<ffffffff8156ae1d>] ip_finish_output+0x53d/0x7d0
...

The problem occurs because netem is not prepared to handle GSO packets (as it
uses skb_checksum_help in its enqueue path, which cannot manipulate these
frames).

The solution I think is to simply segment the skb in a simmilar fashion to the
way we do in __dev_queue_xmit (via validate_xmit_skb), with some minor changes.
When we decide to corrupt an skb, if the frame is GSO, we segment it, corrupt
the first segment, and enqueue the remaining ones.

tested successfully by myself on the latest net kernel, to which this applies

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Jamal Hadi Salim <jhs@mojatatu.com>
CC: "David S. Miller" <davem@davemloft.net>
CC: netem@lists.linux-foundation.org
CC: eric.dumazet@gmail.com
CC: stephen@networkplumber.org
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sched/sch_netem.c | 61 +++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 59 insertions(+), 2 deletions(-)

diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 9640bb39a5d2..4befe97a9034 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -395,6 +395,25 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 	sch->q.qlen++;
 }
 
+/* netem can't properly corrupt a megapacket (like we get from GSO), so instead
+ * when we statistically choose to corrupt one, we instead segment it, returning
+ * the first packet to be corrupted, and re-enqueue the remaining frames
+ */
+static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch)
+{
+	struct sk_buff *segs;
+	netdev_features_t features = netif_skb_features(skb);
+
+	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+
+	if (IS_ERR_OR_NULL(segs)) {
+		qdisc_reshape_fail(skb, sch);
+		return NULL;
+	}
+	consume_skb(skb);
+	return segs;
+}
+
 /*
  * Insert one skb into qdisc.
  * Note: parent depends on return value to account for queue length.
@@ -407,7 +426,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	/* We don't fill cb now as skb_unshare() may invalidate it */
 	struct netem_skb_cb *cb;
 	struct sk_buff *skb2;
+	struct sk_buff *segs = NULL;
+	unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb);
+	int nb = 0;
 	int count = 1;
+	int rc = NET_XMIT_SUCCESS;
 
 	/* Random duplication */
 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
@@ -453,10 +476,23 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	 * do it now in software before we mangle it.
 	 */
 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
+		if (skb_is_gso(skb)) {
+			segs = netem_segment(skb, sch);
+			if (!segs)
+				return NET_XMIT_DROP;
+		} else {
+			segs = skb;
+		}
+
+		skb = segs;
+		segs = segs->next;
+
 		if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
 		    (skb->ip_summed == CHECKSUM_PARTIAL &&
-		     skb_checksum_help(skb)))
-			return qdisc_drop(skb, sch);
+		     skb_checksum_help(skb))) {
+			rc = qdisc_drop(skb, sch);
+			goto finish_segs;
+		}
 
 		skb->data[prandom_u32() % skb_headlen(skb)] ^=
 			1<<(prandom_u32() % 8);
@@ -516,6 +552,27 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		sch->qstats.requeues++;
 	}
 
+finish_segs:
+	if (segs) {
+		while (segs) {
+			skb2 = segs->next;
+			segs->next = NULL;
+			qdisc_skb_cb(segs)->pkt_len = segs->len;
+			last_len = segs->len;
+			rc = qdisc_enqueue(segs, sch);
+			if (rc != NET_XMIT_SUCCESS) {
+				if (net_xmit_drop_count(rc))
+					qdisc_qstats_drop(sch);
+			} else {
+				nb++;
+				len += last_len;
+			}
+			segs = skb2;
+		}
+		sch->q.qlen += nb;
+		if (nb > 1)
+			qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
+	}
 	return NET_XMIT_SUCCESS;
 }
 

From f27e1ed8d971a6649c0da8e8a8517fea56ad71f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Tue, 3 May 2016 16:38:53 +0200
Subject: [PATCH 377/424] net: fec: only clear a queue's work bit if the queue
 was emptied
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 1c021bb717a70aaeaa4b25c91f43c2aeddd922de ]

In the receive path a queue's work bit was cleared unconditionally even
if fec_enet_rx_queue only read out a part of the available packets from
the hardware. This resulted in not reading any packets in the next napi
turn and so packets were delayed or lost.

The obvious fix is to only clear a queue's bit when the queue was
emptied.

Fixes: 4d494cdc92b3 ("net: fec: change data structure to support multiqueue")
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
Tested-by: Fugang Duan <fugang.duan@nxp.com>
Acked-by: Fugang Duan <fugang.duan@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/freescale/fec_main.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index b2a32209ffbf..f6147ffc7fbc 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1557,9 +1557,15 @@ fec_enet_rx(struct net_device *ndev, int budget)
 	struct fec_enet_private *fep = netdev_priv(ndev);
 
 	for_each_set_bit(queue_id, &fep->work_rx, FEC_ENET_MAX_RX_QS) {
-		clear_bit(queue_id, &fep->work_rx);
-		pkt_received += fec_enet_rx_queue(ndev,
+		int ret;
+
+		ret = fec_enet_rx_queue(ndev,
 					budget - pkt_received, queue_id);
+
+		if (ret < budget - pkt_received)
+			clear_bit(queue_id, &fep->work_rx);
+
+		pkt_received += ret;
 	}
 	return pkt_received;
 }

From 52f307b18b1f070f0442fc98515575616b21fa20 Mon Sep 17 00:00:00 2001
From: Kangjie Lu <kangjielu@gmail.com>
Date: Tue, 3 May 2016 16:35:05 -0400
Subject: [PATCH 378/424] net: fix infoleak in llc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit b8670c09f37bdf2847cc44f36511a53afc6161fd ]

The stack object “info” has a total size of 12 bytes. Its last byte
is padding which is not initialized and leaked via “put_cmsg”.

Signed-off-by: Kangjie Lu <kjlu@gatech.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/llc/af_llc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 8dab4e569571..bb8edb9ef506 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -626,6 +626,7 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb)
 	if (llc->cmsg_flags & LLC_CMSG_PKTINFO) {
 		struct llc_pktinfo info;
 
+		memset(&info, 0, sizeof(info));
 		info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex;
 		llc_pdu_decode_dsap(skb, &info.lpi_sap);
 		llc_pdu_decode_da(skb, info.lpi_mac);

From e0c0313681aaa0c4514c6794635aba82691d2154 Mon Sep 17 00:00:00 2001
From: Kangjie Lu <kangjielu@gmail.com>
Date: Tue, 3 May 2016 16:46:24 -0400
Subject: [PATCH 379/424] net: fix infoleak in rtnetlink
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 5f8e44741f9f216e33736ea4ec65ca9ac03036e6 ]

The stack object “map” has a total size of 32 bytes. Its last 4
bytes are padding generated by compiler. These padding bytes are
not initialized and sent out via “nla_put”.

Signed-off-by: Kangjie Lu <kjlu@gatech.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/rtnetlink.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index ca966f7de351..87b91ffbdec3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1175,14 +1175,16 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
 
 static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
 {
-	struct rtnl_link_ifmap map = {
-		.mem_start   = dev->mem_start,
-		.mem_end     = dev->mem_end,
-		.base_addr   = dev->base_addr,
-		.irq         = dev->irq,
-		.dma         = dev->dma,
-		.port        = dev->if_port,
-	};
+	struct rtnl_link_ifmap map;
+
+	memset(&map, 0, sizeof(map));
+	map.mem_start   = dev->mem_start;
+	map.mem_end     = dev->mem_end;
+	map.base_addr   = dev->base_addr;
+	map.irq         = dev->irq;
+	map.dma         = dev->dma;
+	map.port        = dev->if_port;
+
 	if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
 		return -EMSGSIZE;
 

From bcf3e33e962d83837a03ccc489d834e0e9d95d58 Mon Sep 17 00:00:00 2001
From: Daniel Jurgens <danielj@mellanox.com>
Date: Wed, 4 May 2016 15:00:33 +0300
Subject: [PATCH 380/424] net/mlx4_en: Fix endianness bug in IPV6 csum
 calculation

[ Upstream commit 82d69203df634b4dfa765c94f60ce9482bcc44d6 ]

Use htons instead of unconditionally byte swapping nexthdr.  On a little
endian systems shifting the byte is correct behavior, but it results in
incorrect csums on big endian architectures.

Fixes: f8c6455bb04b ('net/mlx4_en: Extend checksum offloading by CHECKSUM COMPLETE')
Signed-off-by: Daniel Jurgens <danielj@mellanox.com>
Reviewed-by: Carol Soto <clsoto@us.ibm.com>
Tested-by: Carol Soto <clsoto@us.ibm.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlx4/en_rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index e7a5000aa12c..bbff8ec6713e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -704,7 +704,7 @@ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb,
 
 	if (ipv6h->nexthdr == IPPROTO_FRAGMENT || ipv6h->nexthdr == IPPROTO_HOPOPTS)
 		return -1;
-	hw_checksum = csum_add(hw_checksum, (__force __wsum)(ipv6h->nexthdr << 8));
+	hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(ipv6h->nexthdr));
 
 	csum_pseudo_hdr = csum_partial(&ipv6h->saddr,
 				       sizeof(ipv6h->saddr) + sizeof(ipv6h->daddr), 0);

From 67779d20fb540b719ca0781200b79831d3498fa4 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@docker.com>
Date: Wed, 4 May 2016 14:21:53 +0100
Subject: [PATCH 381/424] VSOCK: do not disconnect socket when peer has
 shutdown SEND only

[ Upstream commit dedc58e067d8c379a15a8a183c5db318201295bb ]

The peer may be expecting a reply having sent a request and then done a
shutdown(SHUT_WR), so tearing down the whole socket at this point seems
wrong and breaks for me with a client which does a SHUT_WR.

Looking at other socket family's stream_recvmsg callbacks doing a shutdown
here does not seem to be the norm and removing it does not seem to have
had any adverse effects that I can see.

I'm using Stefan's RFC virtio transport patches, I'm unsure of the impact
on the vmci transport.

Signed-off-by: Ian Campbell <ian.campbell@docker.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Cc: Andy King <acking@vmware.com>
Cc: Dmitry Torokhov <dtor@vmware.com>
Cc: Jorgen Hansen <jhansen@vmware.com>
Cc: Adit Ranadive <aditr@vmware.com>
Cc: netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/vmw_vsock/af_vsock.c | 21 +--------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 7fd1220fbfa0..9b5bd6d142dc 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1794,27 +1794,8 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	else if (sk->sk_shutdown & RCV_SHUTDOWN)
 		err = 0;
 
-	if (copied > 0) {
-		/* We only do these additional bookkeeping/notification steps
-		 * if we actually copied something out of the queue pair
-		 * instead of just peeking ahead.
-		 */
-
-		if (!(flags & MSG_PEEK)) {
-			/* If the other side has shutdown for sending and there
-			 * is nothing more to read, then modify the socket
-			 * state.
-			 */
-			if (vsk->peer_shutdown & SEND_SHUTDOWN) {
-				if (vsock_stream_has_data(vsk) <= 0) {
-					sk->sk_state = SS_UNCONNECTED;
-					sock_set_flag(sk, SOCK_DONE);
-					sk->sk_state_change(sk);
-				}
-			}
-		}
+	if (copied > 0)
 		err = copied;
-	}
 
 out_wait:
 	finish_wait(sk_sleep(sk), &wait);

From 97c2160da468f71ba998f2c6c82ed33cdfbc7245 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Wed, 4 May 2016 16:18:45 +0200
Subject: [PATCH 382/424] net: bridge: fix old ioctl unlocked net device walk

[ Upstream commit 31ca0458a61a502adb7ed192bf9716c6d05791a5 ]

get_bridge_ifindices() is used from the old "deviceless" bridge ioctl
calls which aren't called with rtnl held. The comment above says that it is
called with rtnl but that is not really the case.
Here's a sample output from a test ASSERT_RTNL() which I put in
get_bridge_ifindices and executed "brctl show":
[  957.422726] RTNL: assertion failed at net/bridge//br_ioctl.c (30)
[  957.422925] CPU: 0 PID: 1862 Comm: brctl Tainted: G        W  O
4.6.0-rc4+ #157
[  957.423009] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS 1.8.1-20150318_183358- 04/01/2014
[  957.423009]  0000000000000000 ffff880058adfdf0 ffffffff8138dec5
0000000000000400
[  957.423009]  ffffffff81ce8380 ffff880058adfe58 ffffffffa05ead32
0000000000000001
[  957.423009]  00007ffec1a444b0 0000000000000400 ffff880053c19130
0000000000008940
[  957.423009] Call Trace:
[  957.423009]  [<ffffffff8138dec5>] dump_stack+0x85/0xc0
[  957.423009]  [<ffffffffa05ead32>]
br_ioctl_deviceless_stub+0x212/0x2e0 [bridge]
[  957.423009]  [<ffffffff81515beb>] sock_ioctl+0x22b/0x290
[  957.423009]  [<ffffffff8126ba75>] do_vfs_ioctl+0x95/0x700
[  957.423009]  [<ffffffff8126c159>] SyS_ioctl+0x79/0x90
[  957.423009]  [<ffffffff8163a4c0>] entry_SYSCALL_64_fastpath+0x23/0xc1

Since it only reads bridge ifindices, we can use rcu to safely walk the net
device list. Also remove the wrong rtnl comment above.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bridge/br_ioctl.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 263b4de4de57..60a3dbfca8a1 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -21,18 +21,19 @@
 #include <asm/uaccess.h>
 #include "br_private.h"
 
-/* called with RTNL */
 static int get_bridge_ifindices(struct net *net, int *indices, int num)
 {
 	struct net_device *dev;
 	int i = 0;
 
-	for_each_netdev(net, dev) {
+	rcu_read_lock();
+	for_each_netdev_rcu(net, dev) {
 		if (i >= num)
 			break;
 		if (dev->priv_flags & IFF_EBRIDGE)
 			indices[i++] = dev->ifindex;
 	}
+	rcu_read_unlock();
 
 	return i;
 }

From 5895701216ef9e4f89734433dcf64c85fc3d64a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Linus=20L=C3=BCssing?= <linus.luessing@c0d3.blue>
Date: Wed, 4 May 2016 17:25:02 +0200
Subject: [PATCH 383/424] bridge: fix igmp / mld query parsing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 856ce5d083e14571d051301fe3c65b32b8cbe321 ]

With the newly introduced helper functions the skb pulling is hidden
in the checksumming function - and undone before returning to the
caller.

The IGMP and MLD query parsing functions in the bridge still
assumed that the skb is pointing to the beginning of the IGMP/MLD
message while it is now kept at the beginning of the IPv4/6 header.

If there is a querier somewhere else, then this either causes
the multicast snooping to stay disabled even though it could be
enabled. Or, if we have the querier enabled too, then this can
create unnecessary IGMP / MLD query messages on the link.

Fixing this by taking the offset between IP and IGMP/MLD header into
account, too.

Fixes: 9afd85c9e455 ("net: Export IGMP/MLD message validation code")
Reported-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bridge/br_multicast.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 03661d97463c..ea9893743a0f 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1270,6 +1270,7 @@ static int br_ip4_multicast_query(struct net_bridge *br,
 	struct br_ip saddr;
 	unsigned long max_delay;
 	unsigned long now = jiffies;
+	unsigned int offset = skb_transport_offset(skb);
 	__be32 group;
 	int err = 0;
 
@@ -1280,14 +1281,14 @@ static int br_ip4_multicast_query(struct net_bridge *br,
 
 	group = ih->group;
 
-	if (skb->len == sizeof(*ih)) {
+	if (skb->len == offset + sizeof(*ih)) {
 		max_delay = ih->code * (HZ / IGMP_TIMER_SCALE);
 
 		if (!max_delay) {
 			max_delay = 10 * HZ;
 			group = 0;
 		}
-	} else if (skb->len >= sizeof(*ih3)) {
+	} else if (skb->len >= offset + sizeof(*ih3)) {
 		ih3 = igmpv3_query_hdr(skb);
 		if (ih3->nsrcs)
 			goto out;
@@ -1348,6 +1349,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
 	struct br_ip saddr;
 	unsigned long max_delay;
 	unsigned long now = jiffies;
+	unsigned int offset = skb_transport_offset(skb);
 	const struct in6_addr *group = NULL;
 	bool is_general_query;
 	int err = 0;
@@ -1357,8 +1359,8 @@ static int br_ip6_multicast_query(struct net_bridge *br,
 	    (port && port->state == BR_STATE_DISABLED))
 		goto out;
 
-	if (skb->len == sizeof(*mld)) {
-		if (!pskb_may_pull(skb, sizeof(*mld))) {
+	if (skb->len == offset + sizeof(*mld)) {
+		if (!pskb_may_pull(skb, offset + sizeof(*mld))) {
 			err = -EINVAL;
 			goto out;
 		}
@@ -1367,7 +1369,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
 		if (max_delay)
 			group = &mld->mld_mca;
 	} else {
-		if (!pskb_may_pull(skb, sizeof(*mld2q))) {
+		if (!pskb_may_pull(skb, offset + sizeof(*mld2q))) {
 			err = -EINVAL;
 			goto out;
 		}

From 1575c095e444c927f0ebcdeb179c460c8c3b7f1f Mon Sep 17 00:00:00 2001
From: Mikko Rapeli <mikko.rapeli@iki.fi>
Date: Sun, 24 Apr 2016 17:45:00 +0200
Subject: [PATCH 384/424] uapi glibc compat: fix compile errors when glibc
 net/if.h included before linux/if.h MIME-Version: 1.0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 4a91cb61bb995e5571098188092e296192309c77 ]

glibc's net/if.h contains copies of definitions from linux/if.h and these
conflict and cause build failures if both files are included by application
source code. Changes in uapi headers, which fixed header file dependencies to
include linux/if.h when it was needed, e.g. commit 1ffad83d, made the
net/if.h and linux/if.h incompatibilities visible as build failures for
userspace applications like iproute2 and xtables-addons.

This patch fixes compile errors when glibc net/if.h is included before
linux/if.h:

./linux/if.h:99:21: error: redeclaration of enumerator ‘IFF_NOARP’
./linux/if.h:98:23: error: redeclaration of enumerator ‘IFF_RUNNING’
./linux/if.h:97:26: error: redeclaration of enumerator ‘IFF_NOTRAILERS’
./linux/if.h:96:27: error: redeclaration of enumerator ‘IFF_POINTOPOINT’
./linux/if.h:95:24: error: redeclaration of enumerator ‘IFF_LOOPBACK’
./linux/if.h:94:21: error: redeclaration of enumerator ‘IFF_DEBUG’
./linux/if.h:93:25: error: redeclaration of enumerator ‘IFF_BROADCAST’
./linux/if.h:92:19: error: redeclaration of enumerator ‘IFF_UP’
./linux/if.h:252:8: error: redefinition of ‘struct ifconf’
./linux/if.h:203:8: error: redefinition of ‘struct ifreq’
./linux/if.h:169:8: error: redefinition of ‘struct ifmap’
./linux/if.h:107:23: error: redeclaration of enumerator ‘IFF_DYNAMIC’
./linux/if.h:106:25: error: redeclaration of enumerator ‘IFF_AUTOMEDIA’
./linux/if.h:105:23: error: redeclaration of enumerator ‘IFF_PORTSEL’
./linux/if.h:104:25: error: redeclaration of enumerator ‘IFF_MULTICAST’
./linux/if.h:103:21: error: redeclaration of enumerator ‘IFF_SLAVE’
./linux/if.h:102:22: error: redeclaration of enumerator ‘IFF_MASTER’
./linux/if.h:101:24: error: redeclaration of enumerator ‘IFF_ALLMULTI’
./linux/if.h:100:23: error: redeclaration of enumerator ‘IFF_PROMISC’

The cases where linux/if.h is included before net/if.h need a similar fix in
the glibc side, or the order of include files can be changed userspace
code as a workaround.

This change was tested in x86 userspace on Debian unstable with
scripts/headers_compile_test.sh:

$ make headers_install && \
  cd usr/include && ../../scripts/headers_compile_test.sh -l -k
...
cc -Wall -c -nostdinc -I /usr/lib/gcc/i586-linux-gnu/5/include -I /usr/lib/gcc/i586-linux-gnu/5/include-fixed -I . -I /home/mcfrisk/src/linux-2.6/usr/headers_compile_test_include.2uX2zH -I /home/mcfrisk/src/linux-2.6/usr/headers_compile_test_include.2uX2zH/i586-linux-gnu -o /dev/null ./linux/if.h_libc_before_kernel.h
PASSED libc before kernel test: ./linux/if.h

Reported-by: Jan Engelhardt <jengelh@inai.de>
Reported-by: Josh Boyer <jwboyer@fedoraproject.org>
Reported-by: Stephen Hemminger <shemming@brocade.com>
Reported-by: Waldemar Brodkorb <mail@waldemar-brodkorb.de>
Cc: Gabriel Laskar <gabriel@lse.epita.fr>
Signed-off-by: Mikko Rapeli <mikko.rapeli@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/if.h          | 28 ++++++++++++++++++++
 include/uapi/linux/libc-compat.h | 44 ++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+)

diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h
index 9cf2394f0bcf..752f5dc040a5 100644
--- a/include/uapi/linux/if.h
+++ b/include/uapi/linux/if.h
@@ -19,14 +19,20 @@
 #ifndef _LINUX_IF_H
 #define _LINUX_IF_H
 
+#include <linux/libc-compat.h>          /* for compatibility with glibc */
 #include <linux/types.h>		/* for "__kernel_caddr_t" et al	*/
 #include <linux/socket.h>		/* for "struct sockaddr" et al	*/
 #include <linux/compiler.h>		/* for "__user" et al           */
 
+#if __UAPI_DEF_IF_IFNAMSIZ
 #define	IFNAMSIZ	16
+#endif /* __UAPI_DEF_IF_IFNAMSIZ */
 #define	IFALIASZ	256
 #include <linux/hdlc/ioctl.h>
 
+/* For glibc compatibility. An empty enum does not compile. */
+#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 && \
+    __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0
 /**
  * enum net_device_flags - &struct net_device flags
  *
@@ -68,6 +74,8 @@
  * @IFF_ECHO: echo sent packets. Volatile.
  */
 enum net_device_flags {
+/* for compatibility with glibc net/if.h */
+#if __UAPI_DEF_IF_NET_DEVICE_FLAGS
 	IFF_UP				= 1<<0,  /* sysfs */
 	IFF_BROADCAST			= 1<<1,  /* volatile */
 	IFF_DEBUG			= 1<<2,  /* sysfs */
@@ -84,11 +92,17 @@ enum net_device_flags {
 	IFF_PORTSEL			= 1<<13, /* sysfs */
 	IFF_AUTOMEDIA			= 1<<14, /* sysfs */
 	IFF_DYNAMIC			= 1<<15, /* sysfs */
+#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS */
+#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO
 	IFF_LOWER_UP			= 1<<16, /* volatile */
 	IFF_DORMANT			= 1<<17, /* volatile */
 	IFF_ECHO			= 1<<18, /* volatile */
+#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */
 };
+#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO != 0 && __UAPI_DEF_IF_NET_DEVICE_FLAGS != 0 */
 
+/* for compatibility with glibc net/if.h */
+#if __UAPI_DEF_IF_NET_DEVICE_FLAGS
 #define IFF_UP				IFF_UP
 #define IFF_BROADCAST			IFF_BROADCAST
 #define IFF_DEBUG			IFF_DEBUG
@@ -105,9 +119,13 @@ enum net_device_flags {
 #define IFF_PORTSEL			IFF_PORTSEL
 #define IFF_AUTOMEDIA			IFF_AUTOMEDIA
 #define IFF_DYNAMIC			IFF_DYNAMIC
+#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS */
+
+#if __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO
 #define IFF_LOWER_UP			IFF_LOWER_UP
 #define IFF_DORMANT			IFF_DORMANT
 #define IFF_ECHO			IFF_ECHO
+#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */
 
 #define IFF_VOLATILE	(IFF_LOOPBACK|IFF_POINTOPOINT|IFF_BROADCAST|IFF_ECHO|\
 		IFF_MASTER|IFF_SLAVE|IFF_RUNNING|IFF_LOWER_UP|IFF_DORMANT)
@@ -166,6 +184,8 @@ enum {
  *	being very small might be worth keeping for clean configuration.
  */
 
+/* for compatibility with glibc net/if.h */
+#if __UAPI_DEF_IF_IFMAP
 struct ifmap {
 	unsigned long mem_start;
 	unsigned long mem_end;
@@ -175,6 +195,7 @@ struct ifmap {
 	unsigned char port;
 	/* 3 bytes spare */
 };
+#endif /* __UAPI_DEF_IF_IFMAP */
 
 struct if_settings {
 	unsigned int type;	/* Type of physical device or protocol */
@@ -200,6 +221,8 @@ struct if_settings {
  * remainder may be interface specific.
  */
 
+/* for compatibility with glibc net/if.h */
+#if __UAPI_DEF_IF_IFREQ
 struct ifreq {
 #define IFHWADDRLEN	6
 	union
@@ -223,6 +246,7 @@ struct ifreq {
 		struct	if_settings ifru_settings;
 	} ifr_ifru;
 };
+#endif /* __UAPI_DEF_IF_IFREQ */
 
 #define ifr_name	ifr_ifrn.ifrn_name	/* interface name 	*/
 #define ifr_hwaddr	ifr_ifru.ifru_hwaddr	/* MAC address 		*/
@@ -249,6 +273,8 @@ struct ifreq {
  * must know all networks accessible).
  */
 
+/* for compatibility with glibc net/if.h */
+#if __UAPI_DEF_IF_IFCONF
 struct ifconf  {
 	int	ifc_len;			/* size of buffer	*/
 	union {
@@ -256,6 +282,8 @@ struct ifconf  {
 		struct ifreq __user *ifcu_req;
 	} ifc_ifcu;
 };
+#endif /* __UAPI_DEF_IF_IFCONF */
+
 #define	ifc_buf	ifc_ifcu.ifcu_buf		/* buffer address	*/
 #define	ifc_req	ifc_ifcu.ifcu_req		/* array of structures	*/
 
diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h
index 7d024ceb075d..d5e38c73377c 100644
--- a/include/uapi/linux/libc-compat.h
+++ b/include/uapi/linux/libc-compat.h
@@ -51,6 +51,40 @@
 /* We have included glibc headers... */
 #if defined(__GLIBC__)
 
+/* Coordinate with glibc net/if.h header. */
+#if defined(_NET_IF_H)
+
+/* GLIBC headers included first so don't define anything
+ * that would already be defined. */
+
+#define __UAPI_DEF_IF_IFCONF 0
+#define __UAPI_DEF_IF_IFMAP 0
+#define __UAPI_DEF_IF_IFNAMSIZ 0
+#define __UAPI_DEF_IF_IFREQ 0
+/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */
+#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 0
+/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */
+#ifndef __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO
+#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1
+#endif /* __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO */
+
+#else /* _NET_IF_H */
+
+/* Linux headers included first, and we must define everything
+ * we need. The expectation is that glibc will check the
+ * __UAPI_DEF_* defines and adjust appropriately. */
+
+#define __UAPI_DEF_IF_IFCONF 1
+#define __UAPI_DEF_IF_IFMAP 1
+#define __UAPI_DEF_IF_IFNAMSIZ 1
+#define __UAPI_DEF_IF_IFREQ 1
+/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */
+#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1
+/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */
+#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1
+
+#endif /* _NET_IF_H */
+
 /* Coordinate with glibc netinet/in.h header. */
 #if defined(_NETINET_IN_H)
 
@@ -117,6 +151,16 @@
  * that we need. */
 #else /* !defined(__GLIBC__) */
 
+/* Definitions for if.h */
+#define __UAPI_DEF_IF_IFCONF 1
+#define __UAPI_DEF_IF_IFMAP 1
+#define __UAPI_DEF_IF_IFNAMSIZ 1
+#define __UAPI_DEF_IF_IFREQ 1
+/* Everything up to IFF_DYNAMIC, matches net/if.h until glibc 2.23 */
+#define __UAPI_DEF_IF_NET_DEVICE_FLAGS 1
+/* For the future if glibc adds IFF_LOWER_UP, IFF_DORMANT and IFF_ECHO */
+#define __UAPI_DEF_IF_NET_DEVICE_FLAGS_LOWER_UP_DORMANT_ECHO 1
+
 /* Definitions for in.h */
 #define __UAPI_DEF_IN_ADDR		1
 #define __UAPI_DEF_IN_IPPROTO		1

From 83857201758ead21e19e36d9ab5b2f87be03dfe2 Mon Sep 17 00:00:00 2001
From: Kangjie Lu <kangjielu@gmail.com>
Date: Sun, 8 May 2016 12:10:14 -0400
Subject: [PATCH 385/424] net: fix a kernel infoleak in x25 module

[ Upstream commit 79e48650320e6fba48369fccf13fd045315b19b8 ]

Stack object "dte_facilities" is allocated in x25_rx_call_request(),
which is supposed to be initialized in x25_negotiate_facilities.
However, 5 fields (8 bytes in total) are not initialized. This
object is then copied to userland via copy_to_user, thus infoleak
occurs.

Signed-off-by: Kangjie Lu <kjlu@gatech.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/x25/x25_facilities.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 7ecd04c21360..997ff7b2509b 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -277,6 +277,7 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk,
 
 	memset(&theirs, 0, sizeof(theirs));
 	memcpy(new, ours, sizeof(*new));
+	memset(dte, 0, sizeof(*dte));
 
 	len = x25_parse_facilities(skb, &theirs, dte, &x25->vc_facil_mask);
 	if (len < 0)

From a7ddb047796d072a3f19c55aac1d9bfe8cb4b15c Mon Sep 17 00:00:00 2001
From: "xypron.glpk@gmx.de" <xypron.glpk@gmx.de>
Date: Mon, 9 May 2016 00:46:18 +0200
Subject: [PATCH 386/424] net: thunderx: avoid exposing kernel stack

[ Upstream commit 161de2caf68c549c266e571ffba8e2163886fb10 ]

Reserved fields should be set to zero to avoid exposing
bits from the kernel stack.

Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index 206b6a71a545..d1c217eaf417 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -550,6 +550,7 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
 		nicvf_config_vlan_stripping(nic, nic->netdev->features);
 
 	/* Enable Receive queue */
+	memset(&rq_cfg, 0, sizeof(struct rq_cfg));
 	rq_cfg.ena = 1;
 	rq_cfg.tcp_ena = 0;
 	nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, *(u64 *)&rq_cfg);
@@ -582,6 +583,7 @@ void nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs,
 			      qidx, (u64)(cq->dmem.phys_base));
 
 	/* Enable Completion queue */
+	memset(&cq_cfg, 0, sizeof(struct cq_cfg));
 	cq_cfg.ena = 1;
 	cq_cfg.reset = 0;
 	cq_cfg.caching = 0;
@@ -630,6 +632,7 @@ static void nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs,
 			      qidx, (u64)(sq->dmem.phys_base));
 
 	/* Enable send queue  & set queue size */
+	memset(&sq_cfg, 0, sizeof(struct sq_cfg));
 	sq_cfg.ena = 1;
 	sq_cfg.reset = 0;
 	sq_cfg.ldwb = 0;
@@ -666,6 +669,7 @@ static void nicvf_rbdr_config(struct nicvf *nic, struct queue_set *qs,
 
 	/* Enable RBDR  & set queue size */
 	/* Buffer size should be in multiples of 128 bytes */
+	memset(&rbdr_cfg, 0, sizeof(struct rbdr_cfg));
 	rbdr_cfg.ena = 1;
 	rbdr_cfg.reset = 0;
 	rbdr_cfg.ldwb = 0;

From 2cddc95adf3b1be879e6540187bb5aae24dd2689 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 9 May 2016 20:55:16 -0700
Subject: [PATCH 387/424] tcp: refresh skb timestamp at retransmit time

[ Upstream commit 10a81980fc47e64ffac26a073139813d3f697b64 ]

In the very unlikely case __tcp_retransmit_skb() can not use the cloning
done in tcp_transmit_skb(), we need to refresh skb_mstamp before doing
the copy and transmit, otherwise TCP TS val will be an exact copy of
original transmit.

Fixes: 7faee5c0d514 ("tcp: remove TCP_SKB_CB(skb)->when")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_output.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9bfc39ff2285..7c9883ab56e5 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2625,8 +2625,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	 */
 	if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) ||
 		     skb_headroom(skb) >= 0xFFFF)) {
-		struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
-						   GFP_ATOMIC);
+		struct sk_buff *nskb;
+
+		skb_mstamp_get(&skb->skb_mstamp);
+		nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
 		err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
 			     -ENOBUFS;
 	} else {

From 1c76c5d5ffbbaed1cb1829c1eb9b97648b4979fd Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 13 May 2016 18:33:41 +0200
Subject: [PATCH 388/424] net/route: enforce hoplimit max value

[ Upstream commit 626abd59e51d4d8c6367e03aae252a8aa759ac78 ]

Currently, when creating or updating a route, no check is performed
in both ipv4 and ipv6 code to the hoplimit value.

The caller can i.e. set hoplimit to 256, and when such route will
 be used, packets will be sent with hoplimit/ttl equal to 0.

This commit adds checks for the RTAX_HOPLIMIT value, in both ipv4
ipv6 route code, substituting any value greater than 255 with 255.

This is consistent with what is currently done for ADVMSS and MTU
in the ipv4 code.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/fib_semantics.c | 2 ++
 net/ipv6/route.c         | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d97268e8ff10..2b68418c7198 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -975,6 +975,8 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
 			val = 65535 - 40;
 		if (type == RTAX_MTU && val > 65535 - 15)
 			val = 65535 - 15;
+		if (type == RTAX_HOPLIMIT && val > 255)
+			val = 255;
 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
 			return -EINVAL;
 		fi->fib_metrics[type - 1] = val;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3f164d3aaee2..5af2cca0a46d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1727,6 +1727,8 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
 		} else {
 			val = nla_get_u32(nla);
 		}
+		if (type == RTAX_HOPLIMIT && val > 255)
+			val = 255;
 		if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
 			goto err;
 

From 3cbabd4e83f63d5271c8ff74c059f7cbe85c4c1d Mon Sep 17 00:00:00 2001
From: Junxiao Bi <junxiao.bi@oracle.com>
Date: Thu, 12 May 2016 15:42:15 -0700
Subject: [PATCH 389/424] ocfs2: revert using ocfs2_acl_chmod to avoid inode
 cluster lock hang

commit 5ee0fbd50fdf1c1329de8bee35ea9d7c6a81a2e0 upstream.

Commit 743b5f1434f5 ("ocfs2: take inode lock in ocfs2_iop_set/get_acl()")
introduced this issue.  ocfs2_setattr called by chmod command holds
cluster wide inode lock when calling posix_acl_chmod.  This latter
function in turn calls ocfs2_iop_get_acl and ocfs2_iop_set_acl.  These
two are also called directly from vfs layer for getfacl/setfacl commands
and therefore acquire the cluster wide inode lock.  If a remote
conversion request comes after the first inode lock in ocfs2_setattr,
OCFS2_LOCK_BLOCKED will be set.  And this will cause the second call to
inode lock from the ocfs2_iop_get_acl() to block indefinetly.

The deleted version of ocfs2_acl_chmod() calls __posix_acl_chmod() which
does not call back into the filesystem.  Therefore, we restore
ocfs2_acl_chmod(), modify it slightly for locking as needed, and use that
instead.

Fixes: 743b5f1434f5 ("ocfs2: take inode lock in ocfs2_iop_set/get_acl()")
Signed-off-by: Tariq Saeed <tariq.x.saeed@oracle.com>
Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Mark Fasheh <mfasheh@suse.de>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Joseph Qi <joseph.qi@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ocfs2/acl.c  | 24 ++++++++++++++++++++++++
 fs/ocfs2/acl.h  |  1 +
 fs/ocfs2/file.c |  4 ++--
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 0cdf497c91ef..749d3bc41232 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -322,3 +322,27 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type)
 	brelse(di_bh);
 	return acl;
 }
+
+int ocfs2_acl_chmod(struct inode *inode, struct buffer_head *bh)
+{
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct posix_acl *acl;
+	int ret;
+
+	if (S_ISLNK(inode->i_mode))
+		return -EOPNOTSUPP;
+
+	if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+		return 0;
+
+	acl = ocfs2_get_acl_nolock(inode, ACL_TYPE_ACCESS, bh);
+	if (IS_ERR(acl) || !acl)
+		return PTR_ERR(acl);
+	ret = __posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
+	if (ret)
+		return ret;
+	ret = ocfs2_set_acl(NULL, inode, NULL, ACL_TYPE_ACCESS,
+			    acl, NULL, NULL);
+	posix_acl_release(acl);
+	return ret;
+}
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index 3fce68d08625..035e5878db06 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -35,5 +35,6 @@ int ocfs2_set_acl(handle_t *handle,
 			 struct posix_acl *acl,
 			 struct ocfs2_alloc_context *meta_ac,
 			 struct ocfs2_alloc_context *data_ac);
+extern int ocfs2_acl_chmod(struct inode *, struct buffer_head *);
 
 #endif /* OCFS2_ACL_H */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 0e5b4515f92e..77d30cbd944d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1268,20 +1268,20 @@ bail_unlock_rw:
 	if (size_change)
 		ocfs2_rw_unlock(inode, 1);
 bail:
-	brelse(bh);
 
 	/* Release quota pointers in case we acquired them */
 	for (qtype = 0; qtype < OCFS2_MAXQUOTAS; qtype++)
 		dqput(transfer_to[qtype]);
 
 	if (!status && attr->ia_valid & ATTR_MODE) {
-		status = posix_acl_chmod(inode, inode->i_mode);
+		status = ocfs2_acl_chmod(inode, bh);
 		if (status < 0)
 			mlog_errno(status);
 	}
 	if (inode_locked)
 		ocfs2_inode_unlock(inode, 1);
 
+	brelse(bh);
 	return status;
 }
 

From dc3e6de00b00bb7b716d2653650fdd112e981578 Mon Sep 17 00:00:00 2001
From: Junxiao Bi <junxiao.bi@oracle.com>
Date: Thu, 12 May 2016 15:42:18 -0700
Subject: [PATCH 390/424] ocfs2: fix posix_acl_create deadlock

commit c25a1e0671fbca7b2c0d0757d533bd2650d6dc0c upstream.

Commit 702e5bc68ad2 ("ocfs2: use generic posix ACL infrastructure")
refactored code to use posix_acl_create.  The problem with this function
is that it is not mindful of the cluster wide inode lock making it
unsuitable for use with ocfs2 inode creation with ACLs.  For example,
when used in ocfs2_mknod, this function can cause deadlock as follows.
The parent dir inode lock is taken when calling posix_acl_create ->
get_acl -> ocfs2_iop_get_acl which takes the inode lock again.  This can
cause deadlock if there is a blocked remote lock request waiting for the
lock to be downconverted.  And same deadlock happened in ocfs2_reflink.
This fix is to revert back using ocfs2_init_acl.

Fixes: 702e5bc68ad2 ("ocfs2: use generic posix ACL infrastructure")
Signed-off-by: Tariq Saeed <tariq.x.saeed@oracle.com>
Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Mark Fasheh <mfasheh@suse.de>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Joseph Qi <joseph.qi@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ocfs2/acl.c          | 63 +++++++++++++++++++++++++++++++++++++++++
 fs/ocfs2/acl.h          |  4 +++
 fs/ocfs2/namei.c        | 23 ++-------------
 fs/ocfs2/refcounttree.c | 17 ++---------
 fs/ocfs2/xattr.c        | 14 ++++-----
 fs/ocfs2/xattr.h        |  4 +--
 6 files changed, 77 insertions(+), 48 deletions(-)

diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 749d3bc41232..2162434728c0 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -346,3 +346,66 @@ int ocfs2_acl_chmod(struct inode *inode, struct buffer_head *bh)
 	posix_acl_release(acl);
 	return ret;
 }
+
+/*
+ * Initialize the ACLs of a new inode. If parent directory has default ACL,
+ * then clone to new inode. Called from ocfs2_mknod.
+ */
+int ocfs2_init_acl(handle_t *handle,
+		   struct inode *inode,
+		   struct inode *dir,
+		   struct buffer_head *di_bh,
+		   struct buffer_head *dir_bh,
+		   struct ocfs2_alloc_context *meta_ac,
+		   struct ocfs2_alloc_context *data_ac)
+{
+	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+	struct posix_acl *acl = NULL;
+	int ret = 0, ret2;
+	umode_t mode;
+
+	if (!S_ISLNK(inode->i_mode)) {
+		if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
+			acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT,
+						   dir_bh);
+			if (IS_ERR(acl))
+				return PTR_ERR(acl);
+		}
+		if (!acl) {
+			mode = inode->i_mode & ~current_umask();
+			ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode);
+			if (ret) {
+				mlog_errno(ret);
+				goto cleanup;
+			}
+		}
+	}
+	if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) {
+		if (S_ISDIR(inode->i_mode)) {
+			ret = ocfs2_set_acl(handle, inode, di_bh,
+					    ACL_TYPE_DEFAULT, acl,
+					    meta_ac, data_ac);
+			if (ret)
+				goto cleanup;
+		}
+		mode = inode->i_mode;
+		ret = __posix_acl_create(&acl, GFP_NOFS, &mode);
+		if (ret < 0)
+			return ret;
+
+		ret2 = ocfs2_acl_set_mode(inode, di_bh, handle, mode);
+		if (ret2) {
+			mlog_errno(ret2);
+			ret = ret2;
+			goto cleanup;
+		}
+		if (ret > 0) {
+			ret = ocfs2_set_acl(handle, inode,
+					    di_bh, ACL_TYPE_ACCESS,
+					    acl, meta_ac, data_ac);
+		}
+	}
+cleanup:
+	posix_acl_release(acl);
+	return ret;
+}
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index 035e5878db06..2783a75b3999 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -36,5 +36,9 @@ int ocfs2_set_acl(handle_t *handle,
 			 struct ocfs2_alloc_context *meta_ac,
 			 struct ocfs2_alloc_context *data_ac);
 extern int ocfs2_acl_chmod(struct inode *, struct buffer_head *);
+extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
+			  struct buffer_head *, struct buffer_head *,
+			  struct ocfs2_alloc_context *,
+			  struct ocfs2_alloc_context *);
 
 #endif /* OCFS2_ACL_H */
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 3123408da935..62af9554541d 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -259,7 +259,6 @@ static int ocfs2_mknod(struct inode *dir,
 	struct ocfs2_dir_lookup_result lookup = { NULL, };
 	sigset_t oldset;
 	int did_block_signals = 0;
-	struct posix_acl *default_acl = NULL, *acl = NULL;
 	struct ocfs2_dentry_lock *dl = NULL;
 
 	trace_ocfs2_mknod(dir, dentry, dentry->d_name.len, dentry->d_name.name,
@@ -367,12 +366,6 @@ static int ocfs2_mknod(struct inode *dir,
 		goto leave;
 	}
 
-	status = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
-	if (status) {
-		mlog_errno(status);
-		goto leave;
-	}
-
 	handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb,
 							    S_ISDIR(mode),
 							    xattr_credits));
@@ -421,16 +414,8 @@ static int ocfs2_mknod(struct inode *dir,
 		inc_nlink(dir);
 	}
 
-	if (default_acl) {
-		status = ocfs2_set_acl(handle, inode, new_fe_bh,
-				       ACL_TYPE_DEFAULT, default_acl,
-				       meta_ac, data_ac);
-	}
-	if (!status && acl) {
-		status = ocfs2_set_acl(handle, inode, new_fe_bh,
-				       ACL_TYPE_ACCESS, acl,
-				       meta_ac, data_ac);
-	}
+	status = ocfs2_init_acl(handle, inode, dir, new_fe_bh, parent_fe_bh,
+			 meta_ac, data_ac);
 
 	if (status < 0) {
 		mlog_errno(status);
@@ -472,10 +457,6 @@ static int ocfs2_mknod(struct inode *dir,
 	d_instantiate(dentry, inode);
 	status = 0;
 leave:
-	if (default_acl)
-		posix_acl_release(default_acl);
-	if (acl)
-		posix_acl_release(acl);
 	if (status < 0 && did_quota_inode)
 		dquot_free_inode(inode);
 	if (handle)
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 252119860e6c..6a0c55d7dff0 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4248,20 +4248,12 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
 	struct inode *inode = d_inode(old_dentry);
 	struct buffer_head *old_bh = NULL;
 	struct inode *new_orphan_inode = NULL;
-	struct posix_acl *default_acl, *acl;
-	umode_t mode;
 
 	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
 		return -EOPNOTSUPP;
 
-	mode = inode->i_mode;
-	error = posix_acl_create(dir, &mode, &default_acl, &acl);
-	if (error) {
-		mlog_errno(error);
-		return error;
-	}
 
-	error = ocfs2_create_inode_in_orphan(dir, mode,
+	error = ocfs2_create_inode_in_orphan(dir, inode->i_mode,
 					     &new_orphan_inode);
 	if (error) {
 		mlog_errno(error);
@@ -4300,16 +4292,11 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
 	/* If the security isn't preserved, we need to re-initialize them. */
 	if (!preserve) {
 		error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
-						    &new_dentry->d_name,
-						    default_acl, acl);
+						    &new_dentry->d_name);
 		if (error)
 			mlog_errno(error);
 	}
 out:
-	if (default_acl)
-		posix_acl_release(default_acl);
-	if (acl)
-		posix_acl_release(acl);
 	if (!error) {
 		error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
 						       new_dentry);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index e9164f09841b..877830b05e12 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -7197,12 +7197,10 @@ out:
  */
 int ocfs2_init_security_and_acl(struct inode *dir,
 				struct inode *inode,
-				const struct qstr *qstr,
-				struct posix_acl *default_acl,
-				struct posix_acl *acl)
+				const struct qstr *qstr)
 {
-	struct buffer_head *dir_bh = NULL;
 	int ret = 0;
+	struct buffer_head *dir_bh = NULL;
 
 	ret = ocfs2_init_security_get(inode, dir, qstr, NULL);
 	if (ret) {
@@ -7215,11 +7213,9 @@ int ocfs2_init_security_and_acl(struct inode *dir,
 		mlog_errno(ret);
 		goto leave;
 	}
-
-	if (!ret && default_acl)
-		ret = ocfs2_iop_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
-	if (!ret && acl)
-		ret = ocfs2_iop_set_acl(inode, acl, ACL_TYPE_ACCESS);
+	ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
+	if (ret)
+		mlog_errno(ret);
 
 	ocfs2_inode_unlock(dir, 0);
 	brelse(dir_bh);
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index f10d5b93c366..1633cc15ea1f 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -94,7 +94,5 @@ int ocfs2_reflink_xattrs(struct inode *old_inode,
 			 bool preserve_security);
 int ocfs2_init_security_and_acl(struct inode *dir,
 				struct inode *inode,
-				const struct qstr *qstr,
-				struct posix_acl *default_acl,
-				struct posix_acl *acl);
+				const struct qstr *qstr);
 #endif /* OCFS2_XATTR_H */

From 1d77f0a51cb6517d72146c6cb81a997b060b68b2 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Mon, 9 May 2016 16:28:49 -0700
Subject: [PATCH 391/424] zsmalloc: fix zs_can_compact() integer overflow

commit 44f43e99fe70833058482d183e99fdfd11220996 upstream.

zs_can_compact() has two race conditions in its core calculation:

unsigned long obj_wasted = zs_stat_get(class, OBJ_ALLOCATED) -
				zs_stat_get(class, OBJ_USED);

1) classes are not locked, so the numbers of allocated and used
   objects can change by the concurrent ops happening on other CPUs
2) shrinker invokes it from preemptible context

Depending on the circumstances, thus, OBJ_ALLOCATED can become
less than OBJ_USED, which can result in either very high or
negative `total_scan' value calculated later in do_shrink_slab().

do_shrink_slab() has some logic to prevent those cases:

 vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-62
 vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-62
 vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-64
 vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-62
 vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-62
 vmscan: shrink_slab: zs_shrinker_scan+0x0/0x28 [zsmalloc] negative objects to delete nr=-62

However, due to the way `total_scan' is calculated, not every
shrinker->count_objects() overflow can be spotted and handled.
To demonstrate the latter, I added some debugging code to do_shrink_slab()
(x86_64) and the results were:

 vmscan: OVERFLOW: shrinker->count_objects() == -1 [18446744073709551615]
 vmscan: but total_scan > 0: 92679974445502
 vmscan: resulting total_scan: 92679974445502
[..]
 vmscan: OVERFLOW: shrinker->count_objects() == -1 [18446744073709551615]
 vmscan: but total_scan > 0: 22634041808232578
 vmscan: resulting total_scan: 22634041808232578

Even though shrinker->count_objects() has returned an overflowed value,
the resulting `total_scan' is positive, and, what is more worrisome, it
is insanely huge. This value is getting used later on in
shrinker->scan_objects() loop:

        while (total_scan >= batch_size ||
               total_scan >= freeable) {
                unsigned long ret;
                unsigned long nr_to_scan = min(batch_size, total_scan);

                shrinkctl->nr_to_scan = nr_to_scan;
                ret = shrinker->scan_objects(shrinker, shrinkctl);
                if (ret == SHRINK_STOP)
                        break;
                freed += ret;

                count_vm_events(SLABS_SCANNED, nr_to_scan);
                total_scan -= nr_to_scan;

                cond_resched();
        }

`total_scan >= batch_size' is true for a very-very long time and
'total_scan >= freeable' is also true for quite some time, because
`freeable < 0' and `total_scan' is large enough, for example,
22634041808232578. The only break condition, in the given scheme of
things, is shrinker->scan_objects() == SHRINK_STOP test, which is a
bit too weak to rely on, especially in heavy zsmalloc-usage scenarios.

To fix the issue, take a pool stat snapshot and use it instead of
racy zs_stat_get() calls.

Link: http://lkml.kernel.org/r/20160509140052.3389-1-sergey.senozhatsky@gmail.com
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/zsmalloc.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index fc083996e40a..c1ea19478119 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1732,10 +1732,13 @@ static struct page *isolate_source_page(struct size_class *class)
 static unsigned long zs_can_compact(struct size_class *class)
 {
 	unsigned long obj_wasted;
+	unsigned long obj_allocated = zs_stat_get(class, OBJ_ALLOCATED);
+	unsigned long obj_used = zs_stat_get(class, OBJ_USED);
 
-	obj_wasted = zs_stat_get(class, OBJ_ALLOCATED) -
-		zs_stat_get(class, OBJ_USED);
+	if (obj_allocated <= obj_used)
+		return 0;
 
+	obj_wasted = obj_allocated - obj_used;
 	obj_wasted /= get_maxobj_per_zspage(class->size,
 			class->pages_per_zspage);
 

From ce1bc448bac01edfccdc26d8318cfd39aa09e6e0 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Fri, 15 Apr 2016 16:38:40 +0200
Subject: [PATCH 392/424] s390/mm: fix asce_bits handling with dynamic
 pagetable levels

commit 723cacbd9dc79582e562c123a0bacf8bfc69e72a upstream.

There is a race with multi-threaded applications between context switch and
pagetable upgrade. In switch_mm() a new user_asce is built from mm->pgd and
mm->context.asce_bits, w/o holding any locks. A concurrent mmap with a
pagetable upgrade on another thread in crst_table_upgrade() could already
have set new asce_bits, but not yet the new mm->pgd. This would result in a
corrupt user_asce in switch_mm(), and eventually in a kernel panic from a
translation exception.

Fix this by storing the complete asce instead of just the asce_bits, which
can then be read atomically from switch_mm(), so that it either sees the
old value or the new value, but no mixture. Both cases are OK. Having the
old value would result in a page fault on access to the higher level memory,
but the fault handler would see the new mm->pgd, if it was a valid access
after the mmap on the other thread has completed. So as worst-case scenario
we would have a page fault loop for the racing thread until the next time
slice.

Also remove dead code and simplify the upgrade/downgrade path, there are no
upgrades from 2 levels, and only downgrades from 3 levels for compat tasks.
There are also no concurrent upgrades, because the mmap_sem is held with
down_write() in do_mmap, so the flush and table checks during upgrade can
be removed.

Reported-by: Michael Munday <munday@ca.ibm.com>
Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/include/asm/mmu.h         |  2 +-
 arch/s390/include/asm/mmu_context.h | 28 ++++++++--
 arch/s390/include/asm/pgalloc.h     |  4 +-
 arch/s390/include/asm/processor.h   |  2 +-
 arch/s390/include/asm/tlbflush.h    |  9 +--
 arch/s390/mm/init.c                 |  3 +-
 arch/s390/mm/mmap.c                 |  6 +-
 arch/s390/mm/pgtable.c              | 85 ++++++++++-------------------
 8 files changed, 62 insertions(+), 77 deletions(-)

diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index d29ad9545b41..081b2ad99d73 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -11,7 +11,7 @@ typedef struct {
 	spinlock_t list_lock;
 	struct list_head pgtable_list;
 	struct list_head gmap_list;
-	unsigned long asce_bits;
+	unsigned long asce;
 	unsigned long asce_limit;
 	unsigned long vdso_base;
 	/* The mmu context allocates 4K page tables. */
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index e485817f7b1a..22877c9440ea 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -26,12 +26,28 @@ static inline int init_new_context(struct task_struct *tsk,
 	mm->context.has_pgste = 0;
 	mm->context.use_skey = 0;
 #endif
-	if (mm->context.asce_limit == 0) {
+	switch (mm->context.asce_limit) {
+	case 1UL << 42:
+		/*
+		 * forked 3-level task, fall through to set new asce with new
+		 * mm->pgd
+		 */
+	case 0:
 		/* context created by exec, set asce limit to 4TB */
-		mm->context.asce_bits = _ASCE_TABLE_LENGTH |
-			_ASCE_USER_BITS | _ASCE_TYPE_REGION3;
 		mm->context.asce_limit = STACK_TOP_MAX;
-	} else if (mm->context.asce_limit == (1UL << 31)) {
+		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+				   _ASCE_USER_BITS | _ASCE_TYPE_REGION3;
+		break;
+	case 1UL << 53:
+		/* forked 4-level task, set new asce with new mm->pgd */
+		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+				   _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+		break;
+	case 1UL << 31:
+		/* forked 2-level compat task, set new asce with new mm->pgd */
+		mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+				   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
+		/* pgd_alloc() did not increase mm->nr_pmds */
 		mm_inc_nr_pmds(mm);
 	}
 	crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
@@ -42,7 +58,7 @@ static inline int init_new_context(struct task_struct *tsk,
 
 static inline void set_user_asce(struct mm_struct *mm)
 {
-	S390_lowcore.user_asce = mm->context.asce_bits | __pa(mm->pgd);
+	S390_lowcore.user_asce = mm->context.asce;
 	if (current->thread.mm_segment.ar4)
 		__ctl_load(S390_lowcore.user_asce, 7, 7);
 	set_cpu_flag(CIF_ASCE);
@@ -71,7 +87,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 {
 	int cpu = smp_processor_id();
 
-	S390_lowcore.user_asce = next->context.asce_bits | __pa(next->pgd);
+	S390_lowcore.user_asce = next->context.asce;
 	if (prev == next)
 		return;
 	if (MACHINE_HAS_TLB_LC)
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index d7cc79fb6191..5991cdcb5b40 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -56,8 +56,8 @@ static inline unsigned long pgd_entry_type(struct mm_struct *mm)
 	return _REGION2_ENTRY_EMPTY;
 }
 
-int crst_table_upgrade(struct mm_struct *, unsigned long limit);
-void crst_table_downgrade(struct mm_struct *, unsigned long limit);
+int crst_table_upgrade(struct mm_struct *);
+void crst_table_downgrade(struct mm_struct *);
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
 {
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index b16c3d0a1b9f..c1ea67db8404 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -163,7 +163,7 @@ extern __vector128 init_task_fpu_regs[__NUM_VXRS];
 	regs->psw.mask	= PSW_USER_BITS | PSW_MASK_BA;			\
 	regs->psw.addr	= new_psw | PSW_ADDR_AMODE;			\
 	regs->gprs[15]	= new_stackp;					\
-	crst_table_downgrade(current->mm, 1UL << 31);			\
+	crst_table_downgrade(current->mm);				\
 	execve_tail();							\
 } while (0)
 
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index ca148f7c3eaa..a2e6ef32e054 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -110,8 +110,7 @@ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
 static inline void __tlb_flush_kernel(void)
 {
 	if (MACHINE_HAS_IDTE)
-		__tlb_flush_idte((unsigned long) init_mm.pgd |
-				 init_mm.context.asce_bits);
+		__tlb_flush_idte(init_mm.context.asce);
 	else
 		__tlb_flush_global();
 }
@@ -133,8 +132,7 @@ static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
 static inline void __tlb_flush_kernel(void)
 {
 	if (MACHINE_HAS_TLB_LC)
-		__tlb_flush_idte_local((unsigned long) init_mm.pgd |
-				       init_mm.context.asce_bits);
+		__tlb_flush_idte_local(init_mm.context.asce);
 	else
 		__tlb_flush_local();
 }
@@ -148,8 +146,7 @@ static inline void __tlb_flush_mm(struct mm_struct * mm)
 	 * only ran on the local cpu.
 	 */
 	if (MACHINE_HAS_IDTE && list_empty(&mm->context.gmap_list))
-		__tlb_flush_asce(mm, (unsigned long) mm->pgd |
-				 mm->context.asce_bits);
+		__tlb_flush_asce(mm, mm->context.asce);
 	else
 		__tlb_flush_full(mm);
 }
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index c722400c7697..feff9caf89b5 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -89,7 +89,8 @@ void __init paging_init(void)
 		asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
 		pgd_type = _REGION3_ENTRY_EMPTY;
 	}
-	S390_lowcore.kernel_asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits;
+	init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits;
+	S390_lowcore.kernel_asce = init_mm.context.asce;
 	clear_table((unsigned long *) init_mm.pgd, pgd_type,
 		    sizeof(unsigned long)*2048);
 	vmem_map_init();
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index ea01477b4aa6..f2b6b1d9c804 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -174,7 +174,7 @@ int s390_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
 	if (!(flags & MAP_FIXED))
 		addr = 0;
 	if ((addr + len) >= TASK_SIZE)
-		return crst_table_upgrade(current->mm, 1UL << 53);
+		return crst_table_upgrade(current->mm);
 	return 0;
 }
 
@@ -191,7 +191,7 @@ s390_get_unmapped_area(struct file *filp, unsigned long addr,
 		return area;
 	if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
 		/* Upgrade the page table to 4 levels and retry. */
-		rc = crst_table_upgrade(mm, 1UL << 53);
+		rc = crst_table_upgrade(mm);
 		if (rc)
 			return (unsigned long) rc;
 		area = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
@@ -213,7 +213,7 @@ s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr,
 		return area;
 	if (area == -ENOMEM && !is_compat_task() && TASK_SIZE < (1UL << 53)) {
 		/* Upgrade the page table to 4 levels and retry. */
-		rc = crst_table_upgrade(mm, 1UL << 53);
+		rc = crst_table_upgrade(mm);
 		if (rc)
 			return (unsigned long) rc;
 		area = arch_get_unmapped_area_topdown(filp, addr, len,
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 54ef3bc01b43..471a370a527b 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -49,81 +49,52 @@ static void __crst_table_upgrade(void *arg)
 	__tlb_flush_local();
 }
 
-int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
+int crst_table_upgrade(struct mm_struct *mm)
 {
 	unsigned long *table, *pgd;
-	unsigned long entry;
-	int flush;
 
-	BUG_ON(limit > (1UL << 53));
-	flush = 0;
-repeat:
+	/* upgrade should only happen from 3 to 4 levels */
+	BUG_ON(mm->context.asce_limit != (1UL << 42));
+
 	table = crst_table_alloc(mm);
 	if (!table)
 		return -ENOMEM;
+
 	spin_lock_bh(&mm->page_table_lock);
-	if (mm->context.asce_limit < limit) {
-		pgd = (unsigned long *) mm->pgd;
-		if (mm->context.asce_limit <= (1UL << 31)) {
-			entry = _REGION3_ENTRY_EMPTY;
-			mm->context.asce_limit = 1UL << 42;
-			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
-						_ASCE_USER_BITS |
-						_ASCE_TYPE_REGION3;
-		} else {
-			entry = _REGION2_ENTRY_EMPTY;
-			mm->context.asce_limit = 1UL << 53;
-			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
-						_ASCE_USER_BITS |
-						_ASCE_TYPE_REGION2;
-		}
-		crst_table_init(table, entry);
-		pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
-		mm->pgd = (pgd_t *) table;
-		mm->task_size = mm->context.asce_limit;
-		table = NULL;
-		flush = 1;
-	}
+	pgd = (unsigned long *) mm->pgd;
+	crst_table_init(table, _REGION2_ENTRY_EMPTY);
+	pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
+	mm->pgd = (pgd_t *) table;
+	mm->context.asce_limit = 1UL << 53;
+	mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+			   _ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+	mm->task_size = mm->context.asce_limit;
 	spin_unlock_bh(&mm->page_table_lock);
-	if (table)
-		crst_table_free(mm, table);
-	if (mm->context.asce_limit < limit)
-		goto repeat;
-	if (flush)
-		on_each_cpu(__crst_table_upgrade, mm, 0);
+
+	on_each_cpu(__crst_table_upgrade, mm, 0);
 	return 0;
 }
 
-void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
+void crst_table_downgrade(struct mm_struct *mm)
 {
 	pgd_t *pgd;
 
+	/* downgrade should only happen from 3 to 2 levels (compat only) */
+	BUG_ON(mm->context.asce_limit != (1UL << 42));
+
 	if (current->active_mm == mm) {
 		clear_user_asce();
 		__tlb_flush_mm(mm);
 	}
-	while (mm->context.asce_limit > limit) {
-		pgd = mm->pgd;
-		switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
-		case _REGION_ENTRY_TYPE_R2:
-			mm->context.asce_limit = 1UL << 42;
-			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
-						_ASCE_USER_BITS |
-						_ASCE_TYPE_REGION3;
-			break;
-		case _REGION_ENTRY_TYPE_R3:
-			mm->context.asce_limit = 1UL << 31;
-			mm->context.asce_bits = _ASCE_TABLE_LENGTH |
-						_ASCE_USER_BITS |
-						_ASCE_TYPE_SEGMENT;
-			break;
-		default:
-			BUG();
-		}
-		mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
-		mm->task_size = mm->context.asce_limit;
-		crst_table_free(mm, (unsigned long *) pgd);
-	}
+
+	pgd = mm->pgd;
+	mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
+	mm->context.asce_limit = 1UL << 31;
+	mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
+			   _ASCE_USER_BITS | _ASCE_TYPE_SEGMENT;
+	mm->task_size = mm->context.asce_limit;
+	crst_table_free(mm, (unsigned long *) pgd);
+
 	if (current->active_mm == mm)
 		set_user_asce(mm);
 }

From 6f1519425705094323912e55d8d6e174775b9caf Mon Sep 17 00:00:00 2001
From: Tadeusz Struk <tadeusz.struk@intel.com>
Date: Mon, 25 Apr 2016 07:32:19 -0700
Subject: [PATCH 393/424] crypto: qat - fix invalid pf2vf_resp_wq logic

commit 9e209fcfb804da262e38e5cd2e680c47a41f0f95 upstream.

The pf2vf_resp_wq is a global so it has to be created at init
and destroyed at exit, instead of per device.

Tested-by: Suresh Marikkannu <sureshx.marikkannu@intel.com>
Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../crypto/qat/qat_common/adf_common_drv.h    |  2 ++
 drivers/crypto/qat/qat_common/adf_ctl_drv.c   |  6 +++++
 drivers/crypto/qat/qat_common/adf_sriov.c     | 26 ++++++++++++-------
 3 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h
index 3f76bd495bcb..b9178d0a3093 100644
--- a/drivers/crypto/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/qat/qat_common/adf_common_drv.h
@@ -145,6 +145,8 @@ int adf_enable_aer(struct adf_accel_dev *accel_dev, struct pci_driver *adf);
 void adf_disable_aer(struct adf_accel_dev *accel_dev);
 int adf_init_aer(void);
 void adf_exit_aer(void);
+int adf_init_pf_wq(void);
+void adf_exit_pf_wq(void);
 int adf_init_admin_comms(struct adf_accel_dev *accel_dev);
 void adf_exit_admin_comms(struct adf_accel_dev *accel_dev);
 int adf_send_admin_init(struct adf_accel_dev *accel_dev);
diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
index 473d36d91644..e7480f373532 100644
--- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c
+++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
@@ -469,12 +469,17 @@ static int __init adf_register_ctl_device_driver(void)
 	if (adf_init_aer())
 		goto err_aer;
 
+	if (adf_init_pf_wq())
+		goto err_pf_wq;
+
 	if (qat_crypto_register())
 		goto err_crypto_register;
 
 	return 0;
 
 err_crypto_register:
+	adf_exit_pf_wq();
+err_pf_wq:
 	adf_exit_aer();
 err_aer:
 	adf_chr_drv_destroy();
@@ -487,6 +492,7 @@ static void __exit adf_unregister_ctl_device_driver(void)
 {
 	adf_chr_drv_destroy();
 	adf_exit_aer();
+	adf_exit_pf_wq();
 	qat_crypto_unregister();
 	adf_clean_vf_map(false);
 	mutex_destroy(&adf_ctl_lock);
diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c
index 1117a8b58280..38a0415e767d 100644
--- a/drivers/crypto/qat/qat_common/adf_sriov.c
+++ b/drivers/crypto/qat/qat_common/adf_sriov.c
@@ -119,11 +119,6 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev)
 	int i;
 	u32 reg;
 
-	/* Workqueue for PF2VF responses */
-	pf2vf_resp_wq = create_workqueue("qat_pf2vf_resp_wq");
-	if (!pf2vf_resp_wq)
-		return -ENOMEM;
-
 	for (i = 0, vf_info = accel_dev->pf.vf_info; i < totalvfs;
 	     i++, vf_info++) {
 		/* This ptr will be populated when VFs will be created */
@@ -216,11 +211,6 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev)
 
 	kfree(accel_dev->pf.vf_info);
 	accel_dev->pf.vf_info = NULL;
-
-	if (pf2vf_resp_wq) {
-		destroy_workqueue(pf2vf_resp_wq);
-		pf2vf_resp_wq = NULL;
-	}
 }
 EXPORT_SYMBOL_GPL(adf_disable_sriov);
 
@@ -304,3 +294,19 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs)
 	return numvfs;
 }
 EXPORT_SYMBOL_GPL(adf_sriov_configure);
+
+int __init adf_init_pf_wq(void)
+{
+	/* Workqueue for PF2VF responses */
+	pf2vf_resp_wq = create_workqueue("qat_pf2vf_resp_wq");
+
+	return !pf2vf_resp_wq ? -ENOMEM : 0;
+}
+
+void adf_exit_pf_wq(void)
+{
+	if (pf2vf_resp_wq) {
+		destroy_workqueue(pf2vf_resp_wq);
+		pf2vf_resp_wq = NULL;
+	}
+}

From 3cbc5f6ed2722a65b75de22fe2206471f093b6c3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 4 May 2016 17:52:56 +0800
Subject: [PATCH 394/424] crypto: hash - Fix page length clamping in hash walk

commit 13f4bb78cf6a312bbdec367ba3da044b09bf0e29 upstream.

The crypto hash walk code is broken when supplied with an offset
greater than or equal to PAGE_SIZE.  This patch fixes it by adjusting
walk->pg and walk->offset when this happens.

Reported-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/ahash.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/crypto/ahash.c b/crypto/ahash.c
index d19b52324cf5..dac1c24e9c3e 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -69,8 +69,9 @@ static int hash_walk_new_entry(struct crypto_hash_walk *walk)
 	struct scatterlist *sg;
 
 	sg = walk->sg;
-	walk->pg = sg_page(sg);
 	walk->offset = sg->offset;
+	walk->pg = sg_page(walk->sg) + (walk->offset >> PAGE_SHIFT);
+	walk->offset = offset_in_page(walk->offset);
 	walk->entrylen = sg->length;
 
 	if (walk->entrylen > walk->total)

From 7c712000ba78d26f46d16fcc8ac3a9d827ddb242 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 5 May 2016 16:42:49 +0800
Subject: [PATCH 395/424] crypto: testmgr - Use kmalloc memory for RSA input

commit df27b26f04ed388ff4cc2b5d8cfdb5d97678816f upstream.

As akcipher uses an SG interface, you must not use vmalloc memory
as input for it.  This patch fixes testmgr to copy the vmalloc
test vectors to kmalloc memory before running the test.

This patch also removes a superfluous sg_virt call in do_test_rsa.

Reported-by: Anatoly Pugachev <matorola@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/testmgr.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index ae8c57fd8bc7..d4944318ca1f 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1849,6 +1849,7 @@ static int alg_test_drbg(const struct alg_test_desc *desc, const char *driver,
 static int do_test_rsa(struct crypto_akcipher *tfm,
 		       struct akcipher_testvec *vecs)
 {
+	char *xbuf[XBUFSIZE];
 	struct akcipher_request *req;
 	void *outbuf_enc = NULL;
 	void *outbuf_dec = NULL;
@@ -1857,9 +1858,12 @@ static int do_test_rsa(struct crypto_akcipher *tfm,
 	int err = -ENOMEM;
 	struct scatterlist src, dst, src_tab[2];
 
+	if (testmgr_alloc_buf(xbuf))
+		return err;
+
 	req = akcipher_request_alloc(tfm, GFP_KERNEL);
 	if (!req)
-		return err;
+		goto free_xbuf;
 
 	init_completion(&result.completion);
 
@@ -1877,9 +1881,14 @@ static int do_test_rsa(struct crypto_akcipher *tfm,
 	if (!outbuf_enc)
 		goto free_req;
 
+	if (WARN_ON(vecs->m_size > PAGE_SIZE))
+		goto free_all;
+
+	memcpy(xbuf[0], vecs->m, vecs->m_size);
+
 	sg_init_table(src_tab, 2);
-	sg_set_buf(&src_tab[0], vecs->m, 8);
-	sg_set_buf(&src_tab[1], vecs->m + 8, vecs->m_size - 8);
+	sg_set_buf(&src_tab[0], xbuf[0], 8);
+	sg_set_buf(&src_tab[1], xbuf[0] + 8, vecs->m_size - 8);
 	sg_init_one(&dst, outbuf_enc, out_len_max);
 	akcipher_request_set_crypt(req, src_tab, &dst, vecs->m_size,
 				   out_len_max);
@@ -1898,7 +1907,7 @@ static int do_test_rsa(struct crypto_akcipher *tfm,
 		goto free_all;
 	}
 	/* verify that encrypted message is equal to expected */
-	if (memcmp(vecs->c, sg_virt(req->dst), vecs->c_size)) {
+	if (memcmp(vecs->c, outbuf_enc, vecs->c_size)) {
 		pr_err("alg: rsa: encrypt test failed. Invalid output\n");
 		err = -EINVAL;
 		goto free_all;
@@ -1913,7 +1922,13 @@ static int do_test_rsa(struct crypto_akcipher *tfm,
 		err = -ENOMEM;
 		goto free_all;
 	}
-	sg_init_one(&src, vecs->c, vecs->c_size);
+
+	if (WARN_ON(vecs->c_size > PAGE_SIZE))
+		goto free_all;
+
+	memcpy(xbuf[0], vecs->c, vecs->c_size);
+
+	sg_init_one(&src, xbuf[0], vecs->c_size);
 	sg_init_one(&dst, outbuf_dec, out_len_max);
 	init_completion(&result.completion);
 	akcipher_request_set_crypt(req, &src, &dst, vecs->c_size, out_len_max);
@@ -1940,6 +1955,8 @@ free_all:
 	kfree(outbuf_enc);
 free_req:
 	akcipher_request_free(req);
+free_xbuf:
+	testmgr_free_buf(xbuf);
 	return err;
 }
 

From 2a5db188f3151be7d10c01c29deb6867b8bb2925 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 29 Apr 2016 11:20:15 +0200
Subject: [PATCH 396/424] ALSA: usb-audio: Quirk for yet another Phoenix Audio
 devices (v2)

commit 2d2c038a9999f423e820d89db2b5d7774b67ba49 upstream.

Phoenix Audio MT202pcs (1de7:0114) and MT202exe (1de7:0013) need the
same workaround as TMX320 for avoiding the firmware bug.  It fixes the
frequent error about the sample rate inquiries and the slow device
probe as consequence.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=117321
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/quirks.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 001fb4dc0722..93c076574def 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1139,7 +1139,9 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
 	case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */
 	case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
 	case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */
+	case USB_ID(0x1de7, 0x0013): /* Phoenix Audio MT202exe */
 	case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */
+	case USB_ID(0x1de7, 0x0114): /* Phoenix Audio MT202pcs */
 	case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */
 		return true;
 	}

From 7e8b58b0fa1c7f2da95307ab3fdf5704ca3f11db Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 11 May 2016 17:48:00 +0200
Subject: [PATCH 397/424] ALSA: usb-audio: Yet another Phoneix Audio device
 quirk

commit 84add303ef950b8d85f54bc2248c2bc73467c329 upstream.

Phoenix Audio has yet another device with another id (even a different
vendor id, 0556:0014) that requires the same quirk for the sample
rate.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=110221
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/quirks.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 93c076574def..db11ecf0b74d 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1138,6 +1138,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
 	case USB_ID(0x047F, 0x0415): /* Plantronics BT-300 */
 	case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */
 	case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
+	case USB_ID(0x0556, 0x0014): /* Phoenix Audio TMX320VC */
 	case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */
 	case USB_ID(0x1de7, 0x0013): /* Phoenix Audio MT202exe */
 	case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */

From 28c56446f563af16707e7945ce88f143f8fd6285 Mon Sep 17 00:00:00 2001
From: Yura Pakhuchiy <pakhuchiy@gmail.com>
Date: Sat, 7 May 2016 23:53:36 +0700
Subject: [PATCH 398/424] ALSA: hda - Fix subwoofer pin on ASUS N751 and N551

commit 3231e2053eaeee70bdfb216a78a30f11e88e2243 upstream.

Subwoofer does not work out of the box on ASUS N751/N551 laptops. This
patch fixes it. Patch tested on N751 laptop. N551 part is not tested,
but according to [1] and [2] this laptop requires similar changes, so I
included them in the patch.

1. https://github.com/honsiorovskyi/asus-n551-hda-fix
2. https://bugs.launchpad.net/ubuntu/+source/alsa-tools/+bug/1405691

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=117781
Signed-off-by: Yura Pakhuchiy <pakhuchiy@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_realtek.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index ac4490a96863..0b5fe30f626d 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6426,6 +6426,7 @@ enum {
 	ALC668_FIXUP_DELL_DISABLE_AAMIX,
 	ALC668_FIXUP_DELL_XPS13,
 	ALC662_FIXUP_ASUS_Nx50,
+	ALC668_FIXUP_ASUS_Nx51,
 };
 
 static const struct hda_fixup alc662_fixups[] = {
@@ -6672,6 +6673,15 @@ static const struct hda_fixup alc662_fixups[] = {
 		.chained = true,
 		.chain_id = ALC662_FIXUP_BASS_1A
 	},
+	[ALC668_FIXUP_ASUS_Nx51] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{0x1a, 0x90170151}, /* bass speaker */
+			{}
+		},
+		.chained = true,
+		.chain_id = ALC662_FIXUP_BASS_CHMAP,
+	},
 };
 
 static const struct snd_pci_quirk alc662_fixup_tbl[] = {
@@ -6699,6 +6709,8 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_ASUS_Nx50),
 	SND_PCI_QUIRK(0x1043, 0x1477, "ASUS N56VZ", ALC662_FIXUP_BASS_MODE4_CHMAP),
 	SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16),
+	SND_PCI_QUIRK(0x1043, 0x177d, "ASUS N551", ALC668_FIXUP_ASUS_Nx51),
+	SND_PCI_QUIRK(0x1043, 0x17bd, "ASUS N751", ALC668_FIXUP_ASUS_Nx51),
 	SND_PCI_QUIRK(0x1043, 0x1b73, "ASUS N55SF", ALC662_FIXUP_BASS_16),
 	SND_PCI_QUIRK(0x1043, 0x1bf3, "ASUS N76VZ", ALC662_FIXUP_BASS_MODE4_CHMAP),
 	SND_PCI_QUIRK(0x1043, 0x8469, "ASUS mobo", ALC662_FIXUP_NO_JACK_DETECT),

From 28ff35e454b4e2688eb0456b0981d6003a19d122 Mon Sep 17 00:00:00 2001
From: Kaho Ng <ngkaho1234@gmail.com>
Date: Mon, 9 May 2016 00:27:49 +0800
Subject: [PATCH 399/424] ALSA: hda - Fix white noise on Asus UX501VW headset

commit 2da2dc9ead232f25601404335cca13c0f722d41b upstream.

For reducing the noise from the headset output on ASUS UX501VW,
call the existing fixup, alc_fixup_headset_mode_alc668(), additionally.

Thread: https://bbs.archlinux.org/viewtopic.php?id=209554

Signed-off-by: Kaho Ng <ngkaho1234@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 0b5fe30f626d..4918ffa5ba68 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6704,6 +6704,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1028, 0x0698, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800),
+	SND_PCI_QUIRK(0x1043, 0x1080, "Asus UX501VW", ALC668_FIXUP_HEADSET_MODE),
 	SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_ASUS_Nx50),
 	SND_PCI_QUIRK(0x1043, 0x13df, "Asus N550JX", ALC662_FIXUP_BASS_1A),
 	SND_PCI_QUIRK(0x1043, 0x129d, "Asus N750", ALC662_FIXUP_ASUS_Nx50),

From 90204cbb4890422facac52ed197b169eb1f26c9f Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 10 May 2016 10:24:02 +0200
Subject: [PATCH 400/424] ALSA: hda - Fix broken reconfig

commit addacd801e1638f41d659cb53b9b73fc14322cb1 upstream.

The HD-audio reconfig function got broken in the recent kernels,
typically resulting in a failure like:
  snd_hda_intel 0000:00:1b.0: control 3:0:0:Playback Channel Map:0 is already present

This is because of the code restructuring to move the PCM and control
instantiation into the codec drive probe, by the commit [bcd96557bd0a:
ALSA: hda - Build PCMs and controls at codec driver probe].  Although
the commit above removed the calls of snd_hda_codec_build_pcms() and
*_build_controls() at the controller driver probe, the similar calls
in the reconfig were still left forgotten.  This caused the
conflicting and duplicated PCMs and controls.

The fix is trivial: just remove these superfluous calls from
reconfig_codec().

Fixes: bcd96557bd0a ('ALSA: hda - Build PCMs and controls at codec driver probe')
Reported-by: Jochen Henneberg <jh@henneberg-systemdesign.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/hda_sysfs.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/sound/pci/hda/hda_sysfs.c b/sound/pci/hda/hda_sysfs.c
index 64e0d1d81ca5..9739fce9e032 100644
--- a/sound/pci/hda/hda_sysfs.c
+++ b/sound/pci/hda/hda_sysfs.c
@@ -139,14 +139,6 @@ static int reconfig_codec(struct hda_codec *codec)
 		goto error;
 	}
 	err = snd_hda_codec_configure(codec);
-	if (err < 0)
-		goto error;
-	/* rebuild PCMs */
-	err = snd_hda_codec_build_pcms(codec);
-	if (err < 0)
-		goto error;
-	/* rebuild mixers */
-	err = snd_hda_codec_build_controls(codec);
 	if (err < 0)
 		goto error;
 	err = snd_card_register(codec->card);

From ca100af95931ebf2610b47ff2c311fe2c5e43495 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Tue, 26 Apr 2016 10:08:26 +0300
Subject: [PATCH 401/424] spi: pxa2xx: Do not detect number of enabled chip
 selects on Intel SPT

commit 66ec246eb9982e7eb8e15e1fc55f543230310dd0 upstream.

Certain Intel Sunrisepoint PCH variants report zero chip selects in SPI
capabilities register even they have one per port. Detection in
pxa2xx_spi_probe() sets master->num_chipselect to 0 leading to -EINVAL
from spi_register_master() where chip select count is validated.

Fix this by not using SPI capabilities register on Sunrisepoint. They don't
have more than one chip select so use the default value 1 instead of
detection.

Fixes: 8b136baa5892 ("spi: pxa2xx: Detect number of enabled Intel LPSS SPI chip select signals")
Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/spi/spi-pxa2xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index b25dc71b0ea9..73c8ea0b1360 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -111,7 +111,7 @@ static const struct lpss_config lpss_platforms[] = {
 		.reg_general = -1,
 		.reg_ssp = 0x20,
 		.reg_cs_ctrl = 0x24,
-		.reg_capabilities = 0xfc,
+		.reg_capabilities = -1,
 		.rx_threshold = 1,
 		.tx_threshold_lo = 32,
 		.tx_threshold_hi = 56,

From 444189274bd802692e3e9334e23c1d9348ea87f6 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben.hutchings@codethink.co.uk>
Date: Tue, 12 Apr 2016 12:56:25 +0100
Subject: [PATCH 402/424] spi: spi-ti-qspi: Fix FLEN and WLEN settings if
 bits_per_word is overridden

commit ea1b60fb085839a9544cb3a0069992991beabb7f upstream.

Each transfer can specify 8, 16 or 32 bits per word independently of
the default for the device being addressed.  However, currently we
calculate the number of words in the frame assuming that the word size
is the device default.

If multiple transfers in the same message have differing
bits_per_word, we bitwise-or the different values in the WLEN register
field.

Fix both of these.  Also rename 'frame_length' to 'frame_len_words' to
make clear that it's not a byte count like spi_message::frame_length.

Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/spi/spi-ti-qspi.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
index 64318fcfacf2..2137f2112804 100644
--- a/drivers/spi/spi-ti-qspi.c
+++ b/drivers/spi/spi-ti-qspi.c
@@ -94,6 +94,7 @@ struct ti_qspi {
 #define QSPI_FLEN(n)			((n - 1) << 0)
 #define QSPI_WLEN_MAX_BITS		128
 #define QSPI_WLEN_MAX_BYTES		16
+#define QSPI_WLEN_MASK			QSPI_WLEN(QSPI_WLEN_MAX_BITS)
 
 /* STATUS REGISTER */
 #define BUSY				0x01
@@ -373,7 +374,7 @@ static int ti_qspi_start_transfer_one(struct spi_master *master,
 	struct spi_device *spi = m->spi;
 	struct spi_transfer *t;
 	int status = 0, ret;
-	int frame_length;
+	unsigned int frame_len_words;
 
 	/* setup device control reg */
 	qspi->dc = 0;
@@ -385,21 +386,23 @@ static int ti_qspi_start_transfer_one(struct spi_master *master,
 	if (spi->mode & SPI_CS_HIGH)
 		qspi->dc |= QSPI_CSPOL(spi->chip_select);
 
-	frame_length = (m->frame_length << 3) / spi->bits_per_word;
-
-	frame_length = clamp(frame_length, 0, QSPI_FRAME);
+	frame_len_words = 0;
+	list_for_each_entry(t, &m->transfers, transfer_list)
+		frame_len_words += t->len / (t->bits_per_word >> 3);
+	frame_len_words = min_t(unsigned int, frame_len_words, QSPI_FRAME);
 
 	/* setup command reg */
 	qspi->cmd = 0;
 	qspi->cmd |= QSPI_EN_CS(spi->chip_select);
-	qspi->cmd |= QSPI_FLEN(frame_length);
+	qspi->cmd |= QSPI_FLEN(frame_len_words);
 
 	ti_qspi_write(qspi, qspi->dc, QSPI_SPI_DC_REG);
 
 	mutex_lock(&qspi->list_lock);
 
 	list_for_each_entry(t, &m->transfers, transfer_list) {
-		qspi->cmd |= QSPI_WLEN(t->bits_per_word);
+		qspi->cmd = ((qspi->cmd & ~QSPI_WLEN_MASK) |
+			     QSPI_WLEN(t->bits_per_word));
 
 		ret = qspi_transfer_msg(qspi, t);
 		if (ret) {

From 1441dcbdcbe529fd7f0db6fd95b4bc9d867e04bb Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben.hutchings@codethink.co.uk>
Date: Tue, 12 Apr 2016 12:58:14 +0100
Subject: [PATCH 403/424] spi: spi-ti-qspi: Handle truncated frames properly

commit 1ff7760ff66b98ef244bf0e5e2bd5310651205ad upstream.

We clamp frame_len_words to a maximum of 4096, but do not actually
limit the number of words written or read through the DATA registers
or the length added to spi_message::actual_length.  This results in
silent data corruption for commands longer than this maximum.

Recalculate the length of each transfer, taking frame_len_words into
account.  Use this length in qspi_{read,write}_msg(), and to increment
spi_message::actual_length.

Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/spi/spi-ti-qspi.c | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
index 2137f2112804..5044c6198332 100644
--- a/drivers/spi/spi-ti-qspi.c
+++ b/drivers/spi/spi-ti-qspi.c
@@ -225,16 +225,16 @@ static inline int ti_qspi_poll_wc(struct ti_qspi *qspi)
 	return  -ETIMEDOUT;
 }
 
-static int qspi_write_msg(struct ti_qspi *qspi, struct spi_transfer *t)
+static int qspi_write_msg(struct ti_qspi *qspi, struct spi_transfer *t,
+			  int count)
 {
-	int wlen, count, xfer_len;
+	int wlen, xfer_len;
 	unsigned int cmd;
 	const u8 *txbuf;
 	u32 data;
 
 	txbuf = t->tx_buf;
 	cmd = qspi->cmd | QSPI_WR_SNGL;
-	count = t->len;
 	wlen = t->bits_per_word >> 3;	/* in bytes */
 	xfer_len = wlen;
 
@@ -294,9 +294,10 @@ static int qspi_write_msg(struct ti_qspi *qspi, struct spi_transfer *t)
 	return 0;
 }
 
-static int qspi_read_msg(struct ti_qspi *qspi, struct spi_transfer *t)
+static int qspi_read_msg(struct ti_qspi *qspi, struct spi_transfer *t,
+			 int count)
 {
-	int wlen, count;
+	int wlen;
 	unsigned int cmd;
 	u8 *rxbuf;
 
@@ -313,7 +314,6 @@ static int qspi_read_msg(struct ti_qspi *qspi, struct spi_transfer *t)
 		cmd |= QSPI_RD_SNGL;
 		break;
 	}
-	count = t->len;
 	wlen = t->bits_per_word >> 3;	/* in bytes */
 
 	while (count) {
@@ -344,12 +344,13 @@ static int qspi_read_msg(struct ti_qspi *qspi, struct spi_transfer *t)
 	return 0;
 }
 
-static int qspi_transfer_msg(struct ti_qspi *qspi, struct spi_transfer *t)
+static int qspi_transfer_msg(struct ti_qspi *qspi, struct spi_transfer *t,
+			     int count)
 {
 	int ret;
 
 	if (t->tx_buf) {
-		ret = qspi_write_msg(qspi, t);
+		ret = qspi_write_msg(qspi, t, count);
 		if (ret) {
 			dev_dbg(qspi->dev, "Error while writing\n");
 			return ret;
@@ -357,7 +358,7 @@ static int qspi_transfer_msg(struct ti_qspi *qspi, struct spi_transfer *t)
 	}
 
 	if (t->rx_buf) {
-		ret = qspi_read_msg(qspi, t);
+		ret = qspi_read_msg(qspi, t, count);
 		if (ret) {
 			dev_dbg(qspi->dev, "Error while reading\n");
 			return ret;
@@ -374,7 +375,8 @@ static int ti_qspi_start_transfer_one(struct spi_master *master,
 	struct spi_device *spi = m->spi;
 	struct spi_transfer *t;
 	int status = 0, ret;
-	unsigned int frame_len_words;
+	unsigned int frame_len_words, transfer_len_words;
+	int wlen;
 
 	/* setup device control reg */
 	qspi->dc = 0;
@@ -404,14 +406,20 @@ static int ti_qspi_start_transfer_one(struct spi_master *master,
 		qspi->cmd = ((qspi->cmd & ~QSPI_WLEN_MASK) |
 			     QSPI_WLEN(t->bits_per_word));
 
-		ret = qspi_transfer_msg(qspi, t);
+		wlen = t->bits_per_word >> 3;
+		transfer_len_words = min(t->len / wlen, frame_len_words);
+
+		ret = qspi_transfer_msg(qspi, t, transfer_len_words * wlen);
 		if (ret) {
 			dev_dbg(qspi->dev, "transfer message failed\n");
 			mutex_unlock(&qspi->list_lock);
 			return -EINVAL;
 		}
 
-		m->actual_length += t->len;
+		m->actual_length += transfer_len_words * wlen;
+		frame_len_words -= transfer_len_words;
+		if (frame_len_words == 0)
+			break;
 	}
 
 	mutex_unlock(&qspi->list_lock);

From 1fba064f789e053be80f1ee38c70abde71f584e2 Mon Sep 17 00:00:00 2001
From: Ludovic Desroches <ludovic.desroches@atmel.com>
Date: Tue, 19 Apr 2016 16:03:45 +0200
Subject: [PATCH 404/424] pinctrl: at91-pio4: fix pull-up/down logic

commit 5305a7b7e860bb40ab226bc7d58019416073948a upstream.

The default configuration of a pin is often with a value in the
pull-up/down field at chip reset. So, even if the internal logic of the
controller prevents writing a configuration with pull-up and pull-down at
the same time, we must ensure explicitly this condition before writing the
register.

This was leading to a pull-down condition not taken into account for
instance.

Signed-off-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Fixes: 776180848b57 ("pinctrl: introduce driver for Atmel PIO4 controller")
Acked-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pinctrl/pinctrl-at91-pio4.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c
index 33edd07d9149..b3235fd2950c 100644
--- a/drivers/pinctrl/pinctrl-at91-pio4.c
+++ b/drivers/pinctrl/pinctrl-at91-pio4.c
@@ -717,9 +717,11 @@ static int atmel_conf_pin_config_group_set(struct pinctrl_dev *pctldev,
 			break;
 		case PIN_CONFIG_BIAS_PULL_UP:
 			conf |= ATMEL_PIO_PUEN_MASK;
+			conf &= (~ATMEL_PIO_PDEN_MASK);
 			break;
 		case PIN_CONFIG_BIAS_PULL_DOWN:
 			conf |= ATMEL_PIO_PDEN_MASK;
+			conf &= (~ATMEL_PIO_PUEN_MASK);
 			break;
 		case PIN_CONFIG_DRIVE_OPEN_DRAIN:
 			if (arg == 0)

From 4e7e3f54cae7a6ea567c09d18c35bcc468b564c3 Mon Sep 17 00:00:00 2001
From: Jack Pham <jackp@codeaurora.org>
Date: Thu, 14 Apr 2016 23:37:26 -0700
Subject: [PATCH 405/424] regmap: spmi: Fix regmap_spmi_ext_read in multi-byte
 case

commit dec8e8f6e6504aa3496c0f7cc10c756bb0e10f44 upstream.

Specifically for the case of reads that use the Extended Register
Read Long command, a multi-byte read operation is broken up into
8-byte chunks.  However the call to spmi_ext_register_readl() is
incorrectly passing 'val_size', which if greater than 8 will
always fail.  The argument should instead be 'len'.

Fixes: c9afbb05a9ff ("regmap: spmi: support base and extended register spaces")
Signed-off-by: Jack Pham <jackp@codeaurora.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/regmap/regmap-spmi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/base/regmap/regmap-spmi.c b/drivers/base/regmap/regmap-spmi.c
index 7e58f6560399..4a36e415e938 100644
--- a/drivers/base/regmap/regmap-spmi.c
+++ b/drivers/base/regmap/regmap-spmi.c
@@ -142,7 +142,7 @@ static int regmap_spmi_ext_read(void *context,
 	while (val_size) {
 		len = min_t(size_t, val_size, 8);
 
-		err = spmi_ext_register_readl(context, addr, val, val_size);
+		err = spmi_ext_register_readl(context, addr, val, len);
 		if (err)
 			goto err_out;
 

From e54c41be42cd77cf11817bc50f91933cda3903d9 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Tue, 10 May 2016 16:18:33 +0300
Subject: [PATCH 406/424] perf/core: Disable the event on a truncated AUX
 record

commit 9f448cd3cbcec8995935e60b27802ae56aac8cc0 upstream.

When the PMU driver reports a truncated AUX record, it effectively means
that there is no more usable room in the event's AUX buffer (even though
there may still be some room, so that perf_aux_output_begin() doesn't take
action). At this point the consumer still has to be woken up and the event
has to be disabled, otherwise the event will just keep spinning between
perf_aux_output_begin() and perf_aux_output_end() until its context gets
unscheduled.

Again, for cpu-wide events this means never, so once in this condition,
they will be forever losing data.

Fix this by disabling the event and waking up the consumer in case of a
truncated AUX record.

Reported-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: vince@deater.net
Link: http://lkml.kernel.org/r/1462886313-13660-3-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/events/ring_buffer.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index adfdc0536117..014b69528194 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -347,6 +347,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
 			 bool truncated)
 {
 	struct ring_buffer *rb = handle->rb;
+	bool wakeup = truncated;
 	unsigned long aux_head;
 	u64 flags = 0;
 
@@ -375,9 +376,16 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
 	aux_head = rb->user_page->aux_head = local_read(&rb->aux_head);
 
 	if (aux_head - local_read(&rb->aux_wakeup) >= rb->aux_watermark) {
-		perf_output_wakeup(handle);
+		wakeup = true;
 		local_add(rb->aux_watermark, &rb->aux_wakeup);
 	}
+
+	if (wakeup) {
+		if (truncated)
+			handle->event->pending_disable = 1;
+		perf_output_wakeup(handle);
+	}
+
 	handle->event = NULL;
 
 	local_set(&rb->aux_nest, 0);

From b0dac61d2454b392dbdb4ad565f9dc3dc76fce96 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 11 May 2016 01:16:37 +0200
Subject: [PATCH 407/424] vfs: add vfs_select_inode() helper

commit 54d5ca871e72f2bb172ec9323497f01cd5091ec7 upstream.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/open.c              | 12 ++++--------
 include/linux/dcache.h | 12 ++++++++++++
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/fs/open.c b/fs/open.c
index 6a24f988d253..157b9940dd73 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -840,16 +840,12 @@ EXPORT_SYMBOL(file_path);
 int vfs_open(const struct path *path, struct file *file,
 	     const struct cred *cred)
 {
-	struct dentry *dentry = path->dentry;
-	struct inode *inode = dentry->d_inode;
+	struct inode *inode = vfs_select_inode(path->dentry, file->f_flags);
+
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
 
 	file->f_path = *path;
-	if (dentry->d_flags & DCACHE_OP_SELECT_INODE) {
-		inode = dentry->d_op->d_select_inode(dentry, file->f_flags);
-		if (IS_ERR(inode))
-			return PTR_ERR(inode);
-	}
-
 	return do_dentry_open(file, inode, NULL, cred);
 }
 
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index f513dd855cb2..d81746d3b2da 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -592,4 +592,16 @@ static inline struct dentry *d_real(struct dentry *dentry)
 		return dentry;
 }
 
+static inline struct inode *vfs_select_inode(struct dentry *dentry,
+					     unsigned open_flags)
+{
+	struct inode *inode = d_inode(dentry);
+
+	if (inode && unlikely(dentry->d_flags & DCACHE_OP_SELECT_INODE))
+		inode = dentry->d_op->d_select_inode(dentry, open_flags);
+
+	return inode;
+}
+
+
 #endif	/* __LINUX_DCACHE_H */

From 8e5bb3c5417fd98c8966807dc07cc1b687da97c4 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Wed, 11 May 2016 01:16:37 +0200
Subject: [PATCH 408/424] vfs: rename: check backing inode being equal

commit 9409e22acdfc9153f88d9b1ed2bd2a5b34d2d3ca upstream.

If a file is renamed to a hardlink of itself POSIX specifies that rename(2)
should do nothing and return success.

This condition is checked in vfs_rename().  However it won't detect hard
links on overlayfs where these are given separate inodes on the overlayfs
layer.

Overlayfs itself detects this condition and returns success without doing
anything, but then vfs_rename() will proceed as if this was a successful
rename (detach_mounts(), d_move()).

The correct thing to do is to detect this condition before even calling
into overlayfs.  This patch does this by calling vfs_select_inode() to get
the underlying inodes.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/namei.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/namei.c b/fs/namei.c
index d8ee4da93650..0202aebb9813 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4195,7 +4195,11 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	bool new_is_dir = false;
 	unsigned max_links = new_dir->i_sb->s_max_links;
 
-	if (source == target)
+	/*
+	 * Check source == target.
+	 * On overlayfs need to look at underlying inodes.
+	 */
+	if (vfs_select_inode(old_dentry, 0) == vfs_select_inode(new_dentry, 0))
 		return 0;
 
 	error = may_delete(old_dir, old_dentry, is_dir);

From b66cb8c5137da3dda2f32b2bcc69d3937f6d4899 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Wed, 11 May 2016 11:00:02 +0200
Subject: [PATCH 409/424] ARM: dts: at91: sam9x5: Fix the memory range assigned
 to the PMC

commit aab0a4c83ceb344d2327194bf354820e50607af6 upstream.

The memory range assigned to the PMC (Power Management Controller) was
not including the PMC_PCR register which are used to control peripheral
clocks.

This was working fine thanks to the page granularity of ioremap(), but
started to fail when we switched to syscon/regmap, because regmap is
making sure that all accesses are falling into the reserved range.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Reported-by: Richard Genoud <richard.genoud@gmail.com>
Tested-by: Richard Genoud <richard.genoud@gmail.com>
Fixes: 863a81c3be1d ("clk: at91: make use of syscon to share PMC registers in several drivers")
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/at91sam9x5.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi
index 0827d594b1f0..cd0cd5fd09a3 100644
--- a/arch/arm/boot/dts/at91sam9x5.dtsi
+++ b/arch/arm/boot/dts/at91sam9x5.dtsi
@@ -106,7 +106,7 @@
 
 			pmc: pmc@fffffc00 {
 				compatible = "atmel,at91sam9x5-pmc", "syscon";
-				reg = <0xfffffc00 0x100>;
+				reg = <0xfffffc00 0x200>;
 				interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
 				interrupt-controller;
 				#address-cells = <1>;

From cf73d8ad76e4555a45ee399887b7c0361354d10f Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Wed, 11 May 2016 17:55:18 +0800
Subject: [PATCH 410/424] workqueue: fix rebind bound workers warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit f7c17d26f43d5cc1b7a6b896cd2fa24a079739b9 upstream.

------------[ cut here ]------------
WARNING: CPU: 0 PID: 16 at kernel/workqueue.c:4559 rebind_workers+0x1c0/0x1d0
Modules linked in:
CPU: 0 PID: 16 Comm: cpuhp/0 Not tainted 4.6.0-rc4+ #31
Hardware name: IBM IBM System x3550 M4 Server -[7914IUW]-/00Y8603, BIOS -[D7E128FUS-1.40]- 07/23/2013
 0000000000000000 ffff881037babb58 ffffffff8139d885 0000000000000010
 0000000000000000 0000000000000000 0000000000000000 ffff881037babba8
 ffffffff8108505d ffff881037ba0000 000011cf3e7d6e60 0000000000000046
Call Trace:
 dump_stack+0x89/0xd4
 __warn+0xfd/0x120
 warn_slowpath_null+0x1d/0x20
 rebind_workers+0x1c0/0x1d0
 workqueue_cpu_up_callback+0xf5/0x1d0
 notifier_call_chain+0x64/0x90
 ? trace_hardirqs_on_caller+0xf2/0x220
 ? notify_prepare+0x80/0x80
 __raw_notifier_call_chain+0xe/0x10
 __cpu_notify+0x35/0x50
 notify_down_prepare+0x5e/0x80
 ? notify_prepare+0x80/0x80
 cpuhp_invoke_callback+0x73/0x330
 ? __schedule+0x33e/0x8a0
 cpuhp_down_callbacks+0x51/0xc0
 cpuhp_thread_fun+0xc1/0xf0
 smpboot_thread_fn+0x159/0x2a0
 ? smpboot_create_threads+0x80/0x80
 kthread+0xef/0x110
 ? wait_for_completion+0xf0/0x120
 ? schedule_tail+0x35/0xf0
 ret_from_fork+0x22/0x50
 ? __init_kthread_worker+0x70/0x70
---[ end trace eb12ae47d2382d8f ]---
notify_down_prepare: attempt to take down CPU 0 failed

This bug can be reproduced by below config w/ nohz_full= all cpus:

CONFIG_BOOTPARAM_HOTPLUG_CPU0=y
CONFIG_DEBUG_HOTPLUG_CPU0=y
CONFIG_NO_HZ_FULL=y

As Thomas pointed out:

| If a down prepare callback fails, then DOWN_FAILED is invoked for all
| callbacks which have successfully executed DOWN_PREPARE.
|
| But, workqueue has actually two notifiers. One which handles
| UP/DOWN_FAILED/ONLINE and one which handles DOWN_PREPARE.
|
| Now look at the priorities of those callbacks:
|
| CPU_PRI_WORKQUEUE_UP        = 5
| CPU_PRI_WORKQUEUE_DOWN      = -5
|
| So the call order on DOWN_PREPARE is:
|
| CB 1
| CB ...
| CB workqueue_up() -> Ignores DOWN_PREPARE
| CB ...
| CB X ---> Fails
|
| So we call up to CB X with DOWN_FAILED
|
| CB 1
| CB ...
| CB workqueue_up() -> Handles DOWN_FAILED
| CB ...
| CB X-1
|
| So the problem is that the workqueue stuff handles DOWN_FAILED in the up
| callback, while it should do it in the down callback. Which is not a good idea
| either because it wants to be called early on rollback...
|
| Brilliant stuff, isn't it? The hotplug rework will solve this problem because
| the callbacks become symetric, but for the existing mess, we need some
| workaround in the workqueue code.

The boot CPU handles housekeeping duty(unbound timers, workqueues,
timekeeping, ...) on behalf of full dynticks CPUs. It must remain
online when nohz full is enabled. There is a priority set to every
notifier_blocks:

workqueue_cpu_up > tick_nohz_cpu_down > workqueue_cpu_down

So tick_nohz_cpu_down callback failed when down prepare cpu 0, and
notifier_blocks behind tick_nohz_cpu_down will not be called any
more, which leads to workers are actually not unbound. Then hotplug
state machine will fallback to undo and online cpu 0 again. Workers
will be rebound unconditionally even if they are not unbound and
trigger the warning in this progress.

This patch fix it by catching !DISASSOCIATED to avoid rebind bound
workers.

Cc: Tejun Heo <tj@kernel.org>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Suggested-by: Lai Jiangshan <jiangshanlai@gmail.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/workqueue.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 0ec05948a97b..2c2f971f3e75 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4457,6 +4457,17 @@ static void rebind_workers(struct worker_pool *pool)
 						  pool->attrs->cpumask) < 0);
 
 	spin_lock_irq(&pool->lock);
+
+	/*
+	 * XXX: CPU hotplug notifiers are weird and can call DOWN_FAILED
+	 * w/o preceding DOWN_PREPARE.  Work around it.  CPU hotplug is
+	 * being reworked and this can go away in time.
+	 */
+	if (!(pool->flags & POOL_DISASSOCIATED)) {
+		spin_unlock_irq(&pool->lock);
+		return;
+	}
+
 	pool->flags &= ~POOL_DISASSOCIATED;
 
 	for_each_pool_worker(worker, pool) {

From fc2d8c98f754993f0c66d373672418a503e6e299 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Mon, 28 Mar 2016 13:09:56 +0900
Subject: [PATCH 411/424] regulator: s2mps11: Fix invalid selector mask and
 voltages for buck9

commit 3b672623079bb3e5685b8549e514f2dfaa564406 upstream.

The buck9 regulator of S2MPS11 PMIC had incorrect vsel_mask (0xff
instead of 0x1f) thus reading entire register as buck9's voltage. This
effectively caused regulator core to interpret values as higher voltages
than they were and then to set real voltage much lower than intended.

The buck9 provides power to other regulators, including LDO13
and LDO19 which supply the MMC2 (SD card). On Odroid XU3/XU4 the lower
voltage caused SD card detection errors on Odroid XU3/XU4:
	mmc1: card never left busy state
	mmc1: error -110 whilst initialising SD card

During driver probe the regulator core was checking whether initial
voltage matches the constraints. With incorrect vsel_mask of 0xff and
default value of 0x50, the core interpreted this as 5 V which is outside
of constraints (3-3.775 V). Then the regulator core was adjusting the
voltage to match the constraints. With incorrect vsel_mask this new
voltage mapped to a vere low voltage in the driver.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Reviewed-by: Javier Martinez Canillas <javier@osg.samsung.com>
Tested-by: Javier Martinez Canillas <javier@osg.samsung.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/regulator/s2mps11.c         | 28 ++++++++++++++++++++++------
 include/linux/mfd/samsung/s2mps11.h |  2 ++
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c
index 72fc3c32db49..b6d831b84e1d 100644
--- a/drivers/regulator/s2mps11.c
+++ b/drivers/regulator/s2mps11.c
@@ -305,7 +305,7 @@ static struct regulator_ops s2mps11_buck_ops = {
 	.enable_mask	= S2MPS11_ENABLE_MASK			\
 }
 
-#define regulator_desc_s2mps11_buck6_10(num, min, step) {	\
+#define regulator_desc_s2mps11_buck67810(num, min, step) {	\
 	.name		= "BUCK"#num,				\
 	.id		= S2MPS11_BUCK##num,			\
 	.ops		= &s2mps11_buck_ops,			\
@@ -321,6 +321,22 @@ static struct regulator_ops s2mps11_buck_ops = {
 	.enable_mask	= S2MPS11_ENABLE_MASK			\
 }
 
+#define regulator_desc_s2mps11_buck9 {				\
+	.name		= "BUCK9",				\
+	.id		= S2MPS11_BUCK9,			\
+	.ops		= &s2mps11_buck_ops,			\
+	.type		= REGULATOR_VOLTAGE,			\
+	.owner		= THIS_MODULE,				\
+	.min_uV		= MIN_3000_MV,				\
+	.uV_step	= STEP_25_MV,				\
+	.n_voltages	= S2MPS11_BUCK9_N_VOLTAGES,		\
+	.ramp_delay	= S2MPS11_RAMP_DELAY,			\
+	.vsel_reg	= S2MPS11_REG_B9CTRL2,			\
+	.vsel_mask	= S2MPS11_BUCK9_VSEL_MASK,		\
+	.enable_reg	= S2MPS11_REG_B9CTRL1,			\
+	.enable_mask	= S2MPS11_ENABLE_MASK			\
+}
+
 static const struct regulator_desc s2mps11_regulators[] = {
 	regulator_desc_s2mps11_ldo(1, STEP_25_MV),
 	regulator_desc_s2mps11_ldo(2, STEP_50_MV),
@@ -365,11 +381,11 @@ static const struct regulator_desc s2mps11_regulators[] = {
 	regulator_desc_s2mps11_buck1_4(3),
 	regulator_desc_s2mps11_buck1_4(4),
 	regulator_desc_s2mps11_buck5,
-	regulator_desc_s2mps11_buck6_10(6, MIN_600_MV, STEP_6_25_MV),
-	regulator_desc_s2mps11_buck6_10(7, MIN_600_MV, STEP_6_25_MV),
-	regulator_desc_s2mps11_buck6_10(8, MIN_600_MV, STEP_6_25_MV),
-	regulator_desc_s2mps11_buck6_10(9, MIN_3000_MV, STEP_25_MV),
-	regulator_desc_s2mps11_buck6_10(10, MIN_750_MV, STEP_12_5_MV),
+	regulator_desc_s2mps11_buck67810(6, MIN_600_MV, STEP_6_25_MV),
+	regulator_desc_s2mps11_buck67810(7, MIN_600_MV, STEP_6_25_MV),
+	regulator_desc_s2mps11_buck67810(8, MIN_600_MV, STEP_6_25_MV),
+	regulator_desc_s2mps11_buck9,
+	regulator_desc_s2mps11_buck67810(10, MIN_750_MV, STEP_12_5_MV),
 };
 
 static struct regulator_ops s2mps14_reg_ops;
diff --git a/include/linux/mfd/samsung/s2mps11.h b/include/linux/mfd/samsung/s2mps11.h
index b288965e8101..2c14eeca46f0 100644
--- a/include/linux/mfd/samsung/s2mps11.h
+++ b/include/linux/mfd/samsung/s2mps11.h
@@ -173,10 +173,12 @@ enum s2mps11_regulators {
 
 #define S2MPS11_LDO_VSEL_MASK	0x3F
 #define S2MPS11_BUCK_VSEL_MASK	0xFF
+#define S2MPS11_BUCK9_VSEL_MASK	0x1F
 #define S2MPS11_ENABLE_MASK	(0x03 << S2MPS11_ENABLE_SHIFT)
 #define S2MPS11_ENABLE_SHIFT	0x06
 #define S2MPS11_LDO_N_VOLTAGES	(S2MPS11_LDO_VSEL_MASK + 1)
 #define S2MPS11_BUCK_N_VOLTAGES (S2MPS11_BUCK_VSEL_MASK + 1)
+#define S2MPS11_BUCK9_N_VOLTAGES (S2MPS11_BUCK9_VSEL_MASK + 1)
 #define S2MPS11_RAMP_DELAY	25000		/* uV/us */
 
 #define S2MPS11_CTRL1_PWRHOLD_MASK	BIT(4)

From b657027854182f708d74424998fe95624f7c69c4 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 27 Apr 2016 15:59:27 +0200
Subject: [PATCH 412/424] regulator: axp20x: Fix axp22x ldo_io voltage ranges

commit a2262e5a12e05389ab4c7fc5cf60016b041dd8dc upstream.

The minium voltage of 1800mV is a copy and paste error from the axp20x
regulator info. The correct minimum voltage for the ldo_io regulators
on the axp22x is 700mV.

Fixes: 1b82b4e4f954 ("regulator: axp20x: Add support for AXP22X regulators")
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/regulator/axp20x-regulator.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c
index f2e1a39ce0f3..5cf4a97e0304 100644
--- a/drivers/regulator/axp20x-regulator.c
+++ b/drivers/regulator/axp20x-regulator.c
@@ -221,10 +221,10 @@ static const struct regulator_desc axp22x_regulators[] = {
 		 AXP22X_ELDO2_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(1)),
 	AXP_DESC(AXP22X, ELDO3, "eldo3", "eldoin", 700, 3300, 100,
 		 AXP22X_ELDO3_V_OUT, 0x1f, AXP22X_PWR_OUT_CTRL2, BIT(2)),
-	AXP_DESC_IO(AXP22X, LDO_IO0, "ldo_io0", "ips", 1800, 3300, 100,
+	AXP_DESC_IO(AXP22X, LDO_IO0, "ldo_io0", "ips", 700, 3300, 100,
 		    AXP22X_LDO_IO0_V_OUT, 0x1f, AXP20X_GPIO0_CTRL, 0x07,
 		    AXP22X_IO_ENABLED, AXP22X_IO_DISABLED),
-	AXP_DESC_IO(AXP22X, LDO_IO1, "ldo_io1", "ips", 1800, 3300, 100,
+	AXP_DESC_IO(AXP22X, LDO_IO1, "ldo_io1", "ips", 700, 3300, 100,
 		    AXP22X_LDO_IO1_V_OUT, 0x1f, AXP20X_GPIO1_CTRL, 0x07,
 		    AXP22X_IO_ENABLED, AXP22X_IO_DISABLED),
 	AXP_DESC_FIXED(AXP22X, RTC_LDO, "rtc_ldo", "ips", 3000),

From 4549fc712830400a3af34292ffdaa025f7e0c36e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 27 Apr 2016 01:11:55 -0400
Subject: [PATCH 413/424] atomic_open(): fix the handling of create_error

commit 10c64cea04d3c75c306b3f990586ffb343b63287 upstream.

* if we have a hashed negative dentry and either CREAT|EXCL on
r/o filesystem, or CREAT|TRUNC on r/o filesystem, or CREAT|EXCL
with failing may_o_create(), we should fail with EROFS or the
error may_o_create() has returned, but not ENOENT.  Which is what
the current code ends up returning.

* if we have CREAT|TRUNC hitting a regular file on a read-only
filesystem, we can't fail with EROFS here.  At the very least,
not until we'd done follow_managed() - we might have a writable
file (or a device, for that matter) bound on top of that one.
Moreover, the code downstream will see that O_TRUNC and attempt
to grab the write access (*after* following possible mount), so
if we really should fail with EROFS, it will happen.  No need
to do that inside atomic_open().

The real logics is much simpler than what the current code is
trying to do - if we decided to go for simple lookup, ended
up with a negative dentry *and* had create_error set, fail with
create_error.  No matter whether we'd got that negative dentry
from lookup_real() or had found it in dcache.

Acked-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/namei.c | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 0202aebb9813..209ca7737cb2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2906,22 +2906,10 @@ no_open:
 		dentry = lookup_real(dir, dentry, nd->flags);
 		if (IS_ERR(dentry))
 			return PTR_ERR(dentry);
-
-		if (create_error) {
-			int open_flag = op->open_flag;
-
-			error = create_error;
-			if ((open_flag & O_EXCL)) {
-				if (!dentry->d_inode)
-					goto out;
-			} else if (!dentry->d_inode) {
-				goto out;
-			} else if ((open_flag & O_TRUNC) &&
-				   d_is_reg(dentry)) {
-				goto out;
-			}
-			/* will fail later, go on to get the right error */
-		}
+	}
+	if (create_error && !dentry->d_inode) {
+		error = create_error;
+		goto out;
 	}
 looked_up:
 	path->dentry = dentry;

From 4c127a3e21daa77e437828728d40084aa428415c Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Wed, 27 Apr 2016 10:48:52 +0200
Subject: [PATCH 414/424] qla1280: Don't allocate 512kb of host tags

commit 2bcbc81421c511ef117cadcf0bee9c4340e68db0 upstream.

The qla1280 driver sets the scsi_host_template's can_queue field to 0xfffff
which results in an allocation failure when allocating the block layer tags
for the driver's queues. This was introduced with the change for host wide
tags in commit 64d513ac31b - "scsi: use host wide tags by default".

Reduce can_queue to MAX_OUTSTANDING_COMMANDS (512) to solve the allocation
error.

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Fixes: 64d513ac31b - "scsi: use host wide tags by default"
Cc: Laura Abbott <labbott@redhat.com>
Cc: Michael Reed <mdr@sgi.com>
Reviewed-by: Laurence Oberman <loberman@redhat.com>
Reviewed-by: Lee Duncan <lduncan@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: James Bottomley <jejb@linux.vnet.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/qla1280.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index 5d0ec42a9317..634254a52301 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -4214,7 +4214,7 @@ static struct scsi_host_template qla1280_driver_template = {
 	.eh_bus_reset_handler	= qla1280_eh_bus_reset,
 	.eh_host_reset_handler	= qla1280_eh_adapter_reset,
 	.bios_param		= qla1280_biosparam,
-	.can_queue		= 0xfffff,
+	.can_queue		= MAX_OUTSTANDING_COMMANDS,
 	.this_id		= -1,
 	.sg_tablesize		= SG_ALL,
 	.use_clustering		= ENABLE_CLUSTERING,

From 35eb30c213ab718d1c66d44bcfda524184356250 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 11 May 2016 15:09:36 -0400
Subject: [PATCH 415/424] tools lib traceevent: Do not reassign parg after
 collapse_tree()

commit 106b816cb46ebd87408b4ed99a2e16203114daa6 upstream.

At the end of process_filter(), collapse_tree() was changed to update
the parg parameter, but the reassignment after the call wasn't removed.

What happens is that the "current_op" gets modified and freed and parg
is assigned to the new allocated argument. But after the call to
collapse_tree(), parg is assigned again to the just freed "current_op",
and this causes the tool to crash.

The current_op variable must also be assigned to NULL in case of error,
otherwise it will cause it to be free()ed twice.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Fixes: 42d6194d133c ("tools lib traceevent: Refactor process_filter()")
Link: http://lkml.kernel.org/r/20160511150936.678c18a1@gandalf.local.home
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/lib/traceevent/parse-filter.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index 0144b3d1bb77..88cccea3ca99 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -1164,11 +1164,11 @@ process_filter(struct event_format *event, struct filter_arg **parg,
 		current_op = current_exp;
 
 	ret = collapse_tree(current_op, parg, error_str);
+	/* collapse_tree() may free current_op, and updates parg accordingly */
+	current_op = NULL;
 	if (ret < 0)
 		goto fail;
 
-	*parg = current_op;
-
 	free(token);
 	return 0;
 

From 007796c01f0b293c68585397211af2b390bf126d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 5 May 2016 16:25:35 -0400
Subject: [PATCH 416/424] get_rock_ridge_filename(): handle malformed NM
 entries

commit 99d825822eade8d827a1817357cbf3f889a552d6 upstream.

Payloads of NM entries are not supposed to contain NUL.  When we run
into such, only the part prior to the first NUL goes into the
concatenation (i.e. the directory entry name being encoded by a bunch
of NM entries).  We do stop when the amount collected so far + the
claimed amount in the current NM entry exceed 254.  So far, so good,
but what we return as the total length is the sum of *claimed*
sizes, not the actual amount collected.  And that can grow pretty
large - not unlimited, since you'd need to put CE entries in
between to be able to get more than the maximum that could be
contained in one isofs directory entry / continuation chunk and
we are stop once we'd encountered 32 CEs, but you can get about 8Kb
easily.  And that's what will be passed to readdir callback as the
name length.  8Kb __copy_to_user() from a buffer allocated by
__get_free_page()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/isofs/rock.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 735d7522a3a9..204659a5f6db 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -203,6 +203,8 @@ int get_rock_ridge_filename(struct iso_directory_record *de,
 	int retnamlen = 0;
 	int truncate = 0;
 	int ret = 0;
+	char *p;
+	int len;
 
 	if (!ISOFS_SB(inode->i_sb)->s_rock)
 		return 0;
@@ -267,12 +269,17 @@ repeat:
 					rr->u.NM.flags);
 				break;
 			}
-			if ((strlen(retname) + rr->len - 5) >= 254) {
+			len = rr->len - 5;
+			if (retnamlen + len >= 254) {
 				truncate = 1;
 				break;
 			}
-			strncat(retname, rr->u.NM.name, rr->len - 5);
-			retnamlen += rr->len - 5;
+			p = memchr(rr->u.NM.name, '\0', len);
+			if (unlikely(p))
+				len = p - rr->u.NM.name;
+			memcpy(retname + retnamlen, rr->u.NM.name, len);
+			retnamlen += len;
+			retname[retnamlen] = '\0';
 			break;
 		case SIG('R', 'E'):
 			kfree(rs.buffer);

From 1abbf8044aa09aaa2c0810942a853bbf404f4198 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Mon, 9 May 2016 09:31:47 -0700
Subject: [PATCH 417/424] Input: max8997-haptic - fix NULL pointer dereference

commit 6ae645d5fa385f3787bf1723639cd907fe5865e7 upstream.

NULL pointer derefence happens when booting with DTB because the
platform data for haptic device is not set in supplied data from parent
MFD device.

The MFD device creates only platform data (from Device Tree) for itself,
not for haptic child.

Unable to handle kernel NULL pointer dereference at virtual address 0000009c
pgd = c0004000
	[0000009c] *pgd=00000000
	Internal error: Oops: 5 [#1] PREEMPT SMP ARM
	(max8997_haptic_probe) from [<c03f9cec>] (platform_drv_probe+0x4c/0xb0)
	(platform_drv_probe) from [<c03f8440>] (driver_probe_device+0x214/0x2c0)
	(driver_probe_device) from [<c03f8598>] (__driver_attach+0xac/0xb0)
	(__driver_attach) from [<c03f67ac>] (bus_for_each_dev+0x68/0x9c)
	(bus_for_each_dev) from [<c03f7a38>] (bus_add_driver+0x1a0/0x218)
	(bus_add_driver) from [<c03f8db0>] (driver_register+0x78/0xf8)
	(driver_register) from [<c0101774>] (do_one_initcall+0x90/0x1d8)
	(do_one_initcall) from [<c0a00dbc>] (kernel_init_freeable+0x15c/0x1fc)
	(kernel_init_freeable) from [<c06bb5b4>] (kernel_init+0x8/0x114)
	(kernel_init) from [<c0107938>] (ret_from_fork+0x14/0x3c)

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Fixes: 104594b01ce7 ("Input: add driver support for MAX8997-haptic")
[k.kozlowski: Write commit message, add CC-stable]
Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/input/misc/max8997_haptic.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/input/misc/max8997_haptic.c b/drivers/input/misc/max8997_haptic.c
index a806ba3818f7..8d6326d7e7be 100644
--- a/drivers/input/misc/max8997_haptic.c
+++ b/drivers/input/misc/max8997_haptic.c
@@ -255,12 +255,14 @@ static int max8997_haptic_probe(struct platform_device *pdev)
 	struct max8997_dev *iodev = dev_get_drvdata(pdev->dev.parent);
 	const struct max8997_platform_data *pdata =
 					dev_get_platdata(iodev->dev);
-	const struct max8997_haptic_platform_data *haptic_pdata =
-					pdata->haptic_pdata;
+	const struct max8997_haptic_platform_data *haptic_pdata = NULL;
 	struct max8997_haptic *chip;
 	struct input_dev *input_dev;
 	int error;
 
+	if (pdata)
+		haptic_pdata = pdata->haptic_pdata;
+
 	if (!haptic_pdata) {
 		dev_err(&pdev->dev, "no haptic platform data\n");
 		return -EINVAL;

From 9df2dc6cf4adb711545f48001b34f35fd3bb79ef Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Wed, 11 May 2016 13:09:34 -0300
Subject: [PATCH 418/424] Revert "[media] videobuf2-v4l2: Verify planes array
 in buffer dequeueing"

commit 93f0750dcdaed083d6209b01e952e98ca730db66 upstream.

This patch causes a Kernel panic when called on a DVB driver.

This was also reported by David R <david@unsolicited.net>:

May  7 14:47:35 server kernel: [  501.247123] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004
May  7 14:47:35 server kernel: [  501.247239] IP: [<ffffffffa0222c71>] __verify_planes_array.isra.3+0x1/0x80 [videobuf2_v4l2]
May  7 14:47:35 server kernel: [  501.247354] PGD cae6f067 PUD ca99c067 PMD 0
May  7 14:47:35 server kernel: [  501.247426] Oops: 0000 [#1] SMP
May  7 14:47:35 server kernel: [  501.247482] Modules linked in: xfs tun xt_connmark xt_TCPMSS xt_tcpmss xt_owner xt_REDIRECT nf_nat_redirect xt_nat ipt_MASQUERADE nf_nat_masquerade_ipv4 ts_kmp ts_bm xt_string ipt_REJECT nf_reject_ipv4 xt_recent xt_conntrack xt_multiport xt_pkttype xt_tcpudp xt_mark nf_log_ipv4 nf_log_common xt_LOG xt_limit iptable_mangle iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_filter ip_tables ip6table_filter ip6_tables x_tables pppoe pppox dm_crypt ts2020 regmap_i2c ds3000 cx88_dvb dvb_pll cx88_vp3054_i2c mt352 videobuf2_dvb cx8800 cx8802 cx88xx pl2303 tveeprom videobuf2_dma_sg ppdev videobuf2_memops videobuf2_v4l2 videobuf2_core dvb_usb_digitv snd_hda_codec_via snd_hda_codec_hdmi snd_hda_codec_generic radeon dvb_usb snd_hda_intel amd64_edac_mod serio_raw snd_hda_codec edac_core fbcon k10temp bitblit softcursor snd_hda_core font snd_pcm_oss i2c_piix4 snd_mixer_oss tileblit drm_kms_helper syscopyarea snd_pcm snd_seq_dummy sysfillrect snd_seq_oss sysimgblt fb_sys_fops ttm snd_seq_midi r8169 snd_rawmidi drm snd_seq_midi_event e1000e snd_seq snd_seq_device snd_timer snd ptp pps_core i2c_algo_bit soundcore parport_pc ohci_pci shpchp tpm_tis tpm nfsd auth_rpcgss oid_registry hwmon_vid exportfs nfs_acl mii nfs bonding lockd grace lp sunrpc parport
May  7 14:47:35 server kernel: [  501.249564] CPU: 1 PID: 6889 Comm: vb2-cx88[0] Not tainted 4.5.3 #3
May  7 14:47:35 server kernel: [  501.249644] Hardware name: System manufacturer System Product Name/M4A785TD-V EVO, BIOS 0211    07/08/2009
May  7 14:47:35 server kernel: [  501.249767] task: ffff8800aebf3600 ti: ffff8801e07a0000 task.ti: ffff8801e07a0000
May  7 14:47:35 server kernel: [  501.249861] RIP: 0010:[<ffffffffa0222c71>]  [<ffffffffa0222c71>] __verify_planes_array.isra.3+0x1/0x80 [videobuf2_v4l2]
May  7 14:47:35 server kernel: [  501.250002] RSP: 0018:ffff8801e07a3de8  EFLAGS: 00010086
May  7 14:47:35 server kernel: [  501.250071] RAX: 0000000000000283 RBX: ffff880210dc5000 RCX: 0000000000000283
May  7 14:47:35 server kernel: [  501.250161] RDX: ffffffffa0222cf0 RSI: 0000000000000000 RDI: ffff880210dc5014
May  7 14:47:35 server kernel: [  501.250251] RBP: ffff8801e07a3df8 R08: ffff8801e07a0000 R09: 0000000000000000
May  7 14:47:35 server kernel: [  501.250348] R10: 0000000000000000 R11: 0000000000000001 R12: ffff8800cda2a9d8
May  7 14:47:35 server kernel: [  501.250438] R13: ffff880210dc51b8 R14: 0000000000000000 R15: ffff8800cda2a828
May  7 14:47:35 server kernel: [  501.250528] FS:  00007f5b77fff700(0000) GS:ffff88021fc40000(0000) knlGS:00000000adaffb40
May  7 14:47:35 server kernel: [  501.250631] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
May  7 14:47:35 server kernel: [  501.250704] CR2: 0000000000000004 CR3: 00000000ca19d000 CR4: 00000000000006e0
May  7 14:47:35 server kernel: [  501.250794] Stack:
May  7 14:47:35 server kernel: [  501.250822]  ffff8801e07a3df8 ffffffffa0222cfd ffff8801e07a3e70 ffffffffa0236beb
May  7 14:47:35 server kernel: [  501.250937]  0000000000000283 ffff8801e07a3e94 0000000000000000 0000000000000000
May  7 14:47:35 server kernel: [  501.251051]  ffff8800aebf3600 ffffffff8108d8e0 ffff8801e07a3e38 ffff8801e07a3e38
May  7 14:47:35 server kernel: [  501.251165] Call Trace:
May  7 14:47:35 server kernel: [  501.251200]  [<ffffffffa0222cfd>] ? __verify_planes_array_core+0xd/0x10 [videobuf2_v4l2]
May  7 14:47:35 server kernel: [  501.251306]  [<ffffffffa0236beb>] vb2_core_dqbuf+0x2eb/0x4c0 [videobuf2_core]
May  7 14:47:35 server kernel: [  501.251398]  [<ffffffff8108d8e0>] ? prepare_to_wait_event+0x100/0x100
May  7 14:47:35 server kernel: [  501.251482]  [<ffffffffa023855b>] vb2_thread+0x1cb/0x220 [videobuf2_core]
May  7 14:47:35 server kernel: [  501.251569]  [<ffffffffa0238390>] ? vb2_core_qbuf+0x230/0x230 [videobuf2_core]
May  7 14:47:35 server kernel: [  501.251662]  [<ffffffffa0238390>] ? vb2_core_qbuf+0x230/0x230 [videobuf2_core]
May  7 14:47:35 server kernel: [  501.255982]  [<ffffffff8106f984>] kthread+0xc4/0xe0
May  7 14:47:35 server kernel: [  501.260292]  [<ffffffff8106f8c0>] ? kthread_park+0x50/0x50
May  7 14:47:35 server kernel: [  501.264615]  [<ffffffff81697a5f>] ret_from_fork+0x3f/0x70
May  7 14:47:35 server kernel: [  501.268962]  [<ffffffff8106f8c0>] ? kthread_park+0x50/0x50
May  7 14:47:35 server kernel: [  501.273216] Code: 0d 01 74 16 48 8b 46 28 48 8b 56 30 48 89 87 d0 01 00 00 48 89 97 d8 01 00 00 5d c3 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 55 <8b> 46 04 48 89 e5 8d 50 f7 31 c0 83 fa 01 76 02 5d c3 48 83 7e
May  7 14:47:35 server kernel: [  501.282146] RIP  [<ffffffffa0222c71>] __verify_planes_array.isra.3+0x1/0x80 [videobuf2_v4l2]
May  7 14:47:35 server kernel: [  501.286391]  RSP <ffff8801e07a3de8>
May  7 14:47:35 server kernel: [  501.290619] CR2: 0000000000000004
May  7 14:47:35 server kernel: [  501.294786] ---[ end trace b2b354153ccad110 ]---

This reverts commit 2c1f6951a8a82e6de0d82b1158b5e493fc6c54ab.

Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
Cc: Hans Verkuil <hans.verkuil@cisco.com>
Fixes: 2c1f6951a8a8 ("[media] videobuf2-v4l2: Verify planes array in buffer dequeueing")
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/v4l2-core/videobuf2-v4l2.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/media/v4l2-core/videobuf2-v4l2.c b/drivers/media/v4l2-core/videobuf2-v4l2.c
index 6c441be8f893..502984c724ff 100644
--- a/drivers/media/v4l2-core/videobuf2-v4l2.c
+++ b/drivers/media/v4l2-core/videobuf2-v4l2.c
@@ -67,11 +67,6 @@ static int __verify_planes_array(struct vb2_buffer *vb, const struct v4l2_buffer
 	return 0;
 }
 
-static int __verify_planes_array_core(struct vb2_buffer *vb, const void *pb)
-{
-	return __verify_planes_array(vb, pb);
-}
-
 /**
  * __verify_length() - Verify that the bytesused value for each plane fits in
  * the plane length and that the data offset doesn't exceed the bytesused value.
@@ -437,7 +432,6 @@ static int __fill_vb2_buffer(struct vb2_buffer *vb,
 }
 
 static const struct vb2_buf_ops v4l2_buf_ops = {
-	.verify_planes_array	= __verify_planes_array_core,
 	.fill_user_buffer	= __fill_v4l2_buffer,
 	.fill_vb2_buffer	= __fill_vb2_buffer,
 	.set_timestamp		= __set_timestamp,

From 472f52f5639238f569696082e0effbfb2171ad1a Mon Sep 17 00:00:00 2001
From: Lucas Stach <dev@lynxeye.de>
Date: Thu, 5 May 2016 10:16:44 -0400
Subject: [PATCH 419/424] drm/radeon: fix PLL sharing on DCE6.1 (v2)

commit e3c00d87845ab375f90fa6e10a5e72a3a5778cd3 upstream.

On DCE6.1 PPLL2 is exclusively available to UNIPHYA, so it should not
be taken into consideration when looking for an already enabled PLL
to be shared with other outputs.

This fixes the broken VGA port (TRAVIS DP->VGA bridge) on my Richland
based laptop, where the internal display is connected to UNIPHYA through
a TRAVIS DP->LVDS bridge.

Bug:
https://bugs.freedesktop.org/show_bug.cgi?id=78987

v2: agd: add check in radeon_get_shared_nondp_ppll as well, drop
    extra parameter.

Signed-off-by: Lucas Stach <dev@lynxeye.de>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/atombios_crtc.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index dac78ad24b31..79bab6fd76bb 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -1739,6 +1739,7 @@ static u32 radeon_get_pll_use_mask(struct drm_crtc *crtc)
 static int radeon_get_shared_dp_ppll(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
 	struct drm_crtc *test_crtc;
 	struct radeon_crtc *test_radeon_crtc;
 
@@ -1748,6 +1749,10 @@ static int radeon_get_shared_dp_ppll(struct drm_crtc *crtc)
 		test_radeon_crtc = to_radeon_crtc(test_crtc);
 		if (test_radeon_crtc->encoder &&
 		    ENCODER_MODE_IS_DP(atombios_get_encoder_mode(test_radeon_crtc->encoder))) {
+			/* PPLL2 is exclusive to UNIPHYA on DCE61 */
+			if (ASIC_IS_DCE61(rdev) && !ASIC_IS_DCE8(rdev) &&
+			    test_radeon_crtc->pll_id == ATOM_PPLL2)
+				continue;
 			/* for DP use the same PLL for all */
 			if (test_radeon_crtc->pll_id != ATOM_PPLL_INVALID)
 				return test_radeon_crtc->pll_id;
@@ -1769,6 +1774,7 @@ static int radeon_get_shared_nondp_ppll(struct drm_crtc *crtc)
 {
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
 	struct drm_crtc *test_crtc;
 	struct radeon_crtc *test_radeon_crtc;
 	u32 adjusted_clock, test_adjusted_clock;
@@ -1784,6 +1790,10 @@ static int radeon_get_shared_nondp_ppll(struct drm_crtc *crtc)
 		test_radeon_crtc = to_radeon_crtc(test_crtc);
 		if (test_radeon_crtc->encoder &&
 		    !ENCODER_MODE_IS_DP(atombios_get_encoder_mode(test_radeon_crtc->encoder))) {
+			/* PPLL2 is exclusive to UNIPHYA on DCE61 */
+			if (ASIC_IS_DCE61(rdev) && !ASIC_IS_DCE8(rdev) &&
+			    test_radeon_crtc->pll_id == ATOM_PPLL2)
+				continue;
 			/* check if we are already driving this connector with another crtc */
 			if (test_radeon_crtc->connector == radeon_crtc->connector) {
 				/* if we are, return that pll */

From bf12e894e6b4ae0181af83ce5f6bb5e05c744660 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Tue, 3 May 2016 10:33:01 +0200
Subject: [PATCH 420/424] drm/i915: Bail out of pipe config compute loop on LPT

commit 2700818ac9f935d8590715eecd7e8cadbca552b6 upstream.

LPT is pch, so might run into the fdi bandwidth constraint (especially
since it has only 2 lanes). But right now we just force pipe_bpp back
to 24, resulting in a nice loop (which we bail out with a loud
WARN_ON). Fix this.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
References: https://bugs.freedesktop.org/show_bug.cgi?id=93477
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Tested-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/1462264381-7573-1-git-send-email-daniel.vetter@ffwll.ch
(cherry picked from commit f58a1acc7e4a1f37d26124ce4c875c647fbcc61f)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_crt.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 6a2c76e367a5..97d1ed20418b 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -248,8 +248,14 @@ static bool intel_crt_compute_config(struct intel_encoder *encoder,
 		pipe_config->has_pch_encoder = true;
 
 	/* LPT FDI RX only supports 8bpc. */
-	if (HAS_PCH_LPT(dev))
+	if (HAS_PCH_LPT(dev)) {
+		if (pipe_config->bw_constrained && pipe_config->pipe_bpp < 24) {
+			DRM_DEBUG_KMS("LPT only supports 24bpp\n");
+			return false;
+		}
+
 		pipe_config->pipe_bpp = 24;
+	}
 
 	/* FDI must always be 2.7 GHz */
 	if (HAS_DDI(dev)) {

From bafa4fbc2b4ac51045fe7fb3de94bcd5560a56c7 Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Tue, 3 May 2016 15:54:19 +0300
Subject: [PATCH 421/424] drm/i915/bdw: Add missing delay during L3 SQC credit
 programming
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit d6a862fe8c48229ba342648bcd535b2404724603 upstream.

BSpec requires us to wait ~100 clocks before re-enabling clock gating,
so make sure we do this.

CC: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1462280061-1457-2-git-send-email-imre.deak@intel.com
(cherry picked from commit 48e5d68d28f00c0cadac5a830980ff3222781abb)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/intel_pm.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index f091ad12d694..0a68d2ec89dc 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6620,6 +6620,12 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
 	misccpctl = I915_READ(GEN7_MISCCPCTL);
 	I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
 	I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
+	/*
+	 * Wait at least 100 clocks before re-enabling clock gating. See
+	 * the definition of L3SQCREG1 in BSpec.
+	 */
+	POSTING_READ(GEN8_L3SQCREG1);
+	udelay(1);
 	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
 
 	/*

From 62b68367b74b2456ee68deafab047067a6acae67 Mon Sep 17 00:00:00 2001
From: Arindam Nath <arindam.nath@amd.com>
Date: Wed, 4 May 2016 23:39:59 +0530
Subject: [PATCH 422/424] drm/radeon: fix DP link training issue with second 4K
 monitor

commit 1a738347df2ee4977459a8776fe2c62196bdcb1b upstream.

There is an issue observed when we hotplug a second DP
4K monitor to the system. Sometimes, the link training
fails for the second monitor after HPD interrupt
generation.

The issue happens when some queued or deferred transactions
are already present on the AUX channel when we initiate
a new transcation to (say) get DPCD or during link training.

We set AUX_IGNORE_HPD_DISCON bit in the AUX_CONTROL
register so that we can ignore any such deferred
transactions when a new AUX transaction is initiated.

Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/radeon_dp_auxch.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_dp_auxch.c b/drivers/gpu/drm/radeon/radeon_dp_auxch.c
index 3b0c229d7dcd..db64e0062689 100644
--- a/drivers/gpu/drm/radeon/radeon_dp_auxch.c
+++ b/drivers/gpu/drm/radeon/radeon_dp_auxch.c
@@ -105,7 +105,7 @@ radeon_dp_aux_transfer_native(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg
 
 	tmp &= AUX_HPD_SEL(0x7);
 	tmp |= AUX_HPD_SEL(chan->rec.hpd);
-	tmp |= AUX_EN | AUX_LS_READ_EN;
+	tmp |= AUX_EN | AUX_LS_READ_EN | AUX_HPD_DISCON(0x1);
 
 	WREG32(AUX_CONTROL + aux_offset[instance], tmp);
 

From 6ff8315a4df67bfad96cffc406f91ceb6df70cde Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 14 May 2016 11:11:44 -0700
Subject: [PATCH 423/424] nf_conntrack: avoid kernel pointer value leak in slab
 name

commit 31b0b385f69d8d5491a4bca288e25e63f1d945d0 upstream.

The slab name ends up being visible in the directory structure under
/sys, and even if you don't have access rights to the file you can see
the filenames.

Just use a 64-bit counter instead of the pointer to the 'net' structure
to generate a unique name.

This code will go away in 4.7 when the conntrack code moves to a single
kmemcache, but this is the backportable simple solution to avoiding
leaking kernel pointers to user space.

Fixes: 5b3501faa874 ("netfilter: nf_conntrack: per netns nf_conntrack_cachep")
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nf_conntrack_core.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3cb3cb831591..86a3c6f0c871 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1757,6 +1757,7 @@ void nf_conntrack_init_end(void)
 
 int nf_conntrack_init_net(struct net *net)
 {
+	static atomic64_t unique_id;
 	int ret = -ENOMEM;
 	int cpu;
 
@@ -1779,7 +1780,8 @@ int nf_conntrack_init_net(struct net *net)
 	if (!net->ct.stat)
 		goto err_pcpu_lists;
 
-	net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
+	net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%llu",
+				(u64)atomic64_inc_return(&unique_id));
 	if (!net->ct.slabname)
 		goto err_slabname;
 

From 544ec5b08d007f184ab97abdbed87e613c8c0b83 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 18 May 2016 17:08:36 -0700
Subject: [PATCH 424/424] Linux 4.4.11

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 5b5f462f834c..aad86274b61b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 10
+SUBLEVEL = 11
 EXTRAVERSION =
 NAME = Blurry Fish Butt