From df127725783f45e0f7681f7def2ca259ee9ef4ae Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Fri, 15 Jan 2016 16:57:58 -0800
Subject: [PATCH 01/74] include/linux/kernel.h: change abs() macro so it uses
 consistent return type

commit 8f57e4d930d48217268315898212518d4d3e0773 upstream.

Rewrite abs() so that its return type does not depend on the
architecture and no unexpected type conversion happen inside of it.  The
only conversion is from unsigned to signed type.  char is left as a
return type but treated as a signed type regradless of it's actual
signedness.

With the old version, int arguments were promoted to long and depending
on architecture a long argument might result in s64 or long return type
(which may or may not be the same).

This came after some back and forth with Nicolas.  The current macro has
different return type (for the same input type) depending on
architecture which might be midly iritating.

An alternative version would promote to int like so:

	#define abs(x)	__abs_choose_expr(x, long long,			\
			__abs_choose_expr(x, long,			\
			__builtin_choose_expr(				\
				sizeof(x) <= sizeof(int),		\
				({ int __x = (x); __x<0?-__x:__x; }),	\
				((void)0))))

I have no preference but imagine Linus might.  :] Nicolas argument against
is that promoting to int causes iconsistent behaviour:

	int main(void) {
		unsigned short a = 0, b = 1, c = a - b;
		unsigned short d = abs(a - b);
		unsigned short e = abs(c);
		printf("%u %u\n", d, e);  // prints: 1 65535
	}

Then again, no sane person expects consistent behaviour from C integer
arithmetic.  ;)

Note:

  __builtin_types_compatible_p(unsigned char, char) is always false, and
  __builtin_types_compatible_p(signed char, char) is also always false.

Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Reviewed-by: Nicolas Pitre <nico@linaro.org>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iio/industrialio-core.c          |  9 +++---
 drivers/net/wireless/iwlwifi/dvm/calib.c |  2 +-
 include/linux/kernel.h                   | 36 ++++++++++++------------
 3 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 7ede941e9301..131b434af994 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -433,16 +433,15 @@ ssize_t iio_format_value(char *buf, unsigned int type, int size, int *vals)
 		scale_db = true;
 	case IIO_VAL_INT_PLUS_MICRO:
 		if (vals[1] < 0)
-			return sprintf(buf, "-%ld.%06u%s\n", abs(vals[0]),
-					-vals[1],
-				scale_db ? " dB" : "");
+			return sprintf(buf, "-%d.%06u%s\n", abs(vals[0]),
+				       -vals[1], scale_db ? " dB" : "");
 		else
 			return sprintf(buf, "%d.%06u%s\n", vals[0], vals[1],
 				scale_db ? " dB" : "");
 	case IIO_VAL_INT_PLUS_NANO:
 		if (vals[1] < 0)
-			return sprintf(buf, "-%ld.%09u\n", abs(vals[0]),
-					-vals[1]);
+			return sprintf(buf, "-%d.%09u\n", abs(vals[0]),
+				       -vals[1]);
 		else
 			return sprintf(buf, "%d.%09u\n", vals[0], vals[1]);
 	case IIO_VAL_FRACTIONAL:
diff --git a/drivers/net/wireless/iwlwifi/dvm/calib.c b/drivers/net/wireless/iwlwifi/dvm/calib.c
index 20e6aa910700..c148085742a0 100644
--- a/drivers/net/wireless/iwlwifi/dvm/calib.c
+++ b/drivers/net/wireless/iwlwifi/dvm/calib.c
@@ -901,7 +901,7 @@ static void iwlagn_gain_computation(struct iwl_priv *priv,
 		/* bound gain by 2 bits value max, 3rd bit is sign */
 		data->delta_gain_code[i] =
 			min(abs(delta_g),
-			(long) CHAIN_NOISE_MAX_DELTA_GAIN_CODE);
+			(s32) CHAIN_NOISE_MAX_DELTA_GAIN_CODE);
 
 		if (delta_g < 0)
 			/*
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 924853d33a13..e571e592e53a 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -202,26 +202,26 @@ extern int _cond_resched(void);
 
 /**
  * abs - return absolute value of an argument
- * @x: the value.  If it is unsigned type, it is converted to signed type first
- *   (s64, long or int depending on its size).
+ * @x: the value.  If it is unsigned type, it is converted to signed type first.
+ *     char is treated as if it was signed (regardless of whether it really is)
+ *     but the macro's return type is preserved as char.
  *
- * Return: an absolute value of x.  If x is 64-bit, macro's return type is s64,
- *   otherwise it is signed long.
+ * Return: an absolute value of x.
  */
-#define abs(x) __builtin_choose_expr(sizeof(x) == sizeof(s64), ({	\
-		s64 __x = (x);						\
-		(__x < 0) ? -__x : __x;					\
-	}), ({								\
-		long ret;						\
-		if (sizeof(x) == sizeof(long)) {			\
-			long __x = (x);					\
-			ret = (__x < 0) ? -__x : __x;			\
-		} else {						\
-			int __x = (x);					\
-			ret = (__x < 0) ? -__x : __x;			\
-		}							\
-		ret;							\
-	}))
+#define abs(x)	__abs_choose_expr(x, long long,				\
+		__abs_choose_expr(x, long,				\
+		__abs_choose_expr(x, int,				\
+		__abs_choose_expr(x, short,				\
+		__abs_choose_expr(x, char,				\
+		__builtin_choose_expr(					\
+			__builtin_types_compatible_p(typeof(x), char),	\
+			(char)({ signed char __x = (x); __x<0?-__x:__x; }), \
+			((void)0)))))))
+
+#define __abs_choose_expr(x, type, other) __builtin_choose_expr(	\
+	__builtin_types_compatible_p(typeof(x),   signed type) ||	\
+	__builtin_types_compatible_p(typeof(x), unsigned type),		\
+	({ signed type __x = (x); __x < 0 ? -__x : __x; }), other)
 
 /**
  * reciprocal_scale - "scale" a value into range [0, ep_ro)

From 29bd03596e9511fa2f1a199d3a7603da28b32899 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 24 Sep 2016 23:29:51 +0200
Subject: [PATCH 02/74] Fix build warning in kernel/cpuset.c

>           2 ../kernel/cpuset.c:2101:11: warning: initialization from incompatible pointer type [-Wincompatible-pointer-types]
>           1 ../kernel/cpuset.c:2101:2: warning: initialization from incompatible pointer type
>           1 ../kernel/cpuset.c:2101:2: warning: (near initialization for 'cpuset_cgrp_subsys.fork')

This got introduced by 06ec7a1d7646 ("cpuset: make sure new tasks
conform to the current config of the cpuset"). In the upstream
kernel, the function prototype was changed as of b53202e63089
("cgroup: kill cgrp_ss_priv[CGROUP_CANFORK_COUNT] and friends").

That patch is not suitable for stable kernels, and fortunately
the warning seems harmless as the prototypes only differ in the
second argument that is unused. Adding that argument gets rid
of the warning:

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/cpuset.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index e120bd983ad0..b9279a2844d8 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2079,7 +2079,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
  * which could have been changed by cpuset just after it inherits the
  * state from the parent and before it sits on the cgroup's task list.
  */
-void cpuset_fork(struct task_struct *task)
+void cpuset_fork(struct task_struct *task, void *priv)
 {
 	if (task_css_is_root(task, cpuset_cgrp_id))
 		return;

From daef25aa900236a25d1cadc272beefc86713ca9f Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Tue, 2 Aug 2016 14:05:33 -0700
Subject: [PATCH 03/74] reiserfs: fix "new_insert_key may be used uninitialized
 ..."

commit 0a11b9aae49adf1f952427ef1a1d9e793dd6ffb6 upstream.

new_insert_key only makes any sense when it's associated with a
new_insert_ptr, which is initialized to NULL and changed to a
buffer_head when we also initialize new_insert_key.  We can key off of
that to avoid the uninitialized warning.

Link: http://lkml.kernel.org/r/5eca5ffb-2155-8df2-b4a2-f162f105efed@suse.com
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Jan Kara <jack@suse.cz>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/reiserfs/ibalance.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c
index b751eea32e20..5db6f45b3fed 100644
--- a/fs/reiserfs/ibalance.c
+++ b/fs/reiserfs/ibalance.c
@@ -1153,8 +1153,9 @@ int balance_internal(struct tree_balance *tb,
 				       insert_ptr);
 	}
 
-	memcpy(new_insert_key_addr, &new_insert_key, KEY_SIZE);
 	insert_ptr[0] = new_insert_ptr;
+	if (new_insert_ptr)
+		memcpy(new_insert_key_addr, &new_insert_key, KEY_SIZE);
 
 	return order;
 }

From 6b8076b8a76bc7f3d51e51b9fb5b35853cc74138 Mon Sep 17 00:00:00 2001
From: David Forster <dforster@brocade.com>
Date: Wed, 3 Aug 2016 15:13:01 +0100
Subject: [PATCH 04/74] ipv4: panic in leaf_walk_rcu due to stale node pointer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 94d9f1c5906b20053efe375b6d66610bca4b8b64 ]

Panic occurs when issuing "cat /proc/net/route" whilst
populating FIB with > 1M routes.

Use of cached node pointer in fib_route_get_idx is unsafe.

 BUG: unable to handle kernel paging request at ffffc90001630024
 IP: [<ffffffff814cf6a0>] leaf_walk_rcu+0x10/0xe0
 PGD 11b08d067 PUD 11b08e067 PMD dac4b067 PTE 0
 Oops: 0000 [#1] SMP
 Modules linked in: nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscac
 snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep virti
 acpi_cpufreq button parport_pc ppdev lp parport autofs4 ext4 crc16 mbcache jbd
tio_ring virtio floppy uhci_hcd ehci_hcd usbcore usb_common libata scsi_mod
 CPU: 1 PID: 785 Comm: cat Not tainted 4.2.0-rc8+ #4
 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
 task: ffff8800da1c0bc0 ti: ffff88011a05c000 task.ti: ffff88011a05c000
 RIP: 0010:[<ffffffff814cf6a0>]  [<ffffffff814cf6a0>] leaf_walk_rcu+0x10/0xe0
 RSP: 0018:ffff88011a05fda0  EFLAGS: 00010202
 RAX: ffff8800d8a40c00 RBX: ffff8800da4af940 RCX: ffff88011a05ff20
 RDX: ffffc90001630020 RSI: 0000000001013531 RDI: ffff8800da4af950
 RBP: 0000000000000000 R08: ffff8800da1f9a00 R09: 0000000000000000
 R10: ffff8800db45b7e4 R11: 0000000000000246 R12: ffff8800da4af950
 R13: ffff8800d97a74c0 R14: 0000000000000000 R15: ffff8800d97a7480
 FS:  00007fd3970e0700(0000) GS:ffff88011fd00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
 CR2: ffffc90001630024 CR3: 000000011a7e4000 CR4: 00000000000006e0
 Stack:
  ffffffff814d00d3 0000000000000000 ffff88011a05ff20 ffff8800da1f9a00
  ffffffff811dd8b9 0000000000000800 0000000000020000 00007fd396f35000
  ffffffff811f8714 0000000000003431 ffffffff8138dce0 0000000000000f80
 Call Trace:
  [<ffffffff814d00d3>] ? fib_route_seq_start+0x93/0xc0
  [<ffffffff811dd8b9>] ? seq_read+0x149/0x380
  [<ffffffff811f8714>] ? fsnotify+0x3b4/0x500
  [<ffffffff8138dce0>] ? process_echoes+0x70/0x70
  [<ffffffff8121cfa7>] ? proc_reg_read+0x47/0x70
  [<ffffffff811bb823>] ? __vfs_read+0x23/0xd0
  [<ffffffff811bbd42>] ? rw_verify_area+0x52/0xf0
  [<ffffffff811bbe61>] ? vfs_read+0x81/0x120
  [<ffffffff811bcbc2>] ? SyS_read+0x42/0xa0
  [<ffffffff81549ab2>] ? entry_SYSCALL_64_fastpath+0x16/0x75
 Code: 48 85 c0 75 d8 f3 c3 31 c0 c3 f3 c3 66 66 66 66 66 66 2e 0f 1f 84 00 00
a 04 89 f0 33 02 44 89 c9 48 d3 e8 0f b6 4a 05 49 89
 RIP  [<ffffffff814cf6a0>] leaf_walk_rcu+0x10/0xe0
  RSP <ffff88011a05fda0>
 CR2: ffffc90001630024

Signed-off-by: Dave Forster <dforster@brocade.com>
Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
---
 net/ipv4/fib_trie.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 744e5936c10d..e5a3ff210fec 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2453,9 +2453,7 @@ struct fib_route_iter {
 static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
 					    loff_t pos)
 {
-	struct fib_table *tb = iter->main_tb;
 	struct key_vector *l, **tp = &iter->tnode;
-	struct trie *t;
 	t_key key;
 
 	/* use cache location of next-to-find key */
@@ -2463,8 +2461,6 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
 		pos -= iter->pos;
 		key = iter->key;
 	} else {
-		t = (struct trie *)tb->tb_data;
-		iter->tnode = t->kv;
 		iter->pos = 0;
 		key = 0;
 	}
@@ -2505,12 +2501,12 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
 		return NULL;
 
 	iter->main_tb = tb;
+	t = (struct trie *)tb->tb_data;
+	iter->tnode = t->kv;
 
 	if (*pos != 0)
 		return fib_route_get_idx(iter, *pos);
 
-	t = (struct trie *)tb->tb_data;
-	iter->tnode = t->kv;
 	iter->pos = 0;
 	iter->key = 0;
 

From ea7dd213c1e3be9eeb6786affb7766aeca232439 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@codemonkey.org.uk>
Date: Fri, 2 Sep 2016 14:39:50 -0400
Subject: [PATCH 05/74] ipv6: release dst in ping_v6_sendmsg
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 03c2778a938aaba0893f6d6cdc29511d91a79848 ]

Neither the failure or success paths of ping_v6_sendmsg release
the dst it acquires.  This leads to a flood of warnings from
"net/core/dst.c:288 dst_release" on older kernels that
don't have 8bf4ada2e21378816b28205427ee6b0e1ca4c5f1 backported.

That patch optimistically hoped this had been fixed post 3.10, but
it seems at least one case wasn't, where I've seen this triggered
a lot from machines doing unprivileged icmp sockets.

Cc: Martin Lau <kafai@fb.com>
Signed-off-by: Dave Jones <davej@codemonkey.org.uk>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
---
 net/ipv6/ping.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 263a5164a6f5..3e55447b63a4 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -150,8 +150,10 @@ int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	rt = (struct rt6_info *) dst;
 
 	np = inet6_sk(sk);
-	if (!np)
-		return -EBADF;
+	if (!np) {
+		err = -EBADF;
+		goto dst_err_out;
+	}
 
 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
 		fl6.flowi6_oif = np->mcast_oif;
@@ -186,6 +188,9 @@ int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	}
 	release_sock(sk);
 
+dst_err_out:
+	dst_release(dst);
+
 	if (err)
 		return err;
 

From 98418550e124ec047918df8b3bb587e60bd39a8a Mon Sep 17 00:00:00 2001
From: Artem Germanov <agermanov@anchorfree.com>
Date: Wed, 7 Sep 2016 10:49:36 -0700
Subject: [PATCH 06/74] tcp: cwnd does not increase in TCP YeAH
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit db7196a0d0984b933ccf2cd6a60e26abf466e8a3 ]

Commit 76174004a0f19785a328f40388e87e982bbf69b9
(tcp: do not slow start when cwnd equals ssthresh )
introduced regression in TCP YeAH. Using 100ms delay 1% loss virtual
ethernet link kernel 4.2 shows bandwidth ~500KB/s for single TCP
connection and kernel 4.3 and above (including 4.8-rc4) shows bandwidth
~100KB/s.
   That is caused by stalled cwnd when cwnd equals ssthresh. This patch
fixes it by proper increasing cwnd in this case.

Signed-off-by: Artem Germanov <agermanov@anchorfree.com>
Acked-by: Dmitry Adamushko <d.adamushko@anchorfree.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Holger Hoffstätte <holger@applied-asynchrony.com>
---
 net/ipv4/tcp_yeah.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 3e6a472e6b88..92ab5bc91592 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -75,7 +75,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 	if (!tcp_is_cwnd_limited(sk))
 		return;
 
-	if (tp->snd_cwnd <= tp->snd_ssthresh)
+	if (tcp_in_slow_start(tp))
 		tcp_slow_start(tp, acked);
 
 	else if (!yeah->doing_reno_now) {

From 0f55fa7541d7ff34a6690438bb00b78521b98b54 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 17 Aug 2016 05:56:26 -0700
Subject: [PATCH 07/74] tcp: fix use after free in tcp_xmit_retransmit_queue()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit bb1fceca22492109be12640d49f5ea5a544c6bb4 ]

When tcp_sendmsg() allocates a fresh and empty skb, it puts it at the
tail of the write queue using tcp_add_write_queue_tail()

Then it attempts to copy user data into this fresh skb.

If the copy fails, we undo the work and remove the fresh skb.

Unfortunately, this undo lacks the change done to tp->highest_sack and
we can leave a dangling pointer (to a freed skb)

Later, tcp_xmit_retransmit_queue() can dereference this pointer and
access freed memory. For regular kernels where memory is not unmapped,
this might cause SACK bugs because tcp_highest_sack_seq() is buggy,
returning garbage instead of tp->snd_nxt, but with various debug
features like CONFIG_DEBUG_PAGEALLOC, this can crash the kernel.

This bug was found by Marco Grassi thanks to syzkaller.

Fixes: 6859d49475d4 ("[TCP]: Abstract tp->highest_sack accessing & point to next skb")
Reported-by: Marco Grassi <marco.gra@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Reviewed-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
---
 include/net/tcp.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 414d822bc1db..9c3ab544d3a8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1510,6 +1510,8 @@ static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unli
 {
 	if (sk->sk_send_head == skb_unlinked)
 		sk->sk_send_head = NULL;
+	if (tcp_sk(sk)->highest_sack == skb_unlinked)
+		tcp_sk(sk)->highest_sack = NULL;
 }
 
 static inline void tcp_init_send_head(struct sock *sk)

From c867a11289ee74c4594a96ccccac16f4cc29519e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 22 Aug 2016 11:31:10 -0700
Subject: [PATCH 08/74] tcp: properly scale window in
 tcp_v[46]_reqsk_send_ack()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 20a2b49fc538540819a0c552877086548cff8d8d ]

When sending an ack in SYN_RECV state, we must scale the offered
window if wscale option was negotiated and accepted.

Tested:
 Following packetdrill test demonstrates the issue :

0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0

+0 bind(3, ..., ...) = 0
+0 listen(3, 1) = 0

// Establish a connection.
+0 < S 0:0(0) win 20000 <mss 1000,sackOK,wscale 7, nop, TS val 100 ecr 0>
+0 > S. 0:0(0) ack 1 win 28960 <mss 1460,sackOK, TS val 100 ecr 100, nop, wscale 7>

+0 < . 1:11(10) ack 1 win 156 <nop,nop,TS val 99 ecr 100>
// check that window is properly scaled !
+0 > . 1:1(0) ack 1 win 226 <nop,nop,TS val 200 ecr 100>

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
---
 net/ipv4/tcp_ipv4.c | 8 +++++++-
 net/ipv6/tcp_ipv6.c | 8 +++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 048418b049d8..b5853cac3269 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -808,8 +808,14 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 	u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
 					     tcp_sk(sk)->snd_nxt;
 
+	/* RFC 7323 2.3
+	 * The window field (SEG.WND) of every outgoing segment, with the
+	 * exception of <SYN> segments, MUST be right-shifted by
+	 * Rcv.Wind.Shift bits:
+	 */
 	tcp_v4_send_ack(sock_net(sk), skb, seq,
-			tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
+			tcp_rsk(req)->rcv_nxt,
+			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
 			tcp_time_stamp,
 			req->ts_recent,
 			0,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1a1cd3938fd0..2d81e2f33ef2 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -932,9 +932,15 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
 	 */
+	/* RFC 7323 2.3
+	 * The window field (SEG.WND) of every outgoing segment, with the
+	 * exception of <SYN> segments, MUST be right-shifted by
+	 * Rcv.Wind.Shift bits:
+	 */
 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
-			tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
+			tcp_rsk(req)->rcv_nxt,
+			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
 			tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
 			0, 0);

From 3e2d986d8b2f95cfd4c2c0528cff74ce42a4e2a8 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 13 Sep 2016 09:48:53 +0100
Subject: [PATCH 09/74] crypto: arm64/aes-ctr - fix NULL dereference in tail
 processing

commit 2db34e78f126c6001d79d3b66ab1abb482dc7caa upstream.

The AES-CTR glue code avoids calling into the blkcipher API for the
tail portion of the walk, by comparing the remainder of walk.nbytes
modulo AES_BLOCK_SIZE with the residual nbytes, and jumping straight
into the tail processing block if they are equal. This tail processing
block checks whether nbytes != 0, and does nothing otherwise.

However, in case of an allocation failure in the blkcipher layer, we
may enter this code with walk.nbytes == 0, while nbytes > 0. In this
case, we should not dereference the source and destination pointers,
since they may be NULL. So instead of checking for nbytes != 0, check
for (walk.nbytes % AES_BLOCK_SIZE) != 0, which implies the former in
non-error conditions.

Fixes: 49788fe2a128 ("arm64/crypto: AES-ECB/CBC/CTR/XTS using ARMv8 NEON and Crypto Extensions")
Reported-by: xiakaixu <xiakaixu@huawei.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/crypto/aes-glue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 05d9e16c0dfd..6a51dfccfe71 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -211,7 +211,7 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 		err = blkcipher_walk_done(desc, &walk,
 					  walk.nbytes % AES_BLOCK_SIZE);
 	}
-	if (nbytes) {
+	if (walk.nbytes % AES_BLOCK_SIZE) {
 		u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
 		u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
 		u8 __aligned(8) tail[AES_BLOCK_SIZE];

From 9246fd26f99d70c81d3ec90a4b8dece0c2d7b930 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 13 Sep 2016 09:48:52 +0100
Subject: [PATCH 10/74] crypto: arm/aes-ctr - fix NULL dereference in tail
 processing

commit f82e90b28654804ab72881d577d87c3d5c65e2bc upstream.

The AES-CTR glue code avoids calling into the blkcipher API for the
tail portion of the walk, by comparing the remainder of walk.nbytes
modulo AES_BLOCK_SIZE with the residual nbytes, and jumping straight
into the tail processing block if they are equal. This tail processing
block checks whether nbytes != 0, and does nothing otherwise.

However, in case of an allocation failure in the blkcipher layer, we
may enter this code with walk.nbytes == 0, while nbytes > 0. In this
case, we should not dereference the source and destination pointers,
since they may be NULL. So instead of checking for nbytes != 0, check
for (walk.nbytes % AES_BLOCK_SIZE) != 0, which implies the former in
non-error conditions.

Fixes: 86464859cc77 ("crypto: arm - AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions")
Reported-by: xiakaixu <xiakaixu@huawei.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/crypto/aes-ce-glue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
index b445a5d56f43..593da7ffb449 100644
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -279,7 +279,7 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 		err = blkcipher_walk_done(desc, &walk,
 					  walk.nbytes % AES_BLOCK_SIZE);
 	}
-	if (nbytes) {
+	if (walk.nbytes % AES_BLOCK_SIZE) {
 		u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
 		u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
 		u8 __aligned(8) tail[AES_BLOCK_SIZE];

From 1c95a8a481efa5716b847b75f9c8b26f65844362 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 13 Sep 2016 14:43:29 +0800
Subject: [PATCH 11/74] crypto: skcipher - Fix blkcipher walk OOM crash

commit acdb04d0b36769b3e05990c488dc74d8b7ac8060 upstream.

When we need to allocate a temporary blkcipher_walk_next and it
fails, the code is supposed to take the slow path of processing
the data block by block.  However, due to an unrelated change
we instead end up dereferencing the NULL pointer.

This patch fixes it by moving the unrelated bsize setting out
of the way so that we enter the slow path as inteded.

Fixes: 7607bd8ff03b ("[CRYPTO] blkcipher: Added blkcipher_walk_virt_block")
Reported-by: xiakaixu <xiakaixu@huawei.com>
Reported-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/blkcipher.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c
index 8cc1622b2ee0..dca7bc87dad9 100644
--- a/crypto/blkcipher.c
+++ b/crypto/blkcipher.c
@@ -234,6 +234,8 @@ static int blkcipher_walk_next(struct blkcipher_desc *desc,
 		return blkcipher_walk_done(desc, walk, -EINVAL);
 	}
 
+	bsize = min(walk->walk_blocksize, n);
+
 	walk->flags &= ~(BLKCIPHER_WALK_SLOW | BLKCIPHER_WALK_COPY |
 			 BLKCIPHER_WALK_DIFF);
 	if (!scatterwalk_aligned(&walk->in, walk->alignmask) ||
@@ -246,7 +248,6 @@ static int blkcipher_walk_next(struct blkcipher_desc *desc,
 		}
 	}
 
-	bsize = min(walk->walk_blocksize, n);
 	n = scatterwalk_clamp(&walk->in, n);
 	n = scatterwalk_clamp(&walk->out, n);
 

From 2426cdb3f4a33704f1a8f9ee167f82ef5be516ba Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 7 Sep 2016 18:42:08 +0800
Subject: [PATCH 12/74] crypto: echainiv - Replace chaining with multiplication

commit 53a5d5ddccf849dbc27a8c1bba0b43c3a45fb792 upstream.

The current implementation uses a global per-cpu array to store
data which are used to derive the next IV.  This is insecure as
the attacker may change the stored data.

This patch removes all traces of chaining and replaces it with
multiplication of the salt and the sequence number.

Fixes: a10f554fa7e0 ("crypto: echainiv - Add encrypted chain IV...")
Reported-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/echainiv.c | 117 ++++++++++------------------------------------
 1 file changed, 25 insertions(+), 92 deletions(-)

diff --git a/crypto/echainiv.c b/crypto/echainiv.c
index b96a84560b67..343a74e96e2a 100644
--- a/crypto/echainiv.c
+++ b/crypto/echainiv.c
@@ -1,8 +1,8 @@
 /*
  * echainiv: Encrypted Chain IV Generator
  *
- * This generator generates an IV based on a sequence number by xoring it
- * with a salt and then encrypting it with the same key as used to encrypt
+ * This generator generates an IV based on a sequence number by multiplying
+ * it with a salt and then encrypting it with the same key as used to encrypt
  * the plain text.  This algorithm requires that the block size be equal
  * to the IV size.  It is mainly useful for CBC.
  *
@@ -23,81 +23,17 @@
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/mm.h>
 #include <linux/module.h>
-#include <linux/percpu.h>
-#include <linux/spinlock.h>
+#include <linux/slab.h>
 #include <linux/string.h>
 
-#define MAX_IV_SIZE 16
-
-static DEFINE_PER_CPU(u32 [MAX_IV_SIZE / sizeof(u32)], echainiv_iv);
-
-/* We don't care if we get preempted and read/write IVs from the next CPU. */
-static void echainiv_read_iv(u8 *dst, unsigned size)
-{
-	u32 *a = (u32 *)dst;
-	u32 __percpu *b = echainiv_iv;
-
-	for (; size >= 4; size -= 4) {
-		*a++ = this_cpu_read(*b);
-		b++;
-	}
-}
-
-static void echainiv_write_iv(const u8 *src, unsigned size)
-{
-	const u32 *a = (const u32 *)src;
-	u32 __percpu *b = echainiv_iv;
-
-	for (; size >= 4; size -= 4) {
-		this_cpu_write(*b, *a);
-		a++;
-		b++;
-	}
-}
-
-static void echainiv_encrypt_complete2(struct aead_request *req, int err)
-{
-	struct aead_request *subreq = aead_request_ctx(req);
-	struct crypto_aead *geniv;
-	unsigned int ivsize;
-
-	if (err == -EINPROGRESS)
-		return;
-
-	if (err)
-		goto out;
-
-	geniv = crypto_aead_reqtfm(req);
-	ivsize = crypto_aead_ivsize(geniv);
-
-	echainiv_write_iv(subreq->iv, ivsize);
-
-	if (req->iv != subreq->iv)
-		memcpy(req->iv, subreq->iv, ivsize);
-
-out:
-	if (req->iv != subreq->iv)
-		kzfree(subreq->iv);
-}
-
-static void echainiv_encrypt_complete(struct crypto_async_request *base,
-					 int err)
-{
-	struct aead_request *req = base->data;
-
-	echainiv_encrypt_complete2(req, err);
-	aead_request_complete(req, err);
-}
-
 static int echainiv_encrypt(struct aead_request *req)
 {
 	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
 	struct aead_geniv_ctx *ctx = crypto_aead_ctx(geniv);
 	struct aead_request *subreq = aead_request_ctx(req);
-	crypto_completion_t compl;
-	void *data;
+	__be64 nseqno;
+	u64 seqno;
 	u8 *info;
 	unsigned int ivsize = crypto_aead_ivsize(geniv);
 	int err;
@@ -107,8 +43,6 @@ static int echainiv_encrypt(struct aead_request *req)
 
 	aead_request_set_tfm(subreq, ctx->child);
 
-	compl = echainiv_encrypt_complete;
-	data = req;
 	info = req->iv;
 
 	if (req->src != req->dst) {
@@ -123,29 +57,30 @@ static int echainiv_encrypt(struct aead_request *req)
 			return err;
 	}
 
-	if (unlikely(!IS_ALIGNED((unsigned long)info,
-				 crypto_aead_alignmask(geniv) + 1))) {
-		info = kmalloc(ivsize, req->base.flags &
-				       CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL:
-								  GFP_ATOMIC);
-		if (!info)
-			return -ENOMEM;
-
-		memcpy(info, req->iv, ivsize);
-	}
-
-	aead_request_set_callback(subreq, req->base.flags, compl, data);
+	aead_request_set_callback(subreq, req->base.flags,
+				  req->base.complete, req->base.data);
 	aead_request_set_crypt(subreq, req->dst, req->dst,
 			       req->cryptlen, info);
 	aead_request_set_ad(subreq, req->assoclen);
 
-	crypto_xor(info, ctx->salt, ivsize);
-	scatterwalk_map_and_copy(info, req->dst, req->assoclen, ivsize, 1);
-	echainiv_read_iv(info, ivsize);
+	memcpy(&nseqno, info + ivsize - 8, 8);
+	seqno = be64_to_cpu(nseqno);
+	memset(info, 0, ivsize);
 
-	err = crypto_aead_encrypt(subreq);
-	echainiv_encrypt_complete2(req, err);
-	return err;
+	scatterwalk_map_and_copy(info, req->dst, req->assoclen, ivsize, 1);
+
+	do {
+		u64 a;
+
+		memcpy(&a, ctx->salt + ivsize - 8, 8);
+
+		a |= 1;
+		a *= seqno;
+
+		memcpy(info + ivsize - 8, &a, 8);
+	} while ((ivsize -= 8));
+
+	return crypto_aead_encrypt(subreq);
 }
 
 static int echainiv_decrypt(struct aead_request *req)
@@ -192,8 +127,7 @@ static int echainiv_aead_create(struct crypto_template *tmpl,
 	alg = crypto_spawn_aead_alg(spawn);
 
 	err = -EINVAL;
-	if (inst->alg.ivsize & (sizeof(u32) - 1) ||
-	    inst->alg.ivsize > MAX_IV_SIZE)
+	if (inst->alg.ivsize & (sizeof(u64) - 1) || !inst->alg.ivsize)
 		goto free_inst;
 
 	inst->alg.encrypt = echainiv_encrypt;
@@ -202,7 +136,6 @@ static int echainiv_aead_create(struct crypto_template *tmpl,
 	inst->alg.init = aead_init_geniv;
 	inst->alg.exit = aead_exit_geniv;
 
-	inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
 	inst->alg.base.cra_ctxsize = sizeof(struct aead_geniv_ctx);
 	inst->alg.base.cra_ctxsize += inst->alg.ivsize;
 

From f1ce664e687d3d0f83b9e0a4014b96353a641c93 Mon Sep 17 00:00:00 2001
From: Joseph Qi <joseph.qi@huawei.com>
Date: Mon, 19 Sep 2016 14:43:55 -0700
Subject: [PATCH 13/74] ocfs2/dlm: fix race between convert and migration

commit e6f0c6e6170fec175fe676495f29029aecdf486c upstream.

Commit ac7cf246dfdb ("ocfs2/dlm: fix race between convert and recovery")
checks if lockres master has changed to identify whether new master has
finished recovery or not.  This will introduce a race that right after
old master does umount ( means master will change), a new convert
request comes.

In this case, it will reset lockres state to DLM_RECOVERING and then
retry convert, and then fail with lockres->l_action being set to
OCFS2_AST_INVALID, which will cause inconsistent lock level between
ocfs2 and dlm, and then finally BUG.

Since dlm recovery will clear lock->convert_pending in
dlm_move_lockres_to_recovery_list, we can use it to correctly identify
the race case between convert and recovery.  So fix it.

Fixes: ac7cf246dfdb ("ocfs2/dlm: fix race between convert and recovery")
Link: http://lkml.kernel.org/r/57CE1569.8010704@huawei.com
Signed-off-by: Joseph Qi <joseph.qi@huawei.com>
Signed-off-by: Jun Piao <piaojun@huawei.com>
Cc: Mark Fasheh <mfasheh@suse.de>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ocfs2/dlm/dlmconvert.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index f90931335c6b..2e11658676eb 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -262,7 +262,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
 				  struct dlm_lock *lock, int flags, int type)
 {
 	enum dlm_status status;
-	u8 old_owner = res->owner;
 
 	mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type,
 	     lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS);
@@ -329,7 +328,6 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
 
 	spin_lock(&res->spinlock);
 	res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
-	lock->convert_pending = 0;
 	/* if it failed, move it back to granted queue.
 	 * if master returns DLM_NORMAL and then down before sending ast,
 	 * it may have already been moved to granted queue, reset to
@@ -338,12 +336,14 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
 		if (status != DLM_NOTQUEUED)
 			dlm_error(status);
 		dlm_revert_pending_convert(res, lock);
-	} else if ((res->state & DLM_LOCK_RES_RECOVERING) ||
-			(old_owner != res->owner)) {
-		mlog(0, "res %.*s is in recovering or has been recovered.\n",
-				res->lockname.len, res->lockname.name);
+	} else if (!lock->convert_pending) {
+		mlog(0, "%s: res %.*s, owner died and lock has been moved back "
+				"to granted list, retry convert.\n",
+				dlm->name, res->lockname.len, res->lockname.name);
 		status = DLM_RECOVERING;
 	}
+
+	lock->convert_pending = 0;
 bail:
 	spin_unlock(&res->spinlock);
 

From cf5fa7b898f487def896acaf79f3ea3bc4827e8c Mon Sep 17 00:00:00 2001
From: Ashish Samant <ashish.samant@oracle.com>
Date: Mon, 19 Sep 2016 14:44:42 -0700
Subject: [PATCH 14/74] ocfs2: fix start offset to
 ocfs2_zero_range_for_truncate()

commit d21c353d5e99c56cdd5b5c1183ffbcaf23b8b960 upstream.

If we punch a hole on a reflink such that following conditions are met:

1. start offset is on a cluster boundary
2. end offset is not on a cluster boundary
3. (end offset is somewhere in another extent) or
   (hole range > MAX_CONTIG_BYTES(1MB)),

we dont COW the first cluster starting at the start offset.  But in this
case, we were wrongly passing this cluster to
ocfs2_zero_range_for_truncate() to zero out.  This will modify the
cluster in place and zero it in the source too.

Fix this by skipping this cluster in such a scenario.

To reproduce:

1. Create a random file of say 10 MB
     xfs_io -c 'pwrite -b 4k 0 10M' -f 10MBfile
2. Reflink  it
     reflink -f 10MBfile reflnktest
3. Punch a hole at starting at cluster boundary  with range greater that
1MB. You can also use a range that will put the end offset in another
extent.
     fallocate -p -o 0 -l 1048615 reflnktest
4. sync
5. Check the  first cluster in the source file. (It will be zeroed out).
    dd if=10MBfile iflag=direct bs=<cluster size> count=1 | hexdump -C

Link: http://lkml.kernel.org/r/1470957147-14185-1-git-send-email-ashish.samant@oracle.com
Signed-off-by: Ashish Samant <ashish.samant@oracle.com>
Reported-by: Saar Maoz <saar.maoz@oracle.com>
Reviewed-by: Srinivas Eeda <srinivas.eeda@oracle.com>
Cc: Mark Fasheh <mfasheh@suse.de>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <joseph.qi@huawei.com>
Cc: Eric Ren <zren@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ocfs2/file.c | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 77d30cbd944d..56dd3957cc91 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1536,7 +1536,8 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
 				       u64 start, u64 len)
 {
 	int ret = 0;
-	u64 tmpend, end = start + len;
+	u64 tmpend = 0;
+	u64 end = start + len;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	unsigned int csize = osb->s_clustersize;
 	handle_t *handle;
@@ -1568,18 +1569,31 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
 	}
 
 	/*
-	 * We want to get the byte offset of the end of the 1st cluster.
+	 * If start is on a cluster boundary and end is somewhere in another
+	 * cluster, we have not COWed the cluster starting at start, unless
+	 * end is also within the same cluster. So, in this case, we skip this
+	 * first call to ocfs2_zero_range_for_truncate() truncate and move on
+	 * to the next one.
 	 */
-	tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1));
-	if (tmpend > end)
-		tmpend = end;
+	if ((start & (csize - 1)) != 0) {
+		/*
+		 * We want to get the byte offset of the end of the 1st
+		 * cluster.
+		 */
+		tmpend = (u64)osb->s_clustersize +
+			(start & ~(osb->s_clustersize - 1));
+		if (tmpend > end)
+			tmpend = end;
 
-	trace_ocfs2_zero_partial_clusters_range1((unsigned long long)start,
-						 (unsigned long long)tmpend);
+		trace_ocfs2_zero_partial_clusters_range1(
+			(unsigned long long)start,
+			(unsigned long long)tmpend);
 
-	ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend);
-	if (ret)
-		mlog_errno(ret);
+		ret = ocfs2_zero_range_for_truncate(inode, handle, start,
+						    tmpend);
+		if (ret)
+			mlog_errno(ret);
+	}
 
 	if (tmpend < end) {
 		/*

From 252644d83bf7c46d199687c4af2f0ee125ca7f62 Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.cz>
Date: Thu, 10 Dec 2015 15:53:06 +0100
Subject: [PATCH 15/74] kbuild: Do not run modules_install and install in
 paralel

commit a85a41ed69f27c4c667d8c418df14b4fb220c4ad upstream.

Based on a x86-only patch by Andy Lutomirski <luto@amacapital.net>

With modular kernels, 'make install' is going to need the installed
modules at some point to generate the initramfs.

Signed-off-by: Michal Marek <mmarek@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Makefile | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Makefile b/Makefile
index a6512f4eec9f..4a1cd02fa3c3 100644
--- a/Makefile
+++ b/Makefile
@@ -495,6 +495,12 @@ ifeq ($(KBUILD_EXTMOD),)
                 endif
         endif
 endif
+# install and module_install need also be processed one by one
+ifneq ($(filter install,$(MAKECMDGOALS)),)
+        ifneq ($(filter modules_install,$(MAKECMDGOALS)),)
+	        mixed-targets := 1
+        endif
+endif
 
 ifeq ($(mixed-targets),1)
 # ===========================================================================

From 97283248f58444a1b64c24b7ee1be5e708906e86 Mon Sep 17 00:00:00 2001
From: Wang YanQing <udknight@gmail.com>
Date: Fri, 11 Dec 2015 00:35:19 +0800
Subject: [PATCH 16/74] Makefile: revert "Makefile: Document ability to make
 file.lst and file.S" partially

commit 40ab87a4003c7952976ce901a2b9ece5ed833168 upstream.

Commit 627189797807 ("Makefile: Document ability to make file.lst
and file.S") document ability to make file.S, but there isn't such
ability in kbuild, so revert it.

Signed-off-by: Wang YanQing <udknight@gmail.com>
Signed-off-by: Michal Marek <mmarek@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 4a1cd02fa3c3..d61b0c628655 100644
--- a/Makefile
+++ b/Makefile
@@ -1266,7 +1266,7 @@ help:
 	@echo  '  firmware_install- Install all firmware to INSTALL_FW_PATH'
 	@echo  '                    (default: $$(INSTALL_MOD_PATH)/lib/firmware)'
 	@echo  '  dir/            - Build all files in dir and below'
-	@echo  '  dir/file.[oisS] - Build specified target only'
+	@echo  '  dir/file.[ois]  - Build specified target only'
 	@echo  '  dir/file.lst    - Build specified mixed source/assembly target only'
 	@echo  '                    (requires a recent binutils and recent build (System.map))'
 	@echo  '  dir/file.ko     - Build module including final link'

From 9b6bbc3d96729c18eef9a3c39021967220bcc5e4 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Thu, 3 Mar 2016 08:53:43 -0600
Subject: [PATCH 17/74] tools: Support relative directory path for 'O='

commit e17cf3a80d4ba0c4e40bf1a89deb1354c2e10e14 upstream.

Running "make O=foo" (with a relative directory path) fails with:

  scripts/Makefile.include:3: *** O=foo does not exist.  Stop.
  /home/jpoimboe/git/linux/Makefile:1547: recipe for target 'tools/objtool' failed

The tools Makefile gets confused by the relative path and tries to build
objtool in tools/foo.  Convert the output directory to an absolute path
before passing it to the tools Makefile.

Reported-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-next@vger.kernel.org
Cc: linux@roeck-us.net
Cc: live-patching@vger.kernel.org
Link: http://lkml.kernel.org/r/94a078c6c998fac9f01a14f574008bf7dff40191.1457016803.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index d61b0c628655..a50bb7c72b1c 100644
--- a/Makefile
+++ b/Makefile
@@ -1506,11 +1506,11 @@ image_name:
 # Clear a bunch of variables before executing the submake
 tools/: FORCE
 	$(Q)mkdir -p $(objtree)/tools
-	$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(O) subdir=tools -C $(src)/tools/
+	$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(shell cd $(objtree) && /bin/pwd) subdir=tools -C $(src)/tools/
 
 tools/%: FORCE
 	$(Q)mkdir -p $(objtree)/tools
-	$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(O) subdir=tools -C $(src)/tools/ $*
+	$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(shell cd $(objtree) && /bin/pwd) subdir=tools -C $(src)/tools/ $*
 
 # Single targets
 # ---------------------------------------------------------------------------

From d772ec1314f80de536aafc729855bcf3e691f997 Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Sat, 2 Apr 2016 21:38:53 +0200
Subject: [PATCH 18/74] kbuild: forbid kernel directory to contain spaces and
 colons

commit 51193b76bfff5027cf96ba63effae808ad67cca7 upstream.

When the kernel path contains a space or a colon somewhere in the path
name, the modules_install target doesn't work anymore, as the path names
are not enclosed in double quotes. It is also supposed that and O= build
will suffer from the same weakness as modules_install.

Instead of checking and improving kbuild to resist to directories
including these characters, error out early to prevent any build if the
kernel's main directory contains a space.

Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Michal Marek <mmarek@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Makefile | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Makefile b/Makefile
index a50bb7c72b1c..05d83a80464f 100644
--- a/Makefile
+++ b/Makefile
@@ -128,6 +128,10 @@ _all:
 # Cancel implicit rules on top Makefile
 $(CURDIR)/Makefile Makefile: ;
 
+ifneq ($(words $(subst :, ,$(CURDIR))), 1)
+  $(error main directory cannot contain spaces nor colons)
+endif
+
 ifneq ($(KBUILD_OUTPUT),)
 # Invoke a second make in the output directory, passing relevant variables
 # check that the output directory actually exists

From 605623774e714a460bec332d81ab3da194a26ae3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 25 Apr 2016 17:35:28 +0200
Subject: [PATCH 19/74] Kbuild: disable 'maybe-uninitialized' warning for
 CONFIG_PROFILE_ALL_BRANCHES

commit 815eb71e7149ecce40db9dd0ad09c4dd9d33c60f upstream.

CONFIG_PROFILE_ALL_BRANCHES confuses gcc-5.x to the degree that it prints
incorrect warnings about a lot of variables that it thinks can be used
uninitialized, e.g.:

i2c/busses/i2c-diolan-u2c.c: In function 'diolan_usb_xfer':
i2c/busses/i2c-diolan-u2c.c:391:16: warning: 'byte' may be used uninitialized in this function
iio/gyro/itg3200_core.c: In function 'itg3200_probe':
iio/gyro/itg3200_core.c:213:6: warning: 'val' may be used uninitialized in this function
leds/leds-lp55xx-common.c: In function 'lp55xx_update_bits':
leds/leds-lp55xx-common.c:350:6: warning: 'tmp' may be used uninitialized in this function
misc/bmp085.c: In function 'show_pressure':
misc/bmp085.c:363:10: warning: 'pressure' may be used uninitialized in this function
power/ds2782_battery.c: In function 'ds2786_get_capacity':
power/ds2782_battery.c:214:17: warning: 'raw' may be used uninitialized in this function

These are all false positives that either rob someone's time when trying
to figure out whether they are real, or they get people to send wrong
patches to shut up the warnings.

Nobody normally wants to run a CONFIG_PROFILE_ALL_BRANCHES kernel in
production, so disabling the whole class of warnings for this configuration
has no serious downsides either.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Steven Rostedt <rostedtgoodmis.org>
Signed-off-by: Michal Marek <mmarek@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Makefile | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 05d83a80464f..d10cd0965388 100644
--- a/Makefile
+++ b/Makefile
@@ -620,7 +620,11 @@ KBUILD_CFLAGS	+= $(call cc-option,-fno-delete-null-pointer-checks,)
 ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
 KBUILD_CFLAGS	+= -Os $(call cc-disable-warning,maybe-uninitialized,)
 else
-KBUILD_CFLAGS	+= -O2
+ifdef CONFIG_PROFILE_ALL_BRANCHES
+KBUILD_CFLAGS	+= -O2 $(call cc-disable-warning,maybe-uninitialized,)
+else
+KBUILD_CFLAGS   += -O2
+endif
 endif
 
 # Tell gcc to never replace conditional load with a non-conditional one

From 7cd4d22328dc4a019e00508c66ecd4cd837597b5 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 25 Apr 2016 17:35:31 +0200
Subject: [PATCH 20/74] gcov: disable -Wmaybe-uninitialized warning

commit e72e2dfe7c16ffbfbabf9cb24adc6d9f93a4fe37 upstream.

When gcov profiling is enabled, we see a lot of spurious warnings about
possibly uninitialized variables being used:

arch/arm/mm/dma-mapping.c: In function 'arm_coherent_iommu_map_page':
arch/arm/mm/dma-mapping.c:1085:16: warning: 'start' may be used uninitialized in this function [-Wmaybe-uninitialized]
drivers/clk/st/clk-flexgen.c: In function 'st_of_flexgen_setup':
drivers/clk/st/clk-flexgen.c:323:9: warning: 'num_parents' may be used uninitialized in this function [-Wmaybe-uninitialized]
kernel/cgroup.c: In function 'cgroup_mount':
kernel/cgroup.c:2119:11: warning: 'root' may be used uninitialized in this function [-Wmaybe-uninitialized]

All of these are false positives, so it seems better to just disable
the warnings whenever GCOV is enabled. Most users don't enable GCOV,
and based on a prior patch, it is now also disabled for 'allmodconfig'
builds, so there should be no downsides of doing this.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Signed-off-by: Michal Marek <mmarek@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index d10cd0965388..4cd7eb14863d 100644
--- a/Makefile
+++ b/Makefile
@@ -368,7 +368,7 @@ AFLAGS_MODULE   =
 LDFLAGS_MODULE  =
 CFLAGS_KERNEL	=
 AFLAGS_KERNEL	=
-CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage -fno-tree-loop-im
+CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage -fno-tree-loop-im -Wno-maybe-uninitialized
 
 
 # Use USERINCLUDE when you must reference the UAPI directories only.

From 3da2a4cb6e909dd45be3009379f0d29b9a9e369d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 27 Jul 2016 13:17:41 -0700
Subject: [PATCH 21/74] Disable "maybe-uninitialized" warning globally

commit 6e8d666e925333c55378e8d5540a8a9ee0eea9c5 upstream.

Several build configurations had already disabled this warning because
it generates a lot of false positives.  But some had not, and it was
still enabled for "allmodconfig" builds, for example.

Looking at the warnings produced, every single one I looked at was a
false positive, and the warnings are frequent enough (and big enough)
that they can easily hide real problems that you don't notice in the
noise generated by -Wmaybe-uninitialized.

The warning is good in theory, but this is a classic case of a warning
that causes more problems than the warning can solve.

If gcc gets better at avoiding false positives, we may be able to
re-enable this warning.  But as is, we're better off without it, and I
want to be able to see the *real* warnings.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Makefile | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index 4cd7eb14863d..734579371fad 100644
--- a/Makefile
+++ b/Makefile
@@ -368,7 +368,7 @@ AFLAGS_MODULE   =
 LDFLAGS_MODULE  =
 CFLAGS_KERNEL	=
 AFLAGS_KERNEL	=
-CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage -fno-tree-loop-im -Wno-maybe-uninitialized
+CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage -fno-tree-loop-im
 
 
 # Use USERINCLUDE when you must reference the UAPI directories only.
@@ -616,12 +616,13 @@ ARCH_CFLAGS :=
 include arch/$(SRCARCH)/Makefile
 
 KBUILD_CFLAGS	+= $(call cc-option,-fno-delete-null-pointer-checks,)
+KBUILD_CFLAGS	+= $(call cc-disable-warning,maybe-uninitialized,)
 
 ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
-KBUILD_CFLAGS	+= -Os $(call cc-disable-warning,maybe-uninitialized,)
+KBUILD_CFLAGS	+= -Os
 else
 ifdef CONFIG_PROFILE_ALL_BRANCHES
-KBUILD_CFLAGS	+= -O2 $(call cc-disable-warning,maybe-uninitialized,)
+KBUILD_CFLAGS	+= -O2
 else
 KBUILD_CFLAGS   += -O2
 endif

From a521e942bd9b903935d4c79993bf6d0b6d5154c7 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 27 Jul 2016 19:03:04 -0700
Subject: [PATCH 22/74] Disable "frame-address" warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 124a3d88fa20e1869fc229d7d8c740cc81944264 upstream.

Newer versions of gcc warn about the use of __builtin_return_address()
with a non-zero argument when "-Wall" is specified:

  kernel/trace/trace_irqsoff.c: In function ‘stop_critical_timings’:
  kernel/trace/trace_irqsoff.c:433:86: warning: calling ‘__builtin_return_address’ with a nonzero argument is unsafe [-Wframe-address]
     stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
  [ .. repeats a few times for other similar cases .. ]

It is true that a non-zero argument is somewhat dangerous, and we do not
actually have very many uses of that in the kernel - but the ftrace code
does use it, and as Stephen Rostedt says:

 "We are well aware of the danger of using __builtin_return_address() of
  > 0.  In fact that's part of the reason for having the "thunk" code in
  x86 (See arch/x86/entry/thunk_{64,32}.S).  [..] it adds extra frames
  when tracking irqs off sections, to prevent __builtin_return_address()
  from accessing bad areas.  In fact the thunk_32.S states: 'Trampoline to
  trace irqs off.  (otherwise CALLER_ADDR1 might crash)'."

For now, __builtin_return_address() with a non-zero argument is the best
we can do, and the warning is not helpful and can end up making people
miss other warnings for real problems.

So disable the frame-address warning on compilers that need it.

Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Makefile b/Makefile
index 734579371fad..39250a7bd6a5 100644
--- a/Makefile
+++ b/Makefile
@@ -617,6 +617,7 @@ include arch/$(SRCARCH)/Makefile
 
 KBUILD_CFLAGS	+= $(call cc-option,-fno-delete-null-pointer-checks,)
 KBUILD_CFLAGS	+= $(call cc-disable-warning,maybe-uninitialized,)
+KBUILD_CFLAGS	+= $(call cc-disable-warning,frame-address,)
 
 ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
 KBUILD_CFLAGS	+= -Os

From a52031beb067ed3318c3ef226af5fa7223a7787d Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 28 Jul 2016 22:30:43 -0400
Subject: [PATCH 23/74] Makefile: Mute warning for __builtin_return_address(>0)
 for tracing only

commit 377ccbb483738f84400ddf5840c7dd8825716985 upstream.

With the latest gcc compilers, they give a warning if
__builtin_return_address() parameter is greater than 0. That is because if
it is used by a function called by a top level function (or in the case of
the kernel, by assembly), it can try to access stack frames outside the
stack and crash the system.

The tracing system uses __builtin_return_address() of up to 2! But it is
well aware of the dangers that it may have, and has even added precautions
to protect against it (see the thunk code in arch/x86/entry/thunk*.S)

Linus originally added KBUILD_CFLAGS that would suppress the warning for the
entire kernel, as simply adding KBUILD_CFLAGS to the tracing directory
wouldn't work. The tracing directory plays a bit with the CFLAGS and
requires a little more logic.

This adds that special logic to only suppress the warning for the tracing
directory. If it is used anywhere else outside of tracing, the warning will
still be triggered.

Link: http://lkml.kernel.org/r/20160728223043.51996267@grimm.local.home

Tested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Makefile              | 1 -
 kernel/trace/Makefile | 4 ++++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 39250a7bd6a5..734579371fad 100644
--- a/Makefile
+++ b/Makefile
@@ -617,7 +617,6 @@ include arch/$(SRCARCH)/Makefile
 
 KBUILD_CFLAGS	+= $(call cc-option,-fno-delete-null-pointer-checks,)
 KBUILD_CFLAGS	+= $(call cc-disable-warning,maybe-uninitialized,)
-KBUILD_CFLAGS	+= $(call cc-disable-warning,frame-address,)
 
 ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
 KBUILD_CFLAGS	+= -Os
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 9b1044e936a6..05ea5167e6bb 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -1,4 +1,8 @@
 
+# We are fully aware of the dangers of __builtin_return_address()
+FRAME_CFLAGS := $(call cc-disable-warning,frame-address)
+KBUILD_CFLAGS += $(FRAME_CFLAGS)
+
 # Do not instrument the tracer itself:
 
 ifdef CONFIG_FUNCTION_TRACER

From bc43ac8bd632ea7ea3dc210ab8ed11e723e28afd Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 14 Mar 2016 15:18:38 +0100
Subject: [PATCH 24/74] net: caif: fix misleading indentation

commit 8e0cc8c326d99e41468c96fea9785ab78883a281 upstream.

gcc points out code that is not indented the way it is
interpreted:

net/caif/cfpkt_skbuff.c: In function 'cfpkt_setlen':
net/caif/cfpkt_skbuff.c:289:4: error: statement is indented as if it were guarded by... [-Werror=misleading-indentation]
    return cfpkt_getlen(pkt);
    ^~~~~~
net/caif/cfpkt_skbuff.c:286:3: note: ...this 'else' clause, but it is not
   else
   ^~~~

It is clear from the context that not returning here would be
a bug, as we'd end up passing a negative length into a function
that takes a u16 length, so it is not missing curly braces
here, and I'm assuming that the indentation is the only part
that's wrong about it.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/caif/cfpkt_skbuff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index f6c3b2137eea..59ce1fcc220c 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -286,7 +286,7 @@ int cfpkt_setlen(struct cfpkt *pkt, u16 len)
 		else
 			skb_trim(skb, len);
 
-			return cfpkt_getlen(pkt);
+		return cfpkt_getlen(pkt);
 	}
 
 	/* Need to expand SKB */

From c48692f59bca5792245bf723bc484bebd444a4f2 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 27 Jul 2016 20:03:31 -0700
Subject: [PATCH 25/74] =?UTF-8?q?Add=20braces=20to=20avoid=20"ambiguous=20?=
 =?UTF-8?q?=E2=80=98else=E2=80=99"=20compiler=20warnings?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 194dc870a5890e855ecffb30f3b80ba7c88f96d6 upstream.

Some of our "for_each_xyz()" macro constructs make gcc unhappy about
lack of braces around if-statements inside or outside the loop, because
the loop construct itself has a "if-then-else" statement inside of it.

The resulting warnings look something like this:

  drivers/gpu/drm/i915/i915_debugfs.c: In function ‘i915_dump_lrc’:
  drivers/gpu/drm/i915/i915_debugfs.c:2103:6: warning: suggest explicit braces to avoid ambiguous ‘else’ [-Wparentheses]
     if (ctx != dev_priv->kernel_context)
        ^

even if the code itself is fine.

Since the warning is fairly easy to avoid by adding a braces around the
if-statement near the for_each_xyz() construct, do so, rather than
disabling the otherwise potentially useful warning.

(The if-then-else statements used in the "for_each_xyz()" constructs are
designed to be inherently safe even with no braces, but in this case
it's quite understandable that gcc isn't really able to tell that).

This finally leaves the standard "allmodconfig" build with just a
handful of remaining warnings, so new and valid warnings hopefully will
stand out.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iommu/dmar.c        | 3 ++-
 drivers/iommu/intel-iommu.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 3821c4786662..565bb2c140ed 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1858,10 +1858,11 @@ static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
 	/*
 	 * All PCI devices managed by this unit should have been destroyed.
 	 */
-	if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt)
+	if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
 		for_each_active_dev_scope(dmaru->devices,
 					  dmaru->devices_cnt, i, dev)
 			return -EBUSY;
+	}
 
 	ret = dmar_ir_hotplug(dmaru, false);
 	if (ret == 0)
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 24d81308a1a6..b7f852d824a3 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4182,10 +4182,11 @@ int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
 	if (!atsru)
 		return 0;
 
-	if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
+	if (!atsru->include_all && atsru->devices && atsru->devices_cnt) {
 		for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
 					  i, dev)
 			return -EBUSY;
+	}
 
 	return 0;
 }

From 7fb33fb721dcc75b523af322c3724e5cbf7f382c Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 14 Mar 2016 19:40:08 -0300
Subject: [PATCH 26/74] am437x-vfpe: fix typo in vpfe_get_app_input_index

commit 0fb504001192c1df62c847a8bb6558753c36ebef upstream.

gcc-6 points out an obviously silly comparison in vpfe_get_app_input_index():

drivers/media/platform/am437x/am437x-vpfe.c: In function 'vpfe_get_app_input_index':
drivers/media/platform/am437x/am437x-vpfe.c:1709:27: warning: self-comparison always evaluats to true [-Wtautological-compare]
       client->adapter->nr == client->adapter->nr) {
                           ^~

This was introduced in a slighly incorrect conversion, and it's
clear that the comparison was meant to compare the iterator
to the current subdev instead, as we do in the line above.

Fixes: d37232390fd4 ("[media] media: am437x-vpfe: match the OF node/i2c addr instead of name")

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Lad, Prabhakar <prabhakar.csengg@gmail.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/platform/am437x/am437x-vpfe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/platform/am437x/am437x-vpfe.c b/drivers/media/platform/am437x/am437x-vpfe.c
index f0480d687f17..ba780c45f645 100644
--- a/drivers/media/platform/am437x/am437x-vpfe.c
+++ b/drivers/media/platform/am437x/am437x-vpfe.c
@@ -1706,7 +1706,7 @@ static int vpfe_get_app_input_index(struct vpfe_device *vpfe,
 		sdinfo = &cfg->sub_devs[i];
 		client = v4l2_get_subdevdata(sdinfo->sd);
 		if (client->addr == curr_client->addr &&
-		    client->adapter->nr == client->adapter->nr) {
+		    client->adapter->nr == curr_client->adapter->nr) {
 			if (vpfe->current_input >= 1)
 				return -1;
 			*app_input_index = j + vpfe->current_input;

From 1fff631e5218df89c508ac2a5cceb7c693ebde68 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 14 Mar 2016 15:18:37 +0100
Subject: [PATCH 27/74] ath9k: fix misleading indentation

commit 362210e0dff4eb7bb36a9b34dbef3b39d779d95e upstream.

A cleanup patch in linux-3.18 moved around some code in the ath9k
driver and left some code to be indented in a misleading way,
made worse by the addition of some new code for p2p mode, as
discovered by a new gcc-6 warning:

drivers/net/wireless/ath/ath9k/init.c: In function 'ath9k_set_hw_capab':
drivers/net/wireless/ath/ath9k/init.c:851:4: warning: statement is indented as if it were guarded by... [-Wmisleading-indentation]
    hw->wiphy->iface_combinations = if_comb;
    ^~
drivers/net/wireless/ath/ath9k/init.c:847:3: note: ...this 'if' clause, but it is not
   if (ath9k_is_chanctx_enabled())
   ^~

The code is in fact correct, but the indentation is not, so I'm
reformatting it as it should have been after the original cleanup.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 499afaccf6f3 ("ath9k: Isolate ath9k_use_chanctx module parameter")
Fixes: eb61f9f623f7 ("ath9k: advertise p2p dev support when chanctx")
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/ath9k/init.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index 1bdeacf7b257..bc70ce62bc03 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -869,8 +869,8 @@ static void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw)
 			hw->wiphy->interface_modes |=
 					BIT(NL80211_IFTYPE_P2P_DEVICE);
 
-			hw->wiphy->iface_combinations = if_comb;
-			hw->wiphy->n_iface_combinations = ARRAY_SIZE(if_comb);
+		hw->wiphy->iface_combinations = if_comb;
+		hw->wiphy->n_iface_combinations = ARRAY_SIZE(if_comb);
 	}
 
 	hw->wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT;

From 682c360eb26f819be942956ce404064839123337 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 19 May 2016 09:58:49 +0200
Subject: [PATCH 28/74] iwlegacy: avoid warning about missing braces

commit 2cce76c3fab410520610a7d2f52faebc3cfcf843 upstream.

gcc-6 warns about code in il3945_hw_txq_ctx_free() being
somewhat ambiguous:

drivers/net/wireless/intel/iwlegacy/3945.c:1022:5: warning: suggest explicit braces to avoid ambiguous 'else' [-Wparentheses]

This adds a set of curly braces to avoid the warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/iwlegacy/3945.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/iwlegacy/3945.c b/drivers/net/wireless/iwlegacy/3945.c
index 93bdf684babe..ae047ab7a4df 100644
--- a/drivers/net/wireless/iwlegacy/3945.c
+++ b/drivers/net/wireless/iwlegacy/3945.c
@@ -1019,12 +1019,13 @@ il3945_hw_txq_ctx_free(struct il_priv *il)
 	int txq_id;
 
 	/* Tx queues */
-	if (il->txq)
+	if (il->txq) {
 		for (txq_id = 0; txq_id < il->hw_params.max_txq_num; txq_id++)
 			if (txq_id == IL39_CMD_QUEUE_NUM)
 				il_cmd_queue_free(il);
 			else
 				il_tx_queue_free(il, txq_id);
+	}
 
 	/* free tx queue structure */
 	il_free_txq_mem(il);

From 2c4e9913a114c01991dc3ae455a1d126e440251e Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sat, 23 Jan 2016 19:33:10 +0000
Subject: [PATCH 29/74] Staging: iio: adc: fix indent on break statement

commit b6acb0cfc21293a1bfc283e9217f58f7474ef728 upstream.

Fix indent warning when building with gcc 6:
drivers/staging/iio/adc/ad7192.c:239:4: warning: statement is indented
  as if it were guarded by... [-Wmisleading-indentation]

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/iio/adc/ad7192.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/iio/adc/ad7192.c b/drivers/staging/iio/adc/ad7192.c
index bb40f3728742..20314ff08be0 100644
--- a/drivers/staging/iio/adc/ad7192.c
+++ b/drivers/staging/iio/adc/ad7192.c
@@ -236,7 +236,7 @@ static int ad7192_setup(struct ad7192_state *st,
 			st->mclk = pdata->ext_clk_hz;
 		else
 			st->mclk = AD7192_INT_FREQ_MHZ;
-			break;
+		break;
 	default:
 		ret = -EINVAL;
 		goto out;

From e3718ed1df970f743f9f20cb4234dd56ed6f18da Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 14 Mar 2016 15:24:10 +0100
Subject: [PATCH 30/74] nouveau: fix nv40_perfctr_next() cleanup regression

commit 86d65b7e7a0c927d07d18605c276d0f142438ead upstream.

gcc-6 warns about code in the nouveau driver that is obviously silly:

drivers/gpu/drm/nouveau/nvkm/engine/pm/nv40.c: In function 'nv40_perfctr_next':
drivers/gpu/drm/nouveau/nvkm/engine/pm/nv40.c:62:19: warning: self-comparison always evaluats to false [-Wtautological-compare]
  if (pm->sequence != pm->sequence) {

The behavior was accidentally introduced in a patch described as "This is
purely preparation for upcoming commits, there should be no code changes here.".
As far as I can tell, that was true for the rest of that patch except for
this one function, which has been changed to a NOP.

This patch restores the original behavior.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 8c1aeaa13954 ("drm/nouveau/pm: cosmetic changes")
Reviewed-by: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/nouveau/nvkm/engine/pm/nv40.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/pm/nv40.c b/drivers/gpu/drm/nouveau/nvkm/engine/pm/nv40.c
index 4bef72a9d106..3fda594700e0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/pm/nv40.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/pm/nv40.c
@@ -59,9 +59,11 @@ static void
 nv40_perfctr_next(struct nvkm_pm *pm, struct nvkm_perfdom *dom)
 {
 	struct nvkm_device *device = pm->engine.subdev.device;
-	if (pm->sequence != pm->sequence) {
+	struct nv40_pm *nv40pm = container_of(pm, struct nv40_pm, base);
+
+	if (nv40pm->sequence != pm->sequence) {
 		nvkm_wr32(device, 0x400084, 0x00000020);
-		pm->sequence = pm->sequence;
+		nv40pm->sequence = pm->sequence;
 	}
 }
 

From 56e5ad1e1d7a2ab2d79ef004a29d15e0a137a0d0 Mon Sep 17 00:00:00 2001
From: Maurizio Lombardi <mlombard@redhat.com>
Date: Fri, 22 Jan 2016 13:41:42 +0100
Subject: [PATCH 31/74] megaraid: fix null pointer check in
 megasas_detach_one().

commit 546e559c79b1a8d27c23262907a00fc209e392a0 upstream.

The pd_seq_sync pointer can't be NULL, we have to check its entries
instead.

Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
Acked-by: Sumit Saxena <sumit.saxena@broadcom.com>
Reviewed-by: Tomas Henzl <thenzl@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/megaraid/megaraid_sas_base.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 3f8d357b1bac..278e10cd771f 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -5941,11 +5941,11 @@ static void megasas_detach_one(struct pci_dev *pdev)
 			if (fusion->ld_drv_map[i])
 				free_pages((ulong)fusion->ld_drv_map[i],
 					fusion->drv_map_pages);
-				if (fusion->pd_seq_sync)
-					dma_free_coherent(&instance->pdev->dev,
-						pd_seq_map_sz,
-						fusion->pd_seq_sync[i],
-						fusion->pd_seq_phys[i]);
+			if (fusion->pd_seq_sync[i])
+				dma_free_coherent(&instance->pdev->dev,
+					pd_seq_map_sz,
+					fusion->pd_seq_sync[i],
+					fusion->pd_seq_phys[i]);
 		}
 		free_pages((ulong)instance->ctrl_context,
 			instance->ctrl_context_pages);

From f357a79839f95f5ea7baf1c1366d64796d833bca Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Thu, 1 Sep 2016 22:18:34 -0700
Subject: [PATCH 32/74] bonding: Fix bonding crash

[ Upstream commit 24b27fc4cdf9e10c5e79e5923b6b7c2c5c95096c ]

Following few steps will crash kernel -

  (a) Create bonding master
      > modprobe bonding miimon=50
  (b) Create macvlan bridge on eth2
      > ip link add link eth2 dev mvl0 address aa:0:0:0:0:01 \
	   type macvlan
  (c) Now try adding eth2 into the bond
      > echo +eth2 > /sys/class/net/bond0/bonding/slaves
      <crash>

Bonding does lots of things before checking if the device enslaved is
busy or not.

In this case when the notifier call-chain sends notifications, the
bond_netdev_event() assumes that the rx_handler /rx_handler_data is
registered while the bond_enslave() hasn't progressed far enough to
register rx_handler for the new slave.

This patch adds a rx_handler check that can be performed right at the
beginning of the enslave code to avoid getting into this situation.

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/bonding/bond_main.c |  7 ++++---
 include/linux/netdevice.h       |  1 +
 net/core/dev.c                  | 16 ++++++++++++++++
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index b3d70a7a5262..5dca77e0ffed 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1317,9 +1317,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 			    slave_dev->name);
 	}
 
-	/* already enslaved */
-	if (slave_dev->flags & IFF_SLAVE) {
-		netdev_dbg(bond_dev, "Error: Device was already enslaved\n");
+	/* already in-use? */
+	if (netdev_is_rx_handler_busy(slave_dev)) {
+		netdev_err(bond_dev,
+			   "Error: Device is in use and cannot be enslaved\n");
 		return -EBUSY;
 	}
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b97d6823ef3c..4e9c75226f07 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3036,6 +3036,7 @@ static inline void napi_free_frags(struct napi_struct *napi)
 	napi->skb = NULL;
 }
 
+bool netdev_is_rx_handler_busy(struct net_device *dev);
 int netdev_rx_handler_register(struct net_device *dev,
 			       rx_handler_func_t *rx_handler,
 			       void *rx_handler_data);
diff --git a/net/core/dev.c b/net/core/dev.c
index 9efbdb3ff78a..de4ed2b5a221 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3721,6 +3721,22 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
 	return skb;
 }
 
+/**
+ *	netdev_is_rx_handler_busy - check if receive handler is registered
+ *	@dev: device to check
+ *
+ *	Check if a receive handler is already registered for a given device.
+ *	Return true if there one.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+bool netdev_is_rx_handler_busy(struct net_device *dev)
+{
+	ASSERT_RTNL();
+	return dev && rtnl_dereference(dev->rx_handler);
+}
+EXPORT_SYMBOL_GPL(netdev_is_rx_handler_busy);
+
 /**
  *	netdev_rx_handler_register - register receive handler
  *	@dev: device to register a handler for

From 941f6995440400a67cebce678e612459ea60b96b Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 1 Sep 2016 14:56:49 -0700
Subject: [PATCH 33/74] Revert "af_unix: Fix splice-bind deadlock"

commit 38f7bd94a97b542de86a2be9229289717e33a7a4 upstream.

This reverts commit c845acb324aa85a39650a14e7696982ceea75dc1.

It turns out that it just replaces one deadlock with another one: we can
still get the wrong lock ordering with the readlock due to overlayfs
calling back into the filesystem layer and still taking the vfs locks
after the readlock.

The proper solution ends up being to just split the readlock into two
pieces: the bind lock (taken *outside* the vfs locks) and the IO lock
(taken *inside* the filesystem locks).  The two locks are independent
anyway.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Reviewed-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/unix/af_unix.c | 66 ++++++++++++++++++----------------------------
 1 file changed, 26 insertions(+), 40 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 6579fd6e7459..f13f9bca8985 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -953,20 +953,32 @@ fail:
 	return NULL;
 }
 
-static int unix_mknod(struct dentry *dentry, struct path *path, umode_t mode,
-		      struct path *res)
+static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 {
-	int err;
+	struct dentry *dentry;
+	struct path path;
+	int err = 0;
+	/*
+	 * Get the parent directory, calculate the hash for last
+	 * component.
+	 */
+	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
+	err = PTR_ERR(dentry);
+	if (IS_ERR(dentry))
+		return err;
 
-	err = security_path_mknod(path, dentry, mode, 0);
+	/*
+	 * All right, let's create it.
+	 */
+	err = security_path_mknod(&path, dentry, mode, 0);
 	if (!err) {
-		err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0);
+		err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 		if (!err) {
-			res->mnt = mntget(path->mnt);
+			res->mnt = mntget(path.mnt);
 			res->dentry = dget(dentry);
 		}
 	}
-
+	done_path_create(&path, dentry);
 	return err;
 }
 
@@ -977,12 +989,10 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	struct unix_sock *u = unix_sk(sk);
 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 	char *sun_path = sunaddr->sun_path;
-	int err, name_err;
+	int err;
 	unsigned int hash;
 	struct unix_address *addr;
 	struct hlist_head *list;
-	struct path path;
-	struct dentry *dentry;
 
 	err = -EINVAL;
 	if (sunaddr->sun_family != AF_UNIX)
@@ -998,34 +1008,14 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		goto out;
 	addr_len = err;
 
-	name_err = 0;
-	dentry = NULL;
-	if (sun_path[0]) {
-		/* Get the parent directory, calculate the hash for last
-		 * component.
-		 */
-		dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
-
-		if (IS_ERR(dentry)) {
-			/* delay report until after 'already bound' check */
-			name_err = PTR_ERR(dentry);
-			dentry = NULL;
-		}
-	}
-
 	err = mutex_lock_interruptible(&u->readlock);
 	if (err)
-		goto out_path;
+		goto out;
 
 	err = -EINVAL;
 	if (u->addr)
 		goto out_up;
 
-	if (name_err) {
-		err = name_err == -EEXIST ? -EADDRINUSE : name_err;
-		goto out_up;
-	}
-
 	err = -ENOMEM;
 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
 	if (!addr)
@@ -1036,11 +1026,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	addr->hash = hash ^ sk->sk_type;
 	atomic_set(&addr->refcnt, 1);
 
-	if (dentry) {
-		struct path u_path;
+	if (sun_path[0]) {
+		struct path path;
 		umode_t mode = S_IFSOCK |
 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
-		err = unix_mknod(dentry, &path, mode, &u_path);
+		err = unix_mknod(sun_path, mode, &path);
 		if (err) {
 			if (err == -EEXIST)
 				err = -EADDRINUSE;
@@ -1048,9 +1038,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 			goto out_up;
 		}
 		addr->hash = UNIX_HASH_SIZE;
-		hash = d_real_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
+		hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
 		spin_lock(&unix_table_lock);
-		u->path = u_path;
+		u->path = path;
 		list = &unix_socket_table[hash];
 	} else {
 		spin_lock(&unix_table_lock);
@@ -1073,10 +1063,6 @@ out_unlock:
 	spin_unlock(&unix_table_lock);
 out_up:
 	mutex_unlock(&u->readlock);
-out_path:
-	if (dentry)
-		done_path_create(&path, dentry);
-
 out:
 	return err;
 }

From 9b5390d7b6abf1fe04ccc6437691881c369f7ff9 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 1 Sep 2016 14:43:53 -0700
Subject: [PATCH 34/74] af_unix: split 'u->readlock' into two: 'iolock' and
 'bindlock'

commit 6e1ce3c3451291142a57c4f3f6f999a29fb5b3bc upstream.

Right now we use the 'readlock' both for protecting some of the af_unix
IO path and for making the bind be single-threaded.

The two are independent, but using the same lock makes for a nasty
deadlock due to ordering with regards to filesystem locking.  The bind
locking would want to nest outside the VSF pathname locking, but the IO
locking wants to nest inside some of those same locks.

We tried to fix this earlier with commit c845acb324aa ("af_unix: Fix
splice-bind deadlock") which moved the readlock inside the vfs locks,
but that caused problems with overlayfs that will then call back into
filesystem routines that take the lock in the wrong order anyway.

Splitting the locks means that we can go back to having the bind lock be
the outermost lock, and we don't have any deadlocks with lock ordering.

Acked-by: Rainer Weikusat <rweikusat@cyberadapt.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/af_unix.h |  2 +-
 net/unix/af_unix.c    | 41 +++++++++++++++++++++--------------------
 2 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 9b4c418bebd8..fd60eccb59a6 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -52,7 +52,7 @@ struct unix_sock {
 	struct sock		sk;
 	struct unix_address     *addr;
 	struct path		path;
-	struct mutex		readlock;
+	struct mutex		iolock, bindlock;
 	struct sock		*peer;
 	struct list_head	link;
 	atomic_long_t		inflight;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f13f9bca8985..824cc1e160bc 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -661,11 +661,11 @@ static int unix_set_peek_off(struct sock *sk, int val)
 {
 	struct unix_sock *u = unix_sk(sk);
 
-	if (mutex_lock_interruptible(&u->readlock))
+	if (mutex_lock_interruptible(&u->iolock))
 		return -EINTR;
 
 	sk->sk_peek_off = val;
-	mutex_unlock(&u->readlock);
+	mutex_unlock(&u->iolock);
 
 	return 0;
 }
@@ -778,7 +778,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 	spin_lock_init(&u->lock);
 	atomic_long_set(&u->inflight, 0);
 	INIT_LIST_HEAD(&u->link);
-	mutex_init(&u->readlock); /* single task reading lock */
+	mutex_init(&u->iolock); /* single task reading lock */
+	mutex_init(&u->bindlock); /* single task binding lock */
 	init_waitqueue_head(&u->peer_wait);
 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 	unix_insert_socket(unix_sockets_unbound(sk), sk);
@@ -847,7 +848,7 @@ static int unix_autobind(struct socket *sock)
 	int err;
 	unsigned int retries = 0;
 
-	err = mutex_lock_interruptible(&u->readlock);
+	err = mutex_lock_interruptible(&u->bindlock);
 	if (err)
 		return err;
 
@@ -894,7 +895,7 @@ retry:
 	spin_unlock(&unix_table_lock);
 	err = 0;
 
-out:	mutex_unlock(&u->readlock);
+out:	mutex_unlock(&u->bindlock);
 	return err;
 }
 
@@ -1008,7 +1009,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		goto out;
 	addr_len = err;
 
-	err = mutex_lock_interruptible(&u->readlock);
+	err = mutex_lock_interruptible(&u->bindlock);
 	if (err)
 		goto out;
 
@@ -1062,7 +1063,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 out_unlock:
 	spin_unlock(&unix_table_lock);
 out_up:
-	mutex_unlock(&u->readlock);
+	mutex_unlock(&u->bindlock);
 out:
 	return err;
 }
@@ -1957,17 +1958,17 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
 	if (false) {
 alloc_skb:
 		unix_state_unlock(other);
-		mutex_unlock(&unix_sk(other)->readlock);
+		mutex_unlock(&unix_sk(other)->iolock);
 		newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
 					      &err, 0);
 		if (!newskb)
 			goto err;
 	}
 
-	/* we must acquire readlock as we modify already present
+	/* we must acquire iolock as we modify already present
 	 * skbs in the sk_receive_queue and mess with skb->len
 	 */
-	err = mutex_lock_interruptible(&unix_sk(other)->readlock);
+	err = mutex_lock_interruptible(&unix_sk(other)->iolock);
 	if (err) {
 		err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
 		goto err;
@@ -2034,7 +2035,7 @@ alloc_skb:
 	}
 
 	unix_state_unlock(other);
-	mutex_unlock(&unix_sk(other)->readlock);
+	mutex_unlock(&unix_sk(other)->iolock);
 
 	other->sk_data_ready(other);
 	scm_destroy(&scm);
@@ -2043,7 +2044,7 @@ alloc_skb:
 err_state_unlock:
 	unix_state_unlock(other);
 err_unlock:
-	mutex_unlock(&unix_sk(other)->readlock);
+	mutex_unlock(&unix_sk(other)->iolock);
 err:
 	kfree_skb(newskb);
 	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
@@ -2108,7 +2109,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
 	if (flags&MSG_OOB)
 		goto out;
 
-	err = mutex_lock_interruptible(&u->readlock);
+	err = mutex_lock_interruptible(&u->iolock);
 	if (unlikely(err)) {
 		/* recvmsg() in non blocking mode is supposed to return -EAGAIN
 		 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
@@ -2184,7 +2185,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
 out_free:
 	skb_free_datagram(sk, skb);
 out_unlock:
-	mutex_unlock(&u->readlock);
+	mutex_unlock(&u->iolock);
 out:
 	return err;
 }
@@ -2279,7 +2280,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
 	/* Lock the socket to prevent queue disordering
 	 * while sleeps in memcpy_tomsg
 	 */
-	mutex_lock(&u->readlock);
+	mutex_lock(&u->iolock);
 
 	if (flags & MSG_PEEK)
 		skip = sk_peek_offset(sk, flags);
@@ -2320,7 +2321,7 @@ again:
 				break;
 			}
 
-			mutex_unlock(&u->readlock);
+			mutex_unlock(&u->iolock);
 
 			timeo = unix_stream_data_wait(sk, timeo, last,
 						      last_len);
@@ -2331,7 +2332,7 @@ again:
 				goto out;
 			}
 
-			mutex_lock(&u->readlock);
+			mutex_lock(&u->iolock);
 			continue;
 unlock:
 			unix_state_unlock(sk);
@@ -2434,7 +2435,7 @@ unlock:
 		}
 	} while (size);
 
-	mutex_unlock(&u->readlock);
+	mutex_unlock(&u->iolock);
 	if (state->msg)
 		scm_recv(sock, state->msg, &scm, flags);
 	else
@@ -2475,9 +2476,9 @@ static ssize_t skb_unix_socket_splice(struct sock *sk,
 	int ret;
 	struct unix_sock *u = unix_sk(sk);
 
-	mutex_unlock(&u->readlock);
+	mutex_unlock(&u->iolock);
 	ret = splice_to_pipe(pipe, spd);
-	mutex_lock(&u->readlock);
+	mutex_lock(&u->iolock);
 
 	return ret;
 }

From 0bb225a04dc3b730844a9d2acb5b4f9307b6e1f9 Mon Sep 17 00:00:00 2001
From: Lance Richardson <lrichard@redhat.com>
Date: Tue, 9 Aug 2016 15:29:42 -0400
Subject: [PATCH 35/74] vti: flush x-netns xfrm cache when vti interface is
 removed

[ Upstream commit a5d0dc810abf3d6b241777467ee1d6efb02575fc ]

When executing the script included below, the netns delete operation
hangs with the following message (repeated at 10 second intervals):

  kernel:unregister_netdevice: waiting for lo to become free. Usage count = 1

This occurs because a reference to the lo interface in the "secure" netns
is still held by a dst entry in the xfrm bundle cache in the init netns.

Address this problem by garbage collecting the tunnel netns flow cache
when a cross-namespace vti interface receives a NETDEV_DOWN notification.

A more detailed description of the problem scenario (referencing commands
in the script below):

(1) ip link add vti_test type vti local 1.1.1.1 remote 1.1.1.2 key 1

  The vti_test interface is created in the init namespace. vti_tunnel_init()
  attaches a struct ip_tunnel to the vti interface's netdev_priv(dev),
  setting the tunnel net to &init_net.

(2) ip link set vti_test netns secure

  The vti_test interface is moved to the "secure" netns. Note that
  the associated struct ip_tunnel still has tunnel->net set to &init_net.

(3) ip netns exec secure ping -c 4 -i 0.02 -I 192.168.100.1 192.168.200.1

  The first packet sent using the vti device causes xfrm_lookup() to be
  called as follows:

      dst = xfrm_lookup(tunnel->net, skb_dst(skb), fl, NULL, 0);

  Note that tunnel->net is the init namespace, while skb_dst(skb) references
  the vti_test interface in the "secure" namespace. The returned dst
  references an interface in the init namespace.

  Also note that the first parameter to xfrm_lookup() determines which flow
  cache is used to store the computed xfrm bundle, so after xfrm_lookup()
  returns there will be a cached bundle in the init namespace flow cache
  with a dst referencing a device in the "secure" namespace.

(4) ip netns del secure

  Kernel begins to delete the "secure" namespace.  At some point the
  vti_test interface is deleted, at which point dst_ifdown() changes
  the dst->dev in the cached xfrm bundle flow from vti_test to lo (still
  in the "secure" namespace however).
  Since nothing has happened to cause the init namespace's flow cache
  to be garbage collected, this dst remains attached to the flow cache,
  so the kernel loops waiting for the last reference to lo to go away.

<Begin script>
ip link add br1 type bridge
ip link set dev br1 up
ip addr add dev br1 1.1.1.1/8

ip netns add secure
ip link add vti_test type vti local 1.1.1.1 remote 1.1.1.2 key 1
ip link set vti_test netns secure
ip netns exec secure ip link set vti_test up
ip netns exec secure ip link s lo up
ip netns exec secure ip addr add dev lo 192.168.100.1/24
ip netns exec secure ip route add 192.168.200.0/24 dev vti_test
ip xfrm policy flush
ip xfrm state flush
ip xfrm policy add dir out tmpl src 1.1.1.1 dst 1.1.1.2 \
   proto esp mode tunnel mark 1
ip xfrm policy add dir in tmpl src 1.1.1.2 dst 1.1.1.1 \
   proto esp mode tunnel mark 1
ip xfrm state add src 1.1.1.1 dst 1.1.1.2 proto esp spi 1 \
   mode tunnel enc des3_ede 0x112233445566778811223344556677881122334455667788
ip xfrm state add src 1.1.1.2 dst 1.1.1.1 proto esp spi 1 \
   mode tunnel enc des3_ede 0x112233445566778811223344556677881122334455667788

ip netns exec secure ping -c 4 -i 0.02 -I 192.168.100.1 192.168.200.1

ip netns del secure
<End script>

Reported-by: Hangbin Liu <haliu@redhat.com>
Reported-by: Jan Tluka <jtluka@redhat.com>
Signed-off-by: Lance Richardson <lrichard@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ip_vti.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 4d8f0b698777..65036891e080 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -540,6 +540,33 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = {
 	.get_link_net	= ip_tunnel_get_link_net,
 };
 
+static bool is_vti_tunnel(const struct net_device *dev)
+{
+	return dev->netdev_ops == &vti_netdev_ops;
+}
+
+static int vti_device_event(struct notifier_block *unused,
+			    unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+
+	if (!is_vti_tunnel(dev))
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_DOWN:
+		if (!net_eq(tunnel->net, dev_net(dev)))
+			xfrm_garbage_collect(tunnel->net);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block vti_notifier_block __read_mostly = {
+	.notifier_call = vti_device_event,
+};
+
 static int __init vti_init(void)
 {
 	const char *msg;
@@ -547,6 +574,8 @@ static int __init vti_init(void)
 
 	pr_info("IPv4 over IPsec tunneling driver\n");
 
+	register_netdevice_notifier(&vti_notifier_block);
+
 	msg = "tunnel device";
 	err = register_pernet_device(&vti_net_ops);
 	if (err < 0)
@@ -579,6 +608,7 @@ xfrm_proto_ah_failed:
 xfrm_proto_esp_failed:
 	unregister_pernet_device(&vti_net_ops);
 pernet_dev_failed:
+	unregister_netdevice_notifier(&vti_notifier_block);
 	pr_err("vti init: failed to register %s\n", msg);
 	return err;
 }
@@ -590,6 +620,7 @@ static void __exit vti_fini(void)
 	xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
 	xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
 	unregister_pernet_device(&vti_net_ops);
+	unregister_netdevice_notifier(&vti_notifier_block);
 }
 
 module_init(vti_init);

From 8d0d2ce6c81ddba2cc9ba7f7dc04c32b2f2812bb Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@oracle.com>
Date: Fri, 12 Aug 2016 10:29:13 +0200
Subject: [PATCH 36/74] net/irda: handle iriap_register_lsap() allocation
 failure

[ Upstream commit 5ba092efc7ddff040777ae7162f1d195f513571b ]

If iriap_register_lsap() fails to allocate memory, self->lsap is
set to NULL. However, none of the callers handle the failure and
irlmp_connect_request() will happily dereference it:

    iriap_register_lsap: Unable to allocated LSAP!
    ================================================================================
    UBSAN: Undefined behaviour in net/irda/irlmp.c:378:2
    member access within null pointer of type 'struct lsap_cb'
    CPU: 1 PID: 15403 Comm: trinity-c0 Not tainted 4.8.0-rc1+ #81
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org
    04/01/2014
     0000000000000000 ffff88010c7e78a8 ffffffff82344f40 0000000041b58ab3
     ffffffff84f98000 ffffffff82344e94 ffff88010c7e78d0 ffff88010c7e7880
     ffff88010630ad00 ffffffff84a5fae0 ffffffff84d3f5c0 000000000000017a
    Call Trace:
     [<ffffffff82344f40>] dump_stack+0xac/0xfc
     [<ffffffff8242f5a8>] ubsan_epilogue+0xd/0x8a
     [<ffffffff824302bf>] __ubsan_handle_type_mismatch+0x157/0x411
     [<ffffffff83b7bdbc>] irlmp_connect_request+0x7ac/0x970
     [<ffffffff83b77cc0>] iriap_connect_request+0xa0/0x160
     [<ffffffff83b77f48>] state_s_disconnect+0x88/0xd0
     [<ffffffff83b78904>] iriap_do_client_event+0x94/0x120
     [<ffffffff83b77710>] iriap_getvaluebyclass_request+0x3e0/0x6d0
     [<ffffffff83ba6ebb>] irda_find_lsap_sel+0x1eb/0x630
     [<ffffffff83ba90c8>] irda_connect+0x828/0x12d0
     [<ffffffff833c0dfb>] SYSC_connect+0x22b/0x340
     [<ffffffff833c7e09>] SyS_connect+0x9/0x10
     [<ffffffff81007bd3>] do_syscall_64+0x1b3/0x4b0
     [<ffffffff845f946a>] entry_SYSCALL64_slow_path+0x25/0x25
    ================================================================================

The bug seems to have been around since forever.

There's more problems with missing error checks in iriap_init() (and
indeed all of irda_init()), but that's a bigger problem that needs
very careful review and testing. This patch will fix the most serious
bug (as it's easily reached from unprivileged userspace).

I have tested my patch with a reproducer.

Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/irda/iriap.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index 4a7ae32afa09..1138eaf5c682 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -185,8 +185,12 @@ struct iriap_cb *iriap_open(__u8 slsap_sel, int mode, void *priv,
 
 	self->magic = IAS_MAGIC;
 	self->mode = mode;
-	if (mode == IAS_CLIENT)
-		iriap_register_lsap(self, slsap_sel, mode);
+	if (mode == IAS_CLIENT) {
+		if (iriap_register_lsap(self, slsap_sel, mode)) {
+			kfree(self);
+			return NULL;
+		}
+	}
 
 	self->confirm = callback;
 	self->priv = priv;

From 4be4511af7966b7b0f93d9f129a876458794f2f3 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@oracle.com>
Date: Sat, 23 Jul 2016 08:15:04 +0200
Subject: [PATCH 37/74] tipc: fix NULL pointer dereference in shutdown()

[ Upstream commit d2fbdf76b85bcdfe57b8ef2ba09d20e8ada79abd ]

tipc_msg_create() can return a NULL skb and if so, we shouldn't try to
call tipc_node_xmit_skb() on it.

    general protection fault: 0000 [#1] PREEMPT SMP KASAN
    CPU: 3 PID: 30298 Comm: trinity-c0 Not tainted 4.7.0-rc7+ #19
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
    task: ffff8800baf09980 ti: ffff8800595b8000 task.ti: ffff8800595b8000
    RIP: 0010:[<ffffffff830bb46b>]  [<ffffffff830bb46b>] tipc_node_xmit_skb+0x6b/0x140
    RSP: 0018:ffff8800595bfce8  EFLAGS: 00010246
    RAX: 0000000000000000 RBX: 0000000000000000 RCX: 000000003023b0e0
    RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffffffff83d12580
    RBP: ffff8800595bfd78 R08: ffffed000b2b7f32 R09: 0000000000000000
    R10: fffffbfff0759725 R11: 0000000000000000 R12: 1ffff1000b2b7f9f
    R13: ffff8800595bfd58 R14: ffffffff83d12580 R15: dffffc0000000000
    FS:  00007fcdde242700(0000) GS:ffff88011af80000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    CR2: 00007fcddde1db10 CR3: 000000006874b000 CR4: 00000000000006e0
    DR0: 00007fcdde248000 DR1: 00007fcddd73d000 DR2: 00007fcdde248000
    DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000090602
    Stack:
     0000000000000018 0000000000000018 0000000041b58ab3 ffffffff83954208
     ffffffff830bb400 ffff8800595bfd30 ffffffff8309d767 0000000000000018
     0000000000000018 ffff8800595bfd78 ffffffff8309da1a 00000000810ee611
    Call Trace:
     [<ffffffff830c84a3>] tipc_shutdown+0x553/0x880
     [<ffffffff825b4a3b>] SyS_shutdown+0x14b/0x170
     [<ffffffff8100334c>] do_syscall_64+0x19c/0x410
     [<ffffffff83295ca5>] entry_SYSCALL64_slow_path+0x25/0x25
    Code: 90 00 b4 0b 83 c7 00 f1 f1 f1 f1 4c 8d 6d e0 c7 40 04 00 00 00 f4 c7 40 08 f3 f3 f3 f3 48 89 d8 48 c1 e8 03 c7 45 b4 00 00 00 00 <80> 3c 30 00 75 78 48 8d 7b 08 49 8d 75 c0 48 b8 00 00 00 00 00
    RIP  [<ffffffff830bb46b>] tipc_node_xmit_skb+0x6b/0x140
     RSP <ffff8800595bfce8>
    ---[ end trace 57b0484e351e71f1 ]---

I feel like we should maybe return -ENOMEM or -ENOBUFS, but I'm not sure
userspace is equipped to handle that. Anyway, this is better than a GPF
and looks somewhat consistent with other tipc_msg_create() callers.

Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/tipc/socket.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 9b713e0ce00d..b26b7a127773 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2111,7 +2111,8 @@ restart:
 					      TIPC_CONN_MSG, SHORT_H_SIZE,
 					      0, dnode, onode, dport, oport,
 					      TIPC_CONN_SHUTDOWN);
-			tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
+			if (skb)
+				tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
 		}
 		tsk->connected = 0;
 		sock->state = SS_DISCONNECTING;

From c03c024fd36db587cb0797408d5c64adf096f678 Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@mellanox.com>
Date: Thu, 18 Aug 2016 21:09:05 +0300
Subject: [PATCH 38/74] net/mlx5: Added missing check of msg length in
 verifying its signature

[ Upstream commit 2c0f8ce1b584a4d7b8ff53140d21dfed99834940 ]

Set and verify signature calculates the signature for each of the
mailbox nodes, even for those that are unused (from cache). Added
a missing length check to set and verify only those which are used.

While here, also moved the setting of msg's nodes token to where we
already go over them. This saves a pass because checksum is disabled,
and the only useful thing remaining that set signature does is setting
the token.

Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB
adapters')
Signed-off-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 83 ++++++++++++-------
 1 file changed, 53 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 037fc4cdf5af..cc199063612a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -143,13 +143,14 @@ static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx)
 	return cmd->cmd_buf + (idx << cmd->log_stride);
 }
 
-static u8 xor8_buf(void *buf, int len)
+static u8 xor8_buf(void *buf, size_t offset, int len)
 {
 	u8 *ptr = buf;
 	u8 sum = 0;
 	int i;
+	int end = len + offset;
 
-	for (i = 0; i < len; i++)
+	for (i = offset; i < end; i++)
 		sum ^= ptr[i];
 
 	return sum;
@@ -157,41 +158,49 @@ static u8 xor8_buf(void *buf, int len)
 
 static int verify_block_sig(struct mlx5_cmd_prot_block *block)
 {
-	if (xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 1) != 0xff)
+	size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0);
+	int xor_len = sizeof(*block) - sizeof(block->data) - 1;
+
+	if (xor8_buf(block, rsvd0_off, xor_len) != 0xff)
 		return -EINVAL;
 
-	if (xor8_buf(block, sizeof(*block)) != 0xff)
+	if (xor8_buf(block, 0, sizeof(*block)) != 0xff)
 		return -EINVAL;
 
 	return 0;
 }
 
-static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token,
-			   int csum)
+static void calc_block_sig(struct mlx5_cmd_prot_block *block)
 {
-	block->token = token;
-	if (csum) {
-		block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) -
-					    sizeof(block->data) - 2);
-		block->sig = ~xor8_buf(block, sizeof(*block) - 1);
-	}
+	int ctrl_xor_len = sizeof(*block) - sizeof(block->data) - 2;
+	size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0);
+
+	block->ctrl_sig = ~xor8_buf(block, rsvd0_off, ctrl_xor_len);
+	block->sig = ~xor8_buf(block, 0, sizeof(*block) - 1);
 }
 
-static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token, int csum)
+static void calc_chain_sig(struct mlx5_cmd_msg *msg)
 {
 	struct mlx5_cmd_mailbox *next = msg->next;
+	int size = msg->len;
+	int blen = size - min_t(int, sizeof(msg->first.data), size);
+	int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1)
+		/ MLX5_CMD_DATA_BLOCK_SIZE;
+	int i = 0;
 
-	while (next) {
-		calc_block_sig(next->buf, token, csum);
+	for (i = 0; i < n && next; i++)  {
+		calc_block_sig(next->buf);
 		next = next->next;
 	}
 }
 
 static void set_signature(struct mlx5_cmd_work_ent *ent, int csum)
 {
-	ent->lay->sig = ~xor8_buf(ent->lay, sizeof(*ent->lay));
-	calc_chain_sig(ent->in, ent->token, csum);
-	calc_chain_sig(ent->out, ent->token, csum);
+	ent->lay->sig = ~xor8_buf(ent->lay, 0,  sizeof(*ent->lay));
+	if (csum) {
+		calc_chain_sig(ent->in);
+		calc_chain_sig(ent->out);
+	}
 }
 
 static void poll_timeout(struct mlx5_cmd_work_ent *ent)
@@ -222,12 +231,17 @@ static int verify_signature(struct mlx5_cmd_work_ent *ent)
 	struct mlx5_cmd_mailbox *next = ent->out->next;
 	int err;
 	u8 sig;
+	int size = ent->out->len;
+	int blen = size - min_t(int, sizeof(ent->out->first.data), size);
+	int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1)
+		/ MLX5_CMD_DATA_BLOCK_SIZE;
+	int i = 0;
 
-	sig = xor8_buf(ent->lay, sizeof(*ent->lay));
+	sig = xor8_buf(ent->lay, 0, sizeof(*ent->lay));
 	if (sig != 0xff)
 		return -EINVAL;
 
-	while (next) {
+	for (i = 0; i < n && next; i++) {
 		err = verify_block_sig(next->buf);
 		if (err)
 			return err;
@@ -641,7 +655,6 @@ static void cmd_work_handler(struct work_struct *work)
 		spin_unlock_irqrestore(&cmd->alloc_lock, flags);
 	}
 
-	ent->token = alloc_token(cmd);
 	cmd->ent_arr[ent->idx] = ent;
 	lay = get_inst(cmd, ent->idx);
 	ent->lay = lay;
@@ -755,7 +768,8 @@ static u8 *get_status_ptr(struct mlx5_outbox_hdr *out)
 static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
 			   struct mlx5_cmd_msg *out, void *uout, int uout_size,
 			   mlx5_cmd_cbk_t callback,
-			   void *context, int page_queue, u8 *status)
+			   void *context, int page_queue, u8 *status,
+			   u8 token)
 {
 	struct mlx5_cmd *cmd = &dev->cmd;
 	struct mlx5_cmd_work_ent *ent;
@@ -772,6 +786,8 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
 	if (IS_ERR(ent))
 		return PTR_ERR(ent);
 
+	ent->token = token;
+
 	if (!callback)
 		init_completion(&ent->done);
 
@@ -844,7 +860,8 @@ static const struct file_operations fops = {
 	.write	= dbg_write,
 };
 
-static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size)
+static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size,
+			    u8 token)
 {
 	struct mlx5_cmd_prot_block *block;
 	struct mlx5_cmd_mailbox *next;
@@ -870,6 +887,7 @@ static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size)
 		memcpy(block->data, from, copy);
 		from += copy;
 		size -= copy;
+		block->token = token;
 		next = next->next;
 	}
 
@@ -939,7 +957,8 @@ static void free_cmd_box(struct mlx5_core_dev *dev,
 }
 
 static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
-					       gfp_t flags, int size)
+					       gfp_t flags, int size,
+					       u8 token)
 {
 	struct mlx5_cmd_mailbox *tmp, *head = NULL;
 	struct mlx5_cmd_prot_block *block;
@@ -968,6 +987,7 @@ static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
 		tmp->next = head;
 		block->next = cpu_to_be64(tmp->next ? tmp->next->dma : 0);
 		block->block_num = cpu_to_be32(n - i - 1);
+		block->token = token;
 		head = tmp;
 	}
 	msg->next = head;
@@ -1351,7 +1371,7 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size,
 	}
 
 	if (IS_ERR(msg))
-		msg = mlx5_alloc_cmd_msg(dev, gfp, in_size);
+		msg = mlx5_alloc_cmd_msg(dev, gfp, in_size, 0);
 
 	return msg;
 }
@@ -1376,6 +1396,7 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 	int err;
 	u8 status = 0;
 	u32 drv_synd;
+	u8 token;
 
 	if (pci_channel_offline(dev->pdev) ||
 	    dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
@@ -1394,20 +1415,22 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 		return err;
 	}
 
-	err = mlx5_copy_to_msg(inb, in, in_size);
+	token = alloc_token(&dev->cmd);
+
+	err = mlx5_copy_to_msg(inb, in, in_size, token);
 	if (err) {
 		mlx5_core_warn(dev, "err %d\n", err);
 		goto out_in;
 	}
 
-	outb = mlx5_alloc_cmd_msg(dev, gfp, out_size);
+	outb = mlx5_alloc_cmd_msg(dev, gfp, out_size, token);
 	if (IS_ERR(outb)) {
 		err = PTR_ERR(outb);
 		goto out_in;
 	}
 
 	err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context,
-			      pages_queue, &status);
+			      pages_queue, &status, token);
 	if (err)
 		goto out_out;
 
@@ -1475,7 +1498,7 @@ static int create_msg_cache(struct mlx5_core_dev *dev)
 	INIT_LIST_HEAD(&cmd->cache.med.head);
 
 	for (i = 0; i < NUM_LONG_LISTS; i++) {
-		msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE);
+		msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE, 0);
 		if (IS_ERR(msg)) {
 			err = PTR_ERR(msg);
 			goto ex_err;
@@ -1485,7 +1508,7 @@ static int create_msg_cache(struct mlx5_core_dev *dev)
 	}
 
 	for (i = 0; i < NUM_MED_LISTS; i++) {
-		msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE);
+		msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE, 0);
 		if (IS_ERR(msg)) {
 			err = PTR_ERR(msg);
 			goto ex_err;

From a3fb2b3bd4f095327a98c484260fb63851d3bb14 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 24 Aug 2016 11:01:20 -0700
Subject: [PATCH 39/74] net: dsa: bcm_sf2: Fix race condition while unmasking
 interrupts

[ Upstream commit 4f101c47791cdcb831b3ef1f831b1cc51e4fe03c ]

We kept shadow copies of which interrupt sources we have enabled and
disabled, but due to an order bug in how intrl2_mask_clear was defined,
we could run into the following scenario:

CPU0					CPU1
intrl2_1_mask_clear(..)
sets INTRL2_CPU_MASK_CLEAR
					bcm_sf2_switch_1_isr
					read INTRL2_CPU_STATUS and masks with stale
					irq1_mask value
updates irq1_mask value

Which would make us loop again and again trying to process and interrupt
we are not clearing since our copy of whether it was enabled before
still indicates it was not. Fix this by updating the shadow copy first,
and then unasking at the HW level.

Fixes: 246d7f773c13 ("net: dsa: add Broadcom SF2 switch driver")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/dsa/bcm_sf2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h
index 6bba1c98d764..c7994e372284 100644
--- a/drivers/net/dsa/bcm_sf2.h
+++ b/drivers/net/dsa/bcm_sf2.h
@@ -187,8 +187,8 @@ static inline void name##_writeq(struct bcm_sf2_priv *priv, u64 val,	\
 static inline void intrl2_##which##_mask_clear(struct bcm_sf2_priv *priv, \
 						u32 mask)		\
 {									\
-	intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR);	\
 	priv->irq##which##_mask &= ~(mask);				\
+	intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR);	\
 }									\
 static inline void intrl2_##which##_mask_set(struct bcm_sf2_priv *priv, \
 						u32 mask)		\

From 339d61ab09bbb7fae7416ec025eb9a9509d5e818 Mon Sep 17 00:00:00 2001
From: Xander Huff <xander.huff@ni.com>
Date: Wed, 24 Aug 2016 16:47:53 -0500
Subject: [PATCH 40/74] Revert "phy: IRQ cannot be shared"

[ Upstream commit c3e70edd7c2eed6acd234627a6007627f5c76e8e ]

This reverts:
  commit 33c133cc7598 ("phy: IRQ cannot be shared")

On hardware with multiple PHY devices hooked up to the same IRQ line, allow
them to share it.

Sergei Shtylyov says:
  "I'm not sure now what was the reason I concluded that the IRQ sharing
  was impossible... most probably I thought that the kernel IRQ handling
  code exited the loop over the IRQ actions once IRQ_HANDLED was returned
  -- which is obviously not so in reality..."

Signed-off-by: Xander Huff <xander.huff@ni.com>
Signed-off-by: Nathan Sullivan <nathan.sullivan@ni.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/phy/phy.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 47cd306dbb3c..bba0ca786aaa 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -640,8 +640,10 @@ phy_err:
 int phy_start_interrupts(struct phy_device *phydev)
 {
 	atomic_set(&phydev->irq_disable, 0);
-	if (request_irq(phydev->irq, phy_interrupt, 0, "phy_interrupt",
-			phydev) < 0) {
+	if (request_irq(phydev->irq, phy_interrupt,
+				IRQF_SHARED,
+				"phy_interrupt",
+				phydev) < 0) {
 		pr_warn("%s: Can't get IRQ %d (PHY)\n",
 			phydev->bus->name, phydev->irq);
 		phydev->irq = PHY_POLL;

From 8c945f5aac28a81f20f6a5208a6ccab43b8e7a89 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Sat, 27 Aug 2016 17:33:03 +0100
Subject: [PATCH 41/74] net: smc91x: fix SMC accesses

[ Upstream commit 2fb04fdf30192ff1e2b5834e9b7745889ea8bbcb ]

Commit b70661c70830 ("net: smc91x: use run-time configuration on all ARM
machines") broke some ARM platforms through several mistakes.  Firstly,
the access size must correspond to the following rule:

(a) at least one of 16-bit or 8-bit access size must be supported
(b) 32-bit accesses are optional, and may be enabled in addition to
    the above.

Secondly, it provides no emulation of 16-bit accesses, instead blindly
making 16-bit accesses even when the platform specifies that only 8-bit
is supported.

Reorganise smc91x.h so we can make use of the existing 16-bit access
emulation already provided - if 16-bit accesses are supported, use
16-bit accesses directly, otherwise if 8-bit accesses are supported,
use the provided 16-bit access emulation.  If neither, BUG().  This
exactly reflects the driver behaviour prior to the commit being fixed.

Since the conversion incorrectly cut down the available access sizes on
several platforms, we also need to go through every platform and fix up
the overly-restrictive access size: Arnd assumed that if a platform can
perform 32-bit, 16-bit and 8-bit accesses, then only a 32-bit access
size needed to be specified - not so, all available access sizes must
be specified.

This likely fixes some performance regressions in doing this: if a
platform does not support 8-bit accesses, 8-bit accesses have been
emulated by performing a 16-bit read-modify-write access.

Tested on the Intel Assabet/Neponset platform, which supports only 8-bit
accesses, which was broken by the original commit.

Fixes: b70661c70830 ("net: smc91x: use run-time configuration on all ARM machines")
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Tested-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-pxa/idp.c                    |  3 +-
 arch/arm/mach-pxa/xcep.c                   |  3 +-
 arch/arm/mach-realview/core.c              |  3 +-
 arch/arm/mach-sa1100/pleb.c                |  2 +-
 arch/blackfin/mach-bf561/boards/cm_bf561.c |  3 +-
 arch/blackfin/mach-bf561/boards/ezkit.c    |  3 +-
 drivers/net/ethernet/smsc/smc91x.c         |  7 +++
 drivers/net/ethernet/smsc/smc91x.h         | 65 +++++++++++++++-------
 include/linux/smc91x.h                     | 10 ++++
 9 files changed, 73 insertions(+), 26 deletions(-)

diff --git a/arch/arm/mach-pxa/idp.c b/arch/arm/mach-pxa/idp.c
index f6d02e4cbcda..5c87dff5d46e 100644
--- a/arch/arm/mach-pxa/idp.c
+++ b/arch/arm/mach-pxa/idp.c
@@ -83,7 +83,8 @@ static struct resource smc91x_resources[] = {
 };
 
 static struct smc91x_platdata smc91x_platdata = {
-	.flags = SMC91X_USE_32BIT | SMC91X_USE_DMA | SMC91X_NOWAIT,
+	.flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
+		 SMC91X_USE_DMA | SMC91X_NOWAIT,
 };
 
 static struct platform_device smc91x_device = {
diff --git a/arch/arm/mach-pxa/xcep.c b/arch/arm/mach-pxa/xcep.c
index 13b1d4586d7d..9001312710f7 100644
--- a/arch/arm/mach-pxa/xcep.c
+++ b/arch/arm/mach-pxa/xcep.c
@@ -120,7 +120,8 @@ static struct resource smc91x_resources[] = {
 };
 
 static struct smc91x_platdata xcep_smc91x_info = {
-	.flags	= SMC91X_USE_32BIT | SMC91X_NOWAIT | SMC91X_USE_DMA,
+	.flags	= SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
+		  SMC91X_NOWAIT | SMC91X_USE_DMA,
 };
 
 static struct platform_device smc91x_device = {
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 44575edc44b1..cf0a7c2359f0 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -95,7 +95,8 @@ static struct smsc911x_platform_config smsc911x_config = {
 };
 
 static struct smc91x_platdata smc91x_platdata = {
-	.flags = SMC91X_USE_32BIT | SMC91X_NOWAIT,
+	.flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
+		 SMC91X_NOWAIT,
 };
 
 static struct platform_device realview_eth_device = {
diff --git a/arch/arm/mach-sa1100/pleb.c b/arch/arm/mach-sa1100/pleb.c
index 1525d7b5f1b7..88149f85bc49 100644
--- a/arch/arm/mach-sa1100/pleb.c
+++ b/arch/arm/mach-sa1100/pleb.c
@@ -45,7 +45,7 @@ static struct resource smc91x_resources[] = {
 };
 
 static struct smc91x_platdata smc91x_platdata = {
-	.flags = SMC91X_USE_16BIT | SMC91X_NOWAIT,
+	.flags = SMC91X_USE_16BIT | SMC91X_USE_8BIT | SMC91X_NOWAIT,
 };
 
 static struct platform_device smc91x_device = {
diff --git a/arch/blackfin/mach-bf561/boards/cm_bf561.c b/arch/blackfin/mach-bf561/boards/cm_bf561.c
index c6db52ba3a06..10c57771822d 100644
--- a/arch/blackfin/mach-bf561/boards/cm_bf561.c
+++ b/arch/blackfin/mach-bf561/boards/cm_bf561.c
@@ -146,7 +146,8 @@ static struct platform_device hitachi_fb_device = {
 #include <linux/smc91x.h>
 
 static struct smc91x_platdata smc91x_info = {
-	.flags = SMC91X_USE_32BIT | SMC91X_NOWAIT,
+	.flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
+		 SMC91X_NOWAIT,
 	.leda = RPC_LED_100_10,
 	.ledb = RPC_LED_TX_RX,
 };
diff --git a/arch/blackfin/mach-bf561/boards/ezkit.c b/arch/blackfin/mach-bf561/boards/ezkit.c
index 2de71e8c104b..93c22468cc14 100644
--- a/arch/blackfin/mach-bf561/boards/ezkit.c
+++ b/arch/blackfin/mach-bf561/boards/ezkit.c
@@ -134,7 +134,8 @@ static struct platform_device net2272_bfin_device = {
 #include <linux/smc91x.h>
 
 static struct smc91x_platdata smc91x_info = {
-	.flags = SMC91X_USE_32BIT | SMC91X_NOWAIT,
+	.flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
+		 SMC91X_NOWAIT,
 	.leda = RPC_LED_100_10,
 	.ledb = RPC_LED_TX_RX,
 };
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 0e2fc1a844ab..8c44cf6ff7a2 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -2269,6 +2269,13 @@ static int smc_drv_probe(struct platform_device *pdev)
 	if (pd) {
 		memcpy(&lp->cfg, pd, sizeof(lp->cfg));
 		lp->io_shift = SMC91X_IO_SHIFT(lp->cfg.flags);
+
+		if (!SMC_8BIT(lp) && !SMC_16BIT(lp)) {
+			dev_err(&pdev->dev,
+				"at least one of 8-bit or 16-bit access support is required.\n");
+			ret = -ENXIO;
+			goto out_free_netdev;
+		}
 	}
 
 #if IS_BUILTIN(CONFIG_OF)
diff --git a/drivers/net/ethernet/smsc/smc91x.h b/drivers/net/ethernet/smsc/smc91x.h
index a3c129e1e40a..29df0465daf4 100644
--- a/drivers/net/ethernet/smsc/smc91x.h
+++ b/drivers/net/ethernet/smsc/smc91x.h
@@ -36,6 +36,27 @@
 #include <linux/dmaengine.h>
 #include <linux/smc91x.h>
 
+/*
+ * Any 16-bit access is performed with two 8-bit accesses if the hardware
+ * can't do it directly. Most registers are 16-bit so those are mandatory.
+ */
+#define SMC_outw_b(x, a, r)						\
+	do {								\
+		unsigned int __val16 = (x);				\
+		unsigned int __reg = (r);				\
+		SMC_outb(__val16, a, __reg);				\
+		SMC_outb(__val16 >> 8, a, __reg + (1 << SMC_IO_SHIFT));	\
+	} while (0)
+
+#define SMC_inw_b(a, r)							\
+	({								\
+		unsigned int __val16;					\
+		unsigned int __reg = r;					\
+		__val16  = SMC_inb(a, __reg);				\
+		__val16 |= SMC_inb(a, __reg + (1 << SMC_IO_SHIFT)) << 8; \
+		__val16;						\
+	})
+
 /*
  * Define your architecture specific bus configuration parameters here.
  */
@@ -55,10 +76,30 @@
 #define SMC_IO_SHIFT		(lp->io_shift)
 
 #define SMC_inb(a, r)		readb((a) + (r))
-#define SMC_inw(a, r)		readw((a) + (r))
+#define SMC_inw(a, r)							\
+	({								\
+		unsigned int __smc_r = r;				\
+		SMC_16BIT(lp) ? readw((a) + __smc_r) :			\
+		SMC_8BIT(lp) ? SMC_inw_b(a, __smc_r) :			\
+		({ BUG(); 0; });					\
+	})
+
 #define SMC_inl(a, r)		readl((a) + (r))
 #define SMC_outb(v, a, r)	writeb(v, (a) + (r))
+#define SMC_outw(v, a, r)						\
+	do {								\
+		unsigned int __v = v, __smc_r = r;			\
+		if (SMC_16BIT(lp))					\
+			__SMC_outw(__v, a, __smc_r);			\
+		else if (SMC_8BIT(lp))					\
+			SMC_outw_b(__v, a, __smc_r);			\
+		else							\
+			BUG();						\
+	} while (0)
+
 #define SMC_outl(v, a, r)	writel(v, (a) + (r))
+#define SMC_insb(a, r, p, l)	readsb((a) + (r), p, l)
+#define SMC_outsb(a, r, p, l)	writesb((a) + (r), p, l)
 #define SMC_insw(a, r, p, l)	readsw((a) + (r), p, l)
 #define SMC_outsw(a, r, p, l)	writesw((a) + (r), p, l)
 #define SMC_insl(a, r, p, l)	readsl((a) + (r), p, l)
@@ -66,7 +107,7 @@
 #define SMC_IRQ_FLAGS		(-1)	/* from resource */
 
 /* We actually can't write halfwords properly if not word aligned */
-static inline void SMC_outw(u16 val, void __iomem *ioaddr, int reg)
+static inline void __SMC_outw(u16 val, void __iomem *ioaddr, int reg)
 {
 	if ((machine_is_mainstone() || machine_is_stargate2() ||
 	     machine_is_pxa_idp()) && reg & 2) {
@@ -405,24 +446,8 @@ smc_pxa_dma_insw(void __iomem *ioaddr, struct smc_local *lp, int reg, int dma,
 
 #if ! SMC_CAN_USE_16BIT
 
-/*
- * Any 16-bit access is performed with two 8-bit accesses if the hardware
- * can't do it directly. Most registers are 16-bit so those are mandatory.
- */
-#define SMC_outw(x, ioaddr, reg)					\
-	do {								\
-		unsigned int __val16 = (x);				\
-		SMC_outb( __val16, ioaddr, reg );			\
-		SMC_outb( __val16 >> 8, ioaddr, reg + (1 << SMC_IO_SHIFT));\
-	} while (0)
-#define SMC_inw(ioaddr, reg)						\
-	({								\
-		unsigned int __val16;					\
-		__val16 =  SMC_inb( ioaddr, reg );			\
-		__val16 |= SMC_inb( ioaddr, reg + (1 << SMC_IO_SHIFT)) << 8; \
-		__val16;						\
-	})
-
+#define SMC_outw(x, ioaddr, reg)	SMC_outw_b(x, ioaddr, reg)
+#define SMC_inw(ioaddr, reg)		SMC_inw_b(ioaddr, reg)
 #define SMC_insw(a, r, p, l)		BUG()
 #define SMC_outsw(a, r, p, l)		BUG()
 
diff --git a/include/linux/smc91x.h b/include/linux/smc91x.h
index 76199b75d584..e302c447e057 100644
--- a/include/linux/smc91x.h
+++ b/include/linux/smc91x.h
@@ -1,6 +1,16 @@
 #ifndef __SMC91X_H__
 #define __SMC91X_H__
 
+/*
+ * These bits define which access sizes a platform can support, rather
+ * than the maximal access size.  So, if your platform can do 16-bit
+ * and 32-bit accesses to the SMC91x device, but not 8-bit, set both
+ * SMC91X_USE_16BIT and SMC91X_USE_32BIT.
+ *
+ * The SMC91x driver requires at least one of SMC91X_USE_8BIT or
+ * SMC91X_USE_16BIT to be supported - just setting SMC91X_USE_32BIT is
+ * an invalid configuration.
+ */
 #define SMC91X_USE_8BIT (1 << 0)
 #define SMC91X_USE_16BIT (1 << 1)
 #define SMC91X_USE_32BIT (1 << 2)

From fd2e3102adb91a481e34923633b1c0fd0fa77f0b Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Wed, 31 Aug 2016 14:16:44 +0200
Subject: [PATCH 42/74] bridge: re-introduce 'fix parsing of MLDv2 reports'

[ Upstream commit 9264251ee2a55bce8fb93826b3f581fb9eb7e2c2 ]

commit bc8c20acaea1 ("bridge: multicast: treat igmpv3 report with
INCLUDE and no sources as a leave") seems to have accidentally reverted
commit 47cc84ce0c2f ("bridge: fix parsing of MLDv2 reports"). This
commit brings back a change to br_ip6_multicast_mld2_report() where
parsing of MLDv2 reports stops when the first group is successfully
added to the MDB cache.

Fixes: bc8c20acaea1 ("bridge: multicast: treat igmpv3 report with INCLUDE and no sources as a leave")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Acked-by: Thadeu Lima de Souza Cascardo <cascardo@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bridge/br_multicast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 7173a685309a..9542e84a9455 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1113,7 +1113,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 		} else {
 			err = br_ip6_multicast_add_group(br, port,
 							 &grec->grec_mca, vid);
-			if (!err)
+			if (err)
 				break;
 		}
 	}

From 9aea5e0ded7355172d751cdcacf59aa06d0d036e Mon Sep 17 00:00:00 2001
From: Thierry Reding <thierry.reding@gmail.com>
Date: Thu, 21 Jan 2016 15:04:59 +0100
Subject: [PATCH 43/74] pwm: Mark all devices as "might sleep"

commit ff01c944cfa939f3474c28d88223213494aedf0b upstream.

Commit d1cd21427747 ("pwm: Set enable state properly on failed call to
enable") introduced a mutex that is needed to protect internal state of
PWM devices. Since that mutex is acquired in pwm_set_polarity() and in
pwm_enable() and might potentially block, all PWM devices effectively
become "might sleep".

It's rather pointless to keep the .can_sleep field around, but given
that there are external users let's postpone the removal for the next
release cycle.

Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fixes: d1cd21427747 ("pwm: Set enable state properly on failed call to enable")
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
---
 drivers/pwm/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index d24ca5f281b4..7831bc6b51dd 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -889,7 +889,7 @@ EXPORT_SYMBOL_GPL(devm_pwm_put);
   */
 bool pwm_can_sleep(struct pwm_device *pwm)
 {
-	return pwm->chip->can_sleep;
+	return true;
 }
 EXPORT_SYMBOL_GPL(pwm_can_sleep);
 

From 30b54a26af73c470395dd7f2b621e2a6a20a6a7c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 12 Jun 2016 11:24:46 -0400
Subject: [PATCH 44/74] autofs races

commit ea01a18494b3d7a91b2f1f2a6a5aaef4741bc294 upstream.

* make autofs4_expire_indirect() skip the dentries being in process of
expiry
* do *not* mess with list_move(); making sure that dentry with
AUTOFS_INF_EXPIRING are not picked for expiry is enough.
* do not remove NO_RCU when we set EXPIRING, don't bother with smp_mb()
there.  Clear it at the same time we clear EXPIRING.  Makes a bunch of
tests simpler.
* rename NO_RCU to WANT_EXPIRE, which is what it really is.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Ian Kent <raven@themaw.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/autofs4/autofs_i.h |  8 ++++++--
 fs/autofs4/expire.c   | 27 ++++++++-------------------
 fs/autofs4/root.c     |  2 +-
 3 files changed, 15 insertions(+), 22 deletions(-)

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index c37149b929be..502d3892d8a4 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -79,9 +79,13 @@ struct autofs_info {
 };
 
 #define AUTOFS_INF_EXPIRING	(1<<0) /* dentry is in the process of expiring */
-#define AUTOFS_INF_NO_RCU	(1<<1) /* the dentry is being considered
+#define AUTOFS_INF_WANT_EXPIRE	(1<<1) /* the dentry is being considered
 					* for expiry, so RCU_walk is
-					* not permitted
+					* not permitted.  If it progresses to
+					* actual expiry attempt, the flag is
+					* not cleared when EXPIRING is set -
+					* in that case it gets cleared only
+					* when it comes to clearing EXPIRING.
 					*/
 #define AUTOFS_INF_PENDING	(1<<2) /* dentry pending mount */
 
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 1cebc3c52fa5..4182d3e7a7db 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -315,19 +315,17 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
 	if (ino->flags & AUTOFS_INF_PENDING)
 		goto out;
 	if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
-		ino->flags |= AUTOFS_INF_NO_RCU;
+		ino->flags |= AUTOFS_INF_WANT_EXPIRE;
 		spin_unlock(&sbi->fs_lock);
 		synchronize_rcu();
 		spin_lock(&sbi->fs_lock);
 		if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
 			ino->flags |= AUTOFS_INF_EXPIRING;
-			smp_mb();
-			ino->flags &= ~AUTOFS_INF_NO_RCU;
 			init_completion(&ino->expire_complete);
 			spin_unlock(&sbi->fs_lock);
 			return root;
 		}
-		ino->flags &= ~AUTOFS_INF_NO_RCU;
+		ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
 	}
 out:
 	spin_unlock(&sbi->fs_lock);
@@ -444,7 +442,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 	while ((dentry = get_next_positive_subdir(dentry, root))) {
 		spin_lock(&sbi->fs_lock);
 		ino = autofs4_dentry_ino(dentry);
-		if (ino->flags & AUTOFS_INF_NO_RCU)
+		if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
 			expired = NULL;
 		else
 			expired = should_expire(dentry, mnt, timeout, how);
@@ -453,7 +451,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 			continue;
 		}
 		ino = autofs4_dentry_ino(expired);
-		ino->flags |= AUTOFS_INF_NO_RCU;
+		ino->flags |= AUTOFS_INF_WANT_EXPIRE;
 		spin_unlock(&sbi->fs_lock);
 		synchronize_rcu();
 		spin_lock(&sbi->fs_lock);
@@ -463,7 +461,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 			goto found;
 		}
 
-		ino->flags &= ~AUTOFS_INF_NO_RCU;
+		ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
 		if (expired != dentry)
 			dput(expired);
 		spin_unlock(&sbi->fs_lock);
@@ -473,17 +471,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 found:
 	DPRINTK("returning %p %pd", expired, expired);
 	ino->flags |= AUTOFS_INF_EXPIRING;
-	smp_mb();
-	ino->flags &= ~AUTOFS_INF_NO_RCU;
 	init_completion(&ino->expire_complete);
 	spin_unlock(&sbi->fs_lock);
-	spin_lock(&sbi->lookup_lock);
-	spin_lock(&expired->d_parent->d_lock);
-	spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
-	list_move(&expired->d_parent->d_subdirs, &expired->d_child);
-	spin_unlock(&expired->d_lock);
-	spin_unlock(&expired->d_parent->d_lock);
-	spin_unlock(&sbi->lookup_lock);
 	return expired;
 }
 
@@ -494,7 +483,7 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
 	int status;
 
 	/* Block on any pending expire */
-	if (!(ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU)))
+	if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE))
 		return 0;
 	if (rcu_walk)
 		return -ECHILD;
@@ -551,7 +540,7 @@ int autofs4_expire_run(struct super_block *sb,
 	ino = autofs4_dentry_ino(dentry);
 	/* avoid rapid-fire expire attempts if expiry fails */
 	ino->last_used = now;
-	ino->flags &= ~AUTOFS_INF_EXPIRING;
+	ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE);
 	complete_all(&ino->expire_complete);
 	spin_unlock(&sbi->fs_lock);
 
@@ -579,7 +568,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
 		spin_lock(&sbi->fs_lock);
 		/* avoid rapid-fire expire attempts if expiry fails */
 		ino->last_used = now;
-		ino->flags &= ~AUTOFS_INF_EXPIRING;
+		ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE);
 		complete_all(&ino->expire_complete);
 		spin_unlock(&sbi->fs_lock);
 		dput(dentry);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index c6d7d3dbd52a..7a54c6a867c8 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -455,7 +455,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
 		 * a mount-trap.
 		 */
 		struct inode *inode;
-		if (ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU))
+		if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
 			return 0;
 		if (d_mountpoint(dentry))
 			return 0;

From 2ccb99b272a23b2f32327002f715b10d815a86f3 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Mon, 19 Sep 2016 14:44:12 -0700
Subject: [PATCH 45/74] autofs: use dentry flags to block walks during expire

commit 7cbdb4a286a60c5d519cb9223fe2134d26870d39 upstream.

Somewhere along the way the autofs expire operation has changed to hold
a spin lock over expired dentry selection.  The autofs indirect mount
expired dentry selection is complicated and quite lengthy so it isn't
appropriate to hold a spin lock over the operation.

Commit 47be61845c77 ("fs/dcache.c: avoid soft-lockup in dput()") added a
might_sleep() to dput() causing a WARN_ONCE() about this usage to be
issued.

But the spin lock doesn't need to be held over this check, the autofs
dentry info.  flags are enough to block walks into dentrys during the
expire.

I've left the direct mount expire as it is (for now) because it is much
simpler and quicker than the indirect mount expire and adding spin lock
release and re-aquires would do nothing more than add overhead.

Fixes: 47be61845c77 ("fs/dcache.c: avoid soft-lockup in dput()")
Link: http://lkml.kernel.org/r/20160912014017.1773.73060.stgit@pluto.themaw.net
Signed-off-by: Ian Kent <raven@themaw.net>
Reported-by: Takashi Iwai <tiwai@suse.de>
Tested-by: Takashi Iwai <tiwai@suse.de>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: NeilBrown <neilb@suse.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/autofs4/expire.c | 55 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 42 insertions(+), 13 deletions(-)

diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 4182d3e7a7db..7a5a598a2d94 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -415,6 +415,7 @@ static struct dentry *should_expire(struct dentry *dentry,
 	}
 	return NULL;
 }
+
 /*
  * Find an eligible tree to time-out
  * A tree is eligible if :-
@@ -430,6 +431,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 	struct dentry *root = sb->s_root;
 	struct dentry *dentry;
 	struct dentry *expired;
+	struct dentry *found;
 	struct autofs_info *ino;
 
 	if (!root)
@@ -440,31 +442,46 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
 
 	dentry = NULL;
 	while ((dentry = get_next_positive_subdir(dentry, root))) {
+		int flags = how;
+
 		spin_lock(&sbi->fs_lock);
 		ino = autofs4_dentry_ino(dentry);
-		if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
-			expired = NULL;
-		else
-			expired = should_expire(dentry, mnt, timeout, how);
-		if (!expired) {
+		if (ino->flags & AUTOFS_INF_WANT_EXPIRE) {
 			spin_unlock(&sbi->fs_lock);
 			continue;
 		}
+		spin_unlock(&sbi->fs_lock);
+
+		expired = should_expire(dentry, mnt, timeout, flags);
+		if (!expired)
+			continue;
+
+		spin_lock(&sbi->fs_lock);
 		ino = autofs4_dentry_ino(expired);
 		ino->flags |= AUTOFS_INF_WANT_EXPIRE;
 		spin_unlock(&sbi->fs_lock);
 		synchronize_rcu();
-		spin_lock(&sbi->fs_lock);
-		if (should_expire(expired, mnt, timeout, how)) {
-			if (expired != dentry)
-				dput(dentry);
-			goto found;
-		}
 
+		/* Make sure a reference is not taken on found if
+		 * things have changed.
+		 */
+		flags &= ~AUTOFS_EXP_LEAVES;
+		found = should_expire(expired, mnt, timeout, how);
+		if (!found || found != expired)
+			/* Something has changed, continue */
+			goto next;
+
+		if (expired != dentry)
+			dput(dentry);
+
+		spin_lock(&sbi->fs_lock);
+		goto found;
+next:
+		spin_lock(&sbi->fs_lock);
 		ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
+		spin_unlock(&sbi->fs_lock);
 		if (expired != dentry)
 			dput(expired);
-		spin_unlock(&sbi->fs_lock);
 	}
 	return NULL;
 
@@ -481,6 +498,7 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 	int status;
+	int state;
 
 	/* Block on any pending expire */
 	if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE))
@@ -488,8 +506,19 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
 	if (rcu_walk)
 		return -ECHILD;
 
+retry:
 	spin_lock(&sbi->fs_lock);
-	if (ino->flags & AUTOFS_INF_EXPIRING) {
+	state = ino->flags & (AUTOFS_INF_WANT_EXPIRE | AUTOFS_INF_EXPIRING);
+	if (state == AUTOFS_INF_WANT_EXPIRE) {
+		spin_unlock(&sbi->fs_lock);
+		/*
+		 * Possibly being selected for expire, wait until
+		 * it's selected or not.
+		 */
+		schedule_timeout_uninterruptible(HZ/10);
+		goto retry;
+	}
+	if (state & AUTOFS_INF_EXPIRING) {
 		spin_unlock(&sbi->fs_lock);
 
 		DPRINTK("waiting for expire %p name=%pd", dentry, dentry);

From fc4edddc5186b74442933c4399cdd6fff914c683 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Fri, 26 Aug 2016 16:01:59 +1000
Subject: [PATCH 46/74] xfs: prevent dropping ioend completions during buftarg
 wait

commit 800b2694f890cc35a1bda63501fc71c94389d517 upstream.

xfs_wait_buftarg() waits for all pending I/O, drains the ioend
completion workqueue and walks the LRU until all buffers in the cache
have been released. This is traditionally an unmount operation` but the
mechanism is also reused during filesystem freeze.

xfs_wait_buftarg() invokes drain_workqueue() as part of the quiesce,
which is intended more for a shutdown sequence in that it indicates to
the queue that new operations are not expected once the drain has begun.
New work jobs after this point result in a WARN_ON_ONCE() and are
otherwise dropped.

With filesystem freeze, however, read operations are allowed and can
proceed during or after the workqueue drain. If such a read occurs
during the drain sequence, the workqueue infrastructure complains about
the queued ioend completion work item and drops it on the floor. As a
result, the buffer remains on the LRU and the freeze never completes.

Despite the fact that the overall buffer cache cleanup is not necessary
during freeze, fix up this operation such that it is safe to invoke
during non-unmount quiesce operations. Replace the drain_workqueue()
call with flush_workqueue(), which runs a similar serialization on
pending workqueue jobs without causing new jobs to be dropped. This is
safe for unmount as unmount independently locks out new operations by
the time xfs_wait_buftarg() is invoked.

cc: <stable@vger.kernel.org>
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/xfs/xfs_buf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 39090fc56f09..eb1b8c8acfcb 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1535,7 +1535,7 @@ xfs_wait_buftarg(
 	 * ensure here that all reference counts have been dropped before we
 	 * start walking the LRU list.
 	 */
-	drain_workqueue(btp->bt_mount->m_buf_workqueue);
+	flush_workqueue(btp->bt_mount->m_buf_workqueue);
 
 	/* loop until there is nothing left on the lru list. */
 	while (list_lru_count(&btp->bt_lru)) {

From af426ec184a38d257457a22554542d4f9bb7eecd Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 19 Sep 2016 14:44:27 -0700
Subject: [PATCH 47/74] fsnotify: add a way to stop queueing events on group
 shutdown

commit 12703dbfeb15402260e7554d32a34ac40c233990 upstream.

Implement a function that can be called when a group is being shutdown
to stop queueing new events to the group.  Fanotify will use this.

Fixes: 5838d4442bd5 ("fanotify: fix double free of pending permission events")
Link: http://lkml.kernel.org/r/1473797711-14111-2-git-send-email-jack@suse.cz
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/notify/group.c                | 19 +++++++++++++++++++
 fs/notify/notification.c         |  8 +++++++-
 include/linux/fsnotify_backend.h |  3 +++
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/fs/notify/group.c b/fs/notify/group.c
index d16b62cb2854..18eb30c6bd8f 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -39,6 +39,17 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group)
 	kfree(group);
 }
 
+/*
+ * Stop queueing new events for this group. Once this function returns
+ * fsnotify_add_event() will not add any new events to the group's queue.
+ */
+void fsnotify_group_stop_queueing(struct fsnotify_group *group)
+{
+	mutex_lock(&group->notification_mutex);
+	group->shutdown = true;
+	mutex_unlock(&group->notification_mutex);
+}
+
 /*
  * Trying to get rid of a group. Remove all marks, flush all events and release
  * the group reference.
@@ -47,6 +58,14 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group)
  */
 void fsnotify_destroy_group(struct fsnotify_group *group)
 {
+	/*
+	 * Stop queueing new events. The code below is careful enough to not
+	 * require this but fanotify needs to stop queuing events even before
+	 * fsnotify_destroy_group() is called and this makes the other callers
+	 * of fsnotify_destroy_group() to see the same behavior.
+	 */
+	fsnotify_group_stop_queueing(group);
+
 	/* clear all inode marks for this group */
 	fsnotify_clear_marks_by_group(group);
 
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index a95d8e037aeb..3d76e65ff84f 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -82,7 +82,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
  * Add an event to the group notification queue.  The group can later pull this
  * event off the queue to deal with.  The function returns 0 if the event was
  * added to the queue, 1 if the event was merged with some other queued event,
- * 2 if the queue of events has overflown.
+ * 2 if the event was not queued - either the queue of events has overflown
+ * or the group is shutting down.
  */
 int fsnotify_add_event(struct fsnotify_group *group,
 		       struct fsnotify_event *event,
@@ -96,6 +97,11 @@ int fsnotify_add_event(struct fsnotify_group *group,
 
 	mutex_lock(&group->notification_mutex);
 
+	if (group->shutdown) {
+		mutex_unlock(&group->notification_mutex);
+		return 2;
+	}
+
 	if (group->q_len >= group->max_events) {
 		ret = 2;
 		/* Queue overflow event only if it isn't already queued */
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 533c4408529a..5dfba9a8a7be 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -148,6 +148,7 @@ struct fsnotify_group {
 	#define FS_PRIO_1	1 /* fanotify content based access control */
 	#define FS_PRIO_2	2 /* fanotify pre-content access */
 	unsigned int priority;
+	bool shutdown;		/* group is being shut down, don't queue more events */
 
 	/* stores all fastpath marks assoc with this group so they can be cleaned on unregister */
 	struct mutex mark_mutex;	/* protect marks_list */
@@ -308,6 +309,8 @@ extern struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *op
 extern void fsnotify_get_group(struct fsnotify_group *group);
 /* drop reference on a group from fsnotify_alloc_group */
 extern void fsnotify_put_group(struct fsnotify_group *group);
+/* group destruction begins, stop queuing new events */
+extern void fsnotify_group_stop_queueing(struct fsnotify_group *group);
 /* destroy group */
 extern void fsnotify_destroy_group(struct fsnotify_group *group);
 /* fasync handler function */

From 6e67de3922f25ae43fc86be362d740d88167ae7f Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 19 Sep 2016 14:44:30 -0700
Subject: [PATCH 48/74] fanotify: fix list corruption in
 fanotify_get_response()

commit 96d41019e3ac55f6f0115b0ce97e4f24a3d636d2 upstream.

fanotify_get_response() calls fsnotify_remove_event() when it finds that
group is being released from fanotify_release() (bypass_perm is set).

However the event it removes need not be only in the group's notification
queue but it can have already moved to access_list (userspace read the
event before closing the fanotify instance fd) which is protected by a
different lock.  Thus when fsnotify_remove_event() races with
fanotify_release() operating on access_list, the list can get corrupted.

Fix the problem by moving all the logic removing permission events from
the lists to one place - fanotify_release().

Fixes: 5838d4442bd5 ("fanotify: fix double free of pending permission events")
Link: http://lkml.kernel.org/r/1473797711-14111-3-git-send-email-jack@suse.cz
Signed-off-by: Jan Kara <jack@suse.cz>
Reported-by: Miklos Szeredi <mszeredi@redhat.com>
Tested-by: Miklos Szeredi <mszeredi@redhat.com>
Reviewed-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/notify/fanotify/fanotify.c      | 13 +----------
 fs/notify/fanotify/fanotify_user.c | 36 ++++++++++++++++++++----------
 fs/notify/notification.c           | 15 -------------
 include/linux/fsnotify_backend.h   |  3 ---
 4 files changed, 25 insertions(+), 42 deletions(-)

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index d2f97ecca6a5..e0e5f7c3c99f 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -67,18 +67,7 @@ static int fanotify_get_response(struct fsnotify_group *group,
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
-	wait_event(group->fanotify_data.access_waitq, event->response ||
-				atomic_read(&group->fanotify_data.bypass_perm));
-
-	if (!event->response) {	/* bypass_perm set */
-		/*
-		 * Event was canceled because group is being destroyed. Remove
-		 * it from group's event list because we are responsible for
-		 * freeing the permission event.
-		 */
-		fsnotify_remove_event(group, &event->fae.fse);
-		return 0;
-	}
+	wait_event(group->fanotify_data.access_waitq, event->response);
 
 	/* userspace responded, convert to something usable */
 	switch (event->response) {
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 8e8e6bcd1d43..a64313868d3a 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -358,16 +358,20 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
 	struct fanotify_perm_event_info *event, *next;
+	struct fsnotify_event *fsn_event;
 
 	/*
-	 * There may be still new events arriving in the notification queue
-	 * but since userspace cannot use fanotify fd anymore, no event can
-	 * enter or leave access_list by now.
+	 * Stop new events from arriving in the notification queue. since
+	 * userspace cannot use fanotify fd anymore, no event can enter or
+	 * leave access_list by now either.
+	 */
+	fsnotify_group_stop_queueing(group);
+
+	/*
+	 * Process all permission events on access_list and notification queue
+	 * and simulate reply from userspace.
 	 */
 	spin_lock(&group->fanotify_data.access_lock);
-
-	atomic_inc(&group->fanotify_data.bypass_perm);
-
 	list_for_each_entry_safe(event, next, &group->fanotify_data.access_list,
 				 fae.fse.list) {
 		pr_debug("%s: found group=%p event=%p\n", __func__, group,
@@ -379,12 +383,21 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 	spin_unlock(&group->fanotify_data.access_lock);
 
 	/*
-	 * Since bypass_perm is set, newly queued events will not wait for
-	 * access response. Wake up the already sleeping ones now.
-	 * synchronize_srcu() in fsnotify_destroy_group() will wait for all
-	 * processes sleeping in fanotify_handle_event() waiting for access
-	 * response and thus also for all permission events to be freed.
+	 * Destroy all non-permission events. For permission events just
+	 * dequeue them and set the response. They will be freed once the
+	 * response is consumed and fanotify_get_response() returns.
 	 */
+	mutex_lock(&group->notification_mutex);
+	while (!fsnotify_notify_queue_is_empty(group)) {
+		fsn_event = fsnotify_remove_first_event(group);
+		if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS))
+			fsnotify_destroy_event(group, fsn_event);
+		else
+			FANOTIFY_PE(fsn_event)->response = FAN_ALLOW;
+	}
+	mutex_unlock(&group->notification_mutex);
+
+	/* Response for all permission events it set, wakeup waiters */
 	wake_up(&group->fanotify_data.access_waitq);
 #endif
 
@@ -755,7 +768,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	spin_lock_init(&group->fanotify_data.access_lock);
 	init_waitqueue_head(&group->fanotify_data.access_waitq);
 	INIT_LIST_HEAD(&group->fanotify_data.access_list);
-	atomic_set(&group->fanotify_data.bypass_perm, 0);
 #endif
 	switch (flags & FAN_ALL_CLASS_BITS) {
 	case FAN_CLASS_NOTIF:
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 3d76e65ff84f..e455e83ceeeb 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -131,21 +131,6 @@ queue:
 	return ret;
 }
 
-/*
- * Remove @event from group's notification queue. It is the responsibility of
- * the caller to destroy the event.
- */
-void fsnotify_remove_event(struct fsnotify_group *group,
-			   struct fsnotify_event *event)
-{
-	mutex_lock(&group->notification_mutex);
-	if (!list_empty(&event->list)) {
-		list_del_init(&event->list);
-		group->q_len--;
-	}
-	mutex_unlock(&group->notification_mutex);
-}
-
 /*
  * Remove and return the first event from the notification list.  It is the
  * responsibility of the caller to destroy the obtained event
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 5dfba9a8a7be..850d8822e8ff 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -180,7 +180,6 @@ struct fsnotify_group {
 			spinlock_t access_lock;
 			struct list_head access_list;
 			wait_queue_head_t access_waitq;
-			atomic_t bypass_perm;
 #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */
 			int f_flags;
 			unsigned int max_marks;
@@ -323,8 +322,6 @@ extern int fsnotify_add_event(struct fsnotify_group *group,
 			      struct fsnotify_event *event,
 			      int (*merge)(struct list_head *,
 					   struct fsnotify_event *));
-/* Remove passed event from groups notification queue */
-extern void fsnotify_remove_event(struct fsnotify_group *group, struct fsnotify_event *event);
 /* true if the group notification queue is empty */
 extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
 /* return, but do not dequeue the first event on the notification queue */

From 3f5d8326a870729ae21aa0a6b132cf8d9e9fcc1e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Tue, 20 Sep 2016 20:07:42 +0100
Subject: [PATCH 49/74] fix fault_in_multipages_...() on architectures with
 no-op access_ok()

commit e23d4159b109167126e5bcd7f3775c95de7fee47 upstream.

Switching iov_iter fault-in to multipages variants has exposed an old
bug in underlying fault_in_multipages_...(); they break if the range
passed to them wraps around.  Normally access_ok() done by callers will
prevent such (and it's a guaranteed EFAULT - ERR_PTR() values fall into
such a range and they should not point to any valid objects).

However, on architectures where userland and kernel live in different
MMU contexts (e.g. s390) access_ok() is a no-op and on those a range
with a wraparound can reach fault_in_multipages_...().

Since any wraparound means EFAULT there, the fix is trivial - turn
those

    while (uaddr <= end)
	    ...
into

    if (unlikely(uaddr > end))
	    return -EFAULT;
    do
	    ...
    while (uaddr <= end);

Reported-by: Jan Stancek <jstancek@redhat.com>
Tested-by: Jan Stancek <jstancek@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/pagemap.h | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 26eabf5ec718..fbfadba81c5a 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -601,56 +601,56 @@ static inline int fault_in_pages_readable(const char __user *uaddr, int size)
  */
 static inline int fault_in_multipages_writeable(char __user *uaddr, int size)
 {
-	int ret = 0;
 	char __user *end = uaddr + size - 1;
 
 	if (unlikely(size == 0))
-		return ret;
+		return 0;
 
+	if (unlikely(uaddr > end))
+		return -EFAULT;
 	/*
 	 * Writing zeroes into userspace here is OK, because we know that if
 	 * the zero gets there, we'll be overwriting it.
 	 */
-	while (uaddr <= end) {
-		ret = __put_user(0, uaddr);
-		if (ret != 0)
-			return ret;
+	do {
+		if (unlikely(__put_user(0, uaddr) != 0))
+			return -EFAULT;
 		uaddr += PAGE_SIZE;
-	}
+	} while (uaddr <= end);
 
 	/* Check whether the range spilled into the next page. */
 	if (((unsigned long)uaddr & PAGE_MASK) ==
 			((unsigned long)end & PAGE_MASK))
-		ret = __put_user(0, end);
+		return __put_user(0, end);
 
-	return ret;
+	return 0;
 }
 
 static inline int fault_in_multipages_readable(const char __user *uaddr,
 					       int size)
 {
 	volatile char c;
-	int ret = 0;
 	const char __user *end = uaddr + size - 1;
 
 	if (unlikely(size == 0))
-		return ret;
+		return 0;
 
-	while (uaddr <= end) {
-		ret = __get_user(c, uaddr);
-		if (ret != 0)
-			return ret;
+	if (unlikely(uaddr > end))
+		return -EFAULT;
+
+	do {
+		if (unlikely(__get_user(c, uaddr) != 0))
+			return -EFAULT;
 		uaddr += PAGE_SIZE;
-	}
+	} while (uaddr <= end);
 
 	/* Check whether the range spilled into the next page. */
 	if (((unsigned long)uaddr & PAGE_MASK) ==
 			((unsigned long)end & PAGE_MASK)) {
-		ret = __get_user(c, end);
-		(void)c;
+		return __get_user(c, end);
 	}
 
-	return ret;
+	return 0;
 }
 
 int add_to_page_cache_locked(struct page *page, struct address_space *mapping,

From 45987838305b660f44b6edffc8b653361bef7390 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 15 Jul 2016 14:06:30 +0300
Subject: [PATCH 50/74] mtd: maps: sa1100-flash: potential NULL dereference

commit dc01a28d80a42cef08c94dfc595565aaebe46d15 upstream.

We check for NULL but then dereference "info->mtd" on the next line.

Fixes: 72169755cf36 ('mtd: maps: sa1100-flash: show parent device in sysfs')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/maps/sa1100-flash.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/mtd/maps/sa1100-flash.c b/drivers/mtd/maps/sa1100-flash.c
index 142fc3d79463..784c6e1a0391 100644
--- a/drivers/mtd/maps/sa1100-flash.c
+++ b/drivers/mtd/maps/sa1100-flash.c
@@ -230,8 +230,10 @@ static struct sa_info *sa1100_setup_mtd(struct platform_device *pdev,
 
 		info->mtd = mtd_concat_create(cdev, info->num_subdev,
 					      plat->name);
-		if (info->mtd == NULL)
+		if (info->mtd == NULL) {
 			ret = -ENXIO;
+			goto err;
+		}
 	}
 	info->mtd->dev.parent = &pdev->dev;
 

From a52c63ade338cfddb7a5e779cef913624b25fded Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 14 Jul 2016 13:44:56 +0300
Subject: [PATCH 51/74] mtd: pmcmsp-flash: Allocating too much in
 init_msp_flash()

commit 79ad07d45743721010e766e65dc004ad249bd429 upstream.

There is a cut and paste issue here.  The bug is that we are allocating
more memory than necessary for msp_maps.  We should be allocating enough
space for a map_info struct (144 bytes) but we instead allocate enough
for an mtd_info struct (1840 bytes).  It's a small waste.

The other part of this is not harmful but when we allocated msp_flash
then we allocated enough space fro a map_info pointer instead of an
mtd_info pointer.  But since pointers are the same size it works out
fine.

Anyway, I decided to clean up all three allocations a bit to make them
a bit more consistent and clear.

Fixes: 68aa0fa87f6d ('[MTD] PMC MSP71xx flash/rootfs mappings')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/maps/pmcmsp-flash.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/mtd/maps/pmcmsp-flash.c b/drivers/mtd/maps/pmcmsp-flash.c
index 744ca5cacc9b..f9fa3fad728e 100644
--- a/drivers/mtd/maps/pmcmsp-flash.c
+++ b/drivers/mtd/maps/pmcmsp-flash.c
@@ -75,15 +75,15 @@ static int __init init_msp_flash(void)
 
 	printk(KERN_NOTICE "Found %d PMC flash devices\n", fcnt);
 
-	msp_flash = kmalloc(fcnt * sizeof(struct map_info *), GFP_KERNEL);
+	msp_flash = kcalloc(fcnt, sizeof(*msp_flash), GFP_KERNEL);
 	if (!msp_flash)
 		return -ENOMEM;
 
-	msp_parts = kmalloc(fcnt * sizeof(struct mtd_partition *), GFP_KERNEL);
+	msp_parts = kcalloc(fcnt, sizeof(*msp_parts), GFP_KERNEL);
 	if (!msp_parts)
 		goto free_msp_flash;
 
-	msp_maps = kcalloc(fcnt, sizeof(struct mtd_info), GFP_KERNEL);
+	msp_maps = kcalloc(fcnt, sizeof(*msp_maps), GFP_KERNEL);
 	if (!msp_maps)
 		goto free_msp_parts;
 

From 3ee1b560e735449509791698d4772462a9205bda Mon Sep 17 00:00:00 2001
From: Arvind Yadav <arvind.yadav.cs@gmail.com>
Date: Fri, 12 Aug 2016 20:49:18 +0530
Subject: [PATCH 52/74] power: reset: hisi-reboot: Unmap region obtained by
 of_iomap

commit bae170efd6c42bf116f513a1dd07639d68fa71b9 upstream.

Free memory mapping, if probe is not successful.

Fixes: 4a9b37371822 ("power: reset: move hisilicon reboot code")
Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/power/reset/hisi-reboot.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/power/reset/hisi-reboot.c b/drivers/power/reset/hisi-reboot.c
index 9ab7f562a83b..f69387e12c1e 100644
--- a/drivers/power/reset/hisi-reboot.c
+++ b/drivers/power/reset/hisi-reboot.c
@@ -53,13 +53,16 @@ static int hisi_reboot_probe(struct platform_device *pdev)
 
 	if (of_property_read_u32(np, "reboot-offset", &reboot_offset) < 0) {
 		pr_err("failed to find reboot-offset property\n");
+		iounmap(base);
 		return -EINVAL;
 	}
 
 	err = register_restart_handler(&hisi_restart_nb);
-	if (err)
+	if (err) {
 		dev_err(&pdev->dev, "cannot register restart handler (err=%d)\n",
 			err);
+		iounmap(base);
+	}
 
 	return err;
 }

From 369796a8843ec9fce05129c6a80bee9e4b90978b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 17 Sep 2016 18:31:46 -0400
Subject: [PATCH 53/74] fix memory leaks in tracing_buffers_splice_read()

commit 1ae2293dd6d2f5c823cf97e60b70d03631cd622f upstream.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/trace.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8305cbb2d5a2..3a44903169ec 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5766,9 +5766,6 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 		return -EBUSY;
 #endif
 
-	if (splice_grow_spd(pipe, &spd))
-		return -ENOMEM;
-
 	if (*ppos & (PAGE_SIZE - 1))
 		return -EINVAL;
 
@@ -5778,6 +5775,9 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 		len &= PAGE_MASK;
 	}
 
+	if (splice_grow_spd(pipe, &spd))
+		return -ENOMEM;
+
  again:
 	trace_access_lock(iter->cpu_file);
 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
@@ -5835,19 +5835,21 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 	/* did we read anything? */
 	if (!spd.nr_pages) {
 		if (ret)
-			return ret;
+			goto out;
 
+		ret = -EAGAIN;
 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
-			return -EAGAIN;
+			goto out;
 
 		ret = wait_on_pipe(iter, true);
 		if (ret)
-			return ret;
+			goto out;
 
 		goto again;
 	}
 
 	ret = splice_to_pipe(pipe, &spd);
+out:
 	splice_shrink_spd(&spd);
 
 	return ret;

From 8b275b4522af2b19072df7aee87fd659e09d29bc Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 23 Sep 2016 22:57:13 -0400
Subject: [PATCH 54/74] tracing: Move mutex to protect against resetting of seq
 data

commit 1245800c0f96eb6ebb368593e251d66c01e61022 upstream.

The iter->seq can be reset outside the protection of the mutex. So can
reading of user data. Move the mutex up to the beginning of the function.

Fixes: d7350c3f45694 ("tracing/core: make the read callbacks reentrants")
Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/trace/trace.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3a44903169ec..059233abcfcf 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4727,19 +4727,20 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 	struct trace_iterator *iter = filp->private_data;
 	ssize_t sret;
 
-	/* return any leftover data */
-	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
-	if (sret != -EBUSY)
-		return sret;
-
-	trace_seq_init(&iter->seq);
-
 	/*
 	 * Avoid more than one consumer on a single file descriptor
 	 * This is just a matter of traces coherency, the ring buffer itself
 	 * is protected.
 	 */
 	mutex_lock(&iter->mutex);
+
+	/* return any leftover data */
+	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
+	if (sret != -EBUSY)
+		goto out;
+
+	trace_seq_init(&iter->seq);
+
 	if (iter->trace->read) {
 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
 		if (sret)

From 7af3e4e129072b33ae95625555d38150f181adc3 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 23 Sep 2016 20:27:04 -0700
Subject: [PATCH 55/74] mm: delete unnecessary and unsafe init_tlb_ubc()

commit b385d21f27d86426472f6ae92a231095f7de2a8d upstream.

init_tlb_ubc() looked unnecessary to me: tlb_ubc is statically
initialized with zeroes in the init_task, and copied from parent to
child while it is quiescent in arch_dup_task_struct(); so I went to
delete it.

But inserted temporary debug WARN_ONs in place of init_tlb_ubc() to
check that it was always empty at that point, and found them firing:
because memcg reclaim can recurse into global reclaim (when allocating
biosets for swapout in my case), and arrive back at the init_tlb_ubc()
in shrink_node_memcg().

Resetting tlb_ubc.flush_required at that point is wrong: if the upper
level needs a deferred TLB flush, but the lower level turns out not to,
we miss a TLB flush.  But fortunately, that's the only part of the
protocol that does not nest: with the initialization removed, cpumask
collects bits from upper and lower levels, and flushes TLB when needed.

Fixes: 72b252aed506 ("mm: send one IPI per CPU to TLB flush all entries after unmapping pages")
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/vmscan.c | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0c114e2b01d3..0838e9f02b11 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2159,23 +2159,6 @@ out:
 	}
 }
 
-#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
-static void init_tlb_ubc(void)
-{
-	/*
-	 * This deliberately does not clear the cpumask as it's expensive
-	 * and unnecessary. If there happens to be data in there then the
-	 * first SWAP_CLUSTER_MAX pages will send an unnecessary IPI and
-	 * then will be cleared.
-	 */
-	current->tlb_ubc.flush_required = false;
-}
-#else
-static inline void init_tlb_ubc(void)
-{
-}
-#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
-
 /*
  * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
  */
@@ -2210,8 +2193,6 @@ static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
 	scan_adjusted = (global_reclaim(sc) && !current_is_kswapd() &&
 			 sc->priority == DEF_PRIORITY);
 
-	init_tlb_ubc();
-
 	blk_start_plug(&plug);
 	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
 					nr[LRU_INACTIVE_FILE]) {

From a68022d9b59c2fc1ebe3c4133f0e2f32b4cde065 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Wed, 17 Aug 2016 12:41:08 -0300
Subject: [PATCH 56/74] can: flexcan: fix resume function

commit 4de349e786a3a2d51bd02d56f3de151bbc3c3df9 upstream.

On a imx6ul-pico board the following error is seen during system suspend:

dpm_run_callback(): platform_pm_resume+0x0/0x54 returns -110
PM: Device 2090000.flexcan failed to resume: error -110

The reason for this suspend error is because when the CAN interface is not
active the clocks are disabled and then flexcan_chip_enable() will
always fail due to a timeout error.

In order to fix this issue, only call flexcan_chip_enable/disable()
when the CAN interface is active.

Based on a patch from Dong Aisheng in the NXP kernel.

Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/flexcan.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 41c0fc9f3b14..16f7cadda5c3 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -1268,11 +1268,10 @@ static int __maybe_unused flexcan_suspend(struct device *device)
 	struct flexcan_priv *priv = netdev_priv(dev);
 	int err;
 
-	err = flexcan_chip_disable(priv);
-	if (err)
-		return err;
-
 	if (netif_running(dev)) {
+		err = flexcan_chip_disable(priv);
+		if (err)
+			return err;
 		netif_stop_queue(dev);
 		netif_device_detach(dev);
 	}
@@ -1285,13 +1284,17 @@ static int __maybe_unused flexcan_resume(struct device *device)
 {
 	struct net_device *dev = dev_get_drvdata(device);
 	struct flexcan_priv *priv = netdev_priv(dev);
+	int err;
 
 	priv->can.state = CAN_STATE_ERROR_ACTIVE;
 	if (netif_running(dev)) {
 		netif_device_attach(dev);
 		netif_start_queue(dev);
+		err = flexcan_chip_enable(priv);
+		if (err)
+			return err;
 	}
-	return flexcan_chip_enable(priv);
+	return 0;
 }
 
 static SIMPLE_DEV_PM_OPS(flexcan_pm_ops, flexcan_suspend, flexcan_resume);

From f1b01a340b2b4bc18e90fe5e5a3e99d7d3cffe32 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 13 Sep 2016 15:53:55 +0200
Subject: [PATCH 57/74] nl80211: validate number of probe response CSA counters

commit ad5987b47e96a0fb6d13fea250e936aed000093c upstream.

Due to an apparent copy/paste bug, the number of counters for the
beacon configuration were checked twice, instead of checking the
number of probe response counters. Fix this to check the number of
probe response counters before parsing those.

Fixes: 9a774c78e211 ("cfg80211: Support multiple CSA counters")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/wireless/nl80211.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5d89f13a98db..bf65f31bd55e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6628,7 +6628,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info)
 
 		params.n_counter_offsets_presp = len / sizeof(u16);
 		if (rdev->wiphy.max_num_csa_counters &&
-		    (params.n_counter_offsets_beacon >
+		    (params.n_counter_offsets_presp >
 		     rdev->wiphy.max_num_csa_counters))
 			return -EINVAL;
 

From e944e69815ec4d6212514e658a4c7d155793c8f0 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Wed, 21 Sep 2016 08:31:29 -0400
Subject: [PATCH 58/74] btrfs: ensure that file descriptor used with subvol
 ioctls is a dir

commit 325c50e3cebb9208009083e841550f98a863bfa0 upstream.

If the subvol/snapshot create/destroy ioctls are passed a regular file
with execute permissions set, we'll eventually Oops while trying to do
inode->i_op->lookup via lookup_one_len.

This patch ensures that the file descriptor refers to a directory.

Fixes: cb8e70901d (Btrfs: Fix subvolume creation locking rules)
Fixes: 76dda93c6a (Btrfs: add snapshot/subvolume destroy ioctl)
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/ioctl.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 65f30b3b04f9..a7e18dbadf74 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1619,6 +1619,9 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
 	int namelen;
 	int ret = 0;
 
+	if (!S_ISDIR(file_inode(file)->i_mode))
+		return -ENOTDIR;
+
 	ret = mnt_want_write_file(file);
 	if (ret)
 		goto out;
@@ -1676,6 +1679,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
 	struct btrfs_ioctl_vol_args *vol_args;
 	int ret;
 
+	if (!S_ISDIR(file_inode(file)->i_mode))
+		return -ENOTDIR;
+
 	vol_args = memdup_user(arg, sizeof(*vol_args));
 	if (IS_ERR(vol_args))
 		return PTR_ERR(vol_args);
@@ -1699,6 +1705,9 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
 	bool readonly = false;
 	struct btrfs_qgroup_inherit *inherit = NULL;
 
+	if (!S_ISDIR(file_inode(file)->i_mode))
+		return -ENOTDIR;
+
 	vol_args = memdup_user(arg, sizeof(*vol_args));
 	if (IS_ERR(vol_args))
 		return PTR_ERR(vol_args);
@@ -2345,6 +2354,9 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 	int ret;
 	int err = 0;
 
+	if (!S_ISDIR(dir->i_mode))
+		return -ENOTDIR;
+
 	vol_args = memdup_user(arg, sizeof(*vol_args));
 	if (IS_ERR(vol_args))
 		return PTR_ERR(vol_args);

From bbf1c4d2ea4a299d38874646e441327006d52c14 Mon Sep 17 00:00:00 2001
From: "Yadi.hu" <yadi.hu@windriver.com>
Date: Sun, 18 Sep 2016 18:52:31 +0800
Subject: [PATCH 59/74] i2c-eg20t: fix race between i2c init and interrupt
 enable

commit 371a015344b6e270e7e3632107d9554ec6d27a6b upstream.

the eg20t driver call request_irq() function before the pch_base_address,
base address of i2c controller's register, is assigned an effective value.

there is one possible scenario that an interrupt which isn't inside eg20t
arrives immediately after request_irq() is executed when i2c controller
shares an interrupt number with others. since the interrupt handler
pch_i2c_handler() has already active as shared action, it will be called
and read its own register to determine if this interrupt is from itself.

At that moment, since base address of i2c registers is not remapped
in kernel space yet,so the INT handler will access an illegal address
and then a error occurs.

Signed-off-by: Yadi.hu <yadi.hu@windriver.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/i2c/busses/i2c-eg20t.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c
index 76e699f9ed97..eef3aa6007f1 100644
--- a/drivers/i2c/busses/i2c-eg20t.c
+++ b/drivers/i2c/busses/i2c-eg20t.c
@@ -773,13 +773,6 @@ static int pch_i2c_probe(struct pci_dev *pdev,
 	/* Set the number of I2C channel instance */
 	adap_info->ch_num = id->driver_data;
 
-	ret = request_irq(pdev->irq, pch_i2c_handler, IRQF_SHARED,
-		  KBUILD_MODNAME, adap_info);
-	if (ret) {
-		pch_pci_err(pdev, "request_irq FAILED\n");
-		goto err_request_irq;
-	}
-
 	for (i = 0; i < adap_info->ch_num; i++) {
 		pch_adap = &adap_info->pch_data[i].pch_adapter;
 		adap_info->pch_i2c_suspended = false;
@@ -796,6 +789,17 @@ static int pch_i2c_probe(struct pci_dev *pdev,
 		adap_info->pch_data[i].pch_base_address = base_addr + 0x100 * i;
 
 		pch_adap->dev.parent = &pdev->dev;
+	}
+
+	ret = request_irq(pdev->irq, pch_i2c_handler, IRQF_SHARED,
+		  KBUILD_MODNAME, adap_info);
+	if (ret) {
+		pch_pci_err(pdev, "request_irq FAILED\n");
+		goto err_request_irq;
+	}
+
+	for (i = 0; i < adap_info->ch_num; i++) {
+		pch_adap = &adap_info->pch_data[i].pch_adapter;
 
 		pch_i2c_init(&adap_info->pch_data[i]);
 

From 05e5e96324402c5066a763315d8159c9b8c94e22 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <Sudeep.Holla@arm.com>
Date: Thu, 25 Aug 2016 12:23:39 +0100
Subject: [PATCH 60/74] i2c: qup: skip qup_i2c_suspend if the device is already
 runtime suspended

commit 331dcf421c34d227784d07943eb01e4023a42b0a upstream.

If the i2c device is already runtime suspended, if qup_i2c_suspend is
executed during suspend-to-idle or suspend-to-ram it will result in the
following splat:

WARNING: CPU: 3 PID: 1593 at drivers/clk/clk.c:476 clk_core_unprepare+0x80/0x90
Modules linked in:

CPU: 3 PID: 1593 Comm: bash Tainted: G        W       4.8.0-rc3 #14
Hardware name: Qualcomm Technologies, Inc. APQ 8016 SBC (DT)
PC is at clk_core_unprepare+0x80/0x90
LR is at clk_unprepare+0x28/0x40
pc : [<ffff0000086eecf0>] lr : [<ffff0000086f0c58>] pstate: 60000145
Call trace:
 clk_core_unprepare+0x80/0x90
 qup_i2c_disable_clocks+0x2c/0x68
 qup_i2c_suspend+0x10/0x20
 platform_pm_suspend+0x24/0x68
 ...

This patch fixes the issue by executing qup_i2c_pm_suspend_runtime
conditionally in qup_i2c_suspend.

Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Reviewed-by: Andy Gross <andy.gross@linaro.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/i2c/busses/i2c-qup.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c
index fdcbdab808e9..33b11563cde7 100644
--- a/drivers/i2c/busses/i2c-qup.c
+++ b/drivers/i2c/busses/i2c-qup.c
@@ -727,7 +727,8 @@ static int qup_i2c_pm_resume_runtime(struct device *device)
 #ifdef CONFIG_PM_SLEEP
 static int qup_i2c_suspend(struct device *device)
 {
-	qup_i2c_pm_suspend_runtime(device);
+	if (!pm_runtime_suspended(device))
+		return qup_i2c_pm_suspend_runtime(device);
 	return 0;
 }
 

From 18720392db1b57693beb130d37ce9d9201efb58b Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Fri, 23 Sep 2016 15:13:53 +0100
Subject: [PATCH 61/74] MIPS: Fix pre-r6 emulation FPU initialisation

commit 7e956304eb8a285304a78582e4537e72c6365f20 upstream.

In the mipsr2_decoder() function, used to emulate pre-MIPSr6
instructions that were removed in MIPSr6, the init_fpu() function is
called if a removed pre-MIPSr6 floating point instruction is the first
floating point instruction used by the task. However, init_fpu()
performs varous actions that rely upon not being migrated. For example
in the most basic case it sets the coprocessor 0 Status.CU1 bit to
enable the FPU & then loads FP register context into the FPU registers.
If the task were to migrate during this time, it may end up attempting
to load FP register context on a different CPU where it hasn't set the
CU1 bit, leading to errors such as:

    do_cpu invoked from kernel context![#2]:
    CPU: 2 PID: 7338 Comm: fp-prctl Tainted: G      D         4.7.0-00424-g49b0c82 #2
    task: 838e4000 ti: 88d38000 task.ti: 88d38000
    $ 0   : 00000000 00000001 ffffffff 88d3fef8
    $ 4   : 838e4000 88d38004 00000000 00000001
    $ 8   : 3400fc01 801f8020 808e9100 24000000
    $12   : dbffffff 807b69d8 807b0000 00000000
    $16   : 00000000 80786150 00400fc4 809c0398
    $20   : 809c0338 0040273c 88d3ff28 808e9d30
    $24   : 808e9d30 00400fb4
    $28   : 88d38000 88d3fe88 00000000 8011a2ac
    Hi    : 0040273c
    Lo    : 88d3ff28
    epc   : 80114178 _restore_fp+0x10/0xa0
    ra    : 8011a2ac mipsr2_decoder+0xd5c/0x1660
    Status: 1400fc03	KERNEL EXL IE
    Cause : 1080002c (ExcCode 0b)
    PrId  : 0001a920 (MIPS I6400)
    Modules linked in:
    Process fp-prctl (pid: 7338, threadinfo=88d38000, task=838e4000, tls=766527d0)
    Stack : 00000000 00000000 00000000 88d3fe98 00000000 00000000 809c0398 809c0338
    	  808e9100 00000000 88d3ff28 00400fc4 00400fc4 0040273c 7fb69e18 004a0000
    	  004a0000 004a0000 7664add0 8010de18 00000000 00000000 88d3fef8 88d3ff28
    	  808e9100 00000000 766527d0 8010e534 000c0000 85755000 8181d580 00000000
    	  00000000 00000000 004a0000 00000000 766527d0 7fb69e18 004a0000 80105c20
    	  ...
    Call Trace:
    [<80114178>] _restore_fp+0x10/0xa0
    [<8011a2ac>] mipsr2_decoder+0xd5c/0x1660
    [<8010de18>] do_ri+0x90/0x6b8
    [<80105c20>] ret_from_exception+0x0/0x10

Fix this by disabling preemption around the call to init_fpu(), ensuring
that it starts & completes on one CPU.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Fixes: b0a668fb2038 ("MIPS: kernel: mips-r2-to-r6-emul: Add R2 emulator for MIPS R6")
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/14305/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/kernel/mips-r2-to-r6-emul.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c
index 4674a74a08b5..af27334d6809 100644
--- a/arch/mips/kernel/mips-r2-to-r6-emul.c
+++ b/arch/mips/kernel/mips-r2-to-r6-emul.c
@@ -1164,7 +1164,9 @@ fpu_emul:
 		regs->regs[31] = r31;
 		regs->cp0_epc = epc;
 		if (!used_math()) {     /* First time FPU user.  */
+			preempt_disable();
 			err = init_fpu();
+			preempt_enable();
 			set_used_math();
 		}
 		lose_fpu(1);    /* Save FPU state for the emulator. */

From b8a84b8488fbdf38b27fc14bd9e5d49211a38fc7 Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@imgtec.com>
Date: Thu, 22 Sep 2016 17:15:47 +0100
Subject: [PATCH 62/74] MIPS: SMP: Fix possibility of deadlock when bringing
 CPUs online

commit 8f46cca1e6c06a058374816887059bcc017b382f upstream.

This patch fixes the possibility of a deadlock when bringing up
secondary CPUs.
The deadlock occurs because the set_cpu_online() is called before
synchronise_count_slave(). This can cause a deadlock if the boot CPU,
having scheduled another thread, attempts to send an IPI to the
secondary CPU, which it sees has been marked online. The secondary is
blocked in synchronise_count_slave() waiting for the boot CPU to enter
synchronise_count_master(), but the boot cpu is blocked in
smp_call_function_many() waiting for the secondary to respond to it's
IPI request.

Fix this by marking the CPU online in cpu_callin_map and synchronising
counters before declaring the CPU online and calculating the maps for
IPIs.

Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com>
Reported-by: Justin Chen <justinpopo6@gmail.com>
Tested-by: Justin Chen <justinpopo6@gmail.com>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/14302/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/kernel/smp.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 2b521e07b860..7fef02a9eb85 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -174,6 +174,9 @@ asmlinkage void start_secondary(void)
 	cpumask_set_cpu(cpu, &cpu_coherent_mask);
 	notify_cpu_starting(cpu);
 
+	cpumask_set_cpu(cpu, &cpu_callin_map);
+	synchronise_count_slave(cpu);
+
 	set_cpu_online(cpu, true);
 
 	set_cpu_sibling_map(cpu);
@@ -181,10 +184,6 @@ asmlinkage void start_secondary(void)
 
 	calculate_cpu_foreign_map();
 
-	cpumask_set_cpu(cpu, &cpu_callin_map);
-
-	synchronise_count_slave(cpu);
-
 	/*
 	 * irq will be enabled in ->smp_finish(), enabling it too early
 	 * is dangerous.

From 450abddb1cb4d5f3e1cde42670348ead50a25b91 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Wed, 7 Sep 2016 13:37:01 +0100
Subject: [PATCH 63/74] MIPS: vDSO: Fix Malta EVA mapping to vDSO page structs

commit 554af0c396380baf416f54c439b99b495180b2f4 upstream.

The page structures associated with the vDSO pages in the kernel image
are calculated using virt_to_page(), which uses __pa() under the hood to
find the pfn associated with the virtual address. The vDSO data pointers
however point to kernel symbols, so __pa_symbol() should really be used
instead.

Since there is no equivalent to virt_to_page() which uses __pa_symbol(),
fix init_vdso_image() to work directly with pfns, calculated with
__phys_to_pfn(__pa_symbol(...)).

This issue broke the Malta Enhanced Virtual Addressing (EVA)
configuration which has a non-default implementation of __pa_symbol().
This is because it uses a physical alias so that the kernel executes
from KSeg0 (VA 0x80000000 -> PA 0x00000000), while RAM is provided to
the kernel in the KUSeg range (VA 0x00000000 -> PA 0x80000000) which
uses the same underlying RAM.

Since there are no page structures associated with the low physical
address region, some arbitrary kernel memory would be interpreted as a
page structure for the vDSO pages and badness ensues.

Fixes: ebb5e78cc634 ("MIPS: Initial implementation of a VDSO")
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Leonid Yegoshin <leonid.yegoshin@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/14229/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/kernel/vdso.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index 975e99759bab..5649a9e429e0 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -39,16 +39,16 @@ static struct vm_special_mapping vdso_vvar_mapping = {
 static void __init init_vdso_image(struct mips_vdso_image *image)
 {
 	unsigned long num_pages, i;
+	unsigned long data_pfn;
 
 	BUG_ON(!PAGE_ALIGNED(image->data));
 	BUG_ON(!PAGE_ALIGNED(image->size));
 
 	num_pages = image->size / PAGE_SIZE;
 
-	for (i = 0; i < num_pages; i++) {
-		image->mapping.pages[i] =
-			virt_to_page(image->data + (i * PAGE_SIZE));
-	}
+	data_pfn = __phys_to_pfn(__pa_symbol(image->data));
+	for (i = 0; i < num_pages; i++)
+		image->mapping.pages[i] = pfn_to_page(data_pfn + i);
 }
 
 static int __init init_vdso(void)

From 9b30cac4946a74a25f2569e6929bcd081cfbdeac Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Mon, 12 Sep 2016 10:58:06 +0100
Subject: [PATCH 64/74] MIPS: Remove compact branch policy Kconfig entries

commit b03c1e3b8eed9026733c473071d1f528358a0e50 upstream.

Commit c1a0e9bc885d ("MIPS: Allow compact branch policy to be changed")
added Kconfig entries allowing for the compact branch policy used by the
compiler for MIPSr6 kernels to be specified. This can be useful for
debugging, particularly in systems where compact branches have recently
been introduced.

Unfortunately mainline gcc 5.x supports MIPSr6 but not the
-mcompact-branches compiler flag, leading to MIPSr6 kernels failing to
build with gcc 5.x with errors such as:

  mipsel-linux-gnu-gcc: error: unrecognized command line option '-mcompact-branches=optimal'
  make[2]: *** [kernel/bounds.s] Error 1

Fixing this by hiding the Kconfig entry behind another seems to be more
hassle than it's worth, as MIPSr6 & compact branches have been around
for a while now and if policy does need to be set for debug it can be
done easily enough with KCFLAGS. Therefore remove the compact branch
policy Kconfig entries & their handling in the Makefile.

This reverts commit c1a0e9bc885d ("MIPS: Allow compact branch policy to
be changed").

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Fixes: c1a0e9bc885d ("MIPS: Allow compact branch policy to be changed")
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/14241/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/Kconfig.debug | 36 ------------------------------------
 arch/mips/Makefile      |  4 ----
 2 files changed, 40 deletions(-)

diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug
index f0e314ceb8ba..7f975b20b20c 100644
--- a/arch/mips/Kconfig.debug
+++ b/arch/mips/Kconfig.debug
@@ -113,42 +113,6 @@ config SPINLOCK_TEST
 	help
 	  Add several files to the debugfs to test spinlock speed.
 
-if CPU_MIPSR6
-
-choice
-	prompt "Compact branch policy"
-	default MIPS_COMPACT_BRANCHES_OPTIMAL
-
-config MIPS_COMPACT_BRANCHES_NEVER
-	bool "Never (force delay slot branches)"
-	help
-	  Pass the -mcompact-branches=never flag to the compiler in order to
-	  force it to always emit branches with delay slots, and make no use
-	  of the compact branch instructions introduced by MIPSr6. This is
-	  useful if you suspect there may be an issue with compact branches in
-	  either the compiler or the CPU.
-
-config MIPS_COMPACT_BRANCHES_OPTIMAL
-	bool "Optimal (use where beneficial)"
-	help
-	  Pass the -mcompact-branches=optimal flag to the compiler in order for
-	  it to make use of compact branch instructions where it deems them
-	  beneficial, and use branches with delay slots elsewhere. This is the
-	  default compiler behaviour, and should be used unless you have a
-	  reason to choose otherwise.
-
-config MIPS_COMPACT_BRANCHES_ALWAYS
-	bool "Always (force compact branches)"
-	help
-	  Pass the -mcompact-branches=always flag to the compiler in order to
-	  force it to always emit compact branches, making no use of branch
-	  instructions with delay slots. This can result in more compact code
-	  which may be beneficial in some scenarios.
-
-endchoice
-
-endif # CPU_MIPSR6
-
 config SCACHE_DEBUGFS
 	bool "L2 cache debugfs entries"
 	depends on DEBUG_FS
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 3f70ba54ae21..252e347958f3 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -204,10 +204,6 @@ toolchain-msa				:= $(call cc-option-yn,$(mips-cflags) -mhard-float -mfp64 -Wa$(
 cflags-$(toolchain-msa)			+= -DTOOLCHAIN_SUPPORTS_MSA
 endif
 
-cflags-$(CONFIG_MIPS_COMPACT_BRANCHES_NEVER)	+= -mcompact-branches=never
-cflags-$(CONFIG_MIPS_COMPACT_BRANCHES_OPTIMAL)	+= -mcompact-branches=optimal
-cflags-$(CONFIG_MIPS_COMPACT_BRANCHES_ALWAYS)	+= -mcompact-branches=always
-
 #
 # Firmware support
 #

From 49cded2ae19b0624be81bbf6ba3badd51c1375a8 Mon Sep 17 00:00:00 2001
From: Marcin Nowakowski <marcin.nowakowski@imgtec.com>
Date: Wed, 31 Aug 2016 12:33:23 +0200
Subject: [PATCH 65/74] MIPS: Avoid a BUG warning during prctl(PR_SET_FP_MODE,
 ...)

commit b244614a60ab7ce54c12a9cbe15cfbf8d79d0967 upstream.

cpu_has_fpu macro uses smp_processor_id() and is currently executed
with preemption enabled, that triggers the warning at runtime.

It is assumed throughout the kernel that if any CPU has an FPU, then all
CPUs would have an FPU as well, so it is safe to perform the check with
preemption enabled - change the code to use raw_ variant of the check to
avoid the warning.

Signed-off-by: Marcin Nowakowski <marcin.nowakowski@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/14125/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/kernel/process.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 89847bee2b53..44a6f25e902e 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -593,14 +593,14 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value)
 		return -EOPNOTSUPP;
 
 	/* Avoid inadvertently triggering emulation */
-	if ((value & PR_FP_MODE_FR) && cpu_has_fpu &&
-	    !(current_cpu_data.fpu_id & MIPS_FPIR_F64))
+	if ((value & PR_FP_MODE_FR) && raw_cpu_has_fpu &&
+	    !(raw_current_cpu_data.fpu_id & MIPS_FPIR_F64))
 		return -EOPNOTSUPP;
-	if ((value & PR_FP_MODE_FRE) && cpu_has_fpu && !cpu_has_fre)
+	if ((value & PR_FP_MODE_FRE) && raw_cpu_has_fpu && !cpu_has_fre)
 		return -EOPNOTSUPP;
 
 	/* FR = 0 not supported in MIPS R6 */
-	if (!(value & PR_FP_MODE_FR) && cpu_has_fpu && cpu_has_mips_r6)
+	if (!(value & PR_FP_MODE_FR) && raw_cpu_has_fpu && cpu_has_mips_r6)
 		return -EOPNOTSUPP;
 
 	/* Proceed with the mode switch */

From aebd3358fdb6f74b53ebd17d4eac5e67269dd9ab Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Mon, 5 Sep 2016 08:48:03 +0800
Subject: [PATCH 66/74] MIPS: Add a missing ".set pop" in an early commit

commit 3cbc6fc9c99f1709203711f125bc3b79487aba06 upstream.

Commit 842dfc11ea9a21 ("MIPS: Fix build with binutils 2.24.51+") missing
a ".set pop" in macro fpu_restore_16even, so add it.

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Acked-by: Manuel Lauss <manuel.lauss@gmail.com>
Cc: Steven J . Hill <Steven.Hill@caviumnetworks.com>
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/14210/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/include/asm/asmmacro.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index e689b894353c..8dedee1def83 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -135,6 +135,7 @@
 	ldc1	$f28, THREAD_FPR28(\thread)
 	ldc1	$f30, THREAD_FPR30(\thread)
 	ctc1	\tmp, fcr31
+	.set	pop
 	.endm
 
 	.macro	fpu_restore_16odd thread

From da40055ecfbc0244f1e2906d627f0dda54b685df Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@imgtec.com>
Date: Mon, 5 Sep 2016 15:43:40 +0100
Subject: [PATCH 67/74] MIPS: paravirt: Fix undefined reference to
 smp_bootstrap

commit 951c39cd3bc0aedf67fbd8fb4b9380287e6205d1 upstream.

If the paravirt machine is compiles without CONFIG_SMP, the following
linker error occurs

arch/mips/kernel/head.o: In function `kernel_entry':
(.ref.text+0x10): undefined reference to `smp_bootstrap'

due to the kernel entry macro always including SMP startup code.
Wrap this code in CONFIG_SMP to fix the error.

Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/14212/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/include/asm/mach-paravirt/kernel-entry-init.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h b/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h
index 2f82bfa3a773..c9f5769dfc8f 100644
--- a/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h
+++ b/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h
@@ -11,11 +11,13 @@
 #define CP0_EBASE $15, 1
 
 	.macro  kernel_entry_setup
+#ifdef CONFIG_SMP
 	mfc0	t0, CP0_EBASE
 	andi	t0, t0, 0x3ff		# CPUNum
 	beqz	t0, 1f
 	# CPUs other than zero goto smp_bootstrap
 	j	smp_bootstrap
+#endif /* CONFIG_SMP */
 
 1:
 	.endm

From 119c348a8ef2765fb19c96f76480d4aaefdcae23 Mon Sep 17 00:00:00 2001
From: Thomas Garnier <thgarnie@google.com>
Date: Thu, 11 Aug 2016 14:49:29 -0700
Subject: [PATCH 68/74] PM / hibernate: Restore processor state before using
 per-CPU variables

commit 62822e2ec4ad091ba31f823f577ef80db52e3c2c upstream.

Restore the processor state before calling any other functions to
ensure per-CPU variables can be used with KASLR memory randomization.

Tracing functions use per-CPU variables (GS based on x86) and one was
called just before restoring the processor state fully. It resulted
in a double fault when both the tracing & the exception handler
functions tried to use a per-CPU variable.

Fixes: bb3632c6101b (PM / sleep: trace events for suspend/resume)
Reported-and-tested-by: Borislav Petkov <bp@suse.de>
Reported-by: Jiri Kosina <jikos@kernel.org>
Tested-by: Rafael J. Wysocki <rafael@kernel.org>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Thomas Garnier <thgarnie@google.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/power/hibernate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index b7dd5718836e..3124cebaec31 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -299,12 +299,12 @@ static int create_image(int platform_mode)
 	save_processor_state();
 	trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, true);
 	error = swsusp_arch_suspend();
+	/* Restore control flow magically appears here */
+	restore_processor_state();
 	trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false);
 	if (error)
 		printk(KERN_ERR "PM: Error %d creating hibernation image\n",
 			error);
-	/* Restore control flow magically appears here */
-	restore_processor_state();
 	if (!in_suspend)
 		events_check_enabled = false;
 

From 116fcd882e3b6dc3aeb0fd29f7c93b5628337bc3 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Tue, 16 Aug 2016 10:46:38 +0100
Subject: [PATCH 69/74] PM / hibernate: Fix rtree_next_node() to avoid walking
 off list ends

commit 924d8696751c4b9e58263bc82efdafcf875596a6 upstream.

rtree_next_node() walks the linked list of leaf nodes to find the next
block of pages in the struct memory_bitmap. If it walks off the end of
the list of nodes, it walks the list of memory zones to find the next
region of memory. If it walks off the end of the list of zones, it
returns false.

This leaves the struct bm_position's node and zone pointers pointing
at their respective struct list_heads in struct mem_zone_bm_rtree.

memory_bm_find_bit() uses struct bm_position's node and zone pointers
to avoid walking lists and trees if the next bit appears in the same
node/zone. It handles these values being stale.

Swap rtree_next_node()s 'step then test' to 'test-next then step',
this means if we reach the end of memory we return false and leave
the node and zone pointers as they were.

This fixes a panic on resume using AMD Seattle with 64K pages:
[    6.868732] Freezing user space processes ... (elapsed 0.000 seconds) done.
[    6.875753] Double checking all user space processes after OOM killer disable... (elapsed 0.000 seconds)
[    6.896453] PM: Using 3 thread(s) for decompression.
[    6.896453] PM: Loading and decompressing image data (5339 pages)...
[    7.318890] PM: Image loading progress:   0%
[    7.323395] Unable to handle kernel paging request at virtual address 00800040
[    7.330611] pgd = ffff000008df0000
[    7.334003] [00800040] *pgd=00000083fffe0003, *pud=00000083fffe0003, *pmd=00000083fffd0003, *pte=0000000000000000
[    7.344266] Internal error: Oops: 96000005 [#1] PREEMPT SMP
[    7.349825] Modules linked in:
[    7.352871] CPU: 2 PID: 1 Comm: swapper/0 Tainted: G        W I     4.8.0-rc1 #4737
[    7.360512] Hardware name: AMD Overdrive/Supercharger/Default string, BIOS ROD1002C 04/08/2016
[    7.369109] task: ffff8003c0220000 task.stack: ffff8003c0280000
[    7.375020] PC is at set_bit+0x18/0x30
[    7.378758] LR is at memory_bm_set_bit+0x24/0x30
[    7.383362] pc : [<ffff00000835bbc8>] lr : [<ffff0000080faf18>] pstate: 60000045
[    7.390743] sp : ffff8003c0283b00
[    7.473551]
[    7.475031] Process swapper/0 (pid: 1, stack limit = 0xffff8003c0280020)
[    7.481718] Stack: (0xffff8003c0283b00 to 0xffff8003c0284000)
[    7.800075] Call trace:
[    7.887097] [<ffff00000835bbc8>] set_bit+0x18/0x30
[    7.891876] [<ffff0000080fb038>] duplicate_memory_bitmap.constprop.38+0x54/0x70
[    7.899172] [<ffff0000080fcc40>] snapshot_write_next+0x22c/0x47c
[    7.905166] [<ffff0000080fe1b4>] load_image_lzo+0x754/0xa88
[    7.910725] [<ffff0000080ff0a8>] swsusp_read+0x144/0x230
[    7.916025] [<ffff0000080fa338>] load_image_and_restore+0x58/0x90
[    7.922105] [<ffff0000080fa660>] software_resume+0x2f0/0x338
[    7.927752] [<ffff000008083350>] do_one_initcall+0x38/0x11c
[    7.933314] [<ffff000008b40cc0>] kernel_init_freeable+0x14c/0x1ec
[    7.939395] [<ffff0000087ce564>] kernel_init+0x10/0xfc
[    7.944520] [<ffff000008082e90>] ret_from_fork+0x10/0x40
[    7.949820] Code: d2800022 8b400c21 f9800031 9ac32043 (c85f7c22)
[    7.955909] ---[ end trace 0024a5986e6ff323 ]---
[    7.960529] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b

Here struct mem_zone_bm_rtree's start_pfn has been returned instead of
struct rtree_node's addr as the node/zone pointers are corrupt after
we walked off the end of the lists during mark_unsafe_pages().

This behaviour was exposed by commit 6dbecfd345a6 ("PM / hibernate:
Simplify mark_unsafe_pages()"), which caused mark_unsafe_pages() to call
duplicate_memory_bitmap(), which uses memory_bm_find_bit() after walking
off the end of the memory bitmap.

Fixes: 3a20cb177961 (PM / Hibernate: Implement position keeping in radix tree)
Signed-off-by: James Morse <james.morse@arm.com>
[ rjw: Subject ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/power/snapshot.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 3a970604308f..f155c62f1f2c 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -765,9 +765,9 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
  */
 static bool rtree_next_node(struct memory_bitmap *bm)
 {
-	bm->cur.node = list_entry(bm->cur.node->list.next,
-				  struct rtree_node, list);
-	if (&bm->cur.node->list != &bm->cur.zone->leaves) {
+	if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) {
+		bm->cur.node = list_entry(bm->cur.node->list.next,
+					  struct rtree_node, list);
 		bm->cur.node_pfn += BM_BITS_PER_BLOCK;
 		bm->cur.node_bit  = 0;
 		touch_softlockup_watchdog();
@@ -775,9 +775,9 @@ static bool rtree_next_node(struct memory_bitmap *bm)
 	}
 
 	/* No more nodes, goto next zone */
-	bm->cur.zone = list_entry(bm->cur.zone->list.next,
+	if (!list_is_last(&bm->cur.zone->list, &bm->zones)) {
+		bm->cur.zone = list_entry(bm->cur.zone->list.next,
 				  struct mem_zone_bm_rtree, list);
-	if (&bm->cur.zone->list != &bm->zones) {
 		bm->cur.node = list_entry(bm->cur.zone->leaves.next,
 					  struct rtree_node, list);
 		bm->cur.node_pfn = 0;

From a596ebc52657e2b7840c5d1a8adffe4393156604 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyj.lk@gmail.com>
Date: Tue, 26 Jul 2016 14:49:04 +0000
Subject: [PATCH 70/74] power_supply: tps65217-charger: fix missing
 platform_set_drvdata()

commit 33e7664a0af6e9a516f01014f39737aaa119b6d9 upstream.

Add missing platform_set_drvdata() in tps65217_charger_probe(), otherwise
calling platform_get_drvdata() in remove returns NULL.

This is detected by Coccinelle semantic patch.

Fixes: 3636859b280c ("power_supply: Add support for tps65217-charger")
Signed-off-by: Wei Yongjun <weiyj.lk@gmail.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/power/tps65217_charger.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/power/tps65217_charger.c b/drivers/power/tps65217_charger.c
index d9f56730c735..040a40b4b173 100644
--- a/drivers/power/tps65217_charger.c
+++ b/drivers/power/tps65217_charger.c
@@ -205,6 +205,7 @@ static int tps65217_charger_probe(struct platform_device *pdev)
 	if (!charger)
 		return -ENOMEM;
 
+	platform_set_drvdata(pdev, charger);
 	charger->tps = tps;
 	charger->dev = &pdev->dev;
 

From 97f9aa7c027126e9cb024d95a45b00505198746e Mon Sep 17 00:00:00 2001
From: Sven Van Asbroeck <thesven73@gmail.com>
Date: Fri, 12 Aug 2016 09:10:27 -0400
Subject: [PATCH 71/74] power: supply: max17042_battery: fix model download
 bug.

commit 5381cfb6f0422da24cfa9da35b0433c0415830e0 upstream.

The device's model download function returns the model data as
an array of u32s, which is later compared to the reference
model data. However, since the latter is an array of u16s,
the comparison does not happen correctly, and model verification
fails. This in turn breaks the POR initialization sequence.

Fixes: 39e7213edc4f3 ("max17042_battery: Support regmap to access device's registers")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Sven Van Asbroeck <TheSven73@googlemail.com>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/power/max17042_battery.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/power/max17042_battery.c b/drivers/power/max17042_battery.c
index 9c65f134d447..da7a75f82489 100644
--- a/drivers/power/max17042_battery.c
+++ b/drivers/power/max17042_battery.c
@@ -457,13 +457,16 @@ static inline void max17042_write_model_data(struct max17042_chip *chip,
 }
 
 static inline void max17042_read_model_data(struct max17042_chip *chip,
-					u8 addr, u32 *data, int size)
+					u8 addr, u16 *data, int size)
 {
 	struct regmap *map = chip->regmap;
 	int i;
+	u32 tmp;
 
-	for (i = 0; i < size; i++)
-		regmap_read(map, addr + i, &data[i]);
+	for (i = 0; i < size; i++) {
+		regmap_read(map, addr + i, &tmp);
+		data[i] = (u16)tmp;
+	}
 }
 
 static inline int max17042_model_data_compare(struct max17042_chip *chip,
@@ -486,7 +489,7 @@ static int max17042_init_model(struct max17042_chip *chip)
 {
 	int ret;
 	int table_size = ARRAY_SIZE(chip->pdata->config_data->cell_char_tbl);
-	u32 *temp_data;
+	u16 *temp_data;
 
 	temp_data = kcalloc(table_size, sizeof(*temp_data), GFP_KERNEL);
 	if (!temp_data)
@@ -501,7 +504,7 @@ static int max17042_init_model(struct max17042_chip *chip)
 	ret = max17042_model_data_compare(
 		chip,
 		chip->pdata->config_data->cell_char_tbl,
-		(u16 *)temp_data,
+		temp_data,
 		table_size);
 
 	max10742_lock_model(chip);
@@ -514,7 +517,7 @@ static int max17042_verify_model_lock(struct max17042_chip *chip)
 {
 	int i;
 	int table_size = ARRAY_SIZE(chip->pdata->config_data->cell_char_tbl);
-	u32 *temp_data;
+	u16 *temp_data;
 	int ret = 0;
 
 	temp_data = kcalloc(table_size, sizeof(*temp_data), GFP_KERNEL);

From 69b10e10f5a8c99d8df4769053be1bd71a97f770 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 11 Jul 2016 11:46:33 +0300
Subject: [PATCH 72/74] qxl: check for kmap failures

commit f4cceb2affcd1285d4ce498089e8a79f4cd2fa66 upstream.

If kmap fails, it leads to memory corruption.

Fixes: f64122c1f6ad ('drm: add new QXL driver. (v1.4)')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20160711084633.GA31411@mwanda
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/qxl/qxl_draw.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/qxl/qxl_draw.c b/drivers/gpu/drm/qxl/qxl_draw.c
index 56e1d633875e..6e6c76080d6a 100644
--- a/drivers/gpu/drm/qxl/qxl_draw.c
+++ b/drivers/gpu/drm/qxl/qxl_draw.c
@@ -136,6 +136,8 @@ static int qxl_palette_create_1bit(struct qxl_bo *palette_bo,
 				 * correctly globaly, since that would require
 				 * tracking all of our palettes. */
 	ret = qxl_bo_kmap(palette_bo, (void **)&pal);
+	if (ret)
+		return ret;
 	pal->num_ents = 2;
 	pal->unique = unique++;
 	if (visual == FB_VISUAL_TRUECOLOR || visual == FB_VISUAL_DIRECTCOLOR) {

From 7b251d3404dfd0384274b7493722f65a1ceaa724 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 13 Jul 2016 13:12:34 +0300
Subject: [PATCH 73/74] hostfs: Freeing an ERR_PTR in hostfs_fill_sb_common()

commit 8a545f185145e3c09348cd74326268ecfc6715a3 upstream.

We can't pass error pointers to kfree() or it causes an oops.

Fixes: 52b209f7b848 ('get rid of hostfs_read_inode()')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/hostfs/hostfs_kern.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 5a7b3229b956..f34d6f5a5aca 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -959,10 +959,11 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
 
 	if (S_ISLNK(root_inode->i_mode)) {
 		char *name = follow_link(host_root_path);
-		if (IS_ERR(name))
+		if (IS_ERR(name)) {
 			err = PTR_ERR(name);
-		else
-			err = read_name(root_inode, name);
+			goto out_put;
+		}
+		err = read_name(root_inode, name);
 		kfree(name);
 		if (err)
 			goto out_put;

From d19e48fe5da7b83d02ed4aec3567f08ae02a168c Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 30 Sep 2016 10:20:43 +0200
Subject: [PATCH 74/74] Linux 4.4.23

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 734579371fad..95421b688f23 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 22
+SUBLEVEL = 23
 EXTRAVERSION =
 NAME = Blurry Fish Butt