From d68a4e380ff55d2c9fe630ec7cb984bb57eeb6b3 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 18 Feb 2017 19:00:45 -0500 Subject: [PATCH 001/100] ipv6: release dst on error in ip6_dst_lookup_tail commit 00ea1ceebe0d9f2dc1cc2b7bd575a00100c27869 upstream. If ip6_dst_lookup_tail has acquired a dst and fails the IPv4-mapped check, release the dst before returning an error. Fixes: ec5e3b0a1d41 ("ipv6: Inhibit IPv4-mapped src address on the wire.") Signed-off-by: Willem de Bruijn Acked-by: Eric Dumazet Signed-off-by: David S. Miller Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_output.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 19c0d67ce8c4..7d339fc1057f 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1005,8 +1005,10 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, } #endif if (ipv6_addr_v4mapped(&fl6->saddr) && - !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) - return -EAFNOSUPPORT; + !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) { + err = -EAFNOSUPPORT; + goto out_err_release; + } return 0; From e79948e2d90bfe19267b35850c83c7cd98a78a60 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Tue, 6 Jun 2017 15:56:54 +0200 Subject: [PATCH 002/100] net: don't call strlen on non-terminated string in dev_set_alias() [ Upstream commit c28294b941232931fbd714099798eb7aa7e865d7 ] KMSAN reported a use of uninitialized memory in dev_set_alias(), which was caused by calling strlcpy() (which in turn called strlen()) on the user-supplied non-terminated string. Signed-off-by: Alexander Potapenko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 87b8754f34ac..524d8b28e690 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1246,8 +1246,9 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) if (!new_ifalias) return -ENOMEM; dev->ifalias = new_ifalias; + memcpy(dev->ifalias, alias, len); + dev->ifalias[len] = 0; - strlcpy(dev->ifalias, alias, len+1); return len; } From dedb088a1d189961285d603257dfe5c3a9fb37aa Mon Sep 17 00:00:00 2001 From: Mateusz Jurczyk Date: Wed, 7 Jun 2017 16:14:29 +0200 Subject: [PATCH 003/100] decnet: dn_rtmsg: Improve input length sanitization in dnrmg_receive_user_skb [ Upstream commit dd0da17b209ed91f39872766634ca967c170ada1 ] Verify that the length of the socket buffer is sufficient to cover the nlmsghdr structure before accessing the nlh->nlmsg_len field for further input sanitization. If the client only supplies 1-3 bytes of data in sk_buff, then nlh->nlmsg_len remains partially uninitialized and contains leftover memory from the corresponding kernel allocation. Operating on such data may result in indeterminate evaluation of the nlmsg_len < sizeof(*nlh) expression. The bug was discovered by a runtime instrumentation designed to detect use of uninitialized memory in the kernel. The patch prevents this and other similar tools (e.g. KMSAN) from flagging this behavior in the future. Signed-off-by: Mateusz Jurczyk Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/decnet/netfilter/dn_rtmsg.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 85f2fdc360c2..29246bc9a7b4 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -102,7 +102,9 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb) { struct nlmsghdr *nlh = nlmsg_hdr(skb); - if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) + if (skb->len < sizeof(*nlh) || + nlh->nlmsg_len < sizeof(*nlh) || + skb->len < nlh->nlmsg_len) return; if (!netlink_capable(skb, CAP_NET_ADMIN)) From e2c3ee003280ce0e6cd02f305dd6c1ced17f286c Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Wed, 7 Jun 2017 21:00:33 +0300 Subject: [PATCH 004/100] net: Zero ifla_vf_info in rtnl_fill_vfinfo() [ Upstream commit 0eed9cf58446b28b233388b7f224cbca268b6986 ] Some of the structure's fields are not initialized by the rtnetlink. If driver doesn't set those in ndo_get_vf_config(), they'd leak memory to user. Signed-off-by: Yuval Mintz CC: Michal Schmidt Reviewed-by: Greg Rose Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index d43544ce7550..4e04c415d7d4 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1089,6 +1089,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, struct ifla_vf_mac vf_mac; struct ifla_vf_info ivi; + memset(&ivi, 0, sizeof(ivi)); + /* Not all SR-IOV capable drivers support the * spoofcheck and "RSS query enable" query. Preset to * -1 so the user space tool can detect that the driver @@ -1097,7 +1099,6 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, ivi.spoofchk = -1; ivi.rss_query_en = -1; ivi.trusted = -1; - memset(ivi.mac, 0, sizeof(ivi.mac)); /* The default value for VF link state is "auto" * IFLA_VF_LINK_STATE_AUTO which equals zero */ From 0fc0fad07722e7ff1e4322e2155b8cd4d963e42a Mon Sep 17 00:00:00 2001 From: Mateusz Jurczyk Date: Thu, 8 Jun 2017 11:13:36 +0200 Subject: [PATCH 005/100] af_unix: Add sockaddr length checks before accessing sa_family in bind and connect handlers [ Upstream commit defbcf2decc903a28d8398aa477b6881e711e3ea ] Verify that the caller-provided sockaddr structure is large enough to contain the sa_family field, before accessing it in bind() and connect() handlers of the AF_UNIX socket. Since neither syscall enforces a minimum size of the corresponding memory region, very short sockaddrs (zero or one byte long) result in operating on uninitialized memory while referencing .sa_family. Signed-off-by: Mateusz Jurczyk Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/unix/af_unix.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index b2e934ff2448..e05ec54ac53f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -997,7 +997,8 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct path path = { NULL, NULL }; err = -EINVAL; - if (sunaddr->sun_family != AF_UNIX) + if (addr_len < offsetofend(struct sockaddr_un, sun_family) || + sunaddr->sun_family != AF_UNIX) goto out; if (addr_len == sizeof(short)) { @@ -1108,6 +1109,10 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, unsigned int hash; int err; + err = -EINVAL; + if (alen < offsetofend(struct sockaddr, sa_family)) + goto out; + if (addr->sa_family != AF_UNSPEC) { err = unix_mkname(sunaddr, alen, &hash); if (err < 0) From 030a77d2f904a3a2cdf9690744d06d075fa2f512 Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Thu, 8 Jun 2017 13:12:38 -0700 Subject: [PATCH 006/100] Fix an intermittent pr_emerg warning about lo becoming free. [ Upstream commit f186ce61bb8235d80068c390dc2aad7ca427a4c2 ] It looks like this: Message from syslogd@flamingo at Apr 26 00:45:00 ... kernel:unregister_netdevice: waiting for lo to become free. Usage count = 4 They seem to coincide with net namespace teardown. The message is emitted by netdev_wait_allrefs(). Forced a kdump in netdev_run_todo, but found that the refcount on the lo device was already 0 at the time we got to the panic. Used bcc to check the blocking in netdev_run_todo. The only places where we're off cpu there are in the rcu_barrier() and msleep() calls. That behavior is expected. The msleep time coincides with the amount of time we spend waiting for the refcount to reach zero; the rcu_barrier() wait times are not excessive. After looking through the list of callbacks that the netdevice notifiers invoke in this path, it appears that the dst_dev_event is the most interesting. The dst_ifdown path places a hold on the loopback_dev as part of releasing the dev associated with the original dst cache entry. Most of our notifier callbacks are straight-forward, but this one a) looks complex, and b) places a hold on the network interface in question. I constructed a new bcc script that watches various events in the liftime of a dst cache entry. Note that dst_ifdown will take a hold on the loopback device until the invalidated dst entry gets freed. [ __dst_free] on DST: ffff883ccabb7900 IF tap1008300eth0 invoked at 1282115677036183 __dst_free rcu_nocb_kthread kthread ret_from_fork Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dst.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/net/core/dst.c b/net/core/dst.c index d7ad628bf64e..e72d706f8d0c 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -462,6 +462,20 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, spin_lock_bh(&dst_garbage.lock); dst = dst_garbage.list; dst_garbage.list = NULL; + /* The code in dst_ifdown places a hold on the loopback device. + * If the gc entry processing is set to expire after a lengthy + * interval, this hold can cause netdev_wait_allrefs() to hang + * out and wait for a long time -- until the the loopback + * interface is released. If we're really unlucky, it'll emit + * pr_emerg messages to console too. Reset the interval here, + * so dst cleanups occur in a more timely fashion. + */ + if (dst_garbage.timer_inc > DST_GC_INC) { + dst_garbage.timer_inc = DST_GC_INC; + dst_garbage.timer_expires = DST_GC_MIN; + mod_delayed_work(system_wq, &dst_gc_work, + dst_garbage.timer_expires); + } spin_unlock_bh(&dst_garbage.lock); if (last) From 7de53eed6fda3354ef8f2d3748d0ecfef88e0475 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Sat, 10 Jun 2017 16:49:39 +0800 Subject: [PATCH 007/100] net: caif: Fix a sleep-in-atomic bug in cfpkt_create_pfx [ Upstream commit f146e872eb12ebbe92d8e583b2637e0741440db3 ] The kernel may sleep under a rcu read lock in cfpkt_create_pfx, and the function call path is: cfcnfg_linkup_rsp (acquire the lock by rcu_read_lock) cfctrl_linkdown_req cfpkt_create cfpkt_create_pfx alloc_skb(GFP_KERNEL) --> may sleep cfserl_receive (acquire the lock by rcu_read_lock) cfpkt_split cfpkt_create_pfx alloc_skb(GFP_KERNEL) --> may sleep There is "in_interrupt" in cfpkt_create_pfx to decide use "GFP_KERNEL" or "GFP_ATOMIC". In this situation, "GFP_KERNEL" is used because the function is called under a rcu read lock, instead in interrupt. To fix it, only "GFP_ATOMIC" is used in cfpkt_create_pfx. Signed-off-by: Jia-Ju Bai Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/caif/cfpkt_skbuff.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c index 59ce1fcc220c..71b6ab240dea 100644 --- a/net/caif/cfpkt_skbuff.c +++ b/net/caif/cfpkt_skbuff.c @@ -81,11 +81,7 @@ static struct cfpkt *cfpkt_create_pfx(u16 len, u16 pfx) { struct sk_buff *skb; - if (likely(in_interrupt())) - skb = alloc_skb(len + pfx, GFP_ATOMIC); - else - skb = alloc_skb(len + pfx, GFP_KERNEL); - + skb = alloc_skb(len + pfx, GFP_ATOMIC); if (unlikely(skb == NULL)) return NULL; From ee8d5f9fd17e6169ac478a39308f0f94a98a4a67 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 12 Jun 2017 09:52:26 -0700 Subject: [PATCH 008/100] igmp: acquire pmc lock for ip_mc_clear_src() [ Upstream commit c38b7d327aafd1e3ad7ff53eefac990673b65667 ] Andrey reported a use-after-free in add_grec(): for (psf = *psf_list; psf; psf = psf_next) { ... psf_next = psf->sf_next; where the struct ip_sf_list's were already freed by: kfree+0xe8/0x2b0 mm/slub.c:3882 ip_mc_clear_src+0x69/0x1c0 net/ipv4/igmp.c:2078 ip_mc_dec_group+0x19a/0x470 net/ipv4/igmp.c:1618 ip_mc_drop_socket+0x145/0x230 net/ipv4/igmp.c:2609 inet_release+0x4e/0x1c0 net/ipv4/af_inet.c:411 sock_release+0x8d/0x1e0 net/socket.c:597 sock_close+0x16/0x20 net/socket.c:1072 This happens because we don't hold pmc->lock in ip_mc_clear_src() and a parallel mr_ifc_timer timer could jump in and access them. The RCU lock is there but it is merely for pmc itself, this spinlock could actually ensure we don't access them in parallel. Thanks to Eric and Long for discussion on this bug. Reported-by: Andrey Konovalov Cc: Eric Dumazet Cc: Xin Long Signed-off-by: Cong Wang Reviewed-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/igmp.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 17adfdaf5795..e71901986790 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2026,21 +2026,26 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, static void ip_mc_clear_src(struct ip_mc_list *pmc) { - struct ip_sf_list *psf, *nextpsf; + struct ip_sf_list *psf, *nextpsf, *tomb, *sources; - for (psf = pmc->tomb; psf; psf = nextpsf) { - nextpsf = psf->sf_next; - kfree(psf); - } + spin_lock_bh(&pmc->lock); + tomb = pmc->tomb; pmc->tomb = NULL; - for (psf = pmc->sources; psf; psf = nextpsf) { - nextpsf = psf->sf_next; - kfree(psf); - } + sources = pmc->sources; pmc->sources = NULL; pmc->sfmode = MCAST_EXCLUDE; pmc->sfcount[MCAST_INCLUDE] = 0; pmc->sfcount[MCAST_EXCLUDE] = 1; + spin_unlock_bh(&pmc->lock); + + for (psf = tomb; psf; psf = nextpsf) { + nextpsf = psf->sf_next; + kfree(psf); + } + for (psf = sources; psf; psf = nextpsf) { + nextpsf = psf->sf_next; + kfree(psf); + } } /* Join a multicast group From 4feb6121aa5e664eeb101c56d7e9adfb8e462c9a Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 20 Jun 2017 10:46:27 -0700 Subject: [PATCH 009/100] igmp: add a missing spin_lock_init() [ Upstream commit b4846fc3c8559649277e3e4e6b5cec5348a8d208 ] Andrey reported a lockdep warning on non-initialized spinlock: INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 1 PID: 4099 Comm: a.out Not tainted 4.12.0-rc6+ #9 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:16 dump_stack+0x292/0x395 lib/dump_stack.c:52 register_lock_class+0x717/0x1aa0 kernel/locking/lockdep.c:755 ? 0xffffffffa0000000 __lock_acquire+0x269/0x3690 kernel/locking/lockdep.c:3255 lock_acquire+0x22d/0x560 kernel/locking/lockdep.c:3855 __raw_spin_lock_bh ./include/linux/spinlock_api_smp.h:135 _raw_spin_lock_bh+0x36/0x50 kernel/locking/spinlock.c:175 spin_lock_bh ./include/linux/spinlock.h:304 ip_mc_clear_src+0x27/0x1e0 net/ipv4/igmp.c:2076 igmpv3_clear_delrec+0xee/0x4f0 net/ipv4/igmp.c:1194 ip_mc_destroy_dev+0x4e/0x190 net/ipv4/igmp.c:1736 We miss a spin_lock_init() in igmpv3_add_delrec(), probably because previously we never use it on this code path. Since we already unlink it from the global mc_tomb list, it is probably safe not to acquire this spinlock here. It does not harm to have it although, to avoid conditional locking. Fixes: c38b7d327aaf ("igmp: acquire pmc lock for ip_mc_clear_src()") Reported-by: Andrey Konovalov Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/igmp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index e71901986790..3809d523d012 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1102,6 +1102,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im) pmc = kzalloc(sizeof(*pmc), GFP_KERNEL); if (!pmc) return; + spin_lock_init(&pmc->lock); spin_lock_bh(&im->lock); pmc->interface = im->interface; in_dev_hold(in_dev); From 0d1effe95ebe07848677e78162364aa80443ab69 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 15 Jun 2017 16:33:58 +0800 Subject: [PATCH 010/100] ipv6: fix calling in6_ifa_hold incorrectly for dad work [ Upstream commit f8a894b218138888542a5058d0e902378fd0d4ec ] Now when starting the dad work in addrconf_mod_dad_work, if the dad work is idle and queued, it needs to hold ifa. The problem is there's one gap in [1], during which if the pending dad work is removed elsewhere. It will miss to hold ifa, but the dad word is still idea and queue. if (!delayed_work_pending(&ifp->dad_work)) in6_ifa_hold(ifp); <--------------[1] mod_delayed_work(addrconf_wq, &ifp->dad_work, delay); An use-after-free issue can be caused by this. Chen Wei found this issue when WARN_ON(!hlist_unhashed(&ifp->addr_lst)) in net6_ifa_finish_destroy was hit because of it. As Hannes' suggestion, this patch is to fix it by holding ifa first in addrconf_mod_dad_work, then calling mod_delayed_work and putting ifa if the dad_work is already in queue. Note that this patch did not choose to fix it with: if (!mod_delayed_work(delay)) in6_ifa_hold(ifp); As with it, when delay == 0, dad_work would be scheduled immediately, all addrconf_mod_dad_work(0) callings had to be moved under ifp->lock. Reported-by: Wei Chen Suggested-by: Hannes Frederic Sowa Acked-by: Hannes Frederic Sowa Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8cf3fc7c2932..03dadbf6cc5e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -291,9 +291,9 @@ static void addrconf_mod_rs_timer(struct inet6_dev *idev, static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp, unsigned long delay) { - if (!delayed_work_pending(&ifp->dad_work)) - in6_ifa_hold(ifp); - mod_delayed_work(addrconf_wq, &ifp->dad_work, delay); + in6_ifa_hold(ifp); + if (mod_delayed_work(addrconf_wq, &ifp->dad_work, delay)) + in6_ifa_put(ifp); } static int snmp6_alloc_dev(struct inet6_dev *idev) From 93911697a9f205f1e8f84c4760b754d4d04459da Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 8 Jun 2017 11:33:16 -0500 Subject: [PATCH 011/100] net/mlx5: Wait for FW readiness before initializing command interface [ Upstream commit 6c780a0267b8a1075f40b39851132eeaefefcff5 ] Before attempting to initialize the command interface we must wait till the fw_initializing bit is clear. If we fail to meet this condition the hardware will drop our configuration, specifically the descriptors page address. This scenario can happen when the firmware is still executing an FLR flow and did not finish yet so the driver needs to wait for that to finish. Fixes: e3297246c2c8 ('net/mlx5_core: Wait for FW readiness on startup') Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 1e611980cf99..f5c1f4acc57b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -153,8 +153,9 @@ static struct mlx5_profile profile[] = { }, }; -#define FW_INIT_TIMEOUT_MILI 2000 -#define FW_INIT_WAIT_MS 2 +#define FW_INIT_TIMEOUT_MILI 2000 +#define FW_INIT_WAIT_MS 2 +#define FW_PRE_INIT_TIMEOUT_MILI 10000 static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili) { @@ -934,6 +935,15 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) */ dev->state = MLX5_DEVICE_STATE_UP; + /* wait for firmware to accept initialization segments configurations + */ + err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI); + if (err) { + dev_err(&dev->pdev->dev, "Firmware over %d MS in pre-initializing state, aborting\n", + FW_PRE_INIT_TIMEOUT_MILI); + goto out; + } + err = mlx5_cmd_init(dev); if (err) { dev_err(&pdev->dev, "Failed initializing command interface, aborting\n"); From f50f2e0cb1a3733e8846d8d3a0bee3ab24378874 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 16 Jun 2017 10:46:37 -0700 Subject: [PATCH 012/100] decnet: always not take dst->__refcnt when inserting dst into hash table [ Upstream commit 76371d2e3ad1f84426a30ebcd8c3b9b98f4c724f ] In the existing dn_route.c code, dn_route_output_slow() takes dst->__refcnt before calling dn_insert_route() while dn_route_input_slow() does not take dst->__refcnt before calling dn_insert_route(). This makes the whole routing code very buggy. In dn_dst_check_expire(), dnrt_free() is called when rt expires. This makes the routes inserted by dn_route_output_slow() not able to be freed as the refcnt is not released. In dn_dst_gc(), dnrt_drop() is called to release rt which could potentially cause the dst->__refcnt to be dropped to -1. In dn_run_flush(), dst_free() is called to release all the dst. Again, it makes the dst inserted by dn_route_output_slow() not able to be released and also, it does not wait on the rcu and could potentially cause crash in the path where other users still refer to this dst. This patch makes sure both input and output path do not take dst->__refcnt before calling dn_insert_route() and also makes sure dnrt_free()/dst_free() is called when removing dst from the hash table. The only difference between those 2 calls is that dnrt_free() waits on the rcu while dst_free() does not. Signed-off-by: Wei Wang Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/decnet/dn_route.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index b1dc096d22f8..403593bd2b83 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -188,12 +188,6 @@ static inline void dnrt_free(struct dn_route *rt) call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); } -static inline void dnrt_drop(struct dn_route *rt) -{ - dst_release(&rt->dst); - call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free); -} - static void dn_dst_check_expire(unsigned long dummy) { int i; @@ -248,7 +242,7 @@ static int dn_dst_gc(struct dst_ops *ops) } *rtp = rt->dst.dn_next; rt->dst.dn_next = NULL; - dnrt_drop(rt); + dnrt_free(rt); break; } spin_unlock_bh(&dn_rt_hash_table[i].lock); @@ -350,7 +344,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou dst_use(&rth->dst, now); spin_unlock_bh(&dn_rt_hash_table[hash].lock); - dnrt_drop(rt); + dst_free(&rt->dst); *rp = rth; return 0; } @@ -380,7 +374,7 @@ static void dn_run_flush(unsigned long dummy) for(; rt; rt = next) { next = rcu_dereference_raw(rt->dst.dn_next); RCU_INIT_POINTER(rt->dst.dn_next, NULL); - dst_free((struct dst_entry *)rt); + dnrt_free(rt); } nothing_to_declare: @@ -1187,7 +1181,7 @@ make_route: if (dev_out->flags & IFF_LOOPBACK) flags |= RTCF_LOCAL; - rt = dst_alloc(&dn_dst_ops, dev_out, 1, DST_OBSOLETE_NONE, DST_HOST); + rt = dst_alloc(&dn_dst_ops, dev_out, 0, DST_OBSOLETE_NONE, DST_HOST); if (rt == NULL) goto e_nobufs; From 1f8bb6053249dbe9aeefb6bae6901d6cde4586fa Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Fri, 16 Jun 2017 15:00:02 +0800 Subject: [PATCH 013/100] net: 8021q: Fix one possible panic caused by BUG_ON in free_netdev [ Upstream commit 9745e362add89432d2c951272a99b0a5fe4348a9 ] The register_vlan_device would invoke free_netdev directly, when register_vlan_dev failed. It would trigger the BUG_ON in free_netdev if the dev was already registered. In this case, the netdev would be freed in netdev_run_todo later. So add one condition check now. Only when dev is not registered, then free it directly. The following is the part coredump when netdev_upper_dev_link failed in register_vlan_dev. I removed the lines which are too long. [ 411.237457] ------------[ cut here ]------------ [ 411.237458] kernel BUG at net/core/dev.c:7998! [ 411.237484] invalid opcode: 0000 [#1] SMP [ 411.237705] [last unloaded: 8021q] [ 411.237718] CPU: 1 PID: 12845 Comm: vconfig Tainted: G E 4.12.0-rc5+ #6 [ 411.237737] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015 [ 411.237764] task: ffff9cbeb6685580 task.stack: ffffa7d2807d8000 [ 411.237782] RIP: 0010:free_netdev+0x116/0x120 [ 411.237794] RSP: 0018:ffffa7d2807dbdb0 EFLAGS: 00010297 [ 411.237808] RAX: 0000000000000002 RBX: ffff9cbeb6ba8fd8 RCX: 0000000000001878 [ 411.237826] RDX: 0000000000000001 RSI: 0000000000000282 RDI: 0000000000000000 [ 411.237844] RBP: ffffa7d2807dbdc8 R08: 0002986100029841 R09: 0002982100029801 [ 411.237861] R10: 0004000100029980 R11: 0004000100029980 R12: ffff9cbeb6ba9000 [ 411.238761] R13: ffff9cbeb6ba9060 R14: ffff9cbe60f1a000 R15: ffff9cbeb6ba9000 [ 411.239518] FS: 00007fb690d81700(0000) GS:ffff9cbebb640000(0000) knlGS:0000000000000000 [ 411.239949] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 411.240454] CR2: 00007f7115624000 CR3: 0000000077cdf000 CR4: 00000000003406e0 [ 411.240936] Call Trace: [ 411.241462] vlan_ioctl_handler+0x3f1/0x400 [8021q] [ 411.241910] sock_ioctl+0x18b/0x2c0 [ 411.242394] do_vfs_ioctl+0xa1/0x5d0 [ 411.242853] ? sock_alloc_file+0xa6/0x130 [ 411.243465] SyS_ioctl+0x79/0x90 [ 411.243900] entry_SYSCALL_64_fastpath+0x1e/0xa9 [ 411.244425] RIP: 0033:0x7fb69089a357 [ 411.244863] RSP: 002b:00007ffcd04e0fc8 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 [ 411.245445] RAX: ffffffffffffffda RBX: 00007ffcd04e2884 RCX: 00007fb69089a357 [ 411.245903] RDX: 00007ffcd04e0fd0 RSI: 0000000000008983 RDI: 0000000000000003 [ 411.246527] RBP: 00007ffcd04e0fd0 R08: 0000000000000000 R09: 1999999999999999 [ 411.246976] R10: 000000000000053f R11: 0000000000000202 R12: 0000000000000004 [ 411.247414] R13: 00007ffcd04e1128 R14: 00007ffcd04e2888 R15: 0000000000000001 [ 411.249129] RIP: free_netdev+0x116/0x120 RSP: ffffa7d2807dbdb0 Signed-off-by: Gao Feng Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/8021q/vlan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index ad8d6e6b87ca..e20ae2d3c498 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -278,7 +278,8 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) return 0; out_free_newdev: - free_netdev(new_dev); + if (new_dev->reg_state == NETREG_UNINITIALIZED) + free_netdev(new_dev); return err; } From 9de17701a3bc0f53d29468d064e224337f94d127 Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Fri, 16 Jun 2017 09:45:08 +0100 Subject: [PATCH 014/100] sfc: provide dummy definitions of vswitch functions efx_probe_all() calls efx->type->vswitching_probe during probe. For SFC4000 (Falcon) NICs this function is not defined, leading to a BUG with the top of the call stack similar to: ? efx_pci_probe_main+0x29a/0x830 efx_pci_probe+0x7d3/0xe70 vswitching_restore and vswitching_remove also need to be defined. Fixed in mainline by: commit 5a6681e22c14 ("sfc: separate out SFC4000 ("Falcon") support into new sfc-falcon driver") Fixes: 6d8aaaf6f798 ("sfc: create VEB vswitch and vport above default firmware setup") Signed-off-by: Bert Kenward Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/sfc/falcon.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/sfc/falcon.c b/drivers/net/ethernet/sfc/falcon.c index d790cb8d9db3..8e832ba8ab24 100644 --- a/drivers/net/ethernet/sfc/falcon.c +++ b/drivers/net/ethernet/sfc/falcon.c @@ -2796,6 +2796,11 @@ const struct efx_nic_type falcon_a1_nic_type = { .timer_period_max = 1 << FRF_AB_TC_TIMER_VAL_WIDTH, .offload_features = NETIF_F_IP_CSUM, .mcdi_max_ver = -1, +#ifdef CONFIG_SFC_SRIOV + .vswitching_probe = efx_port_dummy_op_int, + .vswitching_restore = efx_port_dummy_op_int, + .vswitching_remove = efx_port_dummy_op_void, +#endif }; const struct efx_nic_type falcon_b0_nic_type = { @@ -2897,4 +2902,9 @@ const struct efx_nic_type falcon_b0_nic_type = { .offload_features = NETIF_F_IP_CSUM | NETIF_F_RXHASH | NETIF_F_NTUPLE, .mcdi_max_ver = -1, .max_rx_ip_filters = FR_BZ_RX_FILTER_TBL0_ROWS, +#ifdef CONFIG_SFC_SRIOV + .vswitching_probe = efx_port_dummy_op_int, + .vswitching_restore = efx_port_dummy_op_int, + .vswitching_remove = efx_port_dummy_op_void, +#endif }; From 640a09c64ec6bd0b6e387bc60a0ed45397e1381a Mon Sep 17 00:00:00 2001 From: Serhey Popovych Date: Tue, 20 Jun 2017 13:29:25 +0300 Subject: [PATCH 015/100] ipv6: Do not leak throw route references [ Upstream commit 07f615574f8ac499875b21c1142f26308234a92c ] While commit 73ba57bfae4a ("ipv6: fix backtracking for throw routes") does good job on error propagation to the fib_rules_lookup() in fib rules core framework that also corrects throw routes handling, it does not solve route reference leakage problem happened when we return -EAGAIN to the fib_rules_lookup() and leave routing table entry referenced in arg->result. If rule with matched throw route isn't last matched in the list we overwrite arg->result losing reference on throw route stored previously forever. We also partially revert commit ab997ad40839 ("ipv6: fix the incorrect return value of throw route") since we never return routing table entry with dst.error == -EAGAIN when CONFIG_IPV6_MULTIPLE_TABLES is on. Also there is no point to check for RTF_REJECT flag since it is always set throw route. Fixes: 73ba57bfae4a ("ipv6: fix backtracking for throw routes") Signed-off-by: Serhey Popovych Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/fib6_rules.c | 22 ++++++---------------- net/ipv6/ip6_fib.c | 3 +-- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index ed33abf57abd..9ac4f0cef27d 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -32,7 +32,6 @@ struct fib6_rule { struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, int flags, pol_lookup_t lookup) { - struct rt6_info *rt; struct fib_lookup_arg arg = { .lookup_ptr = lookup, .flags = FIB_LOOKUP_NOREF, @@ -41,21 +40,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, fib_rules_lookup(net->ipv6.fib6_rules_ops, flowi6_to_flowi(fl6), flags, &arg); - rt = arg.result; + if (arg.result) + return arg.result; - if (!rt) { - dst_hold(&net->ipv6.ip6_null_entry->dst); - return &net->ipv6.ip6_null_entry->dst; - } - - if (rt->rt6i_flags & RTF_REJECT && - rt->dst.error == -EAGAIN) { - ip6_rt_put(rt); - rt = net->ipv6.ip6_null_entry; - dst_hold(&rt->dst); - } - - return &rt->dst; + dst_hold(&net->ipv6.ip6_null_entry->dst); + return &net->ipv6.ip6_null_entry->dst; } static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, @@ -116,7 +105,8 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, flp6->saddr = saddr; } err = rt->dst.error; - goto out; + if (err != -EAGAIN) + goto out; } again: ip6_rt_put(rt); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 85bf86458706..1ac06723f0d7 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -290,8 +290,7 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, struct rt6_info *rt; rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); - if (rt->rt6i_flags & RTF_REJECT && - rt->dst.error == -EAGAIN) { + if (rt->dst.error == -EAGAIN) { ip6_rt_put(rt); rt = net->ipv6.ip6_null_entry; dst_hold(&rt->dst); From 095a41128cb6df913e24f95e43c57f378b5088bc Mon Sep 17 00:00:00 2001 From: Serhey Popovych Date: Tue, 20 Jun 2017 14:35:23 +0300 Subject: [PATCH 016/100] rtnetlink: add IFLA_GROUP to ifla_policy [ Upstream commit db833d40ad3263b2ee3b59a1ba168bb3cfed8137 ] Network interface groups support added while ago, however there is no IFLA_GROUP attribute description in policy and netlink message size calculations until now. Add IFLA_GROUP attribute to the policy. Fixes: cbda10fa97d7 ("net_device: add support for network device groups") Signed-off-by: Serhey Popovych Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/rtnetlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4e04c415d7d4..2ec5324a7ff7 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -897,6 +897,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(1) /* IFLA_LINKMODE */ + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ + nla_total_size(4) /* IFLA_LINK_NETNSID */ + + nla_total_size(4) /* IFLA_GROUP */ + nla_total_size(ext_filter_mask & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */ + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ @@ -1371,6 +1372,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, [IFLA_LINK_NETNSID] = { .type = NLA_S32 }, [IFLA_PROTO_DOWN] = { .type = NLA_U8 }, + [IFLA_GROUP] = { .type = NLA_U32 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { From 234e649840d191379cd132d89f4b01a2495cfcc3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 3 Apr 2017 10:55:11 -0700 Subject: [PATCH 017/100] netfilter: xt_TCPMSS: add more sanity tests on tcph->doff commit 2638fd0f92d4397884fd991d8f4925cb3f081901 upstream. Denys provided an awesome KASAN report pointing to an use after free in xt_TCPMSS I have provided three patches to fix this issue, either in xt_TCPMSS or in xt_tcpudp.c. It seems xt_TCPMSS patch has the smallest possible impact. Signed-off-by: Eric Dumazet Reported-by: Denys Fedoryshchenko Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/xt_TCPMSS.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index b7c43def0dc6..00f798b20b20 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -104,7 +104,7 @@ tcpmss_mangle_packet(struct sk_buff *skb, tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); tcp_hdrlen = tcph->doff * 4; - if (len < tcp_hdrlen) + if (len < tcp_hdrlen || tcp_hdrlen < sizeof(struct tcphdr)) return -1; if (info->mss == XT_TCPMSS_CLAMP_PMTU) { @@ -156,6 +156,10 @@ tcpmss_mangle_packet(struct sk_buff *skb, if (len > tcp_hdrlen) return 0; + /* tcph->doff has 4 bits, do not wrap it to 0 */ + if (tcp_hdrlen >= 15 * 4) + return 0; + /* * MSS Option not found ?! add it.. */ From e052be55a598eed299af2ec01a8835696bdd64c8 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Thu, 11 May 2017 18:56:38 +0200 Subject: [PATCH 018/100] netfilter: synproxy: fix conntrackd interaction commit 87e94dbc210a720a34be5c1174faee5c84be963e upstream. This patch fixes the creation of connection tracking entry from netlink when synproxy is used. It was missing the addition of the synproxy extension. This was causing kernel crashes when a conntrack entry created by conntrackd was used after the switch of traffic from active node to the passive node. Signed-off-by: Eric Leblond Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_netlink.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 9f5272968abb..e565b2becb14 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -45,6 +45,8 @@ #include #include #include +#include +#include #ifdef CONFIG_NF_NAT_NEEDED #include #include @@ -1798,6 +1800,8 @@ ctnetlink_create_conntrack(struct net *net, nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); nf_ct_labels_ext_add(ct); + nfct_seqadj_ext_add(ct); + nfct_synproxy_ext_add(ct); /* we must add conntrack extensions before confirmation. */ ct->status |= IPS_CONFIRMED; From 5424427100216e9ef949623ab781a33152da2cb8 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Mon, 6 Mar 2017 22:29:14 +0800 Subject: [PATCH 019/100] NFSv4: fix a reference leak caused WARNING messages commit 366a1569bff3fe14abfdf9285e31e05e091745f5 upstream. Because nfs4_opendata_access() has close the state when access is denied, so the state isn't leak. Rather than revert the commit a974deee47, I'd like clean the strange state close. [ 1615.094218] ------------[ cut here ]------------ [ 1615.094607] WARNING: CPU: 0 PID: 23702 at lib/list_debug.c:31 __list_add_valid+0x8e/0xa0 [ 1615.094913] list_add double add: new=ffff9d7901d9f608, prev=ffff9d7901d9f608, next=ffff9d7901ee8dd0. [ 1615.095458] Modules linked in: nfsv4(E) nfs(E) nfsd(E) tun bridge stp llc fuse ip_set nfnetlink vmw_vsock_vmci_transport vsock f2fs snd_seq_midi snd_seq_midi_event fscrypto coretemp ppdev crct10dif_pclmul crc32_pclmul ghash_clmulni_intel intel_rapl_perf vmw_balloon snd_ens1371 joydev gameport snd_ac97_codec ac97_bus snd_seq snd_pcm snd_rawmidi snd_timer snd_seq_device snd soundcore nfit parport_pc parport acpi_cpufreq tpm_tis tpm_tis_core tpm i2c_piix4 vmw_vmci shpchp auth_rpcgss nfs_acl lockd(E) grace sunrpc(E) xfs libcrc32c vmwgfx drm_kms_helper ttm drm crc32c_intel mptspi e1000 serio_raw scsi_transport_spi mptscsih mptbase ata_generic pata_acpi fjes [last unloaded: nfs] [ 1615.097663] CPU: 0 PID: 23702 Comm: fstest Tainted: G W E 4.11.0-rc1+ #517 [ 1615.098015] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015 [ 1615.098807] Call Trace: [ 1615.099183] dump_stack+0x63/0x86 [ 1615.099578] __warn+0xcb/0xf0 [ 1615.099967] warn_slowpath_fmt+0x5f/0x80 [ 1615.100370] __list_add_valid+0x8e/0xa0 [ 1615.100760] nfs4_put_state_owner+0x75/0xc0 [nfsv4] [ 1615.101136] __nfs4_close+0x109/0x140 [nfsv4] [ 1615.101524] nfs4_close_state+0x15/0x20 [nfsv4] [ 1615.101949] nfs4_close_context+0x21/0x30 [nfsv4] [ 1615.102691] __put_nfs_open_context+0xb8/0x110 [nfs] [ 1615.103155] put_nfs_open_context+0x10/0x20 [nfs] [ 1615.103586] nfs4_file_open+0x13b/0x260 [nfsv4] [ 1615.103978] do_dentry_open+0x20a/0x2f0 [ 1615.104369] ? nfs4_copy_file_range+0x30/0x30 [nfsv4] [ 1615.104739] vfs_open+0x4c/0x70 [ 1615.105106] ? may_open+0x5a/0x100 [ 1615.105469] path_openat+0x623/0x1420 [ 1615.105823] do_filp_open+0x91/0x100 [ 1615.106174] ? __alloc_fd+0x3f/0x170 [ 1615.106568] do_sys_open+0x130/0x220 [ 1615.106920] ? __put_cred+0x3d/0x50 [ 1615.107256] SyS_open+0x1e/0x20 [ 1615.107588] entry_SYSCALL_64_fastpath+0x1a/0xa9 [ 1615.107922] RIP: 0033:0x7fab599069b0 [ 1615.108247] RSP: 002b:00007ffcf0600d78 EFLAGS: 00000246 ORIG_RAX: 0000000000000002 [ 1615.108575] RAX: ffffffffffffffda RBX: 00007fab59bcfae0 RCX: 00007fab599069b0 [ 1615.108896] RDX: 0000000000000200 RSI: 0000000000000200 RDI: 00007ffcf060255e [ 1615.109211] RBP: 0000000000040010 R08: 0000000000000000 R09: 0000000000000016 [ 1615.109515] R10: 00000000000006a1 R11: 0000000000000246 R12: 0000000000041000 [ 1615.109806] R13: 0000000000040010 R14: 0000000000001000 R15: 0000000000002710 [ 1615.110152] ---[ end trace 96ed63b1306bf2f3 ]--- Fixes: a974deee47 ("NFSv4: Fix memory and state leak in...") Signed-off-by: Kinglong Mee Signed-off-by: Anna Schumaker Cc: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4e3679b25b9b..8e425f2c5ddd 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2188,8 +2188,6 @@ static int nfs4_opendata_access(struct rpc_cred *cred, if ((mask & ~cache.mask & (MAY_READ | MAY_EXEC)) == 0) return 0; - /* even though OPEN succeeded, access is denied. Close the file */ - nfs4_close_state(state, fmode); return -EACCES; } From a9e5044b6804bd50b5eef8c0bc7cee7179fb852c Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Fri, 17 Feb 2017 14:33:01 +1100 Subject: [PATCH 020/100] drm/ast: Handle configuration without P2A bridge commit 71f677a91046599ece96ebab21df956ce909c456 upstream. The ast driver configures a window to enable access into BMC memory space in order to read some configuration registers. If this window is disabled, which it can be from the BMC side, the ast driver can't function. Closing this window is a necessity for security if a machine's host side and BMC side are controlled by different parties; i.e. a cloud provider offering machines "bare metal". A recent patch went in to try to check if that window is open but it does so by trying to access the registers in question and testing if the result is 0xffffffff. This method will trigger a PCIe error when the window is closed which on some systems will be fatal (it will trigger an EEH for example on POWER which will take out the device). This patch improves this in two ways: - First, if the firmware has put properties in the device-tree containing the relevant configuration information, we use these. - Otherwise, a bit in one of the SCU scratch registers (which are readable via the VGA register space and writeable by the BMC) will indicate if the BMC has closed the window. This bit has been defined by Y.C Chen from Aspeed. If the window is closed and the configuration isn't available from the device-tree, some sane defaults are used. Those defaults are hopefully sufficient for standard video modes used on a server. Signed-off-by: Russell Currey Acked-by: Joel Stanley Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Dave Airlie Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ast/ast_drv.h | 6 +- drivers/gpu/drm/ast/ast_main.c | 266 ++++++++++++++++++++------------- drivers/gpu/drm/ast/ast_post.c | 7 +- 3 files changed, 169 insertions(+), 110 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index b92139e9b9d8..b5c64edeb668 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -113,7 +113,11 @@ struct ast_private { struct ttm_bo_kmap_obj cache_kmap; int next_cursor; bool support_wide_screen; - bool DisableP2A; + enum { + ast_use_p2a, + ast_use_dt, + ast_use_defaults + } config_mode; enum ast_tx_chip tx_chip_type; u8 dp501_maxclk; diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c index 6c021165ca67..498a94069e6b 100644 --- a/drivers/gpu/drm/ast/ast_main.c +++ b/drivers/gpu/drm/ast/ast_main.c @@ -62,13 +62,84 @@ uint8_t ast_get_index_reg_mask(struct ast_private *ast, return ret; } +static void ast_detect_config_mode(struct drm_device *dev, u32 *scu_rev) +{ + struct device_node *np = dev->pdev->dev.of_node; + struct ast_private *ast = dev->dev_private; + uint32_t data, jregd0, jregd1; + + /* Defaults */ + ast->config_mode = ast_use_defaults; + *scu_rev = 0xffffffff; + + /* Check if we have device-tree properties */ + if (np && !of_property_read_u32(np, "aspeed,scu-revision-id", + scu_rev)) { + /* We do, disable P2A access */ + ast->config_mode = ast_use_dt; + DRM_INFO("Using device-tree for configuration\n"); + return; + } + + /* Not all families have a P2A bridge */ + if (dev->pdev->device != PCI_CHIP_AST2000) + return; + + /* + * The BMC will set SCU 0x40 D[12] to 1 if the P2 bridge + * is disabled. We force using P2A if VGA only mode bit + * is set D[7] + */ + jregd0 = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd0, 0xff); + jregd1 = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd1, 0xff); + if (!(jregd0 & 0x80) || !(jregd1 & 0x10)) { + /* Double check it's actually working */ + data = ast_read32(ast, 0xf004); + if (data != 0xFFFFFFFF) { + /* P2A works, grab silicon revision */ + ast->config_mode = ast_use_p2a; + + DRM_INFO("Using P2A bridge for configuration\n"); + + /* Read SCU7c (silicon revision register) */ + ast_write32(ast, 0xf004, 0x1e6e0000); + ast_write32(ast, 0xf000, 0x1); + *scu_rev = ast_read32(ast, 0x1207c); + return; + } + } + + /* We have a P2A bridge but it's disabled */ + DRM_INFO("P2A bridge disabled, using default configuration\n"); +} static int ast_detect_chip(struct drm_device *dev, bool *need_post) { struct ast_private *ast = dev->dev_private; - uint32_t data, jreg; + uint32_t jreg, scu_rev; + + /* + * If VGA isn't enabled, we need to enable now or subsequent + * access to the scratch registers will fail. We also inform + * our caller that it needs to POST the chip + * (Assumption: VGA not enabled -> need to POST) + */ + if (!ast_is_vga_enabled(dev)) { + ast_enable_vga(dev); + DRM_INFO("VGA not enabled on entry, requesting chip POST\n"); + *need_post = true; + } else + *need_post = false; + + + /* Enable extended register access */ + ast_enable_mmio(dev); ast_open_key(ast); + /* Find out whether P2A works or whether to use device-tree */ + ast_detect_config_mode(dev, &scu_rev); + + /* Identify chipset */ if (dev->pdev->device == PCI_CHIP_AST1180) { ast->chip = AST1100; DRM_INFO("AST 1180 detected\n"); @@ -80,12 +151,7 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post) ast->chip = AST2300; DRM_INFO("AST 2300 detected\n"); } else if (dev->pdev->revision >= 0x10) { - uint32_t data; - ast_write32(ast, 0xf004, 0x1e6e0000); - ast_write32(ast, 0xf000, 0x1); - - data = ast_read32(ast, 0x1207c); - switch (data & 0x0300) { + switch (scu_rev & 0x0300) { case 0x0200: ast->chip = AST1100; DRM_INFO("AST 1100 detected\n"); @@ -110,26 +176,6 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post) } } - /* - * If VGA isn't enabled, we need to enable now or subsequent - * access to the scratch registers will fail. We also inform - * our caller that it needs to POST the chip - * (Assumption: VGA not enabled -> need to POST) - */ - if (!ast_is_vga_enabled(dev)) { - ast_enable_vga(dev); - ast_enable_mmio(dev); - DRM_INFO("VGA not enabled on entry, requesting chip POST\n"); - *need_post = true; - } else - *need_post = false; - - /* Check P2A Access */ - ast->DisableP2A = true; - data = ast_read32(ast, 0xf004); - if (data != 0xFFFFFFFF) - ast->DisableP2A = false; - /* Check if we support wide screen */ switch (ast->chip) { case AST1180: @@ -146,17 +192,12 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post) ast->support_wide_screen = true; else { ast->support_wide_screen = false; - if (ast->DisableP2A == false) { - /* Read SCU7c (silicon revision register) */ - ast_write32(ast, 0xf004, 0x1e6e0000); - ast_write32(ast, 0xf000, 0x1); - data = ast_read32(ast, 0x1207c); - data &= 0x300; - if (ast->chip == AST2300 && data == 0x0) /* ast1300 */ - ast->support_wide_screen = true; - if (ast->chip == AST2400 && data == 0x100) /* ast1400 */ - ast->support_wide_screen = true; - } + if (ast->chip == AST2300 && + (scu_rev & 0x300) == 0x0) /* ast1300 */ + ast->support_wide_screen = true; + if (ast->chip == AST2400 && + (scu_rev & 0x300) == 0x100) /* ast1400 */ + ast->support_wide_screen = true; } break; } @@ -220,85 +261,102 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post) static int ast_get_dram_info(struct drm_device *dev) { + struct device_node *np = dev->pdev->dev.of_node; struct ast_private *ast = dev->dev_private; - uint32_t data, data2; - uint32_t denum, num, div, ref_pll; + uint32_t mcr_cfg, mcr_scu_mpll, mcr_scu_strap; + uint32_t denum, num, div, ref_pll, dsel; - if (ast->DisableP2A) - { + switch (ast->config_mode) { + case ast_use_dt: + /* + * If some properties are missing, use reasonable + * defaults for AST2400 + */ + if (of_property_read_u32(np, "aspeed,mcr-configuration", + &mcr_cfg)) + mcr_cfg = 0x00000577; + if (of_property_read_u32(np, "aspeed,mcr-scu-mpll", + &mcr_scu_mpll)) + mcr_scu_mpll = 0x000050C0; + if (of_property_read_u32(np, "aspeed,mcr-scu-strap", + &mcr_scu_strap)) + mcr_scu_strap = 0; + break; + case ast_use_p2a: + ast_write32(ast, 0xf004, 0x1e6e0000); + ast_write32(ast, 0xf000, 0x1); + mcr_cfg = ast_read32(ast, 0x10004); + mcr_scu_mpll = ast_read32(ast, 0x10120); + mcr_scu_strap = ast_read32(ast, 0x10170); + break; + case ast_use_defaults: + default: ast->dram_bus_width = 16; ast->dram_type = AST_DRAM_1Gx16; ast->mclk = 396; + return 0; } + + if (mcr_cfg & 0x40) + ast->dram_bus_width = 16; else - { - ast_write32(ast, 0xf004, 0x1e6e0000); - ast_write32(ast, 0xf000, 0x1); - data = ast_read32(ast, 0x10004); + ast->dram_bus_width = 32; - if (data & 0x40) - ast->dram_bus_width = 16; - else - ast->dram_bus_width = 32; - - if (ast->chip == AST2300 || ast->chip == AST2400) { - switch (data & 0x03) { - case 0: - ast->dram_type = AST_DRAM_512Mx16; - break; - default: - case 1: - ast->dram_type = AST_DRAM_1Gx16; - break; - case 2: - ast->dram_type = AST_DRAM_2Gx16; - break; - case 3: - ast->dram_type = AST_DRAM_4Gx16; - break; - } - } else { - switch (data & 0x0c) { - case 0: - case 4: - ast->dram_type = AST_DRAM_512Mx16; - break; - case 8: - if (data & 0x40) - ast->dram_type = AST_DRAM_1Gx16; - else - ast->dram_type = AST_DRAM_512Mx32; - break; - case 0xc: - ast->dram_type = AST_DRAM_1Gx32; - break; - } - } - - data = ast_read32(ast, 0x10120); - data2 = ast_read32(ast, 0x10170); - if (data2 & 0x2000) - ref_pll = 14318; - else - ref_pll = 12000; - - denum = data & 0x1f; - num = (data & 0x3fe0) >> 5; - data = (data & 0xc000) >> 14; - switch (data) { - case 3: - div = 0x4; - break; - case 2: - case 1: - div = 0x2; + if (ast->chip == AST2300 || ast->chip == AST2400) { + switch (mcr_cfg & 0x03) { + case 0: + ast->dram_type = AST_DRAM_512Mx16; break; default: - div = 0x1; + case 1: + ast->dram_type = AST_DRAM_1Gx16; + break; + case 2: + ast->dram_type = AST_DRAM_2Gx16; + break; + case 3: + ast->dram_type = AST_DRAM_4Gx16; + break; + } + } else { + switch (mcr_cfg & 0x0c) { + case 0: + case 4: + ast->dram_type = AST_DRAM_512Mx16; + break; + case 8: + if (mcr_cfg & 0x40) + ast->dram_type = AST_DRAM_1Gx16; + else + ast->dram_type = AST_DRAM_512Mx32; + break; + case 0xc: + ast->dram_type = AST_DRAM_1Gx32; break; } - ast->mclk = ref_pll * (num + 2) / (denum + 2) * (div * 1000); } + + if (mcr_scu_strap & 0x2000) + ref_pll = 14318; + else + ref_pll = 12000; + + denum = mcr_scu_mpll & 0x1f; + num = (mcr_scu_mpll & 0x3fe0) >> 5; + dsel = (mcr_scu_mpll & 0xc000) >> 14; + switch (dsel) { + case 3: + div = 0x4; + break; + case 2: + case 1: + div = 0x2; + break; + default: + div = 0x1; + break; + } + ast->mclk = ref_pll * (num + 2) / (denum + 2) * (div * 1000); return 0; } diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c index 270e8fb2803f..c7c58becb25d 100644 --- a/drivers/gpu/drm/ast/ast_post.c +++ b/drivers/gpu/drm/ast/ast_post.c @@ -375,17 +375,14 @@ void ast_post_gpu(struct drm_device *dev) ast_enable_mmio(dev); ast_set_def_ext_reg(dev); - if (ast->DisableP2A == false) - { + if (ast->config_mode == ast_use_p2a) { if (ast->chip == AST2300 || ast->chip == AST2400) ast_init_dram_2300(dev); else ast_init_dram_reg(dev); ast_init_3rdtx(dev); - } - else - { + } else { if (ast->tx_chip_type != AST_TX_NONE) ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xa3, 0xcf, 0x80); /* Enable DVO */ } From 74de12dbfa7c05876d88e1466f8a0113a8dcead1 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Fri, 7 Apr 2017 16:05:00 -0700 Subject: [PATCH 021/100] mm, swap_cgroup: reschedule when neeed in swap_cgroup_swapoff() commit 460bcec84e11c75122ace5976214abbc596eb91b upstream. We got need_resched() warnings in swap_cgroup_swapoff() because swap_cgroup_ctrl[type].length is particularly large. Reschedule when needed. Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1704061315270.80559@chino.kir.corp.google.com Signed-off-by: David Rientjes Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Vladimir Davydov Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- mm/swap_cgroup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c index 40dd0f9b00d6..09f733b0424a 100644 --- a/mm/swap_cgroup.c +++ b/mm/swap_cgroup.c @@ -205,6 +205,8 @@ void swap_cgroup_swapoff(int type) struct page *page = map[i]; if (page) __free_page(page); + if (!(i % SWAP_CLUSTER_MAX)) + cond_resched(); } vfree(map); } From cb611ead80a2fb8ad233d163a759c108069a2458 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 29 Jun 2017 15:05:04 +0100 Subject: [PATCH 022/100] MIPS: Avoid accidental raw backtrace commit 854236363370995a609a10b03e35fd3dc5e9e4a1 upstream. Since commit 81a76d7119f6 ("MIPS: Avoid using unwind_stack() with usermode") show_backtrace() invokes the raw backtracer when cp0_status & ST0_KSU indicates user mode to fix issues on EVA kernels where user and kernel address spaces overlap. However this is used by show_stack() which creates its own pt_regs on the stack and leaves cp0_status uninitialised in most of the code paths. This results in the non deterministic use of the raw back tracer depending on the previous stack content. show_stack() deals exclusively with kernel mode stacks anyway, so explicitly initialise regs.cp0_status to KSU_KERNEL (i.e. 0) to ensure we get a useful backtrace. Fixes: 81a76d7119f6 ("MIPS: Avoid using unwind_stack() with usermode") Signed-off-by: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/16656/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/traps.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 99a402231f4d..31ca2edd7218 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -194,6 +194,8 @@ void show_stack(struct task_struct *task, unsigned long *sp) { struct pt_regs regs; mm_segment_t old_fs = get_fs(); + + regs.cp0_status = KSU_KERNEL; if (sp) { regs.regs[29] = (unsigned long)sp; regs.regs[31] = 0; From 93206654a0b2660569b1360aaa9139e25c6be04d Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 2 Mar 2017 14:02:40 -0800 Subject: [PATCH 023/100] MIPS: pm-cps: Drop manual cache-line alignment of ready_count commit 161c51ccb7a6faf45ffe09aa5cf1ad85ccdad503 upstream. We allocate memory for a ready_count variable per-CPU, which is accessed via a cached non-coherent TLB mapping to perform synchronisation between threads within the core using LL/SC instructions. In order to ensure that the variable is contained within its own data cache line we allocate 2 lines worth of memory & align the resulting pointer to a line boundary. This is however unnecessary, since kmalloc is guaranteed to return memory which is at least cache-line aligned (see ARCH_DMA_MINALIGN). Stop the redundant manual alignment. Besides cleaning up the code & avoiding needless work, this has the side effect of avoiding an arithmetic error found by Bryan on 64 bit systems due to the 32 bit size of the former dlinesz. This led the ready_count variable to have its upper 32b cleared erroneously for MIPS64 kernels, causing problems when ready_count was later used on MIPS64 via cpuidle. Signed-off-by: Paul Burton Fixes: 3179d37ee1ed ("MIPS: pm-cps: add PM state entry code for CPS systems") Reported-by: Bryan O'Donoghue Reviewed-by: Bryan O'Donoghue Tested-by: Bryan O'Donoghue Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/15383/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/pm-cps.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c index f63a289977cc..0b3e58a3189f 100644 --- a/arch/mips/kernel/pm-cps.c +++ b/arch/mips/kernel/pm-cps.c @@ -55,7 +55,6 @@ DECLARE_BITMAP(state_support, CPS_PM_STATE_COUNT); * state. Actually per-core rather than per-CPU. */ static DEFINE_PER_CPU_ALIGNED(u32*, ready_count); -static DEFINE_PER_CPU_ALIGNED(void*, ready_count_alloc); /* Indicates online CPUs coupled with the current CPU */ static DEFINE_PER_CPU_ALIGNED(cpumask_t, online_coupled); @@ -625,7 +624,6 @@ static int __init cps_gen_core_entries(unsigned cpu) { enum cps_pm_state state; unsigned core = cpu_data[cpu].core; - unsigned dlinesz = cpu_data[cpu].dcache.linesz; void *entry_fn, *core_rc; for (state = CPS_PM_NC_WAIT; state < CPS_PM_STATE_COUNT; state++) { @@ -645,16 +643,11 @@ static int __init cps_gen_core_entries(unsigned cpu) } if (!per_cpu(ready_count, core)) { - core_rc = kmalloc(dlinesz * 2, GFP_KERNEL); + core_rc = kmalloc(sizeof(u32), GFP_KERNEL); if (!core_rc) { pr_err("Failed allocate core %u ready_count\n", core); return -ENOMEM; } - per_cpu(ready_count_alloc, core) = core_rc; - - /* Ensure ready_count is aligned to a cacheline boundary */ - core_rc += dlinesz - 1; - core_rc = (void *)((unsigned long)core_rc & ~(dlinesz - 1)); per_cpu(ready_count, core) = core_rc; } From db60a2ec9acc938c2427cbba367f20f1b0505267 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 3 Mar 2017 15:26:05 -0800 Subject: [PATCH 024/100] MIPS: Fix IRQ tracing & lockdep when rescheduling commit d8550860d910c6b7b70f830f59003b33daaa52c9 upstream. When the scheduler sets TIF_NEED_RESCHED & we call into the scheduler from arch/mips/kernel/entry.S we disable interrupts. This is true regardless of whether we reach work_resched from syscall_exit_work, resume_userspace or by looping after calling schedule(). Although we disable interrupts in these paths we don't call trace_hardirqs_off() before calling into C code which may acquire locks, and we therefore leave lockdep with an inconsistent view of whether interrupts are disabled or not when CONFIG_PROVE_LOCKING & CONFIG_DEBUG_LOCKDEP are both enabled. Without tracing this interrupt state lockdep will print warnings such as the following once a task returns from a syscall via syscall_exit_partial with TIF_NEED_RESCHED set: [ 49.927678] ------------[ cut here ]------------ [ 49.934445] WARNING: CPU: 0 PID: 1 at kernel/locking/lockdep.c:3687 check_flags.part.41+0x1dc/0x1e8 [ 49.946031] DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled) [ 49.946355] CPU: 0 PID: 1 Comm: init Not tainted 4.10.0-00439-gc9fd5d362289-dirty #197 [ 49.963505] Stack : 0000000000000000 ffffffff81bb5d6a 0000000000000006 ffffffff801ce9c4 [ 49.974431] 0000000000000000 0000000000000000 0000000000000000 000000000000004a [ 49.985300] ffffffff80b7e487 ffffffff80a24498 a8000000ff160000 ffffffff80ede8b8 [ 49.996194] 0000000000000001 0000000000000000 0000000000000000 0000000077c8030c [ 50.007063] 000000007fd8a510 ffffffff801cd45c 0000000000000000 a8000000ff127c88 [ 50.017945] 0000000000000000 ffffffff801cf928 0000000000000001 ffffffff80a24498 [ 50.028827] 0000000000000000 0000000000000001 0000000000000000 0000000000000000 [ 50.039688] 0000000000000000 a8000000ff127bd0 0000000000000000 ffffffff805509bc [ 50.050575] 00000000140084e0 0000000000000000 0000000000000000 0000000000040a00 [ 50.061448] 0000000000000000 ffffffff8010e1b0 0000000000000000 ffffffff805509bc [ 50.072327] ... [ 50.076087] Call Trace: [ 50.079869] [] show_stack+0x80/0xa8 [ 50.086577] [] dump_stack+0x10c/0x190 [ 50.093498] [] __warn+0xf0/0x108 [ 50.099889] [] warn_slowpath_fmt+0x3c/0x48 [ 50.107241] [] check_flags.part.41+0x1dc/0x1e8 [ 50.114961] [] lock_is_held_type+0x8c/0xb0 [ 50.122291] [] __schedule+0x8c0/0x10f8 [ 50.129221] [] schedule+0x30/0x98 [ 50.135659] [] work_resched+0x8/0x34 [ 50.142397] ---[ end trace 0cb4f6ef5b99fe21 ]--- [ 50.148405] possible reason: unannotated irqs-off. [ 50.154600] irq event stamp: 400463 [ 50.159566] hardirqs last enabled at (400463): [] _raw_spin_unlock_irqrestore+0x40/0xa8 [ 50.171981] hardirqs last disabled at (400462): [] _raw_spin_lock_irqsave+0x30/0xb0 [ 50.183897] softirqs last enabled at (400450): [] __do_softirq+0x4ac/0x6a8 [ 50.195015] softirqs last disabled at (400425): [] irq_exit+0x110/0x128 Fix this by using the TRACE_IRQS_OFF macro to call trace_hardirqs_off() when CONFIG_TRACE_IRQFLAGS is enabled. This is done before invoking schedule() following the work_resched label because: 1) Interrupts are disabled regardless of the path we take to reach work_resched() & schedule(). 2) Performing the tracing here avoids the need to do it in paths which disable interrupts but don't call out to C code before hitting a path which uses the RESTORE_SOME macro that will call trace_hardirqs_on() or trace_hardirqs_off() as appropriate. We call trace_hardirqs_on() using the TRACE_IRQS_ON macro before calling syscall_trace_leave() for similar reasons, ensuring that lockdep has a consistent view of state after we re-enable interrupts. Signed-off-by: Paul Burton Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/15385/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/entry.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S index 7791840cf22c..db07793f7b43 100644 --- a/arch/mips/kernel/entry.S +++ b/arch/mips/kernel/entry.S @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -137,6 +138,7 @@ work_pending: andi t0, a2, _TIF_NEED_RESCHED # a2 is preloaded with TI_FLAGS beqz t0, work_notifysig work_resched: + TRACE_IRQS_OFF jal schedule local_irq_disable # make sure need_resched and @@ -173,6 +175,7 @@ syscall_exit_work: beqz t0, work_pending # trace bit set? local_irq_enable # could let syscall_trace_leave() # call schedule() instead + TRACE_IRQS_ON move a0, sp jal syscall_trace_leave b resume_userspace From 11327be3570e4640bf9c19e60b724f9fbb21eeb6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 28 Jun 2017 12:02:02 +0200 Subject: [PATCH 025/100] ALSA: hda - Fix endless loop of codec configure commit d94815f917da770d42c377786dc428f542e38f71 upstream. azx_codec_configure() loops over the codecs found on the given controller via a linked list. The code used to work in the past, but in the current version, this may lead to an endless loop when a codec binding returns an error. The culprit is that the snd_hda_codec_configure() unregisters the device upon error, and this eventually deletes the given codec object from the bus. Since the list is initialized via list_del_init(), the next object points to the same device itself. This behavior change was introduced at splitting the HD-audio code code, and forgotten to adapt it here. For fixing this bug, just use a *_safe() version of list iteration. Fixes: d068ebc25e6e ("ALSA: hda - Move some codes up to hdac_bus struct") Reported-by: Daniel Vetter Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_codec.h | 2 ++ sound/pci/hda/hda_controller.c | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h index 373fcad840ea..776dffa88aee 100644 --- a/sound/pci/hda/hda_codec.h +++ b/sound/pci/hda/hda_codec.h @@ -294,6 +294,8 @@ struct hda_codec { #define list_for_each_codec(c, bus) \ list_for_each_entry(c, &(bus)->core.codec_list, core.list) +#define list_for_each_codec_safe(c, n, bus) \ + list_for_each_entry_safe(c, n, &(bus)->core.codec_list, core.list) /* snd_hda_codec_read/write optional flags */ #define HDA_RW_NO_RESPONSE_FALLBACK (1 << 0) diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c index 5baf8b56b6e7..9c6e10fb479f 100644 --- a/sound/pci/hda/hda_controller.c +++ b/sound/pci/hda/hda_controller.c @@ -1128,8 +1128,12 @@ EXPORT_SYMBOL_GPL(azx_probe_codecs); /* configure each codec instance */ int azx_codec_configure(struct azx *chip) { - struct hda_codec *codec; - list_for_each_codec(codec, &chip->bus) { + struct hda_codec *codec, *next; + + /* use _safe version here since snd_hda_codec_configure() deregisters + * the device upon error and deletes itself from the bus list. + */ + list_for_each_codec_safe(codec, next, &chip->bus) { snd_hda_codec_configure(codec); } return 0; From c70e2006d06a868fb53d922e8b4057561774b081 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Wed, 28 Jun 2017 08:59:16 +0800 Subject: [PATCH 026/100] ALSA: hda - set input_path bitmap to zero after moving it to new place commit a8f20fd25bdce81a8e41767c39f456d346b63427 upstream. Recently we met a problem, the codec has valid adcs and input pins, and they can form valid input paths, but the driver does not build valid controls for them like "Mic boost", "Capture Volume" and "Capture Switch". Through debugging, I found the driver needs to shrink the invalid adcs and input paths for this machine, so it will move the whole column bitmap value to the previous column, after moving it, the driver forgets to set the original column bitmap value to zero, as a result, the driver will invalidate the path whose index value is the original colume bitmap value. After executing this function, all valid input paths are invalidated by a mistake, there are no any valid input paths, so the driver won't build controls for them. Fixes: 3a65bcdc577a ("ALSA: hda - Fix inconsistent input_paths after ADC reduction") Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_generic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index dc2fa576d60d..689df78f640a 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -3190,6 +3190,7 @@ static int check_dyn_adc_switch(struct hda_codec *codec) spec->input_paths[i][nums]); spec->input_paths[i][nums] = spec->input_paths[i][n]; + spec->input_paths[i][n] = 0; } } nums++; From 04686ab28a35842fce97a8ddfab8a8b00656f1e8 Mon Sep 17 00:00:00 2001 From: Deepak Rawat Date: Mon, 26 Jun 2017 14:39:08 +0200 Subject: [PATCH 027/100] drm/vmwgfx: Free hash table allocated by cmdbuf managed res mgr commit 82fcee526ba8ca2c5d378bdf51b21b7eb058fe3a upstream. The hash table created during vmw_cmdbuf_res_man_create was never freed. This causes memory leak in context creation. Added the corresponding drm_ht_remove in vmw_cmdbuf_res_man_destroy. Tested for memory leak by running piglit overnight and kernel memory is not inflated which earlier was. Signed-off-by: Deepak Rawat Reviewed-by: Sinclair Yeh Signed-off-by: Thomas Hellstrom Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c index 13db8a2851ed..1f013d45c9e9 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c @@ -321,6 +321,7 @@ void vmw_cmdbuf_res_man_destroy(struct vmw_cmdbuf_res_manager *man) list_for_each_entry_safe(entry, next, &man->list, head) vmw_cmdbuf_res_free(man, entry); + drm_ht_remove(&man->resources); kfree(man); } From 1e0f216195a6d106ed50c386abffdf60f496d518 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Thu, 8 Dec 2016 19:55:22 +0800 Subject: [PATCH 028/100] usb: gadget: f_fs: Fix possibe deadlock commit b3ce3ce02d146841af012d08506b4071db8ffde3 upstream. When system try to close /dev/usb-ffs/adb/ep0 on one core, at the same time another core try to attach new UDC, which will cause deadlock as below scenario. Thus we should release ffs lock before issuing unregister_gadget_item(). [ 52.642225] c1 ====================================================== [ 52.642228] c1 [ INFO: possible circular locking dependency detected ] [ 52.642236] c1 4.4.6+ #1 Tainted: G W O [ 52.642241] c1 ------------------------------------------------------- [ 52.642245] c1 usb ffs open/2808 is trying to acquire lock: [ 52.642270] c0 (udc_lock){+.+.+.}, at: [] usb_gadget_unregister_driver+0x3c/0xc8 [ 52.642272] c1 but task is already holding lock: [ 52.642283] c0 (ffs_lock){+.+.+.}, at: [] ffs_data_clear+0x30/0x140 [ 52.642285] c1 which lock already depends on the new lock. [ 52.642287] c1 the existing dependency chain (in reverse order) is: [ 52.642295] c0 -> #1 (ffs_lock){+.+.+.}: [ 52.642307] c0 [] __lock_acquire+0x20f0/0x2238 [ 52.642314] c0 [] lock_acquire+0xe4/0x298 [ 52.642322] c0 [] mutex_lock_nested+0x7c/0x3cc [ 52.642328] c0 [] ffs_func_bind+0x504/0x6e8 [ 52.642334] c0 [] usb_add_function+0x84/0x184 [ 52.642340] c0 [] configfs_composite_bind+0x264/0x39c [ 52.642346] c0 [] udc_bind_to_driver+0x58/0x11c [ 52.642352] c0 [] usb_udc_attach_driver+0x90/0xc8 [ 52.642358] c0 [] gadget_dev_desc_UDC_store+0xd4/0x128 [ 52.642369] c0 [] configfs_write_file+0xd0/0x13c [ 52.642376] c0 [] vfs_write+0xb8/0x214 [ 52.642381] c0 [] SyS_write+0x54/0xb0 [ 52.642388] c0 [] el0_svc_naked+0x24/0x28 [ 52.642395] c0 -> #0 (udc_lock){+.+.+.}: [ 52.642401] c0 [] print_circular_bug+0x84/0x2e4 [ 52.642407] c0 [] __lock_acquire+0x2138/0x2238 [ 52.642412] c0 [] lock_acquire+0xe4/0x298 [ 52.642420] c0 [] mutex_lock_nested+0x7c/0x3cc [ 52.642427] c0 [] usb_gadget_unregister_driver+0x3c/0xc8 [ 52.642432] c0 [] unregister_gadget_item+0x28/0x44 [ 52.642439] c0 [] ffs_data_clear+0x138/0x140 [ 52.642444] c0 [] ffs_data_reset+0x20/0x6c [ 52.642450] c0 [] ffs_data_closed+0xac/0x12c [ 52.642454] c0 [] ffs_ep0_release+0x20/0x2c [ 52.642460] c0 [] __fput+0xb0/0x1f4 [ 52.642466] c0 [] ____fput+0x20/0x2c [ 52.642473] c0 [] task_work_run+0xb4/0xe8 [ 52.642482] c0 [] do_exit+0x360/0xb9c [ 52.642487] c0 [] do_group_exit+0x4c/0xb0 [ 52.642494] c0 [] get_signal+0x380/0x89c [ 52.642501] c0 [] do_signal+0x154/0x518 [ 52.642507] c0 [] do_notify_resume+0x70/0x78 [ 52.642512] c0 [] work_pending+0x1c/0x20 [ 52.642514] c1 other info that might help us debug this: [ 52.642517] c1 Possible unsafe locking scenario: [ 52.642518] c1 CPU0 CPU1 [ 52.642520] c1 ---- ---- [ 52.642525] c0 lock(ffs_lock); [ 52.642529] c0 lock(udc_lock); [ 52.642533] c0 lock(ffs_lock); [ 52.642537] c0 lock(udc_lock); [ 52.642539] c1 *** DEADLOCK *** [ 52.642543] c1 1 lock held by usb ffs open/2808: [ 52.642555] c0 #0: (ffs_lock){+.+.+.}, at: [] ffs_data_clear+0x30/0x140 [ 52.642557] c1 stack backtrace: [ 52.642563] c1 CPU: 1 PID: 2808 Comm: usb ffs open Tainted: G [ 52.642565] c1 Hardware name: Spreadtrum SP9860g Board (DT) [ 52.642568] c1 Call trace: [ 52.642573] c1 [] dump_backtrace+0x0/0x170 [ 52.642577] c1 [] show_stack+0x20/0x28 [ 52.642583] c1 [] dump_stack+0xa8/0xe0 [ 52.642587] c1 [] print_circular_bug+0x1fc/0x2e4 [ 52.642591] c1 [] __lock_acquire+0x2138/0x2238 [ 52.642595] c1 [] lock_acquire+0xe4/0x298 [ 52.642599] c1 [] mutex_lock_nested+0x7c/0x3cc [ 52.642604] c1 [] usb_gadget_unregister_driver+0x3c/0xc8 [ 52.642608] c1 [] unregister_gadget_item+0x28/0x44 [ 52.642613] c1 [] ffs_data_clear+0x138/0x140 [ 52.642618] c1 [] ffs_data_reset+0x20/0x6c [ 52.642621] c1 [] ffs_data_closed+0xac/0x12c [ 52.642625] c1 [] ffs_ep0_release+0x20/0x2c [ 52.642629] c1 [] __fput+0xb0/0x1f4 [ 52.642633] c1 [] ____fput+0x20/0x2c [ 52.642636] c1 [] task_work_run+0xb4/0xe8 [ 52.642640] c1 [] do_exit+0x360/0xb9c [ 52.642644] c1 [] do_group_exit+0x4c/0xb0 [ 52.642647] c1 [] get_signal+0x380/0x89c [ 52.642651] c1 [] do_signal+0x154/0x518 [ 52.642656] c1 [] do_notify_resume+0x70/0x78 [ 52.642659] c1 [] work_pending+0x1c/0x20 Acked-by: Michal Nazarewicz Signed-off-by: Baolin Wang Signed-off-by: Felipe Balbi Cc: Jerry Zhang Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_fs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 6d8f865a2fb7..732e6ed5d7b4 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -3463,6 +3463,7 @@ static void ffs_closed(struct ffs_data *ffs) { struct ffs_dev *ffs_obj; struct f_fs_opts *opts; + struct config_item *ci; ENTER(); ffs_dev_lock(); @@ -3486,8 +3487,11 @@ static void ffs_closed(struct ffs_data *ffs) || !atomic_read(&opts->func_inst.group.cg_item.ci_kref.refcount)) goto done; - unregister_gadget_item(ffs_obj->opts-> - func_inst.group.cg_item.ci_parent->ci_parent); + ci = opts->func_inst.group.cg_item.ci_parent->ci_parent; + ffs_dev_unlock(); + + unregister_gadget_item(ci); + return; done: ffs_dev_unlock(); } From 2449a71eb98204fc54ff55ddd6825cc5141ce176 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 20 Jan 2016 15:00:45 -0800 Subject: [PATCH 029/100] sysctl: enable strict writes commit 41662f5cc55335807d39404371cfcbb1909304c4 upstream. SYSCTL_WRITES_WARN was added in commit f4aacea2f5d1 ("sysctl: allow for strict write position handling"), and released in v3.16 in August of 2014. Since then I can find only 1 instance of non-zero offset writing[1], and it was fixed immediately in CRIU[2]. As such, it appears safe to flip this to the strict state now. [1] https://www.google.com/search?q="when%20file%20position%20was%20not%200" [2] http://lists.openvz.org/pipermail/criu/2015-April/019819.html Signed-off-by: Kees Cook Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Cc: Sumit Semwal Signed-off-by: Greg Kroah-Hartman --- Documentation/sysctl/kernel.txt | 15 +++++++-------- kernel/sysctl.c | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index af70d1541d3a..be61d53e997f 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -810,14 +810,13 @@ via the /proc/sys interface: Each write syscall must fully contain the sysctl value to be written, and multiple writes on the same sysctl file descriptor will rewrite the sysctl value, regardless of file position. - 0 - (default) Same behavior as above, but warn about processes that - perform writes to a sysctl file descriptor when the file position - is not 0. - 1 - Respect file position when writing sysctl strings. Multiple writes - will append to the sysctl value buffer. Anything past the max length - of the sysctl value buffer will be ignored. Writes to numeric sysctl - entries must always be at file position 0 and the value must be - fully contained in the buffer sent in the write syscall. + 0 - Same behavior as above, but warn about processes that perform writes + to a sysctl file descriptor when the file position is not 0. + 1 - (default) Respect file position when writing sysctl strings. Multiple + writes will append to the sysctl value buffer. Anything past the max + length of the sysctl value buffer will be ignored. Writes to numeric + sysctl entries must always be at file position 0 and the value must + be fully contained in the buffer sent in the write syscall. ============================================================== diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 300d64162aff..464a7864e4c5 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -174,7 +174,7 @@ extern int no_unaligned_warning; #define SYSCTL_WRITES_WARN 0 #define SYSCTL_WRITES_STRICT 1 -static int sysctl_writes_strict = SYSCTL_WRITES_WARN; +static int sysctl_writes_strict = SYSCTL_WRITES_STRICT; static int proc_do_cad_pid(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); From 21d7c733251a5afd87374d3928738026c93e6261 Mon Sep 17 00:00:00 2001 From: Roman Pen Date: Tue, 9 Feb 2016 12:33:35 -0700 Subject: [PATCH 030/100] block: fix module reference leak on put_disk() call for cgroups throttle commit 39a169b62b415390398291080dafe63aec751e0a upstream. get_disk(),get_gendisk() calls have non explicit side effect: they increase the reference on the disk owner module. The following is the correct sequence how to get a disk reference and to put it: disk = get_gendisk(...); /* use disk */ owner = disk->fops->owner; put_disk(disk); module_put(owner); fs/block_dev.c is aware of this required module_put() call, but f.e. blkg_conf_finish(), which is located in block/blk-cgroup.c, does not put a module reference. To see a leakage in action cgroups throttle config can be used. In the following script I'm removing throttle for /dev/ram0 (actually this is NOP, because throttle was never set for this device): # lsmod | grep brd brd 5175 0 # i=100; while [ $i -gt 0 ]; do echo "1:0 0" > \ /sys/fs/cgroup/blkio/blkio.throttle.read_bps_device; i=$(($i - 1)); \ done # lsmod | grep brd brd 5175 100 Now brd module has 100 references. The issue is fixed by calling module_put() just right away put_disk(). Signed-off-by: Roman Pen Cc: Gi-Oh Kim Cc: Tejun Heo Cc: Jens Axboe Cc: linux-block@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Jens Axboe Cc: Sumit Semwal Signed-off-by: Greg Kroah-Hartman --- block/blk-cgroup.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 9d359e05fad7..8161090a1970 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -788,6 +788,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, { struct gendisk *disk; struct blkcg_gq *blkg; + struct module *owner; unsigned int major, minor; int key_len, part, ret; char *body; @@ -804,7 +805,9 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, if (!disk) return -ENODEV; if (part) { + owner = disk->fops->owner; put_disk(disk); + module_put(owner); return -ENODEV; } @@ -820,7 +823,9 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, ret = PTR_ERR(blkg); rcu_read_unlock(); spin_unlock_irq(disk->queue->queue_lock); + owner = disk->fops->owner; put_disk(disk); + module_put(owner); /* * If queue was bypassing, we should retry. Do so after a * short msleep(). It isn't strictly necessary but queue @@ -851,9 +856,13 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep); void blkg_conf_finish(struct blkg_conf_ctx *ctx) __releases(ctx->disk->queue->queue_lock) __releases(rcu) { + struct module *owner; + spin_unlock_irq(ctx->disk->queue->queue_lock); rcu_read_unlock(); + owner = ctx->disk->fops->owner; put_disk(ctx->disk); + module_put(owner); } EXPORT_SYMBOL_GPL(blkg_conf_finish); From cdbf92675fad445f6dffbd6bf852875fb5c02b7c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 16 Jun 2017 14:02:34 -0700 Subject: [PATCH 031/100] mm: numa: avoid waiting on freed migrated pages commit 3c226c637b69104f6b9f1c6ec5b08d7b741b3229 upstream. In do_huge_pmd_numa_page(), we attempt to handle a migrating thp pmd by waiting until the pmd is unlocked before we return and retry. However, we can race with migrate_misplaced_transhuge_page(): // do_huge_pmd_numa_page // migrate_misplaced_transhuge_page() // Holds 0 refs on page // Holds 2 refs on page vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); /* ... */ if (pmd_trans_migrating(*vmf->pmd)) { page = pmd_page(*vmf->pmd); spin_unlock(vmf->ptl); ptl = pmd_lock(mm, pmd); if (page_count(page) != 2)) { /* roll back */ } /* ... */ mlock_migrate_page(new_page, page); /* ... */ spin_unlock(ptl); put_page(page); put_page(page); // page freed here wait_on_page_locked(page); goto out; } This can result in the freed page having its waiters flag set unexpectedly, which trips the PAGE_FLAGS_CHECK_AT_PREP checks in the page alloc/free functions. This has been observed on arm64 KVM guests. We can avoid this by having do_huge_pmd_numa_page() take a reference on the page before dropping the pmd lock, mirroring what we do in __migration_entry_wait(). When we hit the race, migrate_misplaced_transhuge_page() will see the reference and abort the migration, as it may do today in other cases. Fixes: b8916634b77bffb2 ("mm: Prevent parallel splits during THP migration") Link: http://lkml.kernel.org/r/1497349722-6731-2-git-send-email-will.deacon@arm.com Signed-off-by: Mark Rutland Signed-off-by: Will Deacon Acked-by: Steve Capper Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/huge_memory.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 47b469663822..6c6f5ccfcda1 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1363,8 +1363,11 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, */ if (unlikely(pmd_trans_migrating(*pmdp))) { page = pmd_page(*pmdp); + if (!get_page_unless_zero(page)) + goto out_unlock; spin_unlock(ptl); wait_on_page_locked(page); + put_page(page); goto out; } @@ -1396,8 +1399,11 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, /* Migration could have started since the pmd_trans_migrating check */ if (!page_locked) { + if (!get_page_unless_zero(page)) + goto out_unlock; spin_unlock(ptl); wait_on_page_locked(page); + put_page(page); page_nid = -1; goto out; } From b92f9f6a2c09683bab8c3ac806f8dd0235bb9003 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 17 Jan 2017 14:51:04 +0100 Subject: [PATCH 032/100] KVM: x86: fix fixing of hypercalls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ce2e852ecc9a42e4b8dabb46025cfef63209234a ] emulator_fix_hypercall() replaces hypercall with vmcall instruction, but it does not handle GP exception properly when writes the new instruction. It can return X86EMUL_PROPAGATE_FAULT without setting exception information. This leads to incorrect emulation and triggers WARN_ON(ctxt->exception.vector > 0x1f) in x86_emulate_insn() as discovered by syzkaller fuzzer: WARNING: CPU: 2 PID: 18646 at arch/x86/kvm/emulate.c:5558 Call Trace: warn_slowpath_null+0x2c/0x40 kernel/panic.c:582 x86_emulate_insn+0x16a5/0x4090 arch/x86/kvm/emulate.c:5572 x86_emulate_instruction+0x403/0x1cc0 arch/x86/kvm/x86.c:5618 emulate_instruction arch/x86/include/asm/kvm_host.h:1127 [inline] handle_exception+0x594/0xfd0 arch/x86/kvm/vmx.c:5762 vmx_handle_exit+0x2b7/0x38b0 arch/x86/kvm/vmx.c:8625 vcpu_enter_guest arch/x86/kvm/x86.c:6888 [inline] vcpu_run arch/x86/kvm/x86.c:6947 [inline] Set exception information when write in emulator_fix_hypercall() fails. Signed-off-by: Dmitry Vyukov Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Wanpeng Li Cc: kvm@vger.kernel.org Cc: syzkaller@googlegroups.com Signed-off-by: Radim Krčmář Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6c82792487e9..cba30e791e7d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5974,7 +5974,8 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt) kvm_x86_ops->patch_hypercall(vcpu, instruction); - return emulator_write_emulated(ctxt, rip, instruction, 3, NULL); + return emulator_write_emulated(ctxt, rip, instruction, 3, + &ctxt->exception); } static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu) From 5c982bac211ccc3923519a980f2e9a53ea8e6b60 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 12 Jan 2017 15:25:10 +0900 Subject: [PATCH 033/100] scsi: sd: Fix wrong DPOFUA disable in sd_read_cache_type [ Upstream commit 26f2819772af891dee2843e1f8662c58e5129d5f ] Zoned block devices force the use of READ/WRITE(16) commands by setting sdkp->use_16_for_rw and clearing sdkp->use_10_for_rw. This result in DPOFUA always being disabled for these drives as the assumed use of the deprecated READ/WRITE(6) commands only looks at sdkp->use_10_for_rw. Strenghten the test by also checking that sdkp->use_16_for_rw is false. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 4d5207dff960..8750c86f95f9 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2566,7 +2566,8 @@ sd_read_cache_type(struct scsi_disk *sdkp, unsigned char *buffer) if (sdp->broken_fua) { sd_first_printk(KERN_NOTICE, sdkp, "Disabling FUA\n"); sdkp->DPOFUA = 0; - } else if (sdkp->DPOFUA && !sdkp->device->use_10_for_rw) { + } else if (sdkp->DPOFUA && !sdkp->device->use_10_for_rw && + !sdkp->device->use_16_for_rw) { sd_first_printk(KERN_NOTICE, sdkp, "Uses READ/WRITE(6), disabling FUA\n"); sdkp->DPOFUA = 0; From 8c721e38055a36c881a904c1b4a5b6fd80c67edd Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Tue, 10 Jan 2017 12:05:54 +0100 Subject: [PATCH 034/100] scsi: lpfc: Set elsiocb contexts to NULL after freeing it [ Upstream commit 8667f515952feefebb3c0f8d9a9266c91b101a46 ] Set the elsiocb contexts to NULL after freeing as others depend on it. Signed-off-by: Johannes Thumshirn Acked-by: Dick Kennedy Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_els.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 59ced8864b2f..0e6aaef9a038 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -3563,12 +3563,14 @@ lpfc_els_free_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *elsiocb) } else { buf_ptr1 = (struct lpfc_dmabuf *) elsiocb->context2; lpfc_els_free_data(phba, buf_ptr1); + elsiocb->context2 = NULL; } } if (elsiocb->context3) { buf_ptr = (struct lpfc_dmabuf *) elsiocb->context3; lpfc_els_free_bpl(phba, buf_ptr); + elsiocb->context3 = NULL; } lpfc_sli_release_iocbq(phba, elsiocb); return 0; From 50e18570d8ea12338f13e8835381a087773f8f08 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Fri, 23 Dec 2016 18:06:13 -0800 Subject: [PATCH 035/100] qla2xxx: Fix erroneous invalid handle message [ Upstream commit 4f060736f29a960aba8e781a88837464756200a8 ] Termination of Immediate Notify IOCB was using wrong IOCB handle. IOCB completion code was unable to find appropriate code path due to wrong handle. Following message is seen in the logs. "Error entry - invalid handle/queue (ffff)." Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Reviewed-by: Christoph Hellwig [ bvanassche: Fixed word order in patch title ] Signed-off-by: Bart Van Assche Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_isr.c | 4 ++++ drivers/scsi/qla2xxx/qla_target.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 0e59731f95ad..1f6a3b86965f 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -2466,6 +2466,10 @@ qla2x00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, sts_entry_t *pkt) if (pkt->entry_status & RF_BUSY) res = DID_BUS_BUSY << 16; + if (pkt->entry_type == NOTIFY_ACK_TYPE && + pkt->handle == QLA_TGT_SKIP_HANDLE) + return; + sp = qla2x00_get_sp_from_handle(vha, func, req, pkt); if (sp) { sp->done(ha, sp, res); diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index f57d96984ae4..e6faa0b050d1 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -2865,7 +2865,7 @@ static int __qlt_send_term_imm_notif(struct scsi_qla_host *vha, pkt->entry_type = NOTIFY_ACK_TYPE; pkt->entry_count = 1; - pkt->handle = QLA_TGT_SKIP_HANDLE | CTIO_COMPLETION_HANDLE_MARK; + pkt->handle = QLA_TGT_SKIP_HANDLE; nack = (struct nack_to_isp *)pkt; nack->ox_id = ntfy->ox_id; From 0012ba253767ee157c20d70d5246b428affb6c9f Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Thu, 2 Mar 2017 19:21:32 -0500 Subject: [PATCH 036/100] ARM: dts: BCM5301X: Correct GIC_PPI interrupt flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0c2bf9f95983fe30aa2f6463cb761cd42c2d521a upstream. GIC_PPI flags were misconfigured for the timers, resulting in errors like: [ 0.000000] GIC: PPI11 is secure or misconfigured Changing them to being edge triggered corrects the issue Suggested-by: Rafał Miłecki Signed-off-by: Jon Mason Fixes: d27509f1 ("ARM: BCM5301X: add dts files for BCM4708 SoC") Signed-off-by: Florian Fainelli [AmitP: Resolved minor cherry-pick conflict] Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/bcm5301x.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi index 6f50f672efbd..de8ac998604d 100644 --- a/arch/arm/boot/dts/bcm5301x.dtsi +++ b/arch/arm/boot/dts/bcm5301x.dtsi @@ -54,14 +54,14 @@ timer@0200 { compatible = "arm,cortex-a9-global-timer"; reg = <0x0200 0x100>; - interrupts = ; + interrupts = ; clocks = <&clk_periph>; }; local-timer@0600 { compatible = "arm,cortex-a9-twd-timer"; reg = <0x0600 0x100>; - interrupts = ; + interrupts = ; clocks = <&clk_periph>; }; From 753be27f77eb1f301cda9d8c992325d3fb5ddea3 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Thu, 4 Feb 2016 22:09:23 +0100 Subject: [PATCH 037/100] net: mvneta: Fix for_each_present_cpu usage commit 129219e4950a3fcf9323b3bbd8b224c7aa873985 upstream. This patch convert the for_each_present in on_each_cpu, instead of applying on the present cpus it will be applied only on the online cpus. This fix a bug reported on http://thread.gmane.org/gmane.linux.ports.arm.kernel/468173. Using the macro on_each_cpu (instead of a for_each_* loop) also ensures that all the calls will be done all at once. Fixes: f86428854480 ("net: mvneta: Statically assign queues to CPUs") Reported-by: Stefan Roese Suggested-by: Jisheng Zhang Suggested-by: Russell King Signed-off-by: Gregory CLEMENT Signed-off-by: David S. Miller Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvneta.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 71ec9cb08e06..15056f06754a 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2446,7 +2446,7 @@ static void mvneta_start_dev(struct mvneta_port *pp) mvneta_port_enable(pp); /* Enable polling on the port */ - for_each_present_cpu(cpu) { + for_each_online_cpu(cpu) { struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu); napi_enable(&port->napi); @@ -2472,7 +2472,7 @@ static void mvneta_stop_dev(struct mvneta_port *pp) phy_stop(pp->phy_dev); - for_each_present_cpu(cpu) { + for_each_online_cpu(cpu) { struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu); napi_disable(&port->napi); @@ -2902,13 +2902,11 @@ err_cleanup_rxqs: static int mvneta_stop(struct net_device *dev) { struct mvneta_port *pp = netdev_priv(dev); - int cpu; mvneta_stop_dev(pp); mvneta_mdio_remove(pp); unregister_cpu_notifier(&pp->cpu_notifier); - for_each_present_cpu(cpu) - smp_call_function_single(cpu, mvneta_percpu_disable, pp, true); + on_each_cpu(mvneta_percpu_disable, pp, true); free_percpu_irq(dev->irq, pp->ports); mvneta_cleanup_rxqs(pp); mvneta_cleanup_txqs(pp); From ef0cb4c9d496939d78235304e29d3073a0e4afb9 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 16 May 2016 19:51:55 +0200 Subject: [PATCH 038/100] MIPS: ath79: fix regression in PCI window initialization commit 9184dc8ffa56844352b3b9860e562ec4ee41176f upstream. ath79_ddr_pci_win_base has the type void __iomem *, so register offsets need to be a multiple of 4. Cc: Alban Bedel Fixes: 24b0e3e84fbf ("MIPS: ath79: Improve the DDR controller interface") Signed-off-by: Felix Fietkau Cc: sergei.shtylyov@cogentembedded.com Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/13258/ Signed-off-by: Ralf Baechle Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- arch/mips/ath79/common.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/mips/ath79/common.c b/arch/mips/ath79/common.c index 3cedd1f95e0f..8ae4067a5eda 100644 --- a/arch/mips/ath79/common.c +++ b/arch/mips/ath79/common.c @@ -76,14 +76,14 @@ void ath79_ddr_set_pci_windows(void) { BUG_ON(!ath79_ddr_pci_win_base); - __raw_writel(AR71XX_PCI_WIN0_OFFS, ath79_ddr_pci_win_base + 0); - __raw_writel(AR71XX_PCI_WIN1_OFFS, ath79_ddr_pci_win_base + 1); - __raw_writel(AR71XX_PCI_WIN2_OFFS, ath79_ddr_pci_win_base + 2); - __raw_writel(AR71XX_PCI_WIN3_OFFS, ath79_ddr_pci_win_base + 3); - __raw_writel(AR71XX_PCI_WIN4_OFFS, ath79_ddr_pci_win_base + 4); - __raw_writel(AR71XX_PCI_WIN5_OFFS, ath79_ddr_pci_win_base + 5); - __raw_writel(AR71XX_PCI_WIN6_OFFS, ath79_ddr_pci_win_base + 6); - __raw_writel(AR71XX_PCI_WIN7_OFFS, ath79_ddr_pci_win_base + 7); + __raw_writel(AR71XX_PCI_WIN0_OFFS, ath79_ddr_pci_win_base + 0x0); + __raw_writel(AR71XX_PCI_WIN1_OFFS, ath79_ddr_pci_win_base + 0x4); + __raw_writel(AR71XX_PCI_WIN2_OFFS, ath79_ddr_pci_win_base + 0x8); + __raw_writel(AR71XX_PCI_WIN3_OFFS, ath79_ddr_pci_win_base + 0xc); + __raw_writel(AR71XX_PCI_WIN4_OFFS, ath79_ddr_pci_win_base + 0x10); + __raw_writel(AR71XX_PCI_WIN5_OFFS, ath79_ddr_pci_win_base + 0x14); + __raw_writel(AR71XX_PCI_WIN6_OFFS, ath79_ddr_pci_win_base + 0x18); + __raw_writel(AR71XX_PCI_WIN7_OFFS, ath79_ddr_pci_win_base + 0x1c); } EXPORT_SYMBOL_GPL(ath79_ddr_set_pci_windows); From e1688f1677e65eb5e49851d11d363e2542c437a1 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 23 Dec 2016 19:56:56 -0800 Subject: [PATCH 039/100] net: korina: Fix NAPI versus resources freeing commit e6afb1ad88feddf2347ea779cfaf4d03d3cd40b6 upstream. Commit beb0babfb77e ("korina: disable napi on close and restart") introduced calls to napi_disable() that were missing before, unfortunately this leaves a small window during which NAPI has a chance to run, yet we just freed resources since korina_free_ring() has been called: Fix this by disabling NAPI first then freeing resource, and make sure that we also cancel the restart task before doing the resource freeing. Fixes: beb0babfb77e ("korina: disable napi on close and restart") Reported-by: Alexandros C. Couloumbis Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/korina.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c index d74f5f4e5782..07eabf72c480 100644 --- a/drivers/net/ethernet/korina.c +++ b/drivers/net/ethernet/korina.c @@ -900,10 +900,10 @@ static void korina_restart_task(struct work_struct *work) DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR, &lp->rx_dma_regs->dmasm); - korina_free_ring(dev); - napi_disable(&lp->napi); + korina_free_ring(dev); + if (korina_init(dev) < 0) { printk(KERN_ERR "%s: cannot restart device\n", dev->name); return; @@ -1064,12 +1064,12 @@ static int korina_close(struct net_device *dev) tmp = tmp | DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR; writel(tmp, &lp->rx_dma_regs->dmasm); - korina_free_ring(dev); - napi_disable(&lp->napi); cancel_work_sync(&lp->restart_task); + korina_free_ring(dev); + free_irq(lp->rx_irq, dev); free_irq(lp->tx_irq, dev); free_irq(lp->ovr_irq, dev); From 9a23a35a4fc6882dd0b3a6b125e51356c58dbb46 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Mon, 4 Jan 2016 20:23:56 +0100 Subject: [PATCH 040/100] MIPS: ralink: MT7688 pinmux fixes commit e906a5f67e5a3337d696ec848e9c28fc68b39aa3 upstream. A few fixes to the pinmux data, 2 new muxes and a minor whitespace cleanup. Signed-off-by: John Crispin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/11991/ Signed-off-by: Ralf Baechle Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- arch/mips/ralink/mt7620.c | 78 ++++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 29 deletions(-) diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c index dfb04fcedb04..733768e9877c 100644 --- a/arch/mips/ralink/mt7620.c +++ b/arch/mips/ralink/mt7620.c @@ -107,31 +107,31 @@ static struct rt2880_pmx_group mt7620a_pinmux_data[] = { }; static struct rt2880_pmx_func pwm1_grp_mt7628[] = { - FUNC("sdcx", 3, 19, 1), + FUNC("sdxc d6", 3, 19, 1), FUNC("utif", 2, 19, 1), FUNC("gpio", 1, 19, 1), - FUNC("pwm", 0, 19, 1), + FUNC("pwm1", 0, 19, 1), }; static struct rt2880_pmx_func pwm0_grp_mt7628[] = { - FUNC("sdcx", 3, 18, 1), + FUNC("sdxc d7", 3, 18, 1), FUNC("utif", 2, 18, 1), FUNC("gpio", 1, 18, 1), - FUNC("pwm", 0, 18, 1), + FUNC("pwm0", 0, 18, 1), }; static struct rt2880_pmx_func uart2_grp_mt7628[] = { - FUNC("sdcx", 3, 20, 2), + FUNC("sdxc d5 d4", 3, 20, 2), FUNC("pwm", 2, 20, 2), FUNC("gpio", 1, 20, 2), - FUNC("uart", 0, 20, 2), + FUNC("uart2", 0, 20, 2), }; static struct rt2880_pmx_func uart1_grp_mt7628[] = { - FUNC("sdcx", 3, 45, 2), + FUNC("sw_r", 3, 45, 2), FUNC("pwm", 2, 45, 2), FUNC("gpio", 1, 45, 2), - FUNC("uart", 0, 45, 2), + FUNC("uart1", 0, 45, 2), }; static struct rt2880_pmx_func i2c_grp_mt7628[] = { @@ -143,21 +143,21 @@ static struct rt2880_pmx_func i2c_grp_mt7628[] = { static struct rt2880_pmx_func refclk_grp_mt7628[] = { FUNC("reclk", 0, 36, 1) }; static struct rt2880_pmx_func perst_grp_mt7628[] = { FUNC("perst", 0, 37, 1) }; -static struct rt2880_pmx_func wdt_grp_mt7628[] = { FUNC("wdt", 0, 15, 38) }; +static struct rt2880_pmx_func wdt_grp_mt7628[] = { FUNC("wdt", 0, 38, 1) }; static struct rt2880_pmx_func spi_grp_mt7628[] = { FUNC("spi", 0, 7, 4) }; static struct rt2880_pmx_func sd_mode_grp_mt7628[] = { FUNC("jtag", 3, 22, 8), FUNC("utif", 2, 22, 8), FUNC("gpio", 1, 22, 8), - FUNC("sdcx", 0, 22, 8), + FUNC("sdxc", 0, 22, 8), }; static struct rt2880_pmx_func uart0_grp_mt7628[] = { FUNC("-", 3, 12, 2), FUNC("-", 2, 12, 2), FUNC("gpio", 1, 12, 2), - FUNC("uart", 0, 12, 2), + FUNC("uart0", 0, 12, 2), }; static struct rt2880_pmx_func i2s_grp_mt7628[] = { @@ -171,7 +171,7 @@ static struct rt2880_pmx_func spi_cs1_grp_mt7628[] = { FUNC("-", 3, 6, 1), FUNC("refclk", 2, 6, 1), FUNC("gpio", 1, 6, 1), - FUNC("spi", 0, 6, 1), + FUNC("spi cs1", 0, 6, 1), }; static struct rt2880_pmx_func spis_grp_mt7628[] = { @@ -188,28 +188,44 @@ static struct rt2880_pmx_func gpio_grp_mt7628[] = { FUNC("gpio", 0, 11, 1), }; -#define MT7628_GPIO_MODE_MASK 0x3 +static struct rt2880_pmx_func wled_kn_grp_mt7628[] = { + FUNC("rsvd", 3, 35, 1), + FUNC("rsvd", 2, 35, 1), + FUNC("gpio", 1, 35, 1), + FUNC("wled_kn", 0, 35, 1), +}; -#define MT7628_GPIO_MODE_PWM1 30 -#define MT7628_GPIO_MODE_PWM0 28 -#define MT7628_GPIO_MODE_UART2 26 -#define MT7628_GPIO_MODE_UART1 24 -#define MT7628_GPIO_MODE_I2C 20 -#define MT7628_GPIO_MODE_REFCLK 18 -#define MT7628_GPIO_MODE_PERST 16 -#define MT7628_GPIO_MODE_WDT 14 -#define MT7628_GPIO_MODE_SPI 12 -#define MT7628_GPIO_MODE_SDMODE 10 -#define MT7628_GPIO_MODE_UART0 8 -#define MT7628_GPIO_MODE_I2S 6 -#define MT7628_GPIO_MODE_CS1 4 -#define MT7628_GPIO_MODE_SPIS 2 -#define MT7628_GPIO_MODE_GPIO 0 +static struct rt2880_pmx_func wled_an_grp_mt7628[] = { + FUNC("rsvd", 3, 35, 1), + FUNC("rsvd", 2, 35, 1), + FUNC("gpio", 1, 35, 1), + FUNC("wled_an", 0, 35, 1), +}; + +#define MT7628_GPIO_MODE_MASK 0x3 + +#define MT7628_GPIO_MODE_WLED_KN 48 +#define MT7628_GPIO_MODE_WLED_AN 32 +#define MT7628_GPIO_MODE_PWM1 30 +#define MT7628_GPIO_MODE_PWM0 28 +#define MT7628_GPIO_MODE_UART2 26 +#define MT7628_GPIO_MODE_UART1 24 +#define MT7628_GPIO_MODE_I2C 20 +#define MT7628_GPIO_MODE_REFCLK 18 +#define MT7628_GPIO_MODE_PERST 16 +#define MT7628_GPIO_MODE_WDT 14 +#define MT7628_GPIO_MODE_SPI 12 +#define MT7628_GPIO_MODE_SDMODE 10 +#define MT7628_GPIO_MODE_UART0 8 +#define MT7628_GPIO_MODE_I2S 6 +#define MT7628_GPIO_MODE_CS1 4 +#define MT7628_GPIO_MODE_SPIS 2 +#define MT7628_GPIO_MODE_GPIO 0 static struct rt2880_pmx_group mt7628an_pinmux_data[] = { GRP_G("pmw1", pwm1_grp_mt7628, MT7628_GPIO_MODE_MASK, 1, MT7628_GPIO_MODE_PWM1), - GRP_G("pmw1", pwm0_grp_mt7628, MT7628_GPIO_MODE_MASK, + GRP_G("pmw0", pwm0_grp_mt7628, MT7628_GPIO_MODE_MASK, 1, MT7628_GPIO_MODE_PWM0), GRP_G("uart2", uart2_grp_mt7628, MT7628_GPIO_MODE_MASK, 1, MT7628_GPIO_MODE_UART2), @@ -233,6 +249,10 @@ static struct rt2880_pmx_group mt7628an_pinmux_data[] = { 1, MT7628_GPIO_MODE_SPIS), GRP_G("gpio", gpio_grp_mt7628, MT7628_GPIO_MODE_MASK, 1, MT7628_GPIO_MODE_GPIO), + GRP_G("wled_an", wled_an_grp_mt7628, MT7628_GPIO_MODE_MASK, + 1, MT7628_GPIO_MODE_WLED_AN), + GRP_G("wled_kn", wled_kn_grp_mt7628, MT7628_GPIO_MODE_MASK, + 1, MT7628_GPIO_MODE_WLED_KN), { 0 } }; From ad310161f513b4ad8cf82eb4fc3de990e9412270 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Mon, 4 Jan 2016 20:23:57 +0100 Subject: [PATCH 041/100] MIPS: ralink: fix USB frequency scaling commit fad2522272ed5ed451d2d7b1dc547ddf3781cc7e upstream. Commit 418d29c87061 ("MIPS: ralink: Unify SoC id handling") was not fully correct. The logic for the SoC check got inverted. We need to check if it is not a MT76x8. Signed-off-by: John Crispin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/11992/ Signed-off-by: Ralf Baechle Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- arch/mips/ralink/mt7620.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c index 733768e9877c..4c17dc6e8ae9 100644 --- a/arch/mips/ralink/mt7620.c +++ b/arch/mips/ralink/mt7620.c @@ -459,7 +459,7 @@ void __init ralink_clk_init(void) ralink_clk_add("10000c00.uartlite", periph_rate); ralink_clk_add("10180000.wmac", xtal_rate); - if (IS_ENABLED(CONFIG_USB) && is_mt76x8()) { + if (IS_ENABLED(CONFIG_USB) && !is_mt76x8()) { /* * When the CPU goes into sleep mode, the BUS clock will be * too low for USB to function properly. Adjust the busses From c9336bbdd924bdd4c13300f17357d99c7a9387a4 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Mon, 4 Jan 2016 20:23:58 +0100 Subject: [PATCH 042/100] MIPS: ralink: Fix invalid assignment of SoC type commit 0af3a40f09a2a85089037a0b5b51471fa48b229e upstream. Commit 418d29c87061 ("MIPS: ralink: Unify SoC id handling") introduced broken code. We obviously need to assign the value. Signed-off-by: John Crispin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/11993/ Signed-off-by: Ralf Baechle Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- arch/mips/ralink/rt288x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/ralink/rt288x.c b/arch/mips/ralink/rt288x.c index 15506a1ff22a..9dd67749c592 100644 --- a/arch/mips/ralink/rt288x.c +++ b/arch/mips/ralink/rt288x.c @@ -109,5 +109,5 @@ void prom_soc_init(struct ralink_soc_info *soc_info) soc_info->mem_size_max = RT2880_MEM_SIZE_MAX; rt2880_pinmux_data = rt2880_pinmux_data_act; - ralink_soc == RT2880_SOC; + ralink_soc = RT2880_SOC; } From cd1fe5c31e912f2e4df403b852f8e8142543d32a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Fern=C3=A1ndez=20Rojas?= Date: Thu, 19 May 2016 22:07:34 +0200 Subject: [PATCH 043/100] MIPS: ralink: fix MT7628 pinmux typos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d7146829c9da24e285cb1b1f2156b5b3e2d40c07 upstream. Signed-off-by: Álvaro Fernández Rojas Cc: john@phrozen.org Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13306/ Signed-off-by: Ralf Baechle Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- arch/mips/ralink/mt7620.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c index 4c17dc6e8ae9..37cfc7d3c185 100644 --- a/arch/mips/ralink/mt7620.c +++ b/arch/mips/ralink/mt7620.c @@ -223,9 +223,9 @@ static struct rt2880_pmx_func wled_an_grp_mt7628[] = { #define MT7628_GPIO_MODE_GPIO 0 static struct rt2880_pmx_group mt7628an_pinmux_data[] = { - GRP_G("pmw1", pwm1_grp_mt7628, MT7628_GPIO_MODE_MASK, + GRP_G("pwm1", pwm1_grp_mt7628, MT7628_GPIO_MODE_MASK, 1, MT7628_GPIO_MODE_PWM1), - GRP_G("pmw0", pwm0_grp_mt7628, MT7628_GPIO_MODE_MASK, + GRP_G("pwm0", pwm0_grp_mt7628, MT7628_GPIO_MODE_MASK, 1, MT7628_GPIO_MODE_PWM0), GRP_G("uart2", uart2_grp_mt7628, MT7628_GPIO_MODE_MASK, 1, MT7628_GPIO_MODE_UART2), From e025a30dd8b04971d73091fd3433195572d1c05f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Fern=C3=A1ndez=20Rojas?= Date: Thu, 19 May 2016 22:07:35 +0200 Subject: [PATCH 044/100] MIPS: ralink: fix MT7628 wled_an pinmux gpio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 07b50db6e685172a41b9978aebffb2438166d9b6 upstream. Signed-off-by: Álvaro Fernández Rojas Cc: john@phrozen.org Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/13307/ Signed-off-by: Ralf Baechle Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- arch/mips/ralink/mt7620.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c index 37cfc7d3c185..48d6349fd9d7 100644 --- a/arch/mips/ralink/mt7620.c +++ b/arch/mips/ralink/mt7620.c @@ -196,10 +196,10 @@ static struct rt2880_pmx_func wled_kn_grp_mt7628[] = { }; static struct rt2880_pmx_func wled_an_grp_mt7628[] = { - FUNC("rsvd", 3, 35, 1), - FUNC("rsvd", 2, 35, 1), - FUNC("gpio", 1, 35, 1), - FUNC("wled_an", 0, 35, 1), + FUNC("rsvd", 3, 44, 1), + FUNC("rsvd", 2, 44, 1), + FUNC("gpio", 1, 44, 1), + FUNC("wled_an", 0, 44, 1), }; #define MT7628_GPIO_MODE_MASK 0x3 From 11e4bb957f1aa2adf5d746b00ee0e2ab7602a3be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sat, 5 Dec 2015 02:09:43 +0100 Subject: [PATCH 045/100] mtd: bcm47xxpart: limit scanned flash area on BCM47XX (MIPS) only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2a36a5c30eab9cd1c9d2d08bd27cd763325d70c5 upstream. We allowed using bcm47xxpart on BCM5301X arch with commit: 9e3afa5f5c7 ("mtd: bcm47xxpart: allow enabling on ARCH_BCM_5301X") BCM5301X devices may contain some partitions in higher memory, e.g. Netgear R8000 has board_data at 0x2600000. To detect them we should use size limit on MIPS only. Signed-off-by: Rafał Miłecki Signed-off-by: Brian Norris Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/bcm47xxpart.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/bcm47xxpart.c b/drivers/mtd/bcm47xxpart.c index 5abab8800891..546170a77466 100644 --- a/drivers/mtd/bcm47xxpart.c +++ b/drivers/mtd/bcm47xxpart.c @@ -118,8 +118,8 @@ static int bcm47xxpart_parse(struct mtd_info *master, /* Parse block by block looking for magics */ for (offset = 0; offset <= master->size - blocksize; offset += blocksize) { - /* Nothing more in higher memory */ - if (offset >= 0x2000000) + /* Nothing more in higher memory on BCM47XX (MIPS) */ + if (config_enabled(CONFIG_BCM47XX) && offset >= 0x2000000) break; if (curr_part >= BCM47XXPART_MAX_PARTS) { From 1124701061d8cb3f0c2906a8d23288114e046ebf Mon Sep 17 00:00:00 2001 From: wangweidong Date: Wed, 13 Jan 2016 11:06:41 +0800 Subject: [PATCH 046/100] bgmac: fix a missing check for build_skb commit f1640c3ddeec12804bc9a21feee85fc15aca95f6 upstream. when build_skb failed, it may occure a NULL pointer. So add a 'NULL check' for it. Signed-off-by: Weidong Wang Signed-off-by: David S. Miller Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bgmac.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index b56c9c581359..b7e10e1b9131 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -469,6 +469,11 @@ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring, len -= ETH_FCS_LEN; skb = build_skb(buf, BGMAC_RX_ALLOC_SIZE); + if (unlikely(skb)) { + bgmac_err(bgmac, "build_skb failed\n"); + put_page(virt_to_head_page(buf)); + break; + } skb_put(skb, BGMAC_RX_FRAME_OFFSET + BGMAC_RX_BUF_OFFSET + len); skb_pull(skb, BGMAC_RX_FRAME_OFFSET + From ebfa83ab5a53a1a904c9b4196a43d5e2fc6dc13d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sun, 6 Dec 2015 11:31:38 +0100 Subject: [PATCH 047/100] mtd: bcm47xxpart: don't fail because of bit-flips MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 36bcc0c9c2bc8f56569cd735ba531a51358d7c2b upstream. Bit-flip errors may occur on NAND flashes and are harmless. Handle them gracefully as read content is still reliable and can be parsed. Signed-off-by: Rafał Miłecki Signed-off-by: Brian Norris Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/bcm47xxpart.c | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/drivers/mtd/bcm47xxpart.c b/drivers/mtd/bcm47xxpart.c index 546170a77466..9190057535e6 100644 --- a/drivers/mtd/bcm47xxpart.c +++ b/drivers/mtd/bcm47xxpart.c @@ -66,11 +66,13 @@ static const char *bcm47xxpart_trx_data_part_name(struct mtd_info *master, { uint32_t buf; size_t bytes_read; + int err; - if (mtd_read(master, offset, sizeof(buf), &bytes_read, - (uint8_t *)&buf) < 0) { - pr_err("mtd_read error while parsing (offset: 0x%X)!\n", - offset); + err = mtd_read(master, offset, sizeof(buf), &bytes_read, + (uint8_t *)&buf); + if (err && !mtd_is_bitflip(err)) { + pr_err("mtd_read error while parsing (offset: 0x%X): %d\n", + offset, err); goto out_default; } @@ -95,6 +97,7 @@ static int bcm47xxpart_parse(struct mtd_info *master, int trx_part = -1; int last_trx_part = -1; int possible_nvram_sizes[] = { 0x8000, 0xF000, 0x10000, }; + int err; /* * Some really old flashes (like AT45DB*) had smaller erasesize-s, but @@ -128,10 +131,11 @@ static int bcm47xxpart_parse(struct mtd_info *master, } /* Read beginning of the block */ - if (mtd_read(master, offset, BCM47XXPART_BYTES_TO_READ, - &bytes_read, (uint8_t *)buf) < 0) { - pr_err("mtd_read error while parsing (offset: 0x%X)!\n", - offset); + err = mtd_read(master, offset, BCM47XXPART_BYTES_TO_READ, + &bytes_read, (uint8_t *)buf); + if (err && !mtd_is_bitflip(err)) { + pr_err("mtd_read error while parsing (offset: 0x%X): %d\n", + offset, err); continue; } @@ -252,10 +256,11 @@ static int bcm47xxpart_parse(struct mtd_info *master, } /* Read middle of the block */ - if (mtd_read(master, offset + 0x8000, 0x4, - &bytes_read, (uint8_t *)buf) < 0) { - pr_err("mtd_read error while parsing (offset: 0x%X)!\n", - offset); + err = mtd_read(master, offset + 0x8000, 0x4, &bytes_read, + (uint8_t *)buf); + if (err && !mtd_is_bitflip(err)) { + pr_err("mtd_read error while parsing (offset: 0x%X): %d\n", + offset, err); continue; } @@ -275,10 +280,11 @@ static int bcm47xxpart_parse(struct mtd_info *master, } offset = master->size - possible_nvram_sizes[i]; - if (mtd_read(master, offset, 0x4, &bytes_read, - (uint8_t *)buf) < 0) { - pr_err("mtd_read error while reading at offset 0x%X!\n", - offset); + err = mtd_read(master, offset, 0x4, &bytes_read, + (uint8_t *)buf); + if (err && !mtd_is_bitflip(err)) { + pr_err("mtd_read error while reading (offset 0x%X): %d\n", + offset, err); continue; } From 992048f8ae84df92373e5424fd3fcc3748ac5d87 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 15 Jan 2016 16:07:13 -0500 Subject: [PATCH 048/100] bgmac: Fix reversed test of build_skb() return value. commit 750afbf8ee9c6a1c74a1fe5fc9852146b1d72687 upstream. Fixes: f1640c3ddeec ("bgmac: fix a missing check for build_skb") Signed-off-by: David S. Miller Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bgmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index b7e10e1b9131..20356755b708 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -469,7 +469,7 @@ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring, len -= ETH_FCS_LEN; skb = build_skb(buf, BGMAC_RX_ALLOC_SIZE); - if (unlikely(skb)) { + if (unlikely(!skb)) { bgmac_err(bgmac, "build_skb failed\n"); put_page(virt_to_head_page(buf)); break; From f01babed64e64f2043e163c3f071aae783d135d8 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 23 Jun 2016 14:23:12 -0700 Subject: [PATCH 049/100] net: bgmac: Fix SOF bit checking commit d2b13233879ca1268a1c027d4573109e5a777811 upstream. We are checking for the Start of Frame bit in the ctl1 word, while this bit is set in the ctl0 word instead. Read the ctl0 word and update the check to verify that. Fixes: 9cde94506eac ("bgmac: implement scatter/gather support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bgmac.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 20356755b708..50de83bfcc2e 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -255,15 +255,16 @@ static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring) while (ring->start != ring->end) { int slot_idx = ring->start % BGMAC_TX_RING_SLOTS; struct bgmac_slot_info *slot = &ring->slots[slot_idx]; - u32 ctl1; + u32 ctl0, ctl1; int len; if (slot_idx == empty_slot) break; + ctl0 = le32_to_cpu(ring->cpu_base[slot_idx].ctl0); ctl1 = le32_to_cpu(ring->cpu_base[slot_idx].ctl1); len = ctl1 & BGMAC_DESC_CTL1_LEN; - if (ctl1 & BGMAC_DESC_CTL0_SOF) + if (ctl0 & BGMAC_DESC_CTL0_SOF) /* Unmap no longer used buffer */ dma_unmap_single(dma_dev, slot->dma_addr, len, DMA_TO_DEVICE); From e66647f0e1ff59a3a335112d6cef70b21e8094c1 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 23 Jun 2016 14:25:32 -0700 Subject: [PATCH 050/100] net: bgmac: Start transmit queue in bgmac_open commit c3897f2a69e54dd113fc9abd2daf872e5b495798 upstream. The driver does not start the transmit queue in bgmac_open(). If the queue was stopped prior to closing then re-opening the interface, we would never be able to wake-up again. Fixes: dd4544f05469 ("bgmac: driver for GBit MAC core on BCMA bus") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bgmac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 50de83bfcc2e..a01f62ec12bb 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1309,6 +1309,9 @@ static int bgmac_open(struct net_device *net_dev) phy_start(bgmac->phy_dev); netif_carrier_on(net_dev); + + netif_start_queue(net_dev); + return 0; } From e1db592de73543db89d248c69b5b239be0ee5d2d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 23 Jun 2016 14:25:33 -0700 Subject: [PATCH 051/100] net: bgmac: Remove superflous netif_carrier_on() commit 3894396e64994f31c3ef5c7e6f63dded0593e567 upstream. bgmac_open() calls phy_start() to initialize the PHY state machine, which will set the interface's carrier state accordingly, no need to force that as this could be conflicting with the PHY state determined by PHYLIB. Fixes: dd4544f05469 ("bgmac: driver for GBit MAC core on BCMA bus") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Amit Pundir Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/bgmac.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index a01f62ec12bb..70da30095b89 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1308,8 +1308,6 @@ static int bgmac_open(struct net_device *net_dev) phy_start(bgmac->phy_dev); - netif_carrier_on(net_dev); - netif_start_queue(net_dev); return 0; From 477a2359c881e0ce4c4039cf28669de07e269fa5 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 6 Jan 2017 10:39:49 +1100 Subject: [PATCH 052/100] powerpc/eeh: Enable IO path on permanent error [ Upstream commit 387bbc974f6adf91aa635090f73434ed10edd915 ] We give up recovery on permanent error, simply shutdown the affected devices and remove them. If the devices can't be put into quiet state, they spew more traffic that is likely to cause another unexpected EEH error. This was observed on "p8dtu2u" machine: 0002:00:00.0 PCI bridge: IBM Device 03dc 0002:01:00.0 Ethernet controller: Intel Corporation \ Ethernet Controller X710/X557-AT 10GBASE-T (rev 02) 0002:01:00.1 Ethernet controller: Intel Corporation \ Ethernet Controller X710/X557-AT 10GBASE-T (rev 02) 0002:01:00.2 Ethernet controller: Intel Corporation \ Ethernet Controller X710/X557-AT 10GBASE-T (rev 02) 0002:01:00.3 Ethernet controller: Intel Corporation \ Ethernet Controller X710/X557-AT 10GBASE-T (rev 02) On P8 PowerNV platform, the IO path is frozen when shutdowning the devices, meaning the memory registers are inaccessible. It is why the devices can't be put into quiet state before removing them. This fixes the issue by enabling IO path prior to putting the devices into quiet state. Reported-by: Pridhiviraj Paidipeddi Signed-off-by: Gavin Shan Acked-by: Russell Currey Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/eeh.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 98949b0df00a..6696c1986844 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -304,9 +304,17 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity) * * For pHyp, we have to enable IO for log retrieval. Otherwise, * 0xFF's is always returned from PCI config space. + * + * When the @severity is EEH_LOG_PERM, the PE is going to be + * removed. Prior to that, the drivers for devices included in + * the PE will be closed. The drivers rely on working IO path + * to bring the devices to quiet state. Otherwise, PCI traffic + * from those devices after they are removed is like to cause + * another unexpected EEH error. */ if (!(pe->type & EEH_PE_PHB)) { - if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG)) + if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG) || + severity == EEH_LOG_PERM) eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); /* From 961efcd54e5be36326d809a5b3c934c022e5b024 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Jan 2017 19:44:42 -0800 Subject: [PATCH 053/100] gianfar: Do not reuse pages from emergency reserve [ Upstream commit 69fed99baac186013840ced3524562841296034f ] A driver using dev_alloc_page() must not reuse a page that had to use emergency memory reserve. Otherwise all packets using this page will be immediately dropped, unless for very specific sockets having SOCK_MEMALLOC bit set. This issue might be hard to debug, because only a fraction of the RX ring buffer would suffer from drops. Fixes: 75354148ce69 ("gianfar: Add paged allocation and Rx S/G") Signed-off-by: Eric Dumazet Cc: Claudiu Manoil Acked-by: Claudiu Manoil Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/freescale/gianfar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 6a061f17a44f..4cd2a7d0124f 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2939,7 +2939,7 @@ static bool gfar_add_rx_frag(struct gfar_rx_buff *rxb, u32 lstatus, size, GFAR_RXB_TRUESIZE); /* try reuse page */ - if (unlikely(page_count(page) != 1)) + if (unlikely(page_count(page) != 1 || page_is_pfmemalloc(page))) return false; /* change offset to the other half */ From 6e1116a0b3e23b1c0d8a7aa9a365de603d2dc3fe Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 1 Dec 2016 13:43:31 -0800 Subject: [PATCH 054/100] Btrfs: fix truncate down when no_holes feature is enabled [ Upstream commit 91298eec05cd8d4e828cf7ee5d4a6334f70cf69a ] For such a file mapping, [0-4k][hole][8k-12k] In NO_HOLES mode, we don't have the [hole] extent any more. Commit c1aa45759e90 ("Btrfs: fix shrinking truncate when the no_holes feature is enabled") fixed disk isize not being updated in NO_HOLES mode when data is not flushed. However, even if data has been flushed, we can still have trouble in updating disk isize since we updated disk isize to 'start' of the last evicted extent. Reviewed-by: Chris Mason Signed-off-by: Liu Bo Signed-off-by: David Sterba Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 863fa0f1972b..a61926cb01c0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4397,8 +4397,19 @@ search_again: if (found_type > min_type) { del_item = 1; } else { - if (item_end < new_size) + if (item_end < new_size) { + /* + * With NO_HOLES mode, for the following mapping + * + * [0-4k][hole][8k-12k] + * + * if truncating isize down to 6k, it ends up + * isize being 8k. + */ + if (btrfs_fs_incompat(root->fs_info, NO_HOLES)) + last_size = new_size; break; + } if (found_key.offset >= new_size) del_item = 1; else From 23c7f01691a131c4d29c9d7d00f89d888f2b008a Mon Sep 17 00:00:00 2001 From: "G. Campana" Date: Thu, 19 Jan 2017 23:37:46 +0200 Subject: [PATCH 055/100] virtio_console: fix a crash in config_work_handler [ Upstream commit 8379cadf71c3ee8173a1c6fc1ea7762a9638c047 ] Using control_work instead of config_work as the 3rd argument to container_of results in an invalid portdev pointer. Indeed, the work structure is initialized as below: INIT_WORK(&portdev->config_work, &config_work_handler); It leads to a crash when portdev->vdev is dereferenced later. This bug is triggered when the guest uses a virtio-console without multiport feature and receives a config_changed virtio interrupt. Signed-off-by: G. Campana Reviewed-by: Amit Shah Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/char/virtio_console.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 31e8ae916ba0..be0b09a0fb44 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1864,7 +1864,7 @@ static void config_work_handler(struct work_struct *work) { struct ports_device *portdev; - portdev = container_of(work, struct ports_device, control_work); + portdev = container_of(work, struct ports_device, config_work); if (!use_multiport(portdev)) { struct virtio_device *vdev; struct port *port; From 8df98ff6c39491360c6bdad55729e237da9b1d94 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 19 Jan 2017 10:39:09 -0800 Subject: [PATCH 056/100] swiotlb-xen: update dev_addr after swapping pages [ Upstream commit f1225ee4c8fcf09afaa199b8b1f0450f38b8cd11 ] In xen_swiotlb_map_page and xen_swiotlb_map_sg_attrs, if the original page is not suitable, we swap it for another page from the swiotlb pool. In these cases, we don't update the previously calculated dma address for the page before calling xen_dma_map_page. Thus, we end up calling xen_dma_map_page passing the wrong dev_addr, resulting in xen_dma_map_page mistakenly assuming that the page is foreign when it is local. Fix the bug by updating dev_addr appropriately. This change has no effect on x86, because xen_dma_map_page is a stub there. Signed-off-by: Stefano Stabellini Signed-off-by: Pooya Keshavarzi Tested-by: Pooya Keshavarzi Reviewed-by: Boris Ostrovsky Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/xen/swiotlb-xen.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 7399782c0998..8a58bbc14de2 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -409,9 +409,9 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, if (map == SWIOTLB_MAP_ERROR) return DMA_ERROR_CODE; + dev_addr = xen_phys_to_bus(map); xen_dma_map_page(dev, pfn_to_page(map >> PAGE_SHIFT), dev_addr, map & ~PAGE_MASK, size, dir, attrs); - dev_addr = xen_phys_to_bus(map); /* * Ensure that the address returned is DMA'ble @@ -567,13 +567,14 @@ xen_swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, sg_dma_len(sgl) = 0; return 0; } + dev_addr = xen_phys_to_bus(map); xen_dma_map_page(hwdev, pfn_to_page(map >> PAGE_SHIFT), dev_addr, map & ~PAGE_MASK, sg->length, dir, attrs); - sg->dma_address = xen_phys_to_bus(map); + sg->dma_address = dev_addr; } else { /* we are not interested in the dma_addr returned by * xen_dma_map_page, only in the potential cache flushes executed From 230fe9c7d8141d846ee9d30fd67204a44fc53943 Mon Sep 17 00:00:00 2001 From: Vineeth Remanan Pillai Date: Thu, 19 Jan 2017 08:35:39 -0800 Subject: [PATCH 057/100] xen-netfront: Fix Rx stall during network stress and OOM [ Upstream commit 90c311b0eeead647b708a723dbdde1eda3dcad05 ] During an OOM scenario, request slots could not be created as skb allocation fails. So the netback cannot pass in packets and netfront wrongly assumes that there is no more work to be done and it disables polling. This causes Rx to stall. The issue is with the retry logic which schedules the timer if the created slots are less than NET_RX_SLOTS_MIN. The count of new request slots to be pushed are calculated as a difference between new req_prod and rsp_cons which could be more than the actual slots, if there are unconsumed responses. The fix is to calculate the count of newly created slots as the difference between new req_prod and old req_prod. Signed-off-by: Vineeth Remanan Pillai Reviewed-by: Juergen Gross Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/xen-netfront.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 888e9cfef51a..34a062ccb11d 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -321,7 +321,7 @@ static void xennet_alloc_rx_buffers(struct netfront_queue *queue) queue->rx.req_prod_pvt = req_prod; /* Not enough requests? Try again later. */ - if (req_prod - queue->rx.rsp_cons < NET_RX_SLOTS_MIN) { + if (req_prod - queue->rx.sring->req_prod < NET_RX_SLOTS_MIN) { mod_timer(&queue->rx_refill_timer, jiffies + (HZ/10)); return; } From 666c821b0ae39f40aa30ef1de71e4e5083f6a165 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Fri, 13 Jan 2017 12:48:06 -0500 Subject: [PATCH 058/100] scsi: virtio_scsi: Reject commands when virtqueue is broken [ Upstream commit 773c7220e22d193e5667c352fcbf8d47eefc817f ] In the case of a graceful set of detaches, where the virtio-scsi-ccw disk is removed from the guest prior to the controller, the guest behaves quite normally. Specifically, the detach gets us into sd_sync_cache to issue a Synchronize Cache(10) command, which immediately fails (and is retried a couple of times) because the device has been removed. Later, the removal of the controller sees two CRWs presented, but there's no further indication of the removal from the guest viewpoint. [ 17.217458] sd 0:0:0:0: [sda] Synchronizing SCSI cache [ 17.219257] sd 0:0:0:0: [sda] Synchronize Cache(10) failed: Result: hostbyte=DID_BAD_TARGET driverbyte=DRIVER_OK [ 21.449400] crw_info : CRW reports slct=0, oflw=0, chn=1, rsc=3, anc=0, erc=4, rsid=2 [ 21.449406] crw_info : CRW reports slct=0, oflw=0, chn=0, rsc=3, anc=0, erc=4, rsid=0 However, on s390, the SCSI disks can be removed "by surprise" when an entire controller (host) is removed and all associated disks are removed via the loop in scsi_forget_host. The same call to sd_sync_cache is made, but because the controller has already been removed, the Synchronize Cache(10) command is neither issued (and then failed) nor rejected. That the I/O isn't returned means the guest cannot have other devices added nor removed, and other tasks (such as shutdown or reboot) issued by the guest will not complete either. The virtio ring has already been marked as broken (via virtio_break_device in virtio_ccw_remove), but we still attempt to queue the command only to have it remain there. The calling sequence provides a bit of distinction for us: virtscsi_queuecommand() -> virtscsi_kick_cmd() -> virtscsi_add_cmd() -> virtqueue_add_sgs() -> virtqueue_add() if success return 0 elseif vq->broken or vring_mapping_error() return -EIO else return -ENOSPC A return of ENOSPC is generally a temporary condition, so returning "host busy" from virtscsi_queuecommand makes sense here, to have it redriven in a moment or two. But the EIO return code is more of a permanent error and so it would be wise to return the I/O itself and allow the calling thread to finish gracefully. The result is these four kernel messages in the guest (the fourth one does not occur prior to this patch): [ 22.921562] crw_info : CRW reports slct=0, oflw=0, chn=1, rsc=3, anc=0, erc=4, rsid=2 [ 22.921580] crw_info : CRW reports slct=0, oflw=0, chn=0, rsc=3, anc=0, erc=4, rsid=0 [ 22.921978] sd 0:0:0:0: [sda] Synchronizing SCSI cache [ 22.921993] sd 0:0:0:0: [sda] Synchronize Cache(10) failed: Result: hostbyte=DID_BAD_TARGET driverbyte=DRIVER_OK I opted to fill in the same response data that is returned from the more graceful device detach, where the disk device is removed prior to the controller device. Signed-off-by: Eric Farman Reviewed-by: Fam Zheng Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/virtio_scsi.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 7dbbb29d24c6..03a2aadf0d3c 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -533,7 +533,9 @@ static int virtscsi_queuecommand(struct virtio_scsi *vscsi, { struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev); struct virtio_scsi_cmd *cmd = scsi_cmd_priv(sc); + unsigned long flags; int req_size; + int ret; BUG_ON(scsi_sg_count(sc) > shost->sg_tablesize); @@ -561,8 +563,15 @@ static int virtscsi_queuecommand(struct virtio_scsi *vscsi, req_size = sizeof(cmd->req.cmd); } - if (virtscsi_kick_cmd(req_vq, cmd, req_size, sizeof(cmd->resp.cmd)) != 0) + ret = virtscsi_kick_cmd(req_vq, cmd, req_size, sizeof(cmd->resp.cmd)); + if (ret == -EIO) { + cmd->resp.cmd.response = VIRTIO_SCSI_S_BAD_TARGET; + spin_lock_irqsave(&req_vq->vq_lock, flags); + virtscsi_complete_cmd(vscsi, cmd); + spin_unlock_irqrestore(&req_vq->vq_lock, flags); + } else if (ret != 0) { return SCSI_MLQUEUE_HOST_BUSY; + } return 0; } From dac30e3dbd72c0c141de72338b1ca50d7f585771 Mon Sep 17 00:00:00 2001 From: Zach Ploskey Date: Sun, 22 Jan 2017 00:47:19 -0800 Subject: [PATCH 059/100] platform/x86: ideapad-laptop: handle ACPI event 1 [ Upstream commit cfee5d63767b2e7997c1f36420d008abbe61565c ] On Ideapad laptops, ACPI event 1 is currently not handled. Many models log "ideapad_laptop: Unknown event: 1" every 20 seconds or so while running on battery power. Some convertible laptops receive this event when switching in and out of tablet mode. This adds and additional case for event 1 in ideapad_acpi_notify to call ideapad_input_report(priv, vpc_bit), so that the event is reported to userspace and we avoid unnecessary logging. Fixes bug #107481 (https://bugzilla.kernel.org/show_bug.cgi?id=107481) Fixes bug #65751 (https://bugzilla.kernel.org/show_bug.cgi?id=65751) Signed-off-by: Zach Ploskey Signed-off-by: Andy Shevchenko Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/ideapad-laptop.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index be3bc2f4edd4..09cc64b3b695 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -807,6 +807,7 @@ static void ideapad_acpi_notify(acpi_handle handle, u32 event, void *data) case 11: case 7: case 6: + case 1: ideapad_input_report(priv, vpc_bit); break; case 5: From 3d2cbbcbde3d755b012dfce8115b35f53408f648 Mon Sep 17 00:00:00 2001 From: "Lendacky, Thomas" Date: Fri, 20 Jan 2017 12:14:13 -0600 Subject: [PATCH 060/100] amd-xgbe: Check xgbe_init() return code [ Upstream commit 738f7f647371ff4cfc9646c99dba5b58ad142db3 ] The xgbe_init() routine returns a return code indicating success or failure, but the return code is not checked. Add code to xgbe_init() to issue a message when failures are seen and add code to check the xgbe_init() return code. Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/amd/xgbe/xgbe-dev.c | 4 +++- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c index 5e6238e0b2bd..75e6e7e6baed 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c @@ -2732,8 +2732,10 @@ static int xgbe_init(struct xgbe_prv_data *pdata) /* Flush Tx queues */ ret = xgbe_flush_tx_queues(pdata); - if (ret) + if (ret) { + netdev_err(pdata->netdev, "error flushing TX queues\n"); return ret; + } /* * Initialize DMA related features diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 865b7e0b133b..64034ff081a0 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -877,7 +877,9 @@ static int xgbe_start(struct xgbe_prv_data *pdata) DBGPR("-->xgbe_start\n"); - hw_if->init(pdata); + ret = hw_if->init(pdata); + if (ret) + return ret; ret = phy_if->phy_start(pdata); if (ret) From 58a766c460b1426ecd4743bb535c530f29628dac Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 20 Jan 2017 16:05:05 -0800 Subject: [PATCH 061/100] net: dsa: Check return value of phy_connect_direct() [ Upstream commit 4078b76cac68e50ccf1f76a74e7d3d5788aec3fe ] We need to check the return value of phy_connect_direct() in dsa_slave_phy_connect() otherwise we may be continuing the initialization of a slave network device with a PHY that already attached somewhere else and which will soon be in error because the PHY device is in error. The conditions for such an error to occur are that we have a port of our switch that is not disabled, and has the same port number as a PHY address (say both 5) that can be probed using the DSA slave MII bus. We end-up having this slave network device find a PHY at the same address as our port number, and we try to attach to it. A slave network (e.g: port 0) has already attached to our PHY device, and we try to re-attach it with a different network device, but since we ignore the error we would end-up initializating incorrect device references by the time the slave network interface is opened. The code has been (re)organized several times, making it hard to provide an exact Fixes tag, this is a bugfix nonetheless. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/dsa/slave.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 8dfe9fb7ad36..554c2a961ad5 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1006,10 +1006,8 @@ static int dsa_slave_phy_connect(struct dsa_slave_priv *p, /* Use already configured phy mode */ if (p->phy_interface == PHY_INTERFACE_MODE_NA) p->phy_interface = p->phy->interface; - phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, - p->phy_interface); - - return 0; + return phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, + p->phy_interface); } static int dsa_slave_phy_setup(struct dsa_slave_priv *p, From d3be5e0471abd6b517fca283186b1039c6d337d1 Mon Sep 17 00:00:00 2001 From: Ding Pixel Date: Wed, 18 Jan 2017 17:26:38 +0800 Subject: [PATCH 062/100] drm/amdgpu: check ring being ready before using MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c5f21c9f878b8dcd54d0b9739c025ca73cb4c091 ] Return success when the ring is properly initialized, otherwise return failure. Tonga SRIOV VF doesn't have UVD and VCE engines, the initialization of these IPs is bypassed. The system crashes if application submit IB to their rings which are not ready to use. It could be a common issue if IP having ring buffer is disabled for some reason on specific ASIC, so it should check the ring being ready to use. Bug: amdgpu_test crashes system on Tonga VF. Signed-off-by: Ding Pixel Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 25a3e2485cc2..2bc17a907ecf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -124,6 +124,13 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, } break; } + + if (!(*out_ring && (*out_ring)->adev)) { + DRM_ERROR("Ring %d is not initialized on IP %d\n", + ring, ip_type); + return -EINVAL; + } + return 0; } From c64f4194a65b0d76a3f8dc0b275b290704b6195a Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Tue, 24 Jan 2017 17:50:26 +0100 Subject: [PATCH 063/100] vfio/spapr: fail tce_iommu_attach_group() when iommu_data is null [ Upstream commit bd00fdf198e2da475a2f4265a83686ab42d998a8 ] The recently added mediated VFIO driver doesn't know about powerpc iommu. It thus doesn't register a struct iommu_table_group in the iommu group upon device creation. The iommu_data pointer hence remains null. This causes a kernel oops when userspace tries to set the iommu type of a container associated with a mediated device to VFIO_SPAPR_TCE_v2_IOMMU. [ 82.585440] mtty mtty: MDEV: Registered [ 87.655522] iommu: Adding device 83b8f4f2-509f-382f-3c1e-e6bfe0fa1001 to group 10 [ 87.655527] vfio_mdev 83b8f4f2-509f-382f-3c1e-e6bfe0fa1001: MDEV: group_id = 10 [ 116.297184] Unable to handle kernel paging request for data at address 0x00000030 [ 116.297389] Faulting instruction address: 0xd000000007870524 [ 116.297465] Oops: Kernel access of bad area, sig: 11 [#1] [ 116.297611] SMP NR_CPUS=2048 [ 116.297611] NUMA [ 116.297627] PowerNV ... [ 116.297954] CPU: 33 PID: 7067 Comm: qemu-system-ppc Not tainted 4.10.0-rc5-mdev-test #8 [ 116.297993] task: c000000e7718b680 task.stack: c000000e77214000 [ 116.298025] NIP: d000000007870524 LR: d000000007870518 CTR: 0000000000000000 [ 116.298064] REGS: c000000e77217990 TRAP: 0300 Not tainted (4.10.0-rc5-mdev-test) [ 116.298103] MSR: 9000000000009033 [ 116.298107] CR: 84004444 XER: 00000000 [ 116.298154] CFAR: c00000000000888c DAR: 0000000000000030 DSISR: 40000000 SOFTE: 1 GPR00: d000000007870518 c000000e77217c10 d00000000787b0ed c000000eed2103c0 GPR04: 0000000000000000 0000000000000000 c000000eed2103e0 0000000f24320000 GPR08: 0000000000000104 0000000000000001 0000000000000000 d0000000078729b0 GPR12: c00000000025b7e0 c00000000fe08400 0000000000000001 000001002d31d100 GPR16: 000001002c22c850 00003ffff315c750 0000000043145680 0000000043141bc0 GPR20: ffffffffffffffed fffffffffffff000 0000000020003b65 d000000007706018 GPR24: c000000f16cf0d98 d000000007706000 c000000003f42980 c000000003f42980 GPR28: c000000f1575ac00 c000000003f429c8 0000000000000000 c000000eed2103c0 [ 116.298504] NIP [d000000007870524] tce_iommu_attach_group+0x10c/0x360 [vfio_iommu_spapr_tce] [ 116.298555] LR [d000000007870518] tce_iommu_attach_group+0x100/0x360 [vfio_iommu_spapr_tce] [ 116.298601] Call Trace: [ 116.298610] [c000000e77217c10] [d000000007870518] tce_iommu_attach_group+0x100/0x360 [vfio_iommu_spapr_tce] (unreliable) [ 116.298671] [c000000e77217cb0] [d0000000077033a0] vfio_fops_unl_ioctl+0x278/0x3e0 [vfio] [ 116.298713] [c000000e77217d40] [c0000000002a3ebc] do_vfs_ioctl+0xcc/0x8b0 [ 116.298745] [c000000e77217de0] [c0000000002a4700] SyS_ioctl+0x60/0xc0 [ 116.298782] [c000000e77217e30] [c00000000000b220] system_call+0x38/0xfc [ 116.298812] Instruction dump: [ 116.298828] 7d3f4b78 409effc8 3d220000 e9298020 3c800140 38a00018 608480c0 e8690028 [ 116.298869] 4800249d e8410018 7c7f1b79 41820230 2fa90000 419e0114 e9090020 [ 116.298914] ---[ end trace 1e10b0ced08b9120 ]--- This patch fixes the oops. Reported-by: Vaibhav Jain Signed-off-by: Greg Kurz Signed-off-by: Alex Williamson Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/vfio_iommu_spapr_tce.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 1a9f18b40be6..34e4b3ad8b92 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -1163,6 +1163,10 @@ static int tce_iommu_attach_group(void *iommu_data, /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n", iommu_group_id(iommu_group), iommu_group); */ table_group = iommu_group_get_iommudata(iommu_group); + if (!table_group) { + ret = -ENODEV; + goto unlock_exit; + } if (tce_groups_attached(container) && (!table_group->ops || !table_group->ops->take_ownership || From 00e83abf8e62fa94bea16656f095503b028b07f8 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 23 Jan 2017 21:37:52 +0200 Subject: [PATCH 064/100] virtio_net: fix PAGE_SIZE > 64k [ Upstream commit d0fa28f00052391b5df328f502fbbdd4444938b7 ] I don't have any guests with PAGE_SIZE > 64k but the code seems to be clearly broken in that case as PAGE_SIZE / MERGEABLE_BUFFER_ALIGN will need more than 8 bit and so the code in mergeable_ctx_to_buf_address does not give us the actual true size. Cc: John Fastabend Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7f7c87762bc6..8dfc75250583 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -47,8 +47,16 @@ module_param(gso, bool, 0444); */ DECLARE_EWMA(pkt_len, 1, 64) +/* With mergeable buffers we align buffer address and use the low bits to + * encode its true size. Buffer size is up to 1 page so we need to align to + * square root of page size to ensure we reserve enough bits to encode the true + * size. + */ +#define MERGEABLE_BUFFER_MIN_ALIGN_SHIFT ((PAGE_SHIFT + 1) / 2) + /* Minimum alignment for mergeable packet buffers. */ -#define MERGEABLE_BUFFER_ALIGN max(L1_CACHE_BYTES, 256) +#define MERGEABLE_BUFFER_ALIGN max(L1_CACHE_BYTES, \ + 1 << MERGEABLE_BUFFER_MIN_ALIGN_SHIFT) #define VIRTNET_DRIVER_VERSION "1.0.0" From 8fa301abb31ac72116834f72f369cda083701ec1 Mon Sep 17 00:00:00 2001 From: Balakrishnan Raman Date: Mon, 23 Jan 2017 20:44:33 -0800 Subject: [PATCH 065/100] vxlan: do not age static remote mac entries [ Upstream commit efb5f68f32995c146944a9d4257c3cf8eae2c4a1 ] Mac aging is applicable only for dynamically learnt remote mac entries. Check for user configured static remote mac entries and skip aging. Signed-off-by: Balakrishnan Raman Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/vxlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 9a986ccd42e5..dab3bf6649e6 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2240,7 +2240,7 @@ static void vxlan_cleanup(unsigned long arg) = container_of(p, struct vxlan_fdb, hlist); unsigned long timeout; - if (f->state & NUD_PERMANENT) + if (f->state & (NUD_PERMANENT | NUD_NOARP)) continue; timeout = f->used + vxlan->cfg.age_interval * HZ; From e4272ebefc83eb1238f78c86fdd2cad0a25563bf Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 24 Jan 2017 07:28:41 +0100 Subject: [PATCH 066/100] ibmveth: Add a proper check for the availability of the checksum features [ Upstream commit 23d28a859fb847fd7fcfbd31acb3b160abb5d6ae ] When using the ibmveth driver in a KVM/QEMU based VM, it currently always prints out a scary error message like this when it is started: ibmveth 71000003 (unregistered net_device): unable to change checksum offload settings. 1 rc=-2 ret_attr=71000003 This happens because the driver always tries to enable the checksum offloading without checking for the availability of this feature first. QEMU does not support checksum offloading for the spapr-vlan device, thus we always get the error message here. According to the LoPAPR specification, the "ibm,illan-options" property of the corresponding device tree node should be checked first to see whether the H_ILLAN_ATTRIUBTES hypercall and thus the checksum offloading feature is available. Thus let's do this in the ibmveth driver, too, so that the error message is really only limited to cases where something goes wrong, and does not occur if the feature is just missing. Signed-off-by: Thomas Huth Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/ibm/ibmveth.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index f9e4988ea30e..2f9b12cf9ee5 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1602,8 +1602,11 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id) netdev->netdev_ops = &ibmveth_netdev_ops; netdev->ethtool_ops = &netdev_ethtool_ops; SET_NETDEV_DEV(netdev, &dev->dev); - netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | - NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + netdev->hw_features = NETIF_F_SG; + if (vio_get_attribute(dev, "ibm,illan-options", NULL) != NULL) { + netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_RXCSUM; + } netdev->features |= netdev->hw_features; From 70f41003b9d18e24ad90b81be5a47a5f32e756d3 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 24 Jan 2017 15:18:29 -0800 Subject: [PATCH 067/100] kernel/panic.c: add missing \n [ Upstream commit ff7a28a074ccbea999dadbb58c46212cf90984c6 ] When a system panics, the "Rebooting in X seconds.." message is never printed because it lacks a new line. Fix it. Link: http://lkml.kernel.org/r/20170119114751.2724-1-jslaby@suse.cz Signed-off-by: Jiri Slaby Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- kernel/panic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/panic.c b/kernel/panic.c index 41e2b54f36b5..1d07cf9af849 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -167,7 +167,7 @@ void panic(const char *fmt, ...) * Delay timeout seconds before rebooting the machine. * We can't use the "normal" timers since we just panicked. */ - pr_emerg("Rebooting in %d seconds..", panic_timeout); + pr_emerg("Rebooting in %d seconds..\n", panic_timeout); for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) { touch_nmi_watchdog(); From 4ac60b20f703f15a6a0e6607fd2d570510abb99a Mon Sep 17 00:00:00 2001 From: Brendan McGrath Date: Sat, 7 Jan 2017 08:01:38 +1100 Subject: [PATCH 068/100] HID: i2c-hid: Add sleep between POWER ON and RESET [ Upstream commit a89af4abdf9b353cdd6f61afc0eaaac403304873 ] Support for the Asus Touchpad was recently added. It turns out this device can fail initialisation (and become unusable) when the RESET command is sent too soon after the POWER ON command. Unfortunately the i2c-hid specification does not specify the need for a delay between these two commands. But it was discovered the Windows driver has a 1ms delay. As a result, this patch modifies the i2c-hid module to add a sleep inbetween the POWER ON and RESET commands which lasts between 1ms and 5ms. See https://github.com/vlasenko/hid-asus-dkms/issues/24 for further details. Signed-off-by: Brendan McGrath Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/hid/i2c-hid/i2c-hid.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index 0b80633bae91..d4d655a10df1 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -364,6 +364,15 @@ static int i2c_hid_hwreset(struct i2c_client *client) if (ret) return ret; + /* + * The HID over I2C specification states that if a DEVICE needs time + * after the PWR_ON request, it should utilise CLOCK stretching. + * However, it has been observered that the Windows driver provides a + * 1ms sleep between the PWR_ON and RESET requests and that some devices + * rely on this. + */ + usleep_range(1000, 5000); + i2c_hid_dbg(ihid, "resetting...\n"); ret = i2c_hid_command(client, &hid_reset_cmd, NULL, 0); From be91b09750bf6c68076b3ce68d234d0d0829d2e0 Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Wed, 11 Jan 2017 11:06:42 +0100 Subject: [PATCH 069/100] scsi: lpfc: avoid double free of resource identifiers [ Upstream commit cd60be4916ae689387d04b86b6fc15931e4c95ae ] Set variables initialized in lpfc_sli4_alloc_resource_identifiers() to NULL if an error occurred. Otherwise, lpfc_sli4_driver_resource_unset() attempts to free the memory again. Signed-off-by: Roberto Sassu Signed-off-by: Johannes Thumshirn Acked-by: James Smart Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_sli.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index f5aeda8f014f..38e90d9c2ced 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -5887,18 +5887,25 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba) free_vfi_bmask: kfree(phba->sli4_hba.vfi_bmask); + phba->sli4_hba.vfi_bmask = NULL; free_xri_ids: kfree(phba->sli4_hba.xri_ids); + phba->sli4_hba.xri_ids = NULL; free_xri_bmask: kfree(phba->sli4_hba.xri_bmask); + phba->sli4_hba.xri_bmask = NULL; free_vpi_ids: kfree(phba->vpi_ids); + phba->vpi_ids = NULL; free_vpi_bmask: kfree(phba->vpi_bmask); + phba->vpi_bmask = NULL; free_rpi_ids: kfree(phba->sli4_hba.rpi_ids); + phba->sli4_hba.rpi_ids = NULL; free_rpi_bmask: kfree(phba->sli4_hba.rpi_bmask); + phba->sli4_hba.rpi_bmask = NULL; err_exit: return rc; } From 11dd9e2c480324b46118ff708ea2ca8d7022539b Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Wed, 11 Jan 2017 18:18:40 -0800 Subject: [PATCH 070/100] spi: davinci: use dma_mapping_error() [ Upstream commit c5a2a394835f473ae23931eda5066d3771d7b2f8 ] The correct error checking for dma_map_single() is to use dma_mapping_error(). Signed-off-by: Kevin Hilman Signed-off-by: Mark Brown Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-davinci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c index 7d3af3eacf57..1ddba9ae8c0f 100644 --- a/drivers/spi/spi-davinci.c +++ b/drivers/spi/spi-davinci.c @@ -651,7 +651,7 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t) buf = t->rx_buf; t->rx_dma = dma_map_single(&spi->dev, buf, t->len, DMA_FROM_DEVICE); - if (!t->rx_dma) { + if (dma_mapping_error(&spi->dev, !t->rx_dma)) { ret = -EFAULT; goto err_rx_map; } @@ -665,7 +665,7 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t) buf = (void *)t->tx_buf; t->tx_dma = dma_map_single(&spi->dev, buf, t->len, DMA_TO_DEVICE); - if (!t->tx_dma) { + if (dma_mapping_error(&spi->dev, t->tx_dma)) { ret = -EFAULT; goto err_tx_map; } From 14339b018bc21d085ab5425709c14fdbe954aa04 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 13 Jan 2017 11:28:25 +0100 Subject: [PATCH 071/100] mac80211: initialize SMPS field in HT capabilities [ Upstream commit 43071d8fb3b7f589d72663c496a6880fb097533c ] ibss and mesh modes copy the ht capabilites from the band without overriding the SMPS state. Unfortunately the default value 0 for the SMPS field means static SMPS instead of disabled. This results in HT ibss and mesh setups using only single-stream rates, even though SMPS is not supposed to be active. Initialize SMPS to disabled for all bands on ieee80211_hw_register to ensure that the value is sane where it is not overriden with the real SMPS state. Reported-by: Elektra Wagenrad Signed-off-by: Felix Fietkau [move VHT TODO comment to a better place] Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/mac80211/main.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 175ffcf7fb06..2ee53dc1ddf7 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -891,12 +891,17 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) supp_ht = supp_ht || sband->ht_cap.ht_supported; supp_vht = supp_vht || sband->vht_cap.vht_supported; - if (sband->ht_cap.ht_supported) - local->rx_chains = - max(ieee80211_mcs_to_chains(&sband->ht_cap.mcs), - local->rx_chains); + if (!sband->ht_cap.ht_supported) + continue; /* TODO: consider VHT for RX chains, hopefully it's the same */ + local->rx_chains = + max(ieee80211_mcs_to_chains(&sband->ht_cap.mcs), + local->rx_chains); + + /* no need to mask, SM_PS_DISABLED has all bits set */ + sband->ht_cap.cap |= WLAN_HT_CAP_SM_PS_DISABLED << + IEEE80211_HT_CAP_SM_PS_SHIFT; } /* if low-level driver supports AP, we also support VLAN */ From c20bdc08af9b120f4b2ea4a8325d8454bd6a3810 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Thu, 12 Jan 2017 16:53:11 +0100 Subject: [PATCH 072/100] x86/mpx: Use compatible types in comparison to fix sparse error [ Upstream commit 453828625731d0ba7218242ef6ec88f59408f368 ] info->si_addr is of type void __user *, so it should be compared against something from the same address space. This fixes the following sparse error: arch/x86/mm/mpx.c:296:27: error: incompatible types in comparison expression (different address spaces) Signed-off-by: Tobias Klauser Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/mpx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index ef05755a1900..ec12d7aa9cb2 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -293,7 +293,7 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) * We were not able to extract an address from the instruction, * probably because there was something invalid in it. */ - if (info->si_addr == (void *)-1) { + if (info->si_addr == (void __user *)-1) { err = -EINVAL; goto err_out; } From 878f37efac3e1c2bd2fef7f402837dfcc9a83734 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Wed, 11 Jan 2017 13:25:00 -0600 Subject: [PATCH 073/100] coredump: Ensure proper size of sparse core files [ Upstream commit 4d22c75d4c7b5c5f4bd31054f09103ee490878fd ] If the last section of a core file ends with an unmapped or zero page, the size of the file does not correspond with the last dump_skip() call. gdb complains that the file is truncated and can be confusing to users. After all of the vma sections are written, make sure that the file size is no smaller than the current file position. This problem can be demonstrated with gdb's bigcore testcase on the sparc architecture. Signed-off-by: Dave Kleikamp Cc: Alexander Viro Cc: linux-fsdevel@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Al Viro Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- fs/binfmt_elf.c | 1 + fs/coredump.c | 18 ++++++++++++++++++ include/linux/coredump.h | 1 + 3 files changed, 20 insertions(+) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 3a93755e880f..29ef427c0652 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -2295,6 +2295,7 @@ static int elf_core_dump(struct coredump_params *cprm) goto end_coredump; } } + dump_truncate(cprm); if (!elf_core_write_extra_data(cprm)) goto end_coredump; diff --git a/fs/coredump.c b/fs/coredump.c index 5d15c4975ba1..a8852293038a 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -810,3 +810,21 @@ int dump_align(struct coredump_params *cprm, int align) return mod ? dump_skip(cprm, align - mod) : 1; } EXPORT_SYMBOL(dump_align); + +/* + * Ensures that file size is big enough to contain the current file + * postion. This prevents gdb from complaining about a truncated file + * if the last "write" to the file was dump_skip. + */ +void dump_truncate(struct coredump_params *cprm) +{ + struct file *file = cprm->file; + loff_t offset; + + if (file->f_op->llseek && file->f_op->llseek != no_llseek) { + offset = file->f_op->llseek(file, 0, SEEK_CUR); + if (i_size_read(file->f_mapping->host) < offset) + do_truncate(file->f_path.dentry, offset, 0, file); + } +} +EXPORT_SYMBOL(dump_truncate); diff --git a/include/linux/coredump.h b/include/linux/coredump.h index d016a121a8c4..28ffa94aed6b 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -14,6 +14,7 @@ struct coredump_params; extern int dump_skip(struct coredump_params *cprm, size_t nr); extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr); extern int dump_align(struct coredump_params *cprm, int align); +extern void dump_truncate(struct coredump_params *cprm); #ifdef CONFIG_COREDUMP extern void do_coredump(const siginfo_t *siginfo); #else From 9a536d5872460def17d2279f81a4a4d09e36f620 Mon Sep 17 00:00:00 2001 From: Nikita Yushchenko Date: Wed, 11 Jan 2017 21:56:31 +0300 Subject: [PATCH 074/100] swiotlb: ensure that page-sized mappings are page-aligned [ Upstream commit 602d9858f07c72eab64f5f00e2fae55f9902cfbe ] Some drivers do depend on page mappings to be page aligned. Swiotlb already enforces such alignment for mappings greater than page, extend that to page-sized mappings as well. Without this fix, nvme hits BUG() in nvme_setup_prps(), because that routine assumes page-aligned mappings. Signed-off-by: Nikita Yushchenko Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- lib/swiotlb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 76f29ecba8f4..771234d050c7 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -452,11 +452,11 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); /* - * For mappings greater than a page, we limit the stride (and - * hence alignment) to a page size. + * For mappings greater than or equal to a page, we limit the stride + * (and hence alignment) to a page size. */ nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - if (size > PAGE_SIZE) + if (size >= PAGE_SIZE) stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); else stride = 1; From 5416a88c2fef39a9d75ef59057625a2002959dff Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 28 Dec 2016 11:33:48 +0100 Subject: [PATCH 075/100] s390/ctl_reg: make __ctl_load a full memory barrier [ Upstream commit e991c24d68b8c0ba297eeb7af80b1e398e98c33f ] We have quite a lot of code that depends on the order of the __ctl_load inline assemby and subsequent memory accesses, like e.g. disabling lowcore protection and the writing to lowcore. Since the __ctl_load macro does not have memory barrier semantics, nor any other dependencies the compiler is, theoretically, free to shuffle code around. Or in other words: storing to lowcore could happen before lowcore protection is disabled. In order to avoid this class of potential bugs simply add a full memory barrier to the __ctl_load macro. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/ctl_reg.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h index d7697ab802f6..8e136b88cdf4 100644 --- a/arch/s390/include/asm/ctl_reg.h +++ b/arch/s390/include/asm/ctl_reg.h @@ -15,7 +15,9 @@ BUILD_BUG_ON(sizeof(addrtype) != (high - low + 1) * sizeof(long));\ asm volatile( \ " lctlg %1,%2,%0\n" \ - : : "Q" (*(addrtype *)(&array)), "i" (low), "i" (high));\ + : \ + : "Q" (*(addrtype *)(&array)), "i" (low), "i" (high) \ + : "memory"); \ } #define __ctl_store(array, low, high) { \ From 0593fa8f34a257143de31d8390d8ba72da79b5f1 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Fri, 13 Jan 2017 22:38:27 +0100 Subject: [PATCH 076/100] be2net: fix status check in be_cmd_pmac_add() [ Upstream commit fe68d8bfe59c561664aa87d827aa4b320eb08895 ] Return value from be_mcc_notify_wait() contains a base completion status together with an additional status. The base_status() macro need to be used to access base status. Fixes: e3a7ae2 be2net: Changing MAC Address of a VF was broken Cc: Sathya Perla Cc: Ajit Khaparde Cc: Sriharsha Basavapatna Cc: Somnath Kotur Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/emulex/benet/be_cmds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 1795c935ff02..7b8638ddb673 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -1052,7 +1052,7 @@ int be_cmd_pmac_add(struct be_adapter *adapter, u8 *mac_addr, err: spin_unlock_bh(&adapter->mcc_lock); - if (status == MCC_STATUS_UNAUTHORIZED_REQUEST) + if (base_status(status) == MCC_STATUS_UNAUTHORIZED_REQUEST) status = -EPERM; return status; From 9837392c2b3edd0b818bcc36277ea51b05be25a9 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 11 Jan 2017 14:59:38 +0900 Subject: [PATCH 077/100] perf probe: Fix to show correct locations for events on modules [ Upstream commit d2d4edbebe07ddb77980656abe7b9bc7a9e0cdf7 ] Fix to show correct locations for events on modules by relocating given address instead of retrying after failure. This happens when the module text size is big enough, bigger than sh_addr, because the original code retries with given address + sh_addr if it failed to find CU DIE at the given address. Any address smaller than sh_addr always fails and it retries with the correct address, but addresses bigger than sh_addr will get a CU DIE which is on the given address (not adjusted by sh_addr). In my environment(x86-64), the sh_addr of ".text" section is 0x10030. Since i915 is a huge kernel module, we can see this issue as below. $ grep "[Tt] .*\[i915\]" /proc/kallsyms | sort | head -n1 ffffffffc0270000 t i915_switcheroo_can_switch [i915] ffffffffc0270000 + 0x10030 = ffffffffc0280030, so we'll check symbols cross this boundary. $ grep "[Tt] .*\[i915\]" /proc/kallsyms | grep -B1 ^ffffffffc028\ | head -n 2 ffffffffc027ff80 t haswell_init_clock_gating [i915] ffffffffc0280110 t valleyview_init_clock_gating [i915] So setup probes on both function and see what happen. $ sudo ./perf probe -m i915 -a haswell_init_clock_gating \ -a valleyview_init_clock_gating Added new events: probe:haswell_init_clock_gating (on haswell_init_clock_gating in i915) probe:valleyview_init_clock_gating (on valleyview_init_clock_gating in i915) You can now use it in all perf tools, such as: perf record -e probe:valleyview_init_clock_gating -aR sleep 1 $ sudo ./perf probe -l probe:haswell_init_clock_gating (on haswell_init_clock_gating@gpu/drm/i915/intel_pm.c in i915) probe:valleyview_init_clock_gating (on i915_vga_set_decode:4@gpu/drm/i915/i915_drv.c in i915) As you can see, haswell_init_clock_gating is correctly shown, but valleyview_init_clock_gating is not. With this patch, both events are shown correctly. $ sudo ./perf probe -l probe:haswell_init_clock_gating (on haswell_init_clock_gating@gpu/drm/i915/intel_pm.c in i915) probe:valleyview_init_clock_gating (on valleyview_init_clock_gating@gpu/drm/i915/intel_pm.c in i915) Committer notes: In my case: # perf probe -m i915 -a haswell_init_clock_gating -a valleyview_init_clock_gating Added new events: probe:haswell_init_clock_gating (on haswell_init_clock_gating in i915) probe:valleyview_init_clock_gating (on valleyview_init_clock_gating in i915) You can now use it in all perf tools, such as: perf record -e probe:valleyview_init_clock_gating -aR sleep 1 # perf probe -l probe:haswell_init_clock_gating (on i915_getparam+432@gpu/drm/i915/i915_drv.c in i915) probe:valleyview_init_clock_gating (on __i915_printk+240@gpu/drm/i915/i915_drv.c in i915) # # readelf -SW /lib/modules/4.9.0+/build/vmlinux | egrep -w '.text|Name' [Nr] Name Type Address Off Size ES Flg Lk Inf Al [ 1] .text PROGBITS ffffffff81000000 200000 822fd3 00 AX 0 0 4096 # So both are b0rked, now with the fix: # perf probe -m i915 -a haswell_init_clock_gating -a valleyview_init_clock_gating Added new events: probe:haswell_init_clock_gating (on haswell_init_clock_gating in i915) probe:valleyview_init_clock_gating (on valleyview_init_clock_gating in i915) You can now use it in all perf tools, such as: perf record -e probe:valleyview_init_clock_gating -aR sleep 1 # perf probe -l probe:haswell_init_clock_gating (on haswell_init_clock_gating@gpu/drm/i915/intel_pm.c in i915) probe:valleyview_init_clock_gating (on valleyview_init_clock_gating@gpu/drm/i915/intel_pm.c in i915) # Both looks correct. Signed-off-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/148411436777.9978.1440275861947194930.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/probe-finder.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 05012bb178d7..fdd87c7e3e91 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1460,16 +1460,12 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, Dwarf_Addr _addr = 0, baseaddr = 0; const char *fname = NULL, *func = NULL, *basefunc = NULL, *tmp; int baseline = 0, lineno = 0, ret = 0; - bool reloc = false; -retry: + /* We always need to relocate the address for aranges */ + if (debuginfo__get_text_offset(dbg, &baseaddr) == 0) + addr += baseaddr; /* Find cu die */ if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr, &cudie)) { - if (!reloc && debuginfo__get_text_offset(dbg, &baseaddr) == 0) { - addr += baseaddr; - reloc = true; - goto retry; - } pr_warning("Failed to find debug information for address %lx\n", addr); ret = -EINVAL; From d521e9c384f9737ac22b76b48c787375f0fb988a Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Mon, 16 Jan 2017 18:31:39 +0200 Subject: [PATCH 078/100] net/mlx4_core: Eliminate warning messages for SRQ_LIMIT under SRIOV [ Upstream commit 9577b174cd0323d287c994ef0891db71666d0765 ] When running SRIOV, warnings for SRQ LIMIT events flood the Hypervisor's message log when (correct, normally operating) apps use SRQ LIMIT events as a trigger to post WQEs to SRQs. Add more information to the existing debug printout for SRQ_LIMIT, and output the warning messages only for the SRQ CATAS ERROR event. Fixes: acba2420f9d2 ("mlx4_core: Add wrapper functions and comm channel and slave event support to EQs") Fixes: e0debf9cb50d ("mlx4_core: Reduce warning message for SRQ_LIMIT event to debug level") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/eq.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 603d1c3d3b2e..ff77b8b608bd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -542,8 +542,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) break; case MLX4_EVENT_TYPE_SRQ_LIMIT: - mlx4_dbg(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT\n", - __func__); + mlx4_dbg(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT. srq_no=0x%x, eq 0x%x\n", + __func__, be32_to_cpu(eqe->event.srq.srqn), + eq->eqn); case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR: if (mlx4_is_master(dev)) { /* forward only to slave owning the SRQ */ @@ -558,15 +559,19 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) eq->eqn, eq->cons_index, ret); break; } - mlx4_warn(dev, "%s: slave:%d, srq_no:0x%x, event: %02x(%02x)\n", - __func__, slave, - be32_to_cpu(eqe->event.srq.srqn), - eqe->type, eqe->subtype); + if (eqe->type == + MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) + mlx4_warn(dev, "%s: slave:%d, srq_no:0x%x, event: %02x(%02x)\n", + __func__, slave, + be32_to_cpu(eqe->event.srq.srqn), + eqe->type, eqe->subtype); if (!ret && slave != dev->caps.function) { - mlx4_warn(dev, "%s: sending event %02x(%02x) to slave:%d\n", - __func__, eqe->type, - eqe->subtype, slave); + if (eqe->type == + MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) + mlx4_warn(dev, "%s: sending event %02x(%02x) to slave:%d\n", + __func__, eqe->type, + eqe->subtype, slave); mlx4_slave_event(dev, slave, eqe); break; } From db1323b77c2b35e5633867fc5e8a79c65f130119 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 7 Feb 2017 20:56:08 +0800 Subject: [PATCH 079/100] sctp: check af before verify address in sctp_addr_id2transport [ Upstream commit 912964eacb111551db73429719eb5fadcab0ff8a ] Commit 6f29a1306131 ("sctp: sctp_addr_id2transport should verify the addr before looking up assoc") invoked sctp_verify_addr to verify the addr. But it didn't check af variable beforehand, once users pass an address with family = 0 through sockopt, sctp_get_af_specific will return NULL and NULL pointer dereference will be caused by af->sockaddr_len. This patch is to fix it by returning NULL if af variable is NULL. Fixes: 6f29a1306131 ("sctp: sctp_addr_id2transport should verify the addr before looking up assoc") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 956141b71619..3ebf3b652d60 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -239,7 +239,7 @@ static struct sctp_transport *sctp_addr_id2transport(struct sock *sk, union sctp_addr *laddr = (union sctp_addr *)addr; struct sctp_transport *transport; - if (sctp_verify_addr(sk, laddr, af->sockaddr_len)) + if (!af || sctp_verify_addr(sk, laddr, af->sockaddr_len)) return NULL; addr_asoc = sctp_endpoint_lookup_assoc(sctp_sk(sk)->ep, From 53ae0c2ffef75c947c30c4aab88aed44b3c1b6de Mon Sep 17 00:00:00 2001 From: Eugeniu Rosca Date: Tue, 6 Jun 2017 00:08:10 +0200 Subject: [PATCH 080/100] ravb: Fix use-after-free on `ifconfig eth0 down` [ Upstream commit 79514ef670e9e575a1fe36922268c439d0f0ca8a ] Commit a47b70ea86bd ("ravb: unmap descriptors when freeing rings") has introduced the issue seen in [1] reproduced on H3ULCB board. Fix this by relocating the RX skb ringbuffer free operation, so that swiotlb page unmapping can be done first. Freeing of aligned TX buffers is not relevant to the issue seen in [1]. Still, reposition TX free calls as well, to have all kfree() operations performed consistently _after_ dma_unmap_*()/dma_free_*(). [1] Console screenshot with the problem reproduced: salvator-x login: root root@salvator-x:~# ifconfig eth0 up Micrel KSZ9031 Gigabit PHY e6800000.ethernet-ffffffff:00: \ attached PHY driver [Micrel KSZ9031 Gigabit PHY] \ (mii_bus:phy_addr=e6800000.ethernet-ffffffff:00, irq=235) IPv6: ADDRCONF(NETDEV_UP): eth0: link is not ready root@salvator-x:~# root@salvator-x:~# ifconfig eth0 down ================================================================== BUG: KASAN: use-after-free in swiotlb_tbl_unmap_single+0xc4/0x35c Write of size 1538 at addr ffff8006d884f780 by task ifconfig/1649 CPU: 0 PID: 1649 Comm: ifconfig Not tainted 4.12.0-rc4-00004-g112eb07287d1 #32 Hardware name: Renesas H3ULCB board based on r8a7795 (DT) Call trace: [] dump_backtrace+0x0/0x3a4 [] show_stack+0x14/0x1c [] dump_stack+0xf8/0x150 [] print_address_description+0x7c/0x330 [] kasan_report+0x2e0/0x2f4 [] check_memory_region+0x20/0x14c [] memcpy+0x48/0x68 [] swiotlb_tbl_unmap_single+0xc4/0x35c [] unmap_single+0x90/0xa4 [] swiotlb_unmap_page+0xc/0x14 [] __swiotlb_unmap_page+0xcc/0xe4 [] ravb_ring_free+0x514/0x870 [] ravb_close+0x288/0x36c [] __dev_close_many+0x14c/0x174 [] __dev_close+0xc8/0x144 [] __dev_change_flags+0xd8/0x194 [] dev_change_flags+0x60/0xb0 [] devinet_ioctl+0x484/0x9d4 [] inet_ioctl+0x190/0x194 [] sock_do_ioctl+0x78/0xa8 [] sock_ioctl+0x110/0x3c4 [] vfs_ioctl+0x90/0xa0 [] do_vfs_ioctl+0x148/0xc38 [] SyS_ioctl+0x44/0x74 [] el0_svc_naked+0x24/0x28 The buggy address belongs to the page: page:ffff7e001b6213c0 count:0 mapcount:0 mapping: (null) index:0x0 flags: 0x4000000000000000() raw: 4000000000000000 0000000000000000 0000000000000000 00000000ffffffff raw: 0000000000000000 ffff7e001b6213e0 0000000000000000 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8006d884f680: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff8006d884f700: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff >ffff8006d884f780: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff8006d884f800: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff8006d884f880: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ================================================================== Disabling lock debugging due to kernel taint root@salvator-x:~# Fixes: a47b70ea86bd ("ravb: unmap descriptors when freeing rings") Signed-off-by: Eugeniu Rosca Acked-by: Sergei Shtylyov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/renesas/ravb_main.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 1e61d4da72db..585e90f8341d 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -221,18 +221,6 @@ static void ravb_ring_free(struct net_device *ndev, int q) int ring_size; int i; - /* Free RX skb ringbuffer */ - if (priv->rx_skb[q]) { - for (i = 0; i < priv->num_rx_ring[q]; i++) - dev_kfree_skb(priv->rx_skb[q][i]); - } - kfree(priv->rx_skb[q]); - priv->rx_skb[q] = NULL; - - /* Free aligned TX buffers */ - kfree(priv->tx_align[q]); - priv->tx_align[q] = NULL; - if (priv->rx_ring[q]) { for (i = 0; i < priv->num_rx_ring[q]; i++) { struct ravb_ex_rx_desc *desc = &priv->rx_ring[q][i]; @@ -261,6 +249,18 @@ static void ravb_ring_free(struct net_device *ndev, int q) priv->tx_ring[q] = NULL; } + /* Free RX skb ringbuffer */ + if (priv->rx_skb[q]) { + for (i = 0; i < priv->num_rx_ring[q]; i++) + dev_kfree_skb(priv->rx_skb[q][i]); + } + kfree(priv->rx_skb[q]); + priv->rx_skb[q] = NULL; + + /* Free aligned TX buffers */ + kfree(priv->tx_align[q]); + priv->tx_align[q] = NULL; + /* Free TX skb ringbuffer. * SKBs are freed by ravb_tx_free() call above. */ From 736b342cc9b4e804fe3ee94af1483b7203cce6cc Mon Sep 17 00:00:00 2001 From: Gleb Fotengauer-Malinovskiy Date: Mon, 20 Mar 2017 20:15:53 +0300 Subject: [PATCH 081/100] jump label: fix passing kbuild_cflags when checking for asm goto support commit 7292ae3d5a18fb922be496e6bb687647193569b4 upstream. The latest change of asm goto support check added passing of KBUILD_CFLAGS to compiler. When these flags reference gcc plugins that are not built yet, the check fails. When one runs "make bzImage" followed by "make modules", the kernel is always built with HAVE_JUMP_LABEL disabled, while the modules are built depending on CONFIG_JUMP_LABEL. If HAVE_JUMP_LABEL macro happens to be different, modules are built with undefined references, e.g.: ERROR: "static_key_slow_inc" [net/netfilter/xt_TEE.ko] undefined! ERROR: "static_key_slow_dec" [net/netfilter/xt_TEE.ko] undefined! ERROR: "static_key_slow_dec" [net/netfilter/nft_meta.ko] undefined! ERROR: "static_key_slow_inc" [net/netfilter/nft_meta.ko] undefined! ERROR: "nf_hooks_needed" [net/netfilter/ipvs/ip_vs.ko] undefined! ERROR: "nf_hooks_needed" [net/ipv6/ipv6.ko] undefined! ERROR: "static_key_count" [net/ipv6/ipv6.ko] undefined! ERROR: "static_key_slow_inc" [net/ipv6/ipv6.ko] undefined! This change moves the check before all these references are added to KBUILD_CFLAGS. This is correct because subsequent KBUILD_CFLAGS modifications are not relevant to this check. Reported-by: Anton V. Boyarshinov Fixes: 35f860f9ba6a ("jump label: pass kbuild_cflags when checking for asm goto support") Signed-off-by: Gleb Fotengauer-Malinovskiy Signed-off-by: Dmitry V. Levin Acked-by: Steven Rostedt (VMware) Acked-by: David Lin Signed-off-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman --- Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 696d15d8ad5d..36d7be749587 100644 --- a/Makefile +++ b/Makefile @@ -633,6 +633,12 @@ endif # Tell gcc to never replace conditional load with a non-conditional one KBUILD_CFLAGS += $(call cc-option,--param=allow-store-data-races=0) +# check for 'asm goto' +ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLAGS)), y) + KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO + KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO +endif + ifdef CONFIG_READABLE_ASM # Disable optimizations that make assembler listings hard to read. # reorder blocks reorders the control in the function @@ -788,12 +794,6 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=date-time) # use the deterministic mode of AR if available KBUILD_ARFLAGS := $(call ar-option,D) -# check for 'asm goto' -ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLAGS)), y) - KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO - KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO -endif - include scripts/Makefile.kasan include scripts/Makefile.extrawarn From 398ac7a19f17386d17f563ebfe273ced37c4897a Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 3 May 2017 16:43:19 +0200 Subject: [PATCH 082/100] xfrm: fix stack access out of bounds with CONFIG_XFRM_SUB_POLICY commit 9b3eb54106cf6acd03f07cf0ab01c13676a226c2 upstream. When CONFIG_XFRM_SUB_POLICY=y, xfrm_dst stores a copy of the flowi for that dst. Unfortunately, the code that allocates and fills this copy doesn't care about what type of flowi (flowi, flowi4, flowi6) gets passed. In multiple code paths (from raw_sendmsg, from TCP when replying to a FIN, in vxlan, geneve, and gre), the flowi that gets passed to xfrm is actually an on-stack flowi4, so we end up reading stuff from the stack past the end of the flowi4 struct. Since xfrm_dst->origin isn't used anywhere following commit ca116922afa8 ("xfrm: Eliminate "fl" and "pol" args to xfrm_bundle_ok()."), just get rid of it. xfrm_dst->partner isn't used either, so get rid of that too. Fixes: 9d6ec938019c ("ipv4: Use flowi4 in public route lookup interfaces.") Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- include/net/xfrm.h | 10 --------- net/xfrm/xfrm_policy.c | 47 ------------------------------------------ 2 files changed, 57 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d6f6e5006ee9..185fb037b332 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -948,10 +948,6 @@ struct xfrm_dst { struct flow_cache_object flo; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int num_pols, num_xfrms; -#ifdef CONFIG_XFRM_SUB_POLICY - struct flowi *origin; - struct xfrm_selector *partner; -#endif u32 xfrm_genid; u32 policy_genid; u32 route_mtu_cached; @@ -967,12 +963,6 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) dst_release(xdst->route); if (likely(xdst->u.dst.xfrm)) xfrm_state_put(xdst->u.dst.xfrm); -#ifdef CONFIG_XFRM_SUB_POLICY - kfree(xdst->origin); - xdst->origin = NULL; - kfree(xdst->partner); - xdst->partner = NULL; -#endif } #endif diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 36a50ef9295d..8a0fdd870395 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1776,43 +1776,6 @@ free_dst: goto out; } -#ifdef CONFIG_XFRM_SUB_POLICY -static int xfrm_dst_alloc_copy(void **target, const void *src, int size) -{ - if (!*target) { - *target = kmalloc(size, GFP_ATOMIC); - if (!*target) - return -ENOMEM; - } - - memcpy(*target, src, size); - return 0; -} -#endif - -static int xfrm_dst_update_parent(struct dst_entry *dst, - const struct xfrm_selector *sel) -{ -#ifdef CONFIG_XFRM_SUB_POLICY - struct xfrm_dst *xdst = (struct xfrm_dst *)dst; - return xfrm_dst_alloc_copy((void **)&(xdst->partner), - sel, sizeof(*sel)); -#else - return 0; -#endif -} - -static int xfrm_dst_update_origin(struct dst_entry *dst, - const struct flowi *fl) -{ -#ifdef CONFIG_XFRM_SUB_POLICY - struct xfrm_dst *xdst = (struct xfrm_dst *)dst; - return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl)); -#else - return 0; -#endif -} - static int xfrm_expand_policies(const struct flowi *fl, u16 family, struct xfrm_policy **pols, int *num_pols, int *num_xfrms) @@ -1884,16 +1847,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, xdst = (struct xfrm_dst *)dst; xdst->num_xfrms = err; - if (num_pols > 1) - err = xfrm_dst_update_parent(dst, &pols[1]->selector); - else - err = xfrm_dst_update_origin(dst, fl); - if (unlikely(err)) { - dst_free(dst); - XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); - return ERR_PTR(err); - } - xdst->num_pols = num_pols; memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); xdst->policy_genid = atomic_read(&pols[0]->genid); From ac78351c96e8e58cf93e336ffdf0560a8f691b50 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 14 Jun 2017 13:35:37 +0300 Subject: [PATCH 083/100] xfrm: NULL dereference on allocation failure commit e747f64336fc15e1c823344942923195b800aa1e upstream. The default error code in pfkey_msg2xfrm_state() is -ENOBUFS. We added a new call to security_xfrm_state_alloc() which sets "err" to zero so there several places where we can return ERR_PTR(0) if kmalloc() fails. The caller is expecting error pointers so it leads to a NULL dereference. Fixes: df71837d5024 ("[LSM-IPSec]: Security association restriction.") Signed-off-by: Dan Carpenter Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/key/af_key.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/key/af_key.c b/net/key/af_key.c index f9c9ecb0cdd3..4f5992966924 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1135,6 +1135,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, goto out; } + err = -ENOBUFS; key = ext_hdrs[SADB_EXT_KEY_AUTH - 1]; if (sa->sadb_sa_auth) { int keysize = 0; From f99737ce2e56eb7ee7cc6e7c9923d20ebd66714f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 14 Jun 2017 13:34:05 +0300 Subject: [PATCH 084/100] xfrm: Oops on error in pfkey_msg2xfrm_state() commit 1e3d0c2c70cd3edb5deed186c5f5c75f2b84a633 upstream. There are some missing error codes here so we accidentally return NULL instead of an error pointer. It results in a NULL pointer dereference. Fixes: df71837d5024 ("[LSM-IPSec]: Security association restriction.") Signed-off-by: Dan Carpenter Signed-off-by: Steffen Klassert Signed-off-by: Greg Kroah-Hartman --- net/key/af_key.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index 4f5992966924..e67c28e614b9 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1147,8 +1147,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, if (key) keysize = (key->sadb_key_bits + 7) / 8; x->aalg = kmalloc(sizeof(*x->aalg) + keysize, GFP_KERNEL); - if (!x->aalg) + if (!x->aalg) { + err = -ENOMEM; goto out; + } strcpy(x->aalg->alg_name, a->name); x->aalg->alg_key_len = 0; if (key) { @@ -1167,8 +1169,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, goto out; } x->calg = kmalloc(sizeof(*x->calg), GFP_KERNEL); - if (!x->calg) + if (!x->calg) { + err = -ENOMEM; goto out; + } strcpy(x->calg->alg_name, a->name); x->props.calgo = sa->sadb_sa_encrypt; } else { @@ -1182,8 +1186,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, if (key) keysize = (key->sadb_key_bits + 7) / 8; x->ealg = kmalloc(sizeof(*x->ealg) + keysize, GFP_KERNEL); - if (!x->ealg) + if (!x->ealg) { + err = -ENOMEM; goto out; + } strcpy(x->ealg->alg_name, a->name); x->ealg->alg_key_len = 0; if (key) { @@ -1228,8 +1234,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, struct xfrm_encap_tmpl *natt; x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL); - if (!x->encap) + if (!x->encap) { + err = -ENOMEM; goto out; + } natt = x->encap; n_type = ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]; From f6115ec092257994cb01cb14394f60d4626e3766 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 27 Apr 2017 18:02:32 -0700 Subject: [PATCH 085/100] watchdog: bcm281xx: Fix use of uninitialized spinlock. commit fedf266f9955d9a019643cde199a2fd9a0259f6f upstream. The bcm_kona_wdt_set_resolution_reg() call takes the spinlock, so initialize it earlier. Fixes a warning at boot with lock debugging enabled. Fixes: 6adb730dc208 ("watchdog: bcm281xx: Watchdog Driver") Signed-off-by: Eric Anholt Reviewed-by: Florian Fainelli Reviewed-by: Guenter Roeck Signed-off-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck Signed-off-by: Greg Kroah-Hartman --- drivers/watchdog/bcm_kona_wdt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/bcm_kona_wdt.c b/drivers/watchdog/bcm_kona_wdt.c index e0c98423f2c9..11a72bc2c71b 100644 --- a/drivers/watchdog/bcm_kona_wdt.c +++ b/drivers/watchdog/bcm_kona_wdt.c @@ -304,6 +304,8 @@ static int bcm_kona_wdt_probe(struct platform_device *pdev) if (!wdt) return -ENOMEM; + spin_lock_init(&wdt->lock); + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); wdt->base = devm_ioremap_resource(dev, res); if (IS_ERR(wdt->base)) @@ -316,7 +318,6 @@ static int bcm_kona_wdt_probe(struct platform_device *pdev) return ret; } - spin_lock_init(&wdt->lock); platform_set_drvdata(pdev, wdt); watchdog_set_drvdata(&bcm_kona_wdt_wdd, wdt); bcm_kona_wdt_wdd.parent = &pdev->dev; From 6ca11db55f62ea484c4ecf28b1fa9da14536f31e Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 17 Feb 2017 12:07:30 +0000 Subject: [PATCH 086/100] sched/loadavg: Avoid loadavg spikes caused by delayed NO_HZ accounting commit 6e5f32f7a43f45ee55c401c0b9585eb01f9629a8 upstream. If we crossed a sample window while in NO_HZ we will add LOAD_FREQ to the pending sample window time on exit, setting the next update not one window into the future, but two. This situation on exiting NO_HZ is described by: this_rq->calc_load_update < jiffies < calc_load_update In this scenario, what we should be doing is: this_rq->calc_load_update = calc_load_update [ next window ] But what we actually do is: this_rq->calc_load_update = calc_load_update + LOAD_FREQ [ next+1 window ] This has the effect of delaying load average updates for potentially up to ~9seconds. This can result in huge spikes in the load average values due to per-cpu uninterruptible task counts being out of sync when accumulated across all CPUs. It's safe to update the per-cpu active count if we wake between sample windows because any load that we left in 'calc_load_idle' will have been zero'd when the idle load was folded in calc_global_load(). This issue is easy to reproduce before, commit 9d89c257dfb9 ("sched/fair: Rewrite runnable load and utilization average tracking") just by forking short-lived process pipelines built from ps(1) and grep(1) in a loop. I'm unable to reproduce the spikes after that commit, but the bug still seems to be present from code review. Signed-off-by: Matt Fleming Signed-off-by: Peter Zijlstra (Intel) Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Mike Galbraith Cc: Mike Galbraith Cc: Morten Rasmussen Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vincent Guittot Fixes: commit 5167e8d ("sched/nohz: Rewrite and fix load-avg computation -- again") Link: http://lkml.kernel.org/r/20170217120731.11868-2-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/sched/loadavg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c index b0b93fd33af9..f8e8d68ed3fd 100644 --- a/kernel/sched/loadavg.c +++ b/kernel/sched/loadavg.c @@ -201,8 +201,9 @@ void calc_load_exit_idle(void) struct rq *this_rq = this_rq(); /* - * If we're still before the sample window, we're done. + * If we're still before the pending sample window, we're done. */ + this_rq->calc_load_update = calc_load_update; if (time_before(jiffies, this_rq->calc_load_update)) return; @@ -211,7 +212,6 @@ void calc_load_exit_idle(void) * accounted through the nohz accounting, so skip the entire deal and * sync up for the next window. */ - this_rq->calc_load_update = calc_load_update; if (time_before(jiffies, this_rq->calc_load_update + 10)) this_rq->calc_load_update += LOAD_FREQ; } From d4960d58158be2689a837a853e80caeaae88293b Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 26 May 2017 17:40:02 +0100 Subject: [PATCH 087/100] ARM64/ACPI: Fix BAD_MADT_GICC_ENTRY() macro implementation commit cb7cf772d83d2d4e6995c5bb9e0fb59aea8f7080 upstream. The BAD_MADT_GICC_ENTRY() macro checks if a GICC MADT entry passes muster from an ACPI specification standpoint. Current macro detects the MADT GICC entry length through ACPI firmware version (it changed from 76 to 80 bytes in the transition from ACPI 5.1 to ACPI 6.0 specification) but always uses (erroneously) the ACPICA (latest) struct (ie struct acpi_madt_generic_interrupt - that is 80-bytes long) length to check if the current GICC entry memory record exceeds the MADT table end in memory as defined by the MADT table header itself, which may result in false negatives depending on the ACPI firmware version and how the MADT entries are laid out in memory (ie on ACPI 5.1 firmware MADT GICC entries are 76 bytes long, so by adding 80 to a GICC entry start address in memory the resulting address may well be past the actual MADT end, triggering a false negative). Fix the BAD_MADT_GICC_ENTRY() macro by reshuffling the condition checks and update them to always use the firmware version specific MADT GICC entry length in order to carry out boundary checks. Fixes: b6cfb277378e ("ACPI / ARM64: add BAD_MADT_GICC_ENTRY() macro") Reported-by: Julien Grall Acked-by: Will Deacon Acked-by: Marc Zyngier Signed-off-by: Lorenzo Pieralisi Cc: Julien Grall Cc: Hanjun Guo Cc: Al Stone Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/acpi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index caafd63b8092..40d1351e7573 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -22,9 +22,9 @@ #define ACPI_MADT_GICC_LENGTH \ (acpi_gbl_FADT.header.revision < 6 ? 76 : 80) -#define BAD_MADT_GICC_ENTRY(entry, end) \ - (!(entry) || (unsigned long)(entry) + sizeof(*(entry)) > (end) || \ - (entry)->header.length != ACPI_MADT_GICC_LENGTH) +#define BAD_MADT_GICC_ENTRY(entry, end) \ + (!(entry) || (entry)->header.length != ACPI_MADT_GICC_LENGTH || \ + (unsigned long)(entry) + ACPI_MADT_GICC_LENGTH > (end)) /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI From 7cd8c49031714872d4f69b09c41fb863235f5069 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 29 Jun 2017 18:41:36 +0100 Subject: [PATCH 088/100] ARM: 8685/1: ensure memblock-limit is pmd-aligned commit 9e25ebfe56ece7541cd10a20d715cbdd148a2e06 upstream. The pmd containing memblock_limit is cleared by prepare_page_table() which creates the opportunity for early_alloc() to allocate unmapped memory if memblock_limit is not pmd aligned causing a boot-time hang. Commit 965278dcb8ab ("ARM: 8356/1: mm: handle non-pmd-aligned end of RAM") attempted to resolve this problem, but there is a path through the adjust_lowmem_bounds() routine where if all memory regions start and end on pmd-aligned addresses the memblock_limit will be set to arm_lowmem_limit. Since arm_lowmem_limit can be affected by the vmalloc early parameter, the value of arm_lowmem_limit may not be pmd-aligned. This commit corrects this oversight such that memblock_limit is always rounded down to pmd-alignment. Fixes: 965278dcb8ab ("ARM: 8356/1: mm: handle non-pmd-aligned end of RAM") Signed-off-by: Doug Berger Suggested-by: Mark Rutland Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/mmu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 4867f5daf82c..e47cffd25c6c 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1184,15 +1184,15 @@ void __init sanity_check_meminfo(void) high_memory = __va(arm_lowmem_limit - 1) + 1; + if (!memblock_limit) + memblock_limit = arm_lowmem_limit; + /* * Round the memblock limit down to a pmd size. This * helps to ensure that we will allocate memory from the * last full pmd, which should be mapped. */ - if (memblock_limit) - memblock_limit = round_down(memblock_limit, PMD_SIZE); - if (!memblock_limit) - memblock_limit = arm_lowmem_limit; + memblock_limit = round_down(memblock_limit, PMD_SIZE); memblock_set_current_limit(memblock_limit); } From 6fb3b322307eee9494701d86afc650917621c551 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 6 Apr 2017 16:19:22 +0200 Subject: [PATCH 089/100] x86/mpx: Correctly report do_mpx_bt_fault() failures to user-space commit 5ed386ec09a5d75bcf073967e55e895c2607a5c3 upstream. When this function fails it just sends a SIGSEGV signal to user-space using force_sig(). This signal is missing essential information about the cause, e.g. the trap_nr or an error code. Fix this by propagating the error to the only caller of mpx_handle_bd_fault(), do_bounds(), which sends the correct SIGSEGV signal to the process. Signed-off-by: Joerg Roedel Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: fe3d197f84319 ('x86, mpx: On-demand kernel allocation of bounds tables') Link: http://lkml.kernel.org/r/1491488362-27198-1-git-send-email-joro@8bytes.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/mpx.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index ec12d7aa9cb2..7ed47b1e6f42 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -525,15 +525,7 @@ int mpx_handle_bd_fault(void) if (!kernel_managing_mpx_tables(current->mm)) return -EINVAL; - if (do_mpx_bt_fault()) { - force_sig(SIGSEGV, current); - /* - * The force_sig() is essentially "handling" this - * exception, so we do not pass up the error - * from do_mpx_bt_fault(). - */ - } - return 0; + return do_mpx_bt_fault(); } /* From 5d650fcef901ca30e6f9dd909087294db1f99eb1 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sat, 22 Apr 2017 00:01:22 -0700 Subject: [PATCH 090/100] x86/mm: Fix flush_tlb_page() on Xen commit dbd68d8e84c606673ebbcf15862f8c155fa92326 upstream. flush_tlb_page() passes a bogus range to flush_tlb_others() and expects the latter to fix it up. native_flush_tlb_others() has the fixup but Xen's version doesn't. Move the fixup to flush_tlb_others(). AFAICS the only real effect is that, without this fix, Xen would flush everything instead of just the one page on remote vCPUs in when flush_tlb_page() was called. Signed-off-by: Andy Lutomirski Reviewed-by: Boris Ostrovsky Cc: Andrew Morton Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Michal Hocko Cc: Nadav Amit Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Fixes: e7b52ffd45a6 ("x86/flush_tlb: try flush_tlb_single one by one in flush_tlb_range") Link: http://lkml.kernel.org/r/10ed0e4dfea64daef10b87fb85df1746999b4dba.1492844372.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/tlb.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 5fb6adaaa796..5a760fd66bec 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -134,8 +134,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask, { struct flush_tlb_info info; - if (end == 0) - end = start + PAGE_SIZE; info.flush_mm = mm; info.flush_start = start; info.flush_end = end; @@ -264,7 +262,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) } if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) - flush_tlb_others(mm_cpumask(mm), mm, start, 0UL); + flush_tlb_others(mm_cpumask(mm), mm, start, start + PAGE_SIZE); preempt_enable(); } From 404ef3b4bf624a743d2763ad950125c4612aca62 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Wed, 3 May 2017 14:51:41 -0700 Subject: [PATCH 091/100] ocfs2: o2hb: revert hb threshold to keep compatible commit 33496c3c3d7b88dcbe5e55aa01288b05646c6aca upstream. Configfs is the interface for ocfs2-tools to set configure to kernel and $configfs_dir/cluster/$clustername/heartbeat/dead_threshold is the one used to configure heartbeat dead threshold. Kernel has a default value of it but user can set O2CB_HEARTBEAT_THRESHOLD in /etc/sysconfig/o2cb to override it. Commit 45b997737a80 ("ocfs2/cluster: use per-attribute show and store methods") changed heartbeat dead threshold name while ocfs2-tools did not, so ocfs2-tools won't set this configurable and the default value is always used. So revert it. Fixes: 45b997737a80 ("ocfs2/cluster: use per-attribute show and store methods") Link: http://lkml.kernel.org/r/1490665245-15374-1-git-send-email-junxiao.bi@oracle.com Signed-off-by: Junxiao Bi Acked-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/cluster/heartbeat.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 709fbbd44c65..acebc350e98d 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -2070,13 +2070,13 @@ unlock: spin_unlock(&o2hb_live_lock); } -static ssize_t o2hb_heartbeat_group_threshold_show(struct config_item *item, +static ssize_t o2hb_heartbeat_group_dead_threshold_show(struct config_item *item, char *page) { return sprintf(page, "%u\n", o2hb_dead_threshold); } -static ssize_t o2hb_heartbeat_group_threshold_store(struct config_item *item, +static ssize_t o2hb_heartbeat_group_dead_threshold_store(struct config_item *item, const char *page, size_t count) { unsigned long tmp; @@ -2125,11 +2125,11 @@ static ssize_t o2hb_heartbeat_group_mode_store(struct config_item *item, } -CONFIGFS_ATTR(o2hb_heartbeat_group_, threshold); +CONFIGFS_ATTR(o2hb_heartbeat_group_, dead_threshold); CONFIGFS_ATTR(o2hb_heartbeat_group_, mode); static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { - &o2hb_heartbeat_group_attr_threshold, + &o2hb_heartbeat_group_attr_dead_threshold, &o2hb_heartbeat_group_attr_mode, NULL, }; From 3de9630abe3b7c1bbf95fcdb3e9d771eaefebff8 Mon Sep 17 00:00:00 2001 From: David Dillow Date: Mon, 30 Jan 2017 19:11:11 -0800 Subject: [PATCH 092/100] iommu/vt-d: Don't over-free page table directories commit f7116e115acdd74bc75a4daf6492b11d43505125 upstream. dma_pte_free_level() recurses down the IOMMU page tables and frees directory pages that are entirely contained in the given PFN range. Unfortunately, it incorrectly calculates the starting address covered by the PTE under consideration, which can lead to it clearing an entry that is still in use. This occurs if we have a scatterlist with an entry that has a length greater than 1026 MB and is aligned to 2 MB for both the IOMMU and physical addresses. For example, if __domain_mapping() is asked to map a two-entry scatterlist with 2 MB and 1028 MB segments to PFN 0xffff80000, it will ask if dma_pte_free_pagetable() is asked to PFNs from 0xffff80200 to 0xffffc05ff, it will also incorrectly clear the PFNs from 0xffff80000 to 0xffff801ff because of this issue. The current code will set level_pfn to 0xffff80200, and 0xffff80200-0xffffc01ff fits inside the range being cleared. Properly setting the level_pfn for the current level under consideration catches that this PTE is outside of the range being cleared. This patch also changes the value passed into dma_pte_free_level() when it recurses. This only affects the first PTE of the range being cleared, and is handled by the existing code that ensures we start our cursor no lower than start_pfn. This was found when using dma_map_sg() to map large chunks of contiguous memory, which immediatedly led to faults on the first access of the erroneously-deleted mappings. Fixes: 3269ee0bd668 ("intel-iommu: Fix leaks in pagetable freeing") Reviewed-by: Benjamin Serebrin Signed-off-by: David Dillow Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index b92b8a724efb..f9711aceef54 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1137,7 +1137,7 @@ static void dma_pte_free_level(struct dmar_domain *domain, int level, if (!dma_pte_present(pte) || dma_pte_superpage(pte)) goto next; - level_pfn = pfn & level_mask(level - 1); + level_pfn = pfn & level_mask(level); level_pte = phys_to_virt(dma_pte_addr(pte)); if (level > 2) From 48952c6d517adb6204392fe493f638ce8899911a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 16 Jan 2017 12:58:07 +0000 Subject: [PATCH 093/100] iommu: Handle default domain attach failure commit 797a8b4d768c58caac58ee3e8cb36a164d1b7751 upstream. We wouldn't normally expect ops->attach_dev() to fail, but on IOMMUs with limited hardware resources, or generally misconfigured systems, it is certainly possible. We report failure correctly from the external iommu_attach_device() interface, but do not do so in iommu_group_add() when attaching to the default domain. The result of failure there is that the device, group and domain all get left in a broken, part-configured state which leads to weird errors and misbehaviour down the line when IOMMU API calls sort-of-but-don't-quite work. Check the return value of __iommu_attach_device() on the default domain, and refactor the error handling paths to cope with its failure and clean up correctly in such cases. Fixes: e39cb8a3aa98 ("iommu: Make sure a device is always attached to a domain") Reported-by: Punit Agrawal Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/iommu.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 515bb8b80952..a070fa39521a 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -391,36 +391,30 @@ int iommu_group_add_device(struct iommu_group *group, struct device *dev) device->dev = dev; ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group"); - if (ret) { - kfree(device); - return ret; - } + if (ret) + goto err_free_device; device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj)); rename: if (!device->name) { - sysfs_remove_link(&dev->kobj, "iommu_group"); - kfree(device); - return -ENOMEM; + ret = -ENOMEM; + goto err_remove_link; } ret = sysfs_create_link_nowarn(group->devices_kobj, &dev->kobj, device->name); if (ret) { - kfree(device->name); if (ret == -EEXIST && i >= 0) { /* * Account for the slim chance of collision * and append an instance to the name. */ + kfree(device->name); device->name = kasprintf(GFP_KERNEL, "%s.%d", kobject_name(&dev->kobj), i++); goto rename; } - - sysfs_remove_link(&dev->kobj, "iommu_group"); - kfree(device); - return ret; + goto err_free_name; } kobject_get(group->devices_kobj); @@ -432,8 +426,10 @@ rename: mutex_lock(&group->mutex); list_add_tail(&device->list, &group->devices); if (group->domain) - __iommu_attach_device(group->domain, dev); + ret = __iommu_attach_device(group->domain, dev); mutex_unlock(&group->mutex); + if (ret) + goto err_put_group; /* Notify any listeners about change to group. */ blocking_notifier_call_chain(&group->notifier, @@ -444,6 +440,21 @@ rename: pr_info("Adding device %s to group %d\n", dev_name(dev), group->id); return 0; + +err_put_group: + mutex_lock(&group->mutex); + list_del(&device->list); + mutex_unlock(&group->mutex); + dev->iommu_group = NULL; + kobject_put(group->devices_kobj); +err_free_name: + kfree(device->name); +err_remove_link: + sysfs_remove_link(&dev->kobj, "iommu_group"); +err_free_device: + kfree(device); + pr_err("Failed to add device %s to group %d: %d\n", dev_name(dev), group->id, ret); + return ret; } EXPORT_SYMBOL_GPL(iommu_group_add_device); From aad7041e721e2bb7aa7755d8ffca0fbc2ee1e17f Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 23 Apr 2017 18:23:21 +0800 Subject: [PATCH 094/100] iommu/amd: Fix incorrect error handling in amd_iommu_bind_pasid() commit 73dbd4a4230216b6a5540a362edceae0c9b4876b upstream. In function amd_iommu_bind_pasid(), the control flow jumps to label out_free when pasid_state->mm and mm is NULL. And mmput(mm) is called. In function mmput(mm), mm is referenced without validation. This will result in a NULL dereference bug. This patch fixes the bug. Signed-off-by: Pan Bian Fixes: f0aac63b873b ('iommu/amd: Don't hold a reference to mm_struct') Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd_iommu_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 4831eb910fc7..22160e481794 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -699,9 +699,9 @@ out_clear_state: out_unregister: mmu_notifier_unregister(&pasid_state->mn, mm); + mmput(mm); out_free: - mmput(mm); free_pasid_state(pasid_state); out: From 3491a0b59fcd933ee6dac6e1ed320d76f16e2311 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 7 Feb 2017 16:19:06 +0300 Subject: [PATCH 095/100] cpufreq: s3c2416: double free on driver init error path commit a69261e4470d680185a15f748d9cdafb37c57a33 upstream. The "goto err_armclk;" error path already does a clk_put(s3c_freq->hclk); so this is a double free. Fixes: 34ee55075265 ([CPUFREQ] Add S3C2416/S3C2450 cpufreq driver) Signed-off-by: Dan Carpenter Reviewed-by: Krzysztof Kozlowski Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/s3c2416-cpufreq.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cpufreq/s3c2416-cpufreq.c b/drivers/cpufreq/s3c2416-cpufreq.c index d6d425773fa4..5b2db3c6568f 100644 --- a/drivers/cpufreq/s3c2416-cpufreq.c +++ b/drivers/cpufreq/s3c2416-cpufreq.c @@ -400,7 +400,6 @@ static int s3c2416_cpufreq_driver_init(struct cpufreq_policy *policy) rate = clk_get_rate(s3c_freq->hclk); if (rate < 133 * 1000 * 1000) { pr_err("cpufreq: HCLK not at 133MHz\n"); - clk_put(s3c_freq->hclk); ret = -EINVAL; goto err_armclk; } From b9b3eb5c774eee256a33a6c0863ebad43a693353 Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Tue, 25 Apr 2017 16:42:44 +0200 Subject: [PATCH 096/100] KVM: x86: fix emulation of RSM and IRET instructions commit 6ed071f051e12cf7baa1b69d3becb8f232fdfb7b upstream. On AMD, the effect of set_nmi_mask called by emulate_iret_real and em_rsm on hflags is reverted later on in x86_emulate_instruction where hflags are overwritten with ctxt->emul_flags (the kvm_set_hflags call). This manifests as a hang when rebooting Windows VMs with QEMU, OVMF, and >1 vcpu. Instead of trying to merge ctxt->emul_flags into vcpu->arch.hflags after an instruction is emulated, this commit deletes emul_flags altogether and makes the emulator access vcpu->arch.hflags using two new accessors. This way all changes, on the emulator side as well as in functions called from the emulator and accessing vcpu state with emul_to_vcpu, are preserved. More details on the bug and its manifestation with Windows and OVMF: It's a KVM bug in the interaction between SMI/SMM and NMI, specific to AMD. I believe that the SMM part explains why we started seeing this only with OVMF. KVM masks and unmasks NMI when entering and leaving SMM. When KVM emulates the RSM instruction in em_rsm, the set_nmi_mask call doesn't stick because later on in x86_emulate_instruction we overwrite arch.hflags with ctxt->emul_flags, effectively reverting the effect of the set_nmi_mask call. The AMD-specific hflag of interest here is HF_NMI_MASK. When rebooting the system, Windows sends an NMI IPI to all but the current cpu to shut them down. Only after all of them are parked in HLT will the initiating cpu finish the restart. If NMI is masked, other cpus never get the memo and the initiating cpu spins forever, waiting for hal!HalpInterruptProcessorsStarted to drop. That's the symptom we observe. Fixes: a584539b24b8 ("KVM: x86: pass the whole hflags field to emulator and back") Signed-off-by: Ladi Prosek Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_emulate.h | 4 +++- arch/x86/kvm/emulate.c | 16 +++++++++------- arch/x86/kvm/x86.c | 15 ++++++++++++--- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index e9cd7befcb76..19d14ac23ef9 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -221,6 +221,9 @@ struct x86_emulate_ops { void (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); + + unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt); + void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags); }; typedef u32 __attribute__((vector_size(16))) sse128_t; @@ -290,7 +293,6 @@ struct x86_emulate_ctxt { /* interruptibility state, as a result of execution of STI or MOV SS */ int interruptibility; - int emul_flags; bool perm_ok; /* do not check permissions if true */ bool ud; /* inject an #UD if host doesn't support insn */ diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 1dcea225977d..04b2f3cad7ba 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2531,7 +2531,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) u64 smbase; int ret; - if ((ctxt->emul_flags & X86EMUL_SMM_MASK) == 0) + if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0) return emulate_ud(ctxt); /* @@ -2580,11 +2580,11 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) return X86EMUL_UNHANDLEABLE; } - if ((ctxt->emul_flags & X86EMUL_SMM_INSIDE_NMI_MASK) == 0) + if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0) ctxt->ops->set_nmi_mask(ctxt, false); - ctxt->emul_flags &= ~X86EMUL_SMM_INSIDE_NMI_MASK; - ctxt->emul_flags &= ~X86EMUL_SMM_MASK; + ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) & + ~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK)); return X86EMUL_CONTINUE; } @@ -5296,6 +5296,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) const struct x86_emulate_ops *ops = ctxt->ops; int rc = X86EMUL_CONTINUE; int saved_dst_type = ctxt->dst.type; + unsigned emul_flags; ctxt->mem_read.pos = 0; @@ -5310,6 +5311,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } + emul_flags = ctxt->ops->get_hflags(ctxt); if (unlikely(ctxt->d & (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) { if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) || @@ -5343,7 +5345,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) fetch_possible_mmx_operand(ctxt, &ctxt->dst); } - if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) { + if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) { rc = emulator_check_intercept(ctxt, ctxt->intercept, X86_ICPT_PRE_EXCEPT); if (rc != X86EMUL_CONTINUE) @@ -5372,7 +5374,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } - if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) { + if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) { rc = emulator_check_intercept(ctxt, ctxt->intercept, X86_ICPT_POST_EXCEPT); if (rc != X86EMUL_CONTINUE) @@ -5426,7 +5428,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) special_insn: - if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) { + if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) { rc = emulator_check_intercept(ctxt, ctxt->intercept, X86_ICPT_POST_MEMACCESS); if (rc != X86EMUL_CONTINUE) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cba30e791e7d..88f914ea2db7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4999,6 +4999,16 @@ static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked) kvm_x86_ops->set_nmi_mask(emul_to_vcpu(ctxt), masked); } +static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt) +{ + return emul_to_vcpu(ctxt)->arch.hflags; +} + +static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags) +{ + kvm_set_hflags(emul_to_vcpu(ctxt), emul_flags); +} + static const struct x86_emulate_ops emulate_ops = { .read_gpr = emulator_read_gpr, .write_gpr = emulator_write_gpr, @@ -5038,6 +5048,8 @@ static const struct x86_emulate_ops emulate_ops = { .intercept = emulator_intercept, .get_cpuid = emulator_get_cpuid, .set_nmi_mask = emulator_set_nmi_mask, + .get_hflags = emulator_get_hflags, + .set_hflags = emulator_set_hflags, }; static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) @@ -5090,7 +5102,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK); BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK); BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK); - ctxt->emul_flags = vcpu->arch.hflags; init_decode_cache(ctxt); vcpu->arch.emulate_regs_need_sync_from_vcpu = false; @@ -5486,8 +5497,6 @@ restart: unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); toggle_interruptibility(vcpu, ctxt->interruptibility); vcpu->arch.emulate_regs_need_sync_to_vcpu = false; - if (vcpu->arch.hflags != ctxt->emul_flags) - kvm_set_hflags(vcpu, ctxt->emul_flags); kvm_rip_write(vcpu, ctxt->eip); if (r == EMULATE_DONE) kvm_vcpu_check_singlestep(vcpu, rflags, &r); From 3b1609f6c436a83aa812c2632ae21a4345a333cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 18 May 2017 19:37:31 +0200 Subject: [PATCH 097/100] KVM: x86/vPMU: fix undefined shift in intel_pmu_refresh() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 34b0dadbdf698f9b277a31b2747b625b9a75ea1f upstream. Static analysis noticed that pmu->nr_arch_gp_counters can be 32 (INTEL_PMC_MAX_GENERIC) and therefore cannot be used to shift 'int'. I didn't add BUILD_BUG_ON for it as we have a better checker. Reported-by: Dan Carpenter Fixes: 25462f7f5295 ("KVM: x86/vPMU: Define kvm_pmu_ops to support vPMU function dispatch") Reviewed-by: Paolo Bonzini Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/pmu_intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c index ab38af4f4947..23a7c7ba377a 100644 --- a/arch/x86/kvm/pmu_intel.c +++ b/arch/x86/kvm/pmu_intel.c @@ -294,7 +294,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) ((u64)1 << edx.split.bit_width_fixed) - 1; } - pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | + pmu->global_ctrl = ((1ull << pmu->nr_arch_gp_counters) - 1) | (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED); pmu->global_ctrl_mask = ~pmu->global_ctrl; From 77d977dd78b36385e5c284095ce28b3a6b8fe6fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 18 May 2017 19:37:30 +0200 Subject: [PATCH 098/100] KVM: x86: zero base3 of unusable segments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f0367ee1d64d27fa08be2407df5c125442e885e3 upstream. Static checker noticed that base3 could be used uninitialized if the segment was not present (useable). Random stack values probably would not pass VMCS entry checks. Reported-by: Dan Carpenter Fixes: 1aa366163b8b ("KVM: x86 emulator: consolidate segment accessors") Reviewed-by: Paolo Bonzini Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 88f914ea2db7..8e526c6fd784 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4844,6 +4844,8 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector, if (var.unusable) { memset(desc, 0, sizeof(*desc)); + if (base3) + *base3 = 0; return false; } From be8c39b47061fae1cc260496c41e051c92df924f Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 5 Jun 2017 05:19:09 -0700 Subject: [PATCH 099/100] KVM: nVMX: Fix exception injection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d4912215d1031e4fb3d1038d2e1857218dba0d0a upstream. WARNING: CPU: 3 PID: 2840 at arch/x86/kvm/vmx.c:10966 nested_vmx_vmexit+0xdcd/0xde0 [kvm_intel] CPU: 3 PID: 2840 Comm: qemu-system-x86 Tainted: G OE 4.12.0-rc3+ #23 RIP: 0010:nested_vmx_vmexit+0xdcd/0xde0 [kvm_intel] Call Trace: ? kvm_check_async_pf_completion+0xef/0x120 [kvm] ? rcu_read_lock_sched_held+0x79/0x80 vmx_queue_exception+0x104/0x160 [kvm_intel] ? vmx_queue_exception+0x104/0x160 [kvm_intel] kvm_arch_vcpu_ioctl_run+0x1171/0x1ce0 [kvm] ? kvm_arch_vcpu_load+0x47/0x240 [kvm] ? kvm_arch_vcpu_load+0x62/0x240 [kvm] kvm_vcpu_ioctl+0x384/0x7b0 [kvm] ? kvm_vcpu_ioctl+0x384/0x7b0 [kvm] ? __fget+0xf3/0x210 do_vfs_ioctl+0xa4/0x700 ? __fget+0x114/0x210 SyS_ioctl+0x79/0x90 do_syscall_64+0x81/0x220 entry_SYSCALL64_slow_path+0x25/0x25 This is triggered occasionally by running both win7 and win2016 in L2, in addition, EPT is disabled on both L1 and L2. It can't be reproduced easily. Commit 0b6ac343fc (KVM: nVMX: Correct handling of exception injection) mentioned that "KVM wants to inject page-faults which it got to the guest. This function assumes it is called with the exit reason in vmcs02 being a #PF exception". Commit e011c663 (KVM: nVMX: Check all exceptions for intercept during delivery to L2) allows to check all exceptions for intercept during delivery to L2. However, there is no guarantee the exit reason is exception currently, when there is an external interrupt occurred on host, maybe a time interrupt for host which should not be injected to guest, and somewhere queues an exception, then the function nested_vmx_check_exception() will be called and the vmexit emulation codes will try to emulate the "Acknowledge interrupt on exit" behavior, the warning is triggered. Reusing the exit reason from the L2->L0 vmexit is wrong in this case, the reason must always be EXCEPTION_NMI when injecting an exception into L1 as a nested vmexit. Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Fixes: e011c663b9c7 ("KVM: nVMX: Check all exceptions for intercept during delivery to L2") Signed-off-by: Radim Krčmář Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 50ca8f409a7c..bbaa11f4e74b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2264,7 +2264,7 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr) if (!(vmcs12->exception_bitmap & (1u << nr))) return 0; - nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason, + nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, vmcs_read32(VM_EXIT_INTR_INFO), vmcs_readl(EXIT_QUALIFICATION)); return 1; From 4282d39575bf17daedc18f2fe01ca349830a6e99 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 5 Jul 2017 14:39:21 +0200 Subject: [PATCH 100/100] Linux 4.4.76 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 36d7be749587..902ab134446e 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 75 +SUBLEVEL = 76 EXTRAVERSION = NAME = Blurry Fish Butt