From 626a83d5d55b90fc4df792bb0aa707df2ecafa4b Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Tue, 19 Jan 2016 10:41:33 +0100 Subject: [PATCH 001/418] af_iucv: Validate socket address length in iucv_sock_bind() [ Upstream commit 52a82e23b9f2a9e1d429c5207f8575784290d008 ] Signed-off-by: Ursula Braun Reported-by: Dmitry Vyukov Reviewed-by: Evgeny Cherkashin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/iucv/af_iucv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 435608c4306d..20ab7b2ec463 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -708,6 +708,9 @@ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, if (!addr || addr->sa_family != AF_IUCV) return -EINVAL; + if (addr_len < sizeof(struct sockaddr_iucv)) + return -EINVAL; + lock_sock(sk); if (sk->sk_state != IUCV_OPEN) { err = -EBADFD; From 306d316588459a0e6ee81098c334f3ef249ab64f Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Wed, 20 Jan 2016 17:59:49 -0800 Subject: [PATCH 002/418] gro: Make GRO aware of lightweight tunnels. [ Upstream commit ce87fc6ce3f9f4488546187e3757cf666d9d4a2a ] GRO is currently not aware of tunnel metadata generated by lightweight tunnels and stored in the dst. This leads to two possible problems: * Incorrectly merging two frames that have different metadata. * Leaking of allocated metadata from merged frames. This avoids those problems by comparing the tunnel information before merging, similar to how we handle other metadata (such as vlan tags), and releasing any state when we are done. Reported-by: John Fixes: 2e15ea39 ("ip_gre: Add support to collect tunnel metadata.") Signed-off-by: Jesse Gross Acked-by: Eric Dumazet Acked-by: Thomas Graf Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/dst_metadata.h | 18 ++++++++++++++++++ net/core/dev.c | 7 +++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 6816f0fa5693..30a56ab2ccfb 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -44,6 +44,24 @@ static inline bool skb_valid_dst(const struct sk_buff *skb) return dst && !(dst->flags & DST_METADATA); } +static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a, + const struct sk_buff *skb_b) +{ + const struct metadata_dst *a, *b; + + if (!(skb_a->_skb_refdst | skb_b->_skb_refdst)) + return 0; + + a = (const struct metadata_dst *) skb_dst(skb_a); + b = (const struct metadata_dst *) skb_dst(skb_b); + + if (!a != !b || a->u.tun_info.options_len != b->u.tun_info.options_len) + return 1; + + return memcmp(&a->u.tun_info, &b->u.tun_info, + sizeof(a->u.tun_info) + a->u.tun_info.options_len); +} + struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags); struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags); diff --git a/net/core/dev.c b/net/core/dev.c index 7f00f2439770..7dc137fba5c1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4145,6 +4145,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; diffs |= p->vlan_tci ^ skb->vlan_tci; + diffs |= skb_metadata_dst_cmp(p, skb); if (maclen == ETH_HLEN) diffs |= compare_ether_header(skb_mac_header(p), skb_mac_header(skb)); @@ -4342,10 +4343,12 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) break; case GRO_MERGED_FREE: - if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) + if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) { + skb_dst_drop(skb); kmem_cache_free(skbuff_head_cache, skb); - else + } else { __kfree_skb(skb); + } break; case GRO_HELD: From c95b96877efc8b258d24d6da93465fbb89082495 Mon Sep 17 00:00:00 2001 From: Manfred Rudigier Date: Wed, 20 Jan 2016 11:22:28 +0100 Subject: [PATCH 003/418] net: dp83640: Fix tx timestamp overflow handling. [ Upstream commit 81e8f2e930fe76b9814c71b9d87c30760b5eb705 ] PHY status frames are not reliable, the PHY may not be able to send them during heavy receive traffic. This overflow condition is signaled by the PHY in the next status frame, but the driver did not make use of it. Instead it always reported wrong tx timestamps to user space after an overflow happened because it assigned newly received tx timestamps to old packets in the queue. This commit fixes this issue by clearing the tx timestamp queue every time an overflow happens, so that no timestamps are delivered for overflow packets. This way time stamping will continue correctly after an overflow. Signed-off-by: Manfred Rudigier Acked-by: Richard Cochran Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/dp83640.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 47b711739ba9..e6cefd0e3262 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -845,6 +845,11 @@ static void decode_rxts(struct dp83640_private *dp83640, struct skb_shared_hwtstamps *shhwtstamps = NULL; struct sk_buff *skb; unsigned long flags; + u8 overflow; + + overflow = (phy_rxts->ns_hi >> 14) & 0x3; + if (overflow) + pr_debug("rx timestamp queue overflow, count %d\n", overflow); spin_lock_irqsave(&dp83640->rx_lock, flags); @@ -887,6 +892,7 @@ static void decode_txts(struct dp83640_private *dp83640, struct skb_shared_hwtstamps shhwtstamps; struct sk_buff *skb; u64 ns; + u8 overflow; /* We must already have the skb that triggered this. */ @@ -896,6 +902,17 @@ static void decode_txts(struct dp83640_private *dp83640, pr_debug("have timestamp but tx_queue empty\n"); return; } + + overflow = (phy_txts->ns_hi >> 14) & 0x3; + if (overflow) { + pr_debug("tx timestamp queue overflow, count %d\n", overflow); + while (skb) { + skb_complete_tx_timestamp(skb, NULL); + skb = skb_dequeue(&dp83640->tx_queue); + } + return; + } + ns = phy2txts(phy_txts); memset(&shhwtstamps, 0, sizeof(shhwtstamps)); shhwtstamps.hwtstamp = ns_to_ktime(ns); From aa12fd6d38ce22fc16aaf03f6e53ce8be5de304a Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Wed, 20 Jan 2016 16:22:47 -0800 Subject: [PATCH 004/418] tunnels: Allow IPv6 UDP checksums to be correctly controlled. [ Upstream commit 35e2d1152b22eae99c961affbe85374bef05a775 ] When configuring checksums on UDP tunnels, the flags are different for IPv4 vs. IPv6 (and reversed). However, when lightweight tunnels are enabled the flags used are always the IPv4 versions, which are ignored in the IPv6 code paths. This uses the correct IPv6 flags, so checksums can be controlled appropriately. Fixes: a725e514 ("vxlan: metadata based tunneling for IPv6") Fixes: abe492b4 ("geneve: UDP checksum configuration via netlink") Signed-off-by: Jesse Gross Acked-by: Jiri Benc Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vxlan.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 405a7b6cca25..e0fcda4ddd55 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1984,11 +1984,6 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, vxlan->cfg.port_max, true); if (info) { - if (info->key.tun_flags & TUNNEL_CSUM) - flags |= VXLAN_F_UDP_CSUM; - else - flags &= ~VXLAN_F_UDP_CSUM; - ttl = info->key.ttl; tos = info->key.tos; @@ -2003,8 +1998,15 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto drop; sk = vxlan->vn4_sock->sock->sk; - if (info && (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)) - df = htons(IP_DF); + if (info) { + if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) + df = htons(IP_DF); + + if (info->key.tun_flags & TUNNEL_CSUM) + flags |= VXLAN_F_UDP_CSUM; + else + flags &= ~VXLAN_F_UDP_CSUM; + } memset(&fl4, 0, sizeof(fl4)); fl4.flowi4_oif = rdst ? rdst->remote_ifindex : 0; @@ -2102,6 +2104,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, return; } + if (info) { + if (info->key.tun_flags & TUNNEL_CSUM) + flags &= ~VXLAN_F_UDP_ZERO_CSUM6_TX; + else + flags |= VXLAN_F_UDP_ZERO_CSUM6_TX; + } + ttl = ttl ? : ip6_dst_hoplimit(ndst); err = vxlan6_xmit_skb(ndst, sk, skb, dev, &saddr, &dst->sin6.sin6_addr, 0, ttl, src_port, dst_port, htonl(vni << 8), md, From 176d8f37af972f47360d5c3751a61cd6274924cd Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 17 Feb 2016 19:30:01 +0100 Subject: [PATCH 005/418] lwt: fix rx checksum setting for lwt devices tunneling over ipv6 [ Upstream commit c868ee7063bdb53f3ef9eac7bcec84960980b471 ] the commit 35e2d1152b22 ("tunnels: Allow IPv6 UDP checksums to be correctly controlled.") changed the default xmit checksum setting for lwt vxlan/geneve ipv6 tunnels, so that now the checksum is not set into external UDP header. This commit changes the rx checksum setting for both lwt vxlan/geneve devices created by openvswitch accordingly, so that lwt over ipv6 tunnel pairs are again able to communicate with default values. Signed-off-by: Paolo Abeni Acked-by: Jiri Benc Acked-by: Jesse Gross Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/vport-vxlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 1605691d9414..d933cb89efac 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -90,7 +90,7 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) int err; struct vxlan_config conf = { .no_share = true, - .flags = VXLAN_F_COLLECT_METADATA, + .flags = VXLAN_F_COLLECT_METADATA | VXLAN_F_UDP_ZERO_CSUM6_RX, }; if (!options) { From e5abc10d19b90ab363d8a732bc6cf098249e10c5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Jan 2016 08:02:54 -0800 Subject: [PATCH 006/418] tcp: fix NULL deref in tcp_v4_send_ack() [ Upstream commit e62a123b8ef7c5dc4db2c16383d506860ad21b47 ] Neal reported crashes with this stack trace : RIP: 0010:[] tcp_v4_send_ack+0x41/0x20f ... CR2: 0000000000000018 CR3: 000000044005c000 CR4: 00000000001427e0 ... [] tcp_v4_reqsk_send_ack+0xa5/0xb4 [] tcp_check_req+0x2ea/0x3e0 [] tcp_rcv_state_process+0x850/0x2500 [] tcp_v4_do_rcv+0x141/0x330 [] sk_backlog_rcv+0x21/0x30 [] tcp_recvmsg+0x75d/0xf90 [] inet_recvmsg+0x80/0xa0 [] sock_aio_read+0xee/0x110 [] do_sync_read+0x6f/0xa0 [] SyS_read+0x1e1/0x290 [] system_call_fastpath+0x16/0x1b The problem here is the skb we provide to tcp_v4_send_ack() had to be parked in the backlog of a new TCP fastopen child because this child was owned by the user at the time an out of window packet arrived. Before queuing a packet, TCP has to set skb->dev to NULL as the device could disappear before packet is removed from the queue. Fix this issue by using the net pointer provided by the socket (being a timewait or a request socket). IPv6 is immune to the bug : tcp_v6_send_response() already gets the net pointer from the socket if provided. Fixes: 168a8f58059a ("tcp: TCP Fast Open Server - main code path") Reported-by: Neal Cardwell Signed-off-by: Eric Dumazet Cc: Jerry Chu Cc: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_ipv4.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d8841a2f1569..46277170e492 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -705,7 +705,8 @@ release_sk1: outside socket context is ugly, certainly. What can I do? */ -static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, +static void tcp_v4_send_ack(struct net *net, + struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, int reply_flags, u8 tos) @@ -720,7 +721,6 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, ]; } rep; struct ip_reply_arg arg; - struct net *net = dev_net(skb_dst(skb)->dev); memset(&rep.th, 0, sizeof(struct tcphdr)); memset(&arg, 0, sizeof(arg)); @@ -782,7 +782,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcp_v4_send_ack(sock_net(sk), skb, + tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, tcptw->tw_ts_recent, @@ -801,8 +802,10 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. */ - tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? - tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, + u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : + tcp_sk(sk)->snd_nxt; + + tcp_v4_send_ack(sock_net(sk), skb, seq, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd, tcp_time_stamp, req->ts_recent, From 39770be4d6ad29c5ab1f21edbbf01db067f13b52 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 24 Jan 2016 13:53:50 -0800 Subject: [PATCH 007/418] af_unix: fix struct pid memory leak [ Upstream commit fa0dc04df259ba2df3ce1920e9690c7842f8fa4b ] Dmitry reported a struct pid leak detected by a syzkaller program. Bug happens in unix_stream_recvmsg() when we break the loop when a signal is pending, without properly releasing scm. Fixes: b3ca9b02b007 ("net: fix multithreaded signal handling in unix recv routines") Reported-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Cc: Rainer Weikusat Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/unix/af_unix.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e3f85bc8b135..775855ee1ff8 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2332,6 +2332,7 @@ again: if (signal_pending(current)) { err = sock_intr_errno(timeo); + scm_destroy(&scm); goto out; } From cccf9f37854c9ffd5f658c977345d8159ba1073a Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 22 Jan 2016 01:39:43 +0100 Subject: [PATCH 008/418] pptp: fix illegal memory access caused by multiple bind()s [ Upstream commit 9a368aff9cb370298fa02feeffa861f2db497c18 ] Several times already this has been reported as kasan reports caused by syzkaller and trinity and people always looked at RCU races, but it is much more simple. :) In case we bind a pptp socket multiple times, we simply add it to the callid_sock list but don't remove the old binding. Thus the old socket stays in the bucket with unused call_id indexes and doesn't get cleaned up. This causes various forms of kasan reports which were hard to pinpoint. Simply don't allow multiple binds and correct error handling in pptp_bind. Also keep sk_state bits in place in pptp_connect. Fixes: 00959ade36acad ("PPTP: PPP over IPv4 (Point-to-Point Tunneling Protocol)") Cc: Dmitry Kozlov Cc: Sasha Levin Cc: Dmitry Vyukov Reported-by: Dmitry Vyukov Cc: Dave Jones Reported-by: Dave Jones Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/pptp.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 597c53e0a2ec..f7e8c79349ad 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -129,24 +129,27 @@ static int lookup_chan_dst(u16 call_id, __be32 d_addr) return i < MAX_CALLID; } -static int add_chan(struct pppox_sock *sock) +static int add_chan(struct pppox_sock *sock, + struct pptp_addr *sa) { static int call_id; spin_lock(&chan_lock); - if (!sock->proto.pptp.src_addr.call_id) { + if (!sa->call_id) { call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, call_id + 1); if (call_id == MAX_CALLID) { call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, 1); if (call_id == MAX_CALLID) goto out_err; } - sock->proto.pptp.src_addr.call_id = call_id; - } else if (test_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap)) + sa->call_id = call_id; + } else if (test_bit(sa->call_id, callid_bitmap)) { goto out_err; + } - set_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap); - rcu_assign_pointer(callid_sock[sock->proto.pptp.src_addr.call_id], sock); + sock->proto.pptp.src_addr = *sa; + set_bit(sa->call_id, callid_bitmap); + rcu_assign_pointer(callid_sock[sa->call_id], sock); spin_unlock(&chan_lock); return 0; @@ -416,7 +419,6 @@ static int pptp_bind(struct socket *sock, struct sockaddr *uservaddr, struct sock *sk = sock->sk; struct sockaddr_pppox *sp = (struct sockaddr_pppox *) uservaddr; struct pppox_sock *po = pppox_sk(sk); - struct pptp_opt *opt = &po->proto.pptp; int error = 0; if (sockaddr_len < sizeof(struct sockaddr_pppox)) @@ -424,10 +426,22 @@ static int pptp_bind(struct socket *sock, struct sockaddr *uservaddr, lock_sock(sk); - opt->src_addr = sp->sa_addr.pptp; - if (add_chan(po)) - error = -EBUSY; + if (sk->sk_state & PPPOX_DEAD) { + error = -EALREADY; + goto out; + } + if (sk->sk_state & PPPOX_BOUND) { + error = -EBUSY; + goto out; + } + + if (add_chan(po, &sp->sa_addr.pptp)) + error = -EBUSY; + else + sk->sk_state |= PPPOX_BOUND; + +out: release_sock(sk); return error; } @@ -498,7 +512,7 @@ static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr, } opt->dst_addr = sp->sa_addr.pptp; - sk->sk_state = PPPOX_CONNECTED; + sk->sk_state |= PPPOX_CONNECTED; end: release_sock(sk); From 293c41f89596fb17bd3963f9f240000e8ab462ac Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 22 Jan 2016 18:29:49 -0200 Subject: [PATCH 009/418] sctp: allow setting SCTP_SACK_IMMEDIATELY by the application [ Upstream commit 27f7ed2b11d42ab6d796e96533c2076ec220affc ] This patch extends commit b93d6471748d ("sctp: implement the sender side for SACK-IMMEDIATELY extension") as it didn't white list SCTP_SACK_IMMEDIATELY on sctp_msghdr_parse(), causing it to be understood as an invalid flag and returning -EINVAL to the application. Note that the actual handling of the flag is already there in sctp_datamsg_from_user(). https://tools.ietf.org/html/rfc7053#section-7 Fixes: b93d6471748d ("sctp: implement the sender side for SACK-IMMEDIATELY extension") Signed-off-by: Marcelo Ricardo Leitner Acked-by: Vlad Yasevich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ef1d90fdc773..06a9cfbc07b2 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6640,6 +6640,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) if (cmsgs->srinfo->sinfo_flags & ~(SCTP_UNORDERED | SCTP_ADDR_OVER | + SCTP_SACK_IMMEDIATELY | SCTP_ABORT | SCTP_EOF)) return -EINVAL; break; @@ -6663,6 +6664,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) if (cmsgs->sinfo->snd_flags & ~(SCTP_UNORDERED | SCTP_ADDR_OVER | + SCTP_SACK_IMMEDIATELY | SCTP_ABORT | SCTP_EOF)) return -EINVAL; break; From 7f76933dbd9bb3a01c060e0a580a4f074c5d0def Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 24 Jan 2016 09:22:05 +0000 Subject: [PATCH 010/418] net: dsa: fix mv88e6xxx switches [ Upstream commit db0e51afa481088e6396f11e02018d64113a6578 ] Since commit 76e398a62712 ("net: dsa: use switchdev obj for VLAN add/del ops"), the Marvell 88E6xxx switch has been unable to pass traffic between ports - any received traffic is discarded by the switch. Taking a port out of bridge mode and configuring a vlan on it also the port to start passing traffic. With the debugfs files re-instated to allow debug of this issue by comparing the register settings between the working and non-working case, the reason becomes clear: GLOBAL GLOBAL2 SERDES 0 1 2 3 4 5 6 - 7: 1111 707f 2001 2 2 2 2 2 0 2 + 7: 1111 707f 2001 1 1 1 1 1 0 1 Register 7 for the ports is the default vlan tag register, and in the non-working setup, it has been set to 2, despite vlan 2 not being configured. This causes the switch to drop all packets coming in to these ports. The working setup has the default vlan tag register set to 1, which is the default vlan when none is configured. Inspection of the code reveals why. The code prior to this commit was: - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { ... - if (!err && vlan->flags & BRIDGE_VLAN_INFO_PVID) - err = ds->drv->port_pvid_set(ds, p->port, vid); but the new code is: + for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { ... + } ... + if (pvid) + err = _mv88e6xxx_port_pvid_set(ds, port, vid); This causes the new code to always set the default vlan to one higher than the old code. Fix this. Fixes: 76e398a62712 ("net: dsa: use switchdev obj for VLAN add/del ops") Cc: Signed-off-by: Russell King Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mv88e6xxx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index b06dba05594a..2dea39b5cb0b 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -1519,7 +1519,7 @@ int mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, /* no PVID with ranges, otherwise it's a bug */ if (pvid) - err = _mv88e6xxx_port_pvid_set(ds, port, vid); + err = _mv88e6xxx_port_pvid_set(ds, port, vlan->vid_end); unlock: mutex_unlock(&ps->smi_mutex); From c57e51ffd1d910d595ccb3af3ae70eeeb6d423a2 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan Date: Wed, 27 Jan 2016 11:35:59 +0100 Subject: [PATCH 011/418] tipc: fix connection abort during subscription cancel [ Upstream commit 4d5cfcba2f6ec494d8810b9e3c0a7b06255c8067 ] In 'commit 7fe8097cef5f ("tipc: fix nullpointer bug when subscribing to events")', we terminate the connection if the subscription creation fails. In the same commit, the subscription creation result was based on the value of the subscription pointer (set in the function) instead of the return code. Unfortunately, the same function tipc_subscrp_create() handles subscription cancel request. For a subscription cancellation request, the subscription pointer cannot be set. Thus if a subscriber has several subscriptions and cancels any of them, the connection is terminated. In this commit, we terminate the connection based on the return value of tipc_subscrp_create(). Fixes: commit 7fe8097cef5f ("tipc: fix nullpointer bug when subscribing to events") Reviewed-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/subscr.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 350cca33ee0a..69ee2eeef968 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -289,15 +289,14 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid, struct sockaddr_tipc *addr, void *usr_data, void *buf, size_t len) { - struct tipc_subscriber *subscriber = usr_data; + struct tipc_subscriber *subscrb = usr_data; struct tipc_subscription *sub = NULL; struct tipc_net *tn = net_generic(net, tipc_net_id); - tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscriber, &sub); - if (sub) - tipc_nametbl_subscribe(sub); - else - tipc_conn_terminate(tn->topsrv, subscriber->conid); + if (tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscrb, &sub)) + return tipc_conn_terminate(tn->topsrv, subscrb->conid); + + tipc_nametbl_subscribe(sub); } /* Handle one request to establish a new subscriber */ From 649dc6c32e516972128502ad6a45a16b96098567 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Fri, 22 Jan 2016 15:49:12 -0800 Subject: [PATCH 012/418] inet: frag: Always orphan skbs inside ip_defrag() [ Upstream commit 8282f27449bf15548cb82c77b6e04ee0ab827bdc ] Later parts of the stack (including fragmentation) expect that there is never a socket attached to frag in a frag_list, however this invariant was not enforced on all defrag paths. This could lead to the BUG_ON(skb->sk) during ip_do_fragment(), as per the call stack at the end of this commit message. While the call could be added to openvswitch to fix this particular error, the head and tail of the frags list are already orphaned indirectly inside ip_defrag(), so it seems like the remaining fragments should all be orphaned in all circumstances. kernel BUG at net/ipv4/ip_output.c:586! [...] Call Trace: [] ? do_output.isra.29+0x1b0/0x1b0 [openvswitch] [] ovs_fragment+0xcc/0x214 [openvswitch] [] ? dst_discard_out+0x20/0x20 [] ? dst_ifdown+0x80/0x80 [] ? find_bucket.isra.2+0x62/0x70 [openvswitch] [] ? mod_timer_pending+0x65/0x210 [] ? __lock_acquire+0x3db/0x1b90 [] ? nf_conntrack_in+0x252/0x500 [nf_conntrack] [] ? __lock_is_held+0x54/0x70 [] do_output.isra.29+0xe3/0x1b0 [openvswitch] [] do_execute_actions+0xe11/0x11f0 [openvswitch] [] ? __lock_is_held+0x54/0x70 [] ovs_execute_actions+0x32/0xd0 [openvswitch] [] ovs_dp_process_packet+0x85/0x140 [openvswitch] [] ? __lock_is_held+0x54/0x70 [] ovs_execute_actions+0xb2/0xd0 [openvswitch] [] ovs_dp_process_packet+0x85/0x140 [openvswitch] [] ? ovs_ct_get_labels+0x49/0x80 [openvswitch] [] ovs_vport_receive+0x5d/0xa0 [openvswitch] [] ? __lock_acquire+0x3db/0x1b90 [] ? __lock_acquire+0x3db/0x1b90 [] ? __lock_acquire+0x3db/0x1b90 [] ? internal_dev_xmit+0x5/0x140 [openvswitch] [] internal_dev_xmit+0x6c/0x140 [openvswitch] [] ? internal_dev_xmit+0x5/0x140 [openvswitch] [] dev_hard_start_xmit+0x2b9/0x5e0 [] ? netif_skb_features+0xd1/0x1f0 [] __dev_queue_xmit+0x800/0x930 [] ? __dev_queue_xmit+0x50/0x930 [] ? mark_held_locks+0x71/0x90 [] ? neigh_resolve_output+0x106/0x220 [] dev_queue_xmit+0x10/0x20 [] neigh_resolve_output+0x178/0x220 [] ? ip_finish_output2+0x1ff/0x590 [] ip_finish_output2+0x1ff/0x590 [] ? ip_finish_output2+0x7e/0x590 [] ip_do_fragment+0x831/0x8a0 [] ? ip_copy_metadata+0x1b0/0x1b0 [] ip_fragment.constprop.49+0x43/0x80 [] ip_finish_output+0x17c/0x340 [] ? nf_hook_slow+0xe4/0x190 [] ip_output+0x70/0x110 [] ? ip_fragment.constprop.49+0x80/0x80 [] ip_local_out+0x39/0x70 [] ip_send_skb+0x19/0x40 [] ip_push_pending_frames+0x33/0x40 [] icmp_push_reply+0xea/0x120 [] icmp_reply.constprop.23+0x1ed/0x230 [] icmp_echo.part.21+0x4e/0x50 [] ? __lock_is_held+0x54/0x70 [] ? rcu_read_lock_held+0x5e/0x70 [] icmp_echo+0x36/0x70 [] icmp_rcv+0x271/0x450 [] ip_local_deliver_finish+0x127/0x3a0 [] ? ip_local_deliver_finish+0x41/0x3a0 [] ip_local_deliver+0x60/0xd0 [] ? ip_rcv_finish+0x560/0x560 [] ip_rcv_finish+0xdd/0x560 [] ip_rcv+0x283/0x3e0 [] ? match_held_lock+0x192/0x200 [] ? inet_del_offload+0x40/0x40 [] __netif_receive_skb_core+0x392/0xae0 [] ? process_backlog+0x8e/0x230 [] ? mark_held_locks+0x71/0x90 [] __netif_receive_skb+0x18/0x60 [] process_backlog+0x78/0x230 [] ? process_backlog+0xdd/0x230 [] net_rx_action+0x155/0x400 [] __do_softirq+0xcc/0x420 [] ? ip_finish_output2+0x217/0x590 [] do_softirq_own_stack+0x1c/0x30 [] do_softirq+0x4e/0x60 [] __local_bh_enable_ip+0xa8/0xb0 [] ip_finish_output2+0x240/0x590 [] ? ip_do_fragment+0x831/0x8a0 [] ip_do_fragment+0x831/0x8a0 [] ? ip_copy_metadata+0x1b0/0x1b0 [] ip_fragment.constprop.49+0x43/0x80 [] ip_finish_output+0x17c/0x340 [] ? nf_hook_slow+0xe4/0x190 [] ip_output+0x70/0x110 [] ? ip_fragment.constprop.49+0x80/0x80 [] ip_local_out+0x39/0x70 [] ip_send_skb+0x19/0x40 [] ip_push_pending_frames+0x33/0x40 [] raw_sendmsg+0x7d3/0xc30 [] ? __lock_acquire+0x3db/0x1b90 [] ? inet_sendmsg+0xc7/0x1d0 [] ? __lock_is_held+0x54/0x70 [] inet_sendmsg+0x10a/0x1d0 [] ? inet_sendmsg+0x5/0x1d0 [] sock_sendmsg+0x38/0x50 [] ___sys_sendmsg+0x25f/0x270 [] ? handle_mm_fault+0x8dd/0x1320 [] ? _raw_spin_unlock+0x27/0x40 [] ? __do_page_fault+0x1e2/0x460 [] ? __fget_light+0x66/0x90 [] __sys_sendmsg+0x42/0x80 [] SyS_sendmsg+0x12/0x20 [] entry_SYSCALL_64_fastpath+0x12/0x6f Code: 00 00 44 89 e0 e9 7c fb ff ff 4c 89 ff e8 e7 e7 ff ff 41 8b 9d 80 00 00 00 2b 5d d4 89 d8 c1 f8 03 0f b7 c0 e9 33 ff ff f 66 66 66 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 55 48 RIP [] ip_do_fragment+0x892/0x8a0 RSP Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action") Signed-off-by: Joe Stringer Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_fragment.c | 1 + net/ipv4/netfilter/nf_defrag_ipv4.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 1fe55ae81781..b8a0607dab96 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -661,6 +661,7 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) struct ipq *qp; IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); + skb_orphan(skb); /* Lookup (or create) queue header */ qp = ip_find(net, ip_hdr(skb), user, vif); diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 6fb869f646bf..a04dee536b8e 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -27,8 +27,6 @@ static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb, { int err; - skb_orphan(skb); - local_bh_disable(); err = ip_defrag(net, skb, user); local_bh_enable(); From ba50e6d96f3daf3db8396cc3529f90339d9ef59a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 27 Jan 2016 15:16:43 +0100 Subject: [PATCH 013/418] switchdev: Require RTNL mutex to be held when sending FDB notifications [ Upstream commit 4f2c6ae5c64c353fb1b0425e4747e5603feadba1 ] When switchdev drivers process FDB notifications from the underlying device they resolve the netdev to which the entry points to and notify the bridge using the switchdev notifier. However, since the RTNL mutex is not held there is nothing preventing the netdev from disappearing in the middle, which will cause br_switchdev_event() to dereference a non-existing netdev. Make switchdev drivers hold the lock at the beginning of the notification processing session and release it once it ends, after notifying the bridge. Also, remove switchdev_mutex and fdb_lock, as they are no longer needed when RTNL mutex is held. Fixes: 03bf0c281234 ("switchdev: introduce switchdev notifier") Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 3 +++ drivers/net/ethernet/rocker/rocker.c | 2 ++ net/bridge/br.c | 3 +-- net/switchdev/switchdev.c | 15 ++++++++------- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 617fb22b5d81..7dbeafa65934 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include "spectrum.h" @@ -812,6 +813,7 @@ static void mlxsw_sp_fdb_notify_work(struct work_struct *work) mlxsw_sp = container_of(work, struct mlxsw_sp, fdb_notify.dw.work); + rtnl_lock(); do { mlxsw_reg_sfn_pack(sfn_pl); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl); @@ -824,6 +826,7 @@ static void mlxsw_sp_fdb_notify_work(struct work_struct *work) mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i); } while (num_rec); + rtnl_unlock(); kfree(sfn_pl); mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp); diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index e9f2349e98bc..52ec3d6e056a 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -3531,12 +3531,14 @@ static void rocker_port_fdb_learn_work(struct work_struct *work) info.addr = lw->addr; info.vid = lw->vid; + rtnl_lock(); if (learned && removing) call_switchdev_notifiers(SWITCHDEV_FDB_DEL, lw->rocker_port->dev, &info.info); else if (learned && !removing) call_switchdev_notifiers(SWITCHDEV_FDB_ADD, lw->rocker_port->dev, &info.info); + rtnl_unlock(); rocker_port_kfree(lw->trans, work); } diff --git a/net/bridge/br.c b/net/bridge/br.c index a1abe4936fe1..3addc05b9a16 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -121,6 +121,7 @@ static struct notifier_block br_device_notifier = { .notifier_call = br_device_event }; +/* called with RTNL */ static int br_switchdev_event(struct notifier_block *unused, unsigned long event, void *ptr) { @@ -130,7 +131,6 @@ static int br_switchdev_event(struct notifier_block *unused, struct switchdev_notifier_fdb_info *fdb_info; int err = NOTIFY_DONE; - rtnl_lock(); p = br_port_get_rtnl(dev); if (!p) goto out; @@ -155,7 +155,6 @@ static int br_switchdev_event(struct notifier_block *unused, } out: - rtnl_unlock(); return err; } diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index f34e535e93bd..d5d7132ac847 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -565,7 +566,6 @@ int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj, } EXPORT_SYMBOL_GPL(switchdev_port_obj_dump); -static DEFINE_MUTEX(switchdev_mutex); static RAW_NOTIFIER_HEAD(switchdev_notif_chain); /** @@ -580,9 +580,9 @@ int register_switchdev_notifier(struct notifier_block *nb) { int err; - mutex_lock(&switchdev_mutex); + rtnl_lock(); err = raw_notifier_chain_register(&switchdev_notif_chain, nb); - mutex_unlock(&switchdev_mutex); + rtnl_unlock(); return err; } EXPORT_SYMBOL_GPL(register_switchdev_notifier); @@ -598,9 +598,9 @@ int unregister_switchdev_notifier(struct notifier_block *nb) { int err; - mutex_lock(&switchdev_mutex); + rtnl_lock(); err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb); - mutex_unlock(&switchdev_mutex); + rtnl_unlock(); return err; } EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); @@ -614,16 +614,17 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); * Call all network notifier blocks. This should be called by driver * when it needs to propagate hardware event. * Return values are same as for atomic_notifier_call_chain(). + * rtnl_lock must be held. */ int call_switchdev_notifiers(unsigned long val, struct net_device *dev, struct switchdev_notifier_info *info) { int err; + ASSERT_RTNL(); + info->dev = dev; - mutex_lock(&switchdev_mutex); err = raw_notifier_call_chain(&switchdev_notif_chain, val, info); - mutex_unlock(&switchdev_mutex); return err; } EXPORT_SYMBOL_GPL(call_switchdev_notifiers); From 87e40d8d8ba965003611a8bdbe18019d344d9d58 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 27 Jan 2016 10:52:43 -0800 Subject: [PATCH 014/418] tcp: beware of alignments in tcp_get_info() [ Upstream commit ff5d749772018602c47509bdc0093ff72acd82ec ] With some combinations of user provided flags in netlink command, it is possible to call tcp_get_info() with a buffer that is not 8-bytes aligned. It does matter on some arches, so we need to use put_unaligned() to store the u64 fields. Current iproute2 package does not trigger this particular issue. Fixes: 0df48c26d841 ("tcp: add tcpi_bytes_acked to tcp_info") Fixes: 977cb0ecf82e ("tcp: add pacing_rate information into tcp_info") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c82cca18c90f..9c1241e3230d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -279,6 +279,7 @@ #include #include +#include #include int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; @@ -2637,6 +2638,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; unsigned int start; + u64 rate64; u32 rate; memset(info, 0, sizeof(*info)); @@ -2702,15 +2704,17 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_total_retrans = tp->total_retrans; rate = READ_ONCE(sk->sk_pacing_rate); - info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL; + rate64 = rate != ~0U ? rate : ~0ULL; + put_unaligned(rate64, &info->tcpi_pacing_rate); rate = READ_ONCE(sk->sk_max_pacing_rate); - info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL; + rate64 = rate != ~0U ? rate : ~0ULL; + put_unaligned(rate64, &info->tcpi_max_pacing_rate); do { start = u64_stats_fetch_begin_irq(&tp->syncp); - info->tcpi_bytes_acked = tp->bytes_acked; - info->tcpi_bytes_received = tp->bytes_received; + put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked); + put_unaligned(tp->bytes_received, &info->tcpi_bytes_received); } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); info->tcpi_segs_out = tp->segs_out; info->tcpi_segs_in = tp->segs_in; From e8e729ccd2fee9f0445c4949a1aa7dd610bfa8bd Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 29 Jan 2016 12:30:19 +0100 Subject: [PATCH 015/418] ipv6: enforce flowi6_oif usage in ip6_dst_lookup_tail() [ Upstream commit 6f21c96a78b835259546d8f3fb4edff0f651d478 ] The current implementation of ip6_dst_lookup_tail basically ignore the egress ifindex match: if the saddr is set, ip6_route_output() purposefully ignores flowi6_oif, due to the commit d46a9d678e4c ("net: ipv6: Dont add RT6_LOOKUP_F_IFACE flag if saddr set"), if the saddr is 'any' the first route lookup in ip6_dst_lookup_tail fails, but upon failure a second lookup will be performed with saddr set, thus ignoring the ifindex constraint. This commit adds an output route lookup function variant, which allows the caller to specify lookup flags, and modify ip6_dst_lookup_tail() to enforce the ifindex match on the second lookup via said helper. ip6_route_output() becames now a static inline function build on top of ip6_route_output_flags(); as a side effect, out-of-tree modules need now a GPL license to access the output route lookup functionality. Signed-off-by: Paolo Abeni Acked-by: Hannes Frederic Sowa Acked-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ip6_route.h | 12 ++++++++++-- net/ipv6/ip6_output.c | 6 +++++- net/ipv6/route.c | 7 +++---- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 877f682989b8..295d291269e2 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -64,8 +64,16 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) void ip6_route_input(struct sk_buff *skb); -struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, - struct flowi6 *fl6); +struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, + struct flowi6 *fl6, int flags); + +static inline struct dst_entry *ip6_route_output(struct net *net, + const struct sock *sk, + struct flowi6 *fl6) +{ + return ip6_route_output_flags(net, sk, fl6, 0); +} + struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, int flags); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6473889f1736..31144c486c52 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -909,6 +909,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, struct rt6_info *rt; #endif int err; + int flags = 0; /* The correct way to handle this would be to do * ip6_route_get_saddr, and then ip6_route_output; however, @@ -940,10 +941,13 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, dst_release(*dst); *dst = NULL; } + + if (fl6->flowi6_oif) + flags |= RT6_LOOKUP_F_IFACE; } if (!*dst) - *dst = ip6_route_output(net, sk, fl6); + *dst = ip6_route_output_flags(net, sk, fl6, flags); err = (*dst)->error; if (err) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 826e6aa44f8d..3f164d3aaee2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1174,11 +1174,10 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); } -struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, - struct flowi6 *fl6) +struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, + struct flowi6 *fl6, int flags) { struct dst_entry *dst; - int flags = 0; bool any_src; dst = l3mdev_rt6_dst_by_oif(net, fl6); @@ -1199,7 +1198,7 @@ struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); } -EXPORT_SYMBOL(ip6_route_output); +EXPORT_SYMBOL_GPL(ip6_route_output_flags); struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) { From e1c4e14bf12eb0a7179eedde41e3a94877a08802 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 29 Jan 2016 12:30:20 +0100 Subject: [PATCH 016/418] ipv6/udp: use sticky pktinfo egress ifindex on connect() [ Upstream commit 1cdda91871470f15e79375991bd2eddc6e86ddb1 ] Currently, the egress interface index specified via IPV6_PKTINFO is ignored by __ip6_datagram_connect(), so that RFC 3542 section 6.7 can be subverted when the user space application calls connect() before sendmsg(). Fix it by initializing properly flowi6_oif in connect() before performing the route lookup. Signed-off-by: Paolo Abeni Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/datagram.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 517c55b01ba8..428162155280 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -162,6 +162,9 @@ ipv4_connected: fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; + if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST)) fl6.flowi6_oif = np->mcast_oif; From cdbc66828d250b463c0ca3a1e715965b57f2c1a3 Mon Sep 17 00:00:00 2001 From: "subashab@codeaurora.org" Date: Tue, 2 Feb 2016 02:11:10 +0000 Subject: [PATCH 017/418] ipv6: addrconf: Fix recursive spin lock call [ Upstream commit 16186a82de1fdd868255448274e64ae2616e2640 ] A rcu stall with the following backtrace was seen on a system with forwarding, optimistic_dad and use_optimistic set. To reproduce, set these flags and allow ipv6 autoconf. This occurs because the device write_lock is acquired while already holding the read_lock. Back trace below - INFO: rcu_preempt self-detected stall on CPU { 1} (t=2100 jiffies g=3992 c=3991 q=4471) <6> Task dump for CPU 1: <2> kworker/1:0 R running task 12168 15 2 0x00000002 <2> Workqueue: ipv6_addrconf addrconf_dad_work <6> Call trace: <2> [] el1_irq+0x68/0xdc <2> [] _raw_write_lock_bh+0x20/0x30 <2> [] __ipv6_dev_ac_inc+0x64/0x1b4 <2> [] addrconf_join_anycast+0x9c/0xc4 <2> [] __ipv6_ifa_notify+0x160/0x29c <2> [] ipv6_ifa_notify+0x50/0x70 <2> [] addrconf_dad_work+0x314/0x334 <2> [] process_one_work+0x244/0x3fc <2> [] worker_thread+0x2f8/0x418 <2> [] kthread+0xe0/0xec v2: do addrconf_dad_kick inside read lock and then acquire write lock for ipv6_ifa_notify as suggested by Eric Fixes: 7fd2561e4ebdd ("net: ipv6: Add a sysctl to make optimistic addresses useful candidates") Cc: Eric Dumazet Cc: Erik Kline Cc: Hannes Frederic Sowa Signed-off-by: Subash Abhinov Kasiviswanathan Acked-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1f21087accab..828ab28854c8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3506,6 +3506,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; struct net_device *dev = idev->dev; + bool notify = false; addrconf_join_solict(dev, &ifp->addr); @@ -3551,7 +3552,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) /* Because optimistic nodes can use this address, * notify listeners. If DAD fails, RTM_DELADDR is sent. */ - ipv6_ifa_notify(RTM_NEWADDR, ifp); + notify = true; } } @@ -3559,6 +3560,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) out: spin_unlock(&ifp->lock); read_unlock_bh(&idev->lock); + if (notify) + ipv6_ifa_notify(RTM_NEWADDR, ifp); } static void addrconf_dad_start(struct inet6_ifaddr *ifp) From 4c890233fdee1312190b0db8b664baf2ff9a4ad8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Feb 2016 17:55:01 -0800 Subject: [PATCH 018/418] ipv6: fix a lockdep splat [ Upstream commit 44c3d0c1c0a880354e9de5d94175742e2c7c9683 ] Silence lockdep false positive about rcu_dereference() being used in the wrong context. First one should use rcu_dereference_protected() as we own the spinlock. Second one should be a normal assignation, as no barrier is needed. Fixes: 18367681a10bd ("ipv6 flowlabel: Convert np->ipv6_fl_list to RCU.") Reported-by: Dave Jones Signed-off-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_flowlabel.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 1f9ebe3cbb4a..dc2db4f7b182 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -540,12 +540,13 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) } spin_lock_bh(&ip6_sk_fl_lock); for (sflp = &np->ipv6_fl_list; - (sfl = rcu_dereference(*sflp)) != NULL; + (sfl = rcu_dereference_protected(*sflp, + lockdep_is_held(&ip6_sk_fl_lock))) != NULL; sflp = &sfl->next) { if (sfl->fl->label == freq.flr_label) { if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK)) np->flow_label &= ~IPV6_FLOWLABEL_MASK; - *sflp = rcu_dereference(sfl->next); + *sflp = sfl->next; spin_unlock_bh(&ip6_sk_fl_lock); fl_release(sfl->fl); kfree_rcu(sfl, rcu); From 3ba9b9f2409168fb50d0a0758b922508e7885f48 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 3 Feb 2016 02:11:03 +0100 Subject: [PATCH 019/418] unix: correctly track in-flight fds in sending process user_struct [ Upstream commit 415e3d3e90ce9e18727e8843ae343eda5a58fad6 ] The commit referenced in the Fixes tag incorrectly accounted the number of in-flight fds over a unix domain socket to the original opener of the file-descriptor. This allows another process to arbitrary deplete the original file-openers resource limit for the maximum of open files. Instead the sending processes and its struct cred should be credited. To do so, we add a reference counted struct user_struct pointer to the scm_fp_list and use it to account for the number of inflight unix fds. Fixes: 712f4aad406bb1 ("unix: properly account for FDs passed over unix sockets") Reported-by: David Herrmann Cc: David Herrmann Cc: Willy Tarreau Cc: Linus Torvalds Suggested-by: Linus Torvalds Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/af_unix.h | 4 ++-- include/net/scm.h | 1 + net/core/scm.c | 7 +++++++ net/unix/af_unix.c | 4 ++-- net/unix/garbage.c | 8 ++++---- 5 files changed, 16 insertions(+), 8 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 2a91a0561a47..9b4c418bebd8 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -6,8 +6,8 @@ #include #include -void unix_inflight(struct file *fp); -void unix_notinflight(struct file *fp); +void unix_inflight(struct user_struct *user, struct file *fp); +void unix_notinflight(struct user_struct *user, struct file *fp); void unix_gc(void); void wait_for_unix_gc(void); struct sock *unix_get_socket(struct file *filp); diff --git a/include/net/scm.h b/include/net/scm.h index 262532d111f5..59fa93c01d2a 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -21,6 +21,7 @@ struct scm_creds { struct scm_fp_list { short count; short max; + struct user_struct *user; struct file *fp[SCM_MAX_FD]; }; diff --git a/net/core/scm.c b/net/core/scm.c index 8a1741b14302..dce0acb929f1 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -87,6 +87,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) *fplp = fpl; fpl->count = 0; fpl->max = SCM_MAX_FD; + fpl->user = NULL; } fpp = &fpl->fp[fpl->count]; @@ -107,6 +108,10 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) *fpp++ = file; fpl->count++; } + + if (!fpl->user) + fpl->user = get_uid(current_user()); + return num; } @@ -119,6 +124,7 @@ void __scm_destroy(struct scm_cookie *scm) scm->fp = NULL; for (i=fpl->count-1; i>=0; i--) fput(fpl->fp[i]); + free_uid(fpl->user); kfree(fpl); } } @@ -336,6 +342,7 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) for (i = 0; i < fpl->count; i++) get_file(fpl->fp[i]); new_fpl->max = new_fpl->count; + new_fpl->user = get_uid(fpl->user); } return new_fpl; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 775855ee1ff8..6de41c33a9db 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1496,7 +1496,7 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) UNIXCB(skb).fp = NULL; for (i = scm->fp->count-1; i >= 0; i--) - unix_notinflight(scm->fp->fp[i]); + unix_notinflight(scm->fp->user, scm->fp->fp[i]); } static void unix_destruct_scm(struct sk_buff *skb) @@ -1561,7 +1561,7 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) return -ENOMEM; for (i = scm->fp->count - 1; i >= 0; i--) - unix_inflight(scm->fp->fp[i]); + unix_inflight(scm->fp->user, scm->fp->fp[i]); return max_level; } diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 8fcdc2283af5..6a0d48525fcf 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -116,7 +116,7 @@ struct sock *unix_get_socket(struct file *filp) * descriptor if it is for an AF_UNIX socket. */ -void unix_inflight(struct file *fp) +void unix_inflight(struct user_struct *user, struct file *fp) { struct sock *s = unix_get_socket(fp); @@ -133,11 +133,11 @@ void unix_inflight(struct file *fp) } unix_tot_inflight++; } - fp->f_cred->user->unix_inflight++; + user->unix_inflight++; spin_unlock(&unix_gc_lock); } -void unix_notinflight(struct file *fp) +void unix_notinflight(struct user_struct *user, struct file *fp) { struct sock *s = unix_get_socket(fp); @@ -152,7 +152,7 @@ void unix_notinflight(struct file *fp) list_del_init(&u->link); unix_tot_inflight--; } - fp->f_cred->user->unix_inflight--; + user->unix_inflight--; spin_unlock(&unix_gc_lock); } From 2679161c7712b175053d49536409c4450398067f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Feb 2016 19:31:12 -0800 Subject: [PATCH 020/418] tcp: do not drop syn_recv on all icmp reports [ Upstream commit 9cf7490360bf2c46a16b7525f899e4970c5fc144 ] Petr Novopashenniy reported that ICMP redirects on SYN_RECV sockets were leading to RST. This is of course incorrect. A specific list of ICMP messages should be able to drop a SYN_RECV. For instance, a REDIRECT on SYN_RECV shall be ignored, as we do not hold a dst per SYN_RECV pseudo request. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=111751 Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table") Reported-by: Petr Novopashenniy Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/tcp.h | 2 +- net/ipv4/tcp_ipv4.c | 11 ++++++++--- net/ipv6/tcp_ipv6.c | 5 +++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index f80e74c5ad18..414d822bc1db 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -449,7 +449,7 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb); void tcp_v4_mtu_reduced(struct sock *sk); -void tcp_req_err(struct sock *sk, u32 seq); +void tcp_req_err(struct sock *sk, u32 seq, bool abort); int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); struct sock *tcp_create_openreq_child(const struct sock *sk, struct request_sock *req, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 46277170e492..f66696c51c8c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -312,7 +312,7 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk) /* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */ -void tcp_req_err(struct sock *sk, u32 seq) +void tcp_req_err(struct sock *sk, u32 seq, bool abort) { struct request_sock *req = inet_reqsk(sk); struct net *net = sock_net(sk); @@ -324,7 +324,7 @@ void tcp_req_err(struct sock *sk, u32 seq) if (seq != tcp_rsk(req)->snt_isn) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); - } else { + } else if (abort) { /* * Still in SYN_RECV, just remove it silently. * There is no good way to pass the error to the newly @@ -384,7 +384,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) } seq = ntohl(th->seq); if (sk->sk_state == TCP_NEW_SYN_RECV) - return tcp_req_err(sk, seq); + return tcp_req_err(sk, seq, + type == ICMP_PARAMETERPROB || + type == ICMP_TIME_EXCEEDED || + (type == ICMP_DEST_UNREACH && + (code == ICMP_NET_UNREACH || + code == ICMP_HOST_UNREACH))); bh_lock_sock(sk); /* If too many ICMPs get dropped on busy diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index bd100b47c717..aea071eca370 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -328,6 +328,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct tcp_sock *tp; __u32 seq, snd_una; struct sock *sk; + bool fatal; int err; sk = __inet6_lookup_established(net, &tcp_hashinfo, @@ -346,8 +347,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; } seq = ntohl(th->seq); + fatal = icmpv6_err_convert(type, code, &err); if (sk->sk_state == TCP_NEW_SYN_RECV) - return tcp_req_err(sk, seq); + return tcp_req_err(sk, seq, fatal); bh_lock_sock(sk); if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) @@ -401,7 +403,6 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - icmpv6_err_convert(type, code, &err); /* Might be for an request_sock */ switch (sk->sk_state) { From 1bec5f40664ccef7efa54102c5e29ac52c7db634 Mon Sep 17 00:00:00 2001 From: Hans Westgaard Ry Date: Wed, 3 Feb 2016 09:26:57 +0100 Subject: [PATCH 021/418] net:Add sysctl_max_skb_frags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5f74f82ea34c0da80ea0b49192bb5ea06e063593 ] Devices may have limits on the number of fragments in an skb they support. Current codebase uses a constant as maximum for number of fragments one skb can hold and use. When enabling scatter/gather and running traffic with many small messages the codebase uses the maximum number of fragments and may thereby violate the max for certain devices. The patch introduces a global variable as max number of fragments. Signed-off-by: Hans Westgaard Ry Reviewed-by: Håkon Bugge Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 1 + net/core/skbuff.c | 2 ++ net/core/sysctl_net_core.c | 10 ++++++++++ net/ipv4/tcp.c | 4 ++-- 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9147f9f34cbe..75f136a22a5e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -219,6 +219,7 @@ struct sk_buff; #else #define MAX_SKB_FRAGS (65536/PAGE_SIZE + 1) #endif +extern int sysctl_max_skb_frags; typedef struct skb_frag_struct skb_frag_t; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index b2df375ec9c2..5bf88f58bee7 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -79,6 +79,8 @@ struct kmem_cache *skbuff_head_cache __read_mostly; static struct kmem_cache *skbuff_fclone_cache __read_mostly; +int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; +EXPORT_SYMBOL(sysctl_max_skb_frags); /** * skb_panic - private function for out-of-line support diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 95b6139d710c..a6beb7b6ae55 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -26,6 +26,7 @@ static int zero = 0; static int one = 1; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; +static int max_skb_frags = MAX_SKB_FRAGS; static int net_msg_warn; /* Unused, but still a sysctl */ @@ -392,6 +393,15 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "max_skb_frags", + .data = &sysctl_max_skb_frags, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &max_skb_frags, + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 9c1241e3230d..036a76ba2ac2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -939,7 +939,7 @@ new_segment: i = skb_shinfo(skb)->nr_frags; can_coalesce = skb_can_coalesce(skb, i, page, offset); - if (!can_coalesce && i >= MAX_SKB_FRAGS) { + if (!can_coalesce && i >= sysctl_max_skb_frags) { tcp_mark_push(tp, skb); goto new_segment; } @@ -1212,7 +1212,7 @@ new_segment: if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { - if (i == MAX_SKB_FRAGS || !sg) { + if (i == sysctl_max_skb_frags || !sg) { tcp_mark_push(tp, skb); goto new_segment; } From 98673eb04022285598ae9b62aa314d5ea65e2c27 Mon Sep 17 00:00:00 2001 From: Siva Reddy Kallam Date: Wed, 3 Feb 2016 14:09:38 +0530 Subject: [PATCH 022/418] tg3: Fix for tg3 transmit queue 0 timed out when too many gso_segs [ Upstream commit b7d987295c74500b733a0ba07f9a9bcc4074fa83 ] tg3_tso_bug() can hit a condition where the entire tx ring is not big enough to segment the GSO packet. For example, if MSS is very small, gso_segs can exceed the tx ring size. When we hit the condition, it will cause tx timeout. tg3_tso_bug() is called to handle TSO and DMA hardware bugs. For TSO bugs, if tg3_tso_bug() cannot succeed, we have to drop the packet. For DMA bugs, we can still fall back to linearize the SKB and let the hardware transmit the TSO packet. This patch adds a function tg3_tso_bug_gso_check() to check if there are enough tx descriptors for GSO before calling tg3_tso_bug(). The caller will then handle the error appropriately - drop or lineraize the SKB. v2: Corrected patch description to avoid confusion. Signed-off-by: Siva Reddy Kallam Signed-off-by: Michael Chan Acked-by: Prashant Sreedharan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/tg3.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 79789d8e52da..ca5ac5d6f4e6 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -7833,6 +7833,14 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi, return ret; } +static bool tg3_tso_bug_gso_check(struct tg3_napi *tnapi, struct sk_buff *skb) +{ + /* Check if we will never have enough descriptors, + * as gso_segs can be more than current ring size + */ + return skb_shinfo(skb)->gso_segs < tnapi->tx_pending / 3; +} + static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *); /* Use GSO to workaround all TSO packets that meet HW bug conditions @@ -7936,14 +7944,19 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) * vlan encapsulated. */ if (skb->protocol == htons(ETH_P_8021Q) || - skb->protocol == htons(ETH_P_8021AD)) - return tg3_tso_bug(tp, tnapi, txq, skb); + skb->protocol == htons(ETH_P_8021AD)) { + if (tg3_tso_bug_gso_check(tnapi, skb)) + return tg3_tso_bug(tp, tnapi, txq, skb); + goto drop; + } if (!skb_is_gso_v6(skb)) { if (unlikely((ETH_HLEN + hdr_len) > 80) && - tg3_flag(tp, TSO_BUG)) - return tg3_tso_bug(tp, tnapi, txq, skb); - + tg3_flag(tp, TSO_BUG)) { + if (tg3_tso_bug_gso_check(tnapi, skb)) + return tg3_tso_bug(tp, tnapi, txq, skb); + goto drop; + } ip_csum = iph->check; ip_tot_len = iph->tot_len; iph->check = 0; @@ -8075,7 +8088,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) if (would_hit_hwbug) { tg3_tx_skb_unmap(tnapi, tnapi->tx_prod, i); - if (mss) { + if (mss && tg3_tso_bug_gso_check(tnapi, skb)) { /* If it's a TSO packet, do GSO instead of * allocating and copying to a large linear SKB */ From ff9140071bfb29199504563697056f51f9778f9f Mon Sep 17 00:00:00 2001 From: Sandeep Pillai Date: Wed, 3 Feb 2016 14:40:44 +0530 Subject: [PATCH 023/418] enic: increment devcmd2 result ring in case of timeout [ Upstream commit ca7f41a4957b872577807169bd7464b36aae9b9c ] Firmware posts the devcmd result in result ring. In case of timeout, driver does not increment the current result pointer and firmware could post the result after timeout has occurred. During next devcmd, driver would be reading the result of previous devcmd. Fix this by incrementing result even in case of timeout. Fixes: 373fb0873d43 ("enic: add devcmd2") Signed-off-by: Sandeep Pillai Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cisco/enic/enic.h | 2 +- drivers/net/ethernet/cisco/enic/vnic_dev.c | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h index 1671fa3332c2..7ba6d530b0c0 100644 --- a/drivers/net/ethernet/cisco/enic/enic.h +++ b/drivers/net/ethernet/cisco/enic/enic.h @@ -33,7 +33,7 @@ #define DRV_NAME "enic" #define DRV_DESCRIPTION "Cisco VIC Ethernet NIC Driver" -#define DRV_VERSION "2.3.0.12" +#define DRV_VERSION "2.3.0.20" #define DRV_COPYRIGHT "Copyright 2008-2013 Cisco Systems, Inc" #define ENIC_BARS_MAX 6 diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.c b/drivers/net/ethernet/cisco/enic/vnic_dev.c index 1ffd1050860b..1fdf5fe12a95 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_dev.c +++ b/drivers/net/ethernet/cisco/enic/vnic_dev.c @@ -298,7 +298,8 @@ static int _vnic_dev_cmd2(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, int wait) { struct devcmd2_controller *dc2c = vdev->devcmd2; - struct devcmd2_result *result = dc2c->result + dc2c->next_result; + struct devcmd2_result *result; + u8 color; unsigned int i; int delay, err; u32 fetch_index, new_posted; @@ -336,13 +337,17 @@ static int _vnic_dev_cmd2(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, if (dc2c->cmd_ring[posted].flags & DEVCMD2_FNORESULT) return 0; + result = dc2c->result + dc2c->next_result; + color = dc2c->color; + + dc2c->next_result++; + if (dc2c->next_result == dc2c->result_size) { + dc2c->next_result = 0; + dc2c->color = dc2c->color ? 0 : 1; + } + for (delay = 0; delay < wait; delay++) { - if (result->color == dc2c->color) { - dc2c->next_result++; - if (dc2c->next_result == dc2c->result_size) { - dc2c->next_result = 0; - dc2c->color = dc2c->color ? 0 : 1; - } + if (result->color == color) { if (result->error) { err = result->error; if (err != ERR_ECMDUNKNOWN || From 2038fb6f957cb3367117ba8c91f2372dd28753aa Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 3 Feb 2016 23:33:30 +0800 Subject: [PATCH 024/418] sctp: translate network order to host order when users get a hmacid [ Upstream commit 7a84bd46647ff181eb2659fdc99590e6f16e501d ] Commit ed5a377d87dc ("sctp: translate host order to network order when setting a hmacid") corrected the hmacid byte-order when setting a hmacid. but the same issue also exists on getting a hmacid. We fix it by changing hmacids to host order when users get them with getsockopt. Fixes: Commit ed5a377d87dc ("sctp: translate host order to network order when setting a hmacid") Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 06a9cfbc07b2..be1489fc3234 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5542,6 +5542,7 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, struct sctp_hmac_algo_param *hmacs; __u16 data_len = 0; u32 num_idents; + int i; if (!ep->auth_enable) return -EACCES; @@ -5559,8 +5560,12 @@ static int sctp_getsockopt_hmac_ident(struct sock *sk, int len, return -EFAULT; if (put_user(num_idents, &p->shmac_num_idents)) return -EFAULT; - if (copy_to_user(p->shmac_idents, hmacs->hmac_ids, data_len)) - return -EFAULT; + for (i = 0; i < num_idents; i++) { + __u16 hmacid = ntohs(hmacs->hmac_ids[i]); + + if (copy_to_user(&p->shmac_idents[i], &hmacid, sizeof(__u16))) + return -EFAULT; + } return 0; } From e3865b8bb55897cef3ebb23da090a2b756ddf9d6 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 9 Feb 2016 06:14:43 -0800 Subject: [PATCH 025/418] net: Copy inner L3 and L4 headers as unaligned on GRE TEB [ Upstream commit 78565208d73ca9b654fb9a6b142214d52eeedfd1 ] This patch corrects the unaligned accesses seen on GRE TEB tunnels when generating hash keys. Specifically what this patch does is make it so that we force the use of skb_copy_bits when the GRE inner headers will be unaligned due to NET_IP_ALIGNED being a non-zero value. Signed-off-by: Alexander Duyck Acked-by: Tom Herbert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/flow_dissector.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index d79699c9d1b9..b258f457dac8 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -396,6 +396,13 @@ ip_proto_again: goto out_bad; proto = eth->h_proto; nhoff += sizeof(*eth); + + /* Cap headers that we access via pointers at the + * end of the Ethernet header as our maximum alignment + * at that point is only 2 bytes. + */ + if (NET_IP_ALIGN) + hlen = nhoff; } key_control->flags |= FLOW_DIS_ENCAPSULATION; From b083b36ce3c91abe5ef7b51393d8af66c19fb594 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 9 Feb 2016 02:49:54 -0800 Subject: [PATCH 026/418] flow_dissector: Fix unaligned access in __skb_flow_dissector when used by eth_get_headlen [ Upstream commit 461547f3158978c180d74484d58e82be9b8e7357 ] This patch fixes an issue with unaligned accesses when using eth_get_headlen on a page that was DMA aligned instead of being IP aligned. The fact is when trying to check the length we don't need to be looking at the flow label so we can reorder the checks to first check if we are supposed to gather the flow label and then make the call to actually get it. v2: Updated path so that either STOP_AT_FLOW_LABEL or KEY_FLOW_LABEL can cause us to check for the flow label. Reported-by: Sowmini Varadhan Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/flow_dissector.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index b258f457dac8..12e700332010 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -208,7 +208,6 @@ ip: case htons(ETH_P_IPV6): { const struct ipv6hdr *iph; struct ipv6hdr _iph; - __be32 flow_label; ipv6: iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); @@ -230,8 +229,12 @@ ipv6: key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; } - flow_label = ip6_flowlabel(iph); - if (flow_label) { + if ((dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_FLOW_LABEL) || + (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)) && + ip6_flowlabel(iph)) { + __be32 flow_label = ip6_flowlabel(iph); + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_FLOW_LABEL)) { key_tags = skb_flow_dissector_target(flow_dissector, From a34f2f9f2034f7984f9529002c6fffe9cb63189d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 10 Feb 2016 16:47:11 +0100 Subject: [PATCH 027/418] bpf: fix branch offset adjustment on backjumps after patching ctx expansion [ Upstream commit a1b14d27ed0965838350f1377ff97c93ee383492 ] When ctx access is used, the kernel often needs to expand/rewrite instructions, so after that patching, branch offsets have to be adjusted for both forward and backward jumps in the new eBPF program, but for backward jumps it fails to account the delta. Meaning, for example, if the expansion happens exactly on the insn that sits at the jump target, it doesn't fix up the back jump offset. Analysis on what the check in adjust_branches() is currently doing: /* adjust offset of jmps if necessary */ if (i < pos && i + insn->off + 1 > pos) insn->off += delta; else if (i > pos && i + insn->off + 1 < pos) insn->off -= delta; First condition (forward jumps): Before: After: insns[0] insns[0] insns[1] <--- i/insn insns[1] <--- i/insn insns[2] <--- pos insns[P] <--- pos insns[3] insns[P] `------| delta insns[4] <--- target_X insns[P] `-----| insns[5] insns[3] insns[4] <--- target_X insns[5] First case is if we cross pos-boundary and the jump instruction was before pos. This is handeled correctly. I.e. if i == pos, then this would mean our jump that we currently check was the patchlet itself that we just injected. Since such patchlets are self-contained and have no awareness of any insns before or after the patched one, the delta is correctly not adjusted. Also, for the second condition in case of i + insn->off + 1 == pos, means we jump to that newly patched instruction, so no offset adjustment are needed. That part is correct. Second condition (backward jumps): Before: After: insns[0] insns[0] insns[1] <--- target_X insns[1] <--- target_X insns[2] <--- pos <-- target_Y insns[P] <--- pos <-- target_Y insns[3] insns[P] `------| delta insns[4] <--- i/insn insns[P] `-----| insns[5] insns[3] insns[4] <--- i/insn insns[5] Second interesting case is where we cross pos-boundary and the jump instruction was after pos. Backward jump with i == pos would be impossible and pose a bug somewhere in the patchlet, so the first condition checking i > pos is okay only by itself. However, i + insn->off + 1 < pos does not always work as intended to trigger the adjustment. It works when jump targets would be far off where the delta wouldn't matter. But, for example, where the fixed insn->off before pointed to pos (target_Y), it now points to pos + delta, so that additional room needs to be taken into account for the check. This means that i) both tests here need to be adjusted into pos + delta, and ii) for the second condition, the test needs to be <= as pos itself can be a target in the backjump, too. Fixes: 9bac3d6d548e ("bpf: allow extended BPF programs access skb fields") Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d1d3e8f57de9..2e7f7ab739e4 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2082,7 +2082,7 @@ static void adjust_branches(struct bpf_prog *prog, int pos, int delta) /* adjust offset of jmps if necessary */ if (i < pos && i + insn->off + 1 > pos) insn->off += delta; - else if (i > pos && i + insn->off + 1 < pos) + else if (i > pos + delta && i + insn->off + 1 <= pos + delta) insn->off -= delta; } } From 07cc96fb15fecbd7f17c1d49eafcf127d3f5709f Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Tue, 2 Feb 2016 13:35:56 -0800 Subject: [PATCH 028/418] bonding: Fix ARP monitor validation [ Upstream commit 21a75f0915dde8674708b39abfcda113911c49b1 ] The current logic in bond_arp_rcv will accept an incoming ARP for validation if (a) the receiving slave is either "active" (which includes the currently active slave, or the current ARP slave) or, (b) there is a currently active slave, and it has received an ARP since it became active. For case (b), the receiving slave isn't the currently active slave, and is receiving the original broadcast ARP request, not an ARP reply from the target. This logic can fail if there is no currently active slave. In this situation, the ARP probe logic cycles through all slaves, assigning each in turn as the "current_arp_slave" for one arp_interval, then setting that one as "active," and sending an ARP probe from that slave. The current logic expects the ARP reply to arrive on the sending current_arp_slave, however, due to switch FDB updating delays, the reply may be directed to another slave. This can arise if the bonding slaves and switch are working, but the ARP target is not responding. When the ARP target recovers, a condition may result wherein the ARP target host replies faster than the switch can update its forwarding table, causing each ARP reply to be sent to the previous current_arp_slave. This will never pass the logic in bond_arp_rcv, as neither of the above conditions (a) or (b) are met. Some experimentation on a LAN shows ARP reply round trips in the 200 usec range, but my available switches never update their FDB in less than 4000 usec. This patch changes the logic in bond_arp_rcv to additionally accept an ARP reply for validation on any slave if there is a current ARP slave and it sent an ARP probe during the previous arp_interval. Fixes: aeea64ac717a ("bonding: don't trust arp requests unless active slave really works") Cc: Veaceslav Falico Cc: Andy Gospodarek Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_main.c | 39 +++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index f1692e418fe4..28bbca0af238 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -214,6 +214,8 @@ static void bond_uninit(struct net_device *bond_dev); static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev, struct rtnl_link_stats64 *stats); static void bond_slave_arr_handler(struct work_struct *work); +static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act, + int mod); /*---------------------------- General routines -----------------------------*/ @@ -2418,7 +2420,7 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave) { struct arphdr *arp = (struct arphdr *)skb->data; - struct slave *curr_active_slave; + struct slave *curr_active_slave, *curr_arp_slave; unsigned char *arp_ptr; __be32 sip, tip; int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP); @@ -2465,26 +2467,41 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, &sip, &tip); curr_active_slave = rcu_dereference(bond->curr_active_slave); + curr_arp_slave = rcu_dereference(bond->current_arp_slave); - /* Backup slaves won't see the ARP reply, but do come through - * here for each ARP probe (so we swap the sip/tip to validate - * the probe). In a "redundant switch, common router" type of - * configuration, the ARP probe will (hopefully) travel from - * the active, through one switch, the router, then the other - * switch before reaching the backup. + /* We 'trust' the received ARP enough to validate it if: * - * We 'trust' the arp requests if there is an active slave and - * it received valid arp reply(s) after it became active. This - * is done to avoid endless looping when we can't reach the + * (a) the slave receiving the ARP is active (which includes the + * current ARP slave, if any), or + * + * (b) the receiving slave isn't active, but there is a currently + * active slave and it received valid arp reply(s) after it became + * the currently active slave, or + * + * (c) there is an ARP slave that sent an ARP during the prior ARP + * interval, and we receive an ARP reply on any slave. We accept + * these because switch FDB update delays may deliver the ARP + * reply to a slave other than the sender of the ARP request. + * + * Note: for (b), backup slaves are receiving the broadcast ARP + * request, not a reply. This request passes from the sending + * slave through the L2 switch(es) to the receiving slave. Since + * this is checking the request, sip/tip are swapped for + * validation. + * + * This is done to avoid endless looping when we can't reach the * arp_ip_target and fool ourselves with our own arp requests. */ - if (bond_is_active_slave(slave)) bond_validate_arp(bond, slave, sip, tip); else if (curr_active_slave && time_after(slave_last_rx(bond, curr_active_slave), curr_active_slave->last_link_up)) bond_validate_arp(bond, slave, tip, sip); + else if (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) && + bond_time_in_interval(bond, + dev_trans_start(curr_arp_slave->dev), 1)) + bond_validate_arp(bond, slave, sip, tip); out_unlock: if (arp != (struct arphdr *)skb->data) From 6b567a1abd72a4deb95126f164afcf62a3e091a1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 4 Feb 2016 06:23:28 -0800 Subject: [PATCH 029/418] ipv4: fix memory leaks in ip_cmsg_send() callers [ Upstream commit 919483096bfe75dda338e98d56da91a263746a0a ] Dmitry reported memory leaks of IP options allocated in ip_cmsg_send() when/if this function returns an error. Callers are responsible for the freeing. Many thanks to Dmitry for the report and diagnostic. Reported-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_sockglue.c | 2 ++ net/ipv4/ping.c | 4 +++- net/ipv4/raw.c | 4 +++- net/ipv4/udp.c | 4 +++- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 5f73a7c03e27..a50124260f5a 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -249,6 +249,8 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc, switch (cmsg->cmsg_type) { case IP_RETOPTS: err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); + + /* Our caller is responsible for freeing ipc->opt */ err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg), err < 40 ? err : 40); if (err) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index e89094ab5ddb..aa67e0e64b69 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -746,8 +746,10 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc, false); - if (err) + if (unlikely(err)) { + kfree(ipc.opt); return err; + } if (ipc.opt) free = 1; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index bc35f1842512..7113bae4e6a0 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -547,8 +547,10 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_controllen) { err = ip_cmsg_send(net, msg, &ipc, false); - if (err) + if (unlikely(err)) { + kfree(ipc.opt); goto out; + } if (ipc.opt) free = 1; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c43890848641..7f8ab46adf61 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -966,8 +966,10 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc, sk->sk_family == AF_INET6); - if (err) + if (unlikely(err)) { + kfree(ipc.opt); return err; + } if (ipc.opt) free = 1; connected = 0; From 2f46f069ccfb28e6fdaa6798544fd30b72835b04 Mon Sep 17 00:00:00 2001 From: Rainer Weikusat Date: Mon, 8 Feb 2016 18:47:19 +0000 Subject: [PATCH 030/418] af_unix: Don't set err in unix_stream_read_generic unless there was an error [ Upstream commit 1b92ee3d03af6643df395300ba7748f19ecdb0c5 ] The present unix_stream_read_generic contains various code sequences of the form err = -EDISASTER; if () goto out; This has the unfortunate side effect of possibly causing the error code to bleed through to the final out: return copied ? : err; and then to be wrongly returned if no data was copied because the caller didn't supply a data buffer, as demonstrated by the program available at http://pad.lv/1540731 Change it such that err is only set if an error condition was detected. Fixes: 3822b5c2fc62 ("af_unix: Revert 'lock_interruptible' in stream receive code") Reported-by: Joseph Salisbury Signed-off-by: Rainer Weikusat Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/unix/af_unix.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 6de41c33a9db..265412c95d94 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2270,13 +2270,15 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state) size_t size = state->size; unsigned int last_len; - err = -EINVAL; - if (sk->sk_state != TCP_ESTABLISHED) + if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { + err = -EINVAL; goto out; + } - err = -EOPNOTSUPP; - if (flags & MSG_OOB) + if (unlikely(flags & MSG_OOB)) { + err = -EOPNOTSUPP; goto out; + } target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); timeo = sock_rcvtimeo(sk, noblock); @@ -2322,9 +2324,11 @@ again: goto unlock; unix_state_unlock(sk); - err = -EAGAIN; - if (!timeo) + if (!timeo) { + err = -EAGAIN; break; + } + mutex_unlock(&u->readlock); timeo = unix_stream_data_wait(sk, timeo, last, From 1bd367857b7f884db302703c624f88adfd8eb171 Mon Sep 17 00:00:00 2001 From: Rainer Weikusat Date: Thu, 11 Feb 2016 19:37:27 +0000 Subject: [PATCH 031/418] af_unix: Guard against other == sk in unix_dgram_sendmsg [ Upstream commit a5527dda344fff0514b7989ef7a755729769daa1 ] The unix_dgram_sendmsg routine use the following test if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { to determine if sk and other are in an n:1 association (either established via connect or by using sendto to send messages to an unrelated socket identified by address). This isn't correct as the specified address could have been bound to the sending socket itself or because this socket could have been connected to itself by the time of the unix_peer_get but disconnected before the unix_state_lock(other). In both cases, the if-block would be entered despite other == sk which might either block the sender unintentionally or lead to trying to unlock the same spin lock twice for a non-blocking send. Add a other != sk check to guard against this. Fixes: 7d267278a9ec ("unix: avoid use-after-free in ep_remove_wait_queue") Reported-By: Philipp Hahn Signed-off-by: Rainer Weikusat Tested-by: Philipp Hahn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/unix/af_unix.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 265412c95d94..898a53a562b8 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1781,7 +1781,12 @@ restart_locked: goto out_unlock; } - if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { + /* other == sk && unix_peer(other) != sk if + * - unix_peer(sk) == NULL, destination address bound to sk + * - unix_peer(sk) == sk by time of get but disconnected before lock + */ + if (other != sk && + unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { if (timeo) { timeo = unix_wait_for_peer(other, timeo); From 692925fe2d42092a99d3532cb03932c8fda57786 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 10 Feb 2016 16:14:57 -0500 Subject: [PATCH 032/418] tipc: fix premature addition of node to lookup table [ Upstream commit d5c91fb72f1652ea3026925240a0998a42ddb16b ] In commit 5266698661401a ("tipc: let broadcast packet reception use new link receive function") we introduced a new per-node broadcast reception link instance. This link is created at the moment the node itself is created. Unfortunately, the allocation is done after the node instance has already been added to the node lookup hash table. This creates a potential race condition, where arriving broadcast packets are able to find and access the node before it has been fully initialized, and before the above mentioned link has been created. The result is occasional crashes in the function tipc_bcast_rcv(), which is trying to access the not-yet existing link. We fix this by deferring the addition of the node instance until after it has been fully initialized in the function tipc_node_create(). Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/node.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 20cddec0a43c..3926b561f873 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -168,12 +168,6 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) skb_queue_head_init(&n_ptr->bc_entry.inputq1); __skb_queue_head_init(&n_ptr->bc_entry.arrvq); skb_queue_head_init(&n_ptr->bc_entry.inputq2); - hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); - list_for_each_entry_rcu(temp_node, &tn->node_list, list) { - if (n_ptr->addr < temp_node->addr) - break; - } - list_add_tail_rcu(&n_ptr->list, &temp_node->list); n_ptr->state = SELF_DOWN_PEER_LEAVING; n_ptr->signature = INVALID_NODE_SIG; n_ptr->active_links[0] = INVALID_BEARER_ID; @@ -193,6 +187,12 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) tipc_node_get(n_ptr); setup_timer(&n_ptr->timer, tipc_node_timeout, (unsigned long)n_ptr); n_ptr->keepalive_intv = U32_MAX; + hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]); + list_for_each_entry_rcu(temp_node, &tn->node_list, list) { + if (n_ptr->addr < temp_node->addr) + break; + } + list_add_tail_rcu(&n_ptr->list, &temp_node->list); exit: spin_unlock_bh(&tn->node_list_lock); return n_ptr; From a4b84d5efbb2aabfb728d717c851eb1e6f521bec Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 11 Feb 2016 22:50:29 -0800 Subject: [PATCH 033/418] tcp: md5: release request socket instead of listener [ Upstream commit 729235554d805c63e5e274fcc6a98e71015dd847 ] If tcp_v4_inbound_md5_hash() returns an error, we must release the refcount on the request socket, not on the listener. The bug was added for IPv4 only. Fixes: 079096f103fac ("tcp/dccp: install syn_recv requests into ehash table") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_ipv4.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f66696c51c8c..bfcddee60897 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1597,8 +1597,10 @@ process: struct sock *nsk = NULL; sk = req->rsk_listener; - if (tcp_v4_inbound_md5_hash(sk, skb)) - goto discard_and_relse; + if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) { + reqsk_put(req); + goto discard_it; + } if (likely(sk->sk_state == TCP_LISTEN)) { nsk = tcp_check_req(sk, skb, req, false); } else { From 617d22ddf6b7ddf3860ac3d70e0e73b453e2afb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Fri, 12 Feb 2016 16:42:14 +0100 Subject: [PATCH 034/418] qmi_wwan: add "4G LTE usb-modem U901" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit aac8d3c282e024c344c5b86dc1eab7af88bb9716 ] Thomas reports: T: Bus=01 Lev=01 Prnt=01 Port=03 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=05c6 ProdID=6001 Rev=00.00 S: Manufacturer=USB Modem S: Product=USB Modem S: SerialNumber=1234567890ABCDEF C: #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 4 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage Reported-by: Thomas Schäfer Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 5fccc5a8153f..982e0acd1a36 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -492,6 +492,7 @@ static const struct usb_device_id products[] = { /* 3. Combined interface devices matching on interface number */ {QMI_FIXED_INTF(0x0408, 0xea42, 4)}, /* Yota / Megafon M100-1 */ + {QMI_FIXED_INTF(0x05c6, 0x6001, 3)}, /* 4G LTE usb-modem U901 */ {QMI_FIXED_INTF(0x05c6, 0x7000, 0)}, {QMI_FIXED_INTF(0x05c6, 0x7001, 1)}, {QMI_FIXED_INTF(0x05c6, 0x7002, 1)}, From 3b7abf72216b4ac632724eda3b99f6ebad5a6518 Mon Sep 17 00:00:00 2001 From: Amir Vadai Date: Wed, 17 Feb 2016 17:24:22 +0200 Subject: [PATCH 035/418] net/mlx4_en: Count HW buffer overrun only once [ Upstream commit 281e8b2fdf8e4ef366b899453cae50e09b577ada ] RdropOvflw counts overrun of HW buffer, therefore should be used for rx_fifo_errors only. Currently RdropOvflw counter is mistakenly also set into rx_missed_errors and rx_over_errors too, which makes the device total dropped packets accounting to show wrong results. Fix that. Use it for rx_fifo_errors only. Fixes: c27a02cd94d6 ('mlx4_en: Add driver for Mellanox ConnectX 10GbE NIC') Signed-off-by: Amir Vadai Signed-off-by: Eugenia Emantayev Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_port.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c index ee99e67187f5..3904b5fc0b7c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c @@ -238,11 +238,11 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) stats->collisions = 0; stats->rx_dropped = be32_to_cpu(mlx4_en_stats->RDROP); stats->rx_length_errors = be32_to_cpu(mlx4_en_stats->RdropLength); - stats->rx_over_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); + stats->rx_over_errors = 0; stats->rx_crc_errors = be32_to_cpu(mlx4_en_stats->RCRC); stats->rx_frame_errors = 0; stats->rx_fifo_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); - stats->rx_missed_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw); + stats->rx_missed_errors = 0; stats->tx_aborted_errors = 0; stats->tx_carrier_errors = 0; stats->tx_fifo_errors = 0; From 7675c3c6bdf9f59ad994334ce05c839ca27dd6a8 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Wed, 17 Feb 2016 17:24:23 +0200 Subject: [PATCH 036/418] net/mlx4_en: Choose time-stamping shift value according to HW frequency [ Upstream commit 31c128b66e5b28f468076e4f3ca3025c35342041 ] Previously, the shift value used for time-stamping was constant and didn't depend on the HW chip frequency. Change that to take the frequency into account and calculate the maximal value in cycles per wraparound of ten seconds. This time slot was chosen since it gives a good accuracy in time synchronization. Algorithm for shift value calculation: * Round up the maximal value in cycles to nearest power of two * Calculate maximal multiplier by division of all 64 bits set to above result * Then, invert the function clocksource_khz2mult() to get the shift from maximal mult value Fixes: ec693d47010e ('net/mlx4_en: Add HW timestamping (TS) support') Signed-off-by: Eugenia Emantayev Reviewed-by: Matan Barak Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_clock.c | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 038f9ce391e6..1494997c4f7e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -236,6 +236,24 @@ static const struct ptp_clock_info mlx4_en_ptp_clock_info = { .enable = mlx4_en_phc_enable, }; +#define MLX4_EN_WRAP_AROUND_SEC 10ULL + +/* This function calculates the max shift that enables the user range + * of MLX4_EN_WRAP_AROUND_SEC values in the cycles register. + */ +static u32 freq_to_shift(u16 freq) +{ + u32 freq_khz = freq * 1000; + u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC; + u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ? + max_val_cycles : roundup_pow_of_two(max_val_cycles) - 1; + /* calculate max possible multiplier in order to fit in 64bit */ + u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded); + + /* This comes from the reverse of clocksource_khz2mult */ + return ilog2(div_u64(max_mul * freq_khz, 1000000)); +} + void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) { struct mlx4_dev *dev = mdev->dev; @@ -254,12 +272,7 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) memset(&mdev->cycles, 0, sizeof(mdev->cycles)); mdev->cycles.read = mlx4_en_read_clock; mdev->cycles.mask = CLOCKSOURCE_MASK(48); - /* Using shift to make calculation more accurate. Since current HW - * clock frequency is 427 MHz, and cycles are given using a 48 bits - * register, the biggest shift when calculating using u64, is 14 - * (max_cycles * multiplier < 2^64) - */ - mdev->cycles.shift = 14; + mdev->cycles.shift = freq_to_shift(dev->caps.hca_core_clock); mdev->cycles.mult = clocksource_khz2mult(1000 * dev->caps.hca_core_clock, mdev->cycles.shift); mdev->nominal_c_mult = mdev->cycles.mult; From 1cabc3e35f4b2855c46e4e94fc3b93dd9a307327 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Wed, 17 Feb 2016 17:24:27 +0200 Subject: [PATCH 037/418] net/mlx4_en: Avoid changing dev->features directly in run-time [ Upstream commit 925ab1aa9394bbaeac47ee5b65d3fdf0fb8135cf ] It's forbidden to manually change dev->features in run-time. Currently, this is done in the driver to make sure that GSO_UDP_TUNNEL is advertized only when VXLAN tunnel is set. However, since the stack actually does features intersection with hw_enc_features, we can safely revert to advertizing features early when registering the netdevice. Fixes: f4a1edd56120 ('net/mlx4_en: Advertize encapsulation offloads [...]') Signed-off-by: Eugenia Emantayev Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 7869f97de5da..67e9633ea9c7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2381,8 +2381,6 @@ out: /* set offloads */ priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL; - priv->dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; - priv->dev->features |= NETIF_F_GSO_UDP_TUNNEL; } static void mlx4_en_del_vxlan_offloads(struct work_struct *work) @@ -2393,8 +2391,6 @@ static void mlx4_en_del_vxlan_offloads(struct work_struct *work) /* unset offloads */ priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL); - priv->dev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL; - priv->dev->features &= ~NETIF_F_GSO_UDP_TUNNEL; ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port, VXLAN_STEER_BY_OUTER_MAC, 0); @@ -3020,6 +3016,11 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->rss_hash_fn = ETH_RSS_HASH_TOP; } + if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { + dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; + dev->features |= NETIF_F_GSO_UDP_TUNNEL; + } + mdev->pndev[port] = dev; mdev->upper[port] = NULL; From e9f13d3fa0781ae13051fa644f715d3556244ac7 Mon Sep 17 00:00:00 2001 From: Mark Tomlinson Date: Mon, 15 Feb 2016 16:24:44 +1300 Subject: [PATCH 038/418] l2tp: Fix error creating L2TP tunnels [ Upstream commit 853effc55b0f975abd6d318cca486a9c1b67e10f ] A previous commit (33f72e6) added notification via netlink for tunnels when created/modified/deleted. If the notification returned an error, this error was returned from the tunnel function. If there were no listeners, the error code ESRCH was returned, even though having no listeners is not an error. Other calls to this and other similar notification functions either ignore the error code, or filter ESRCH. This patch checks for ESRCH and does not flag this as an error. Reviewed-by: Hamish Martin Signed-off-by: Mark Tomlinson Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_netlink.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index f93c5be612a7..2caaa84ce92d 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -124,8 +124,13 @@ static int l2tp_tunnel_notify(struct genl_family *family, ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq, NLM_F_ACK, tunnel, cmd); - if (ret >= 0) - return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + if (ret >= 0) { + ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + /* We don't care if no one is listening */ + if (ret == -ESRCH) + ret = 0; + return ret; + } nlmsg_free(msg); @@ -147,8 +152,13 @@ static int l2tp_session_notify(struct genl_family *family, ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, NLM_F_ACK, session, cmd); - if (ret >= 0) - return genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + if (ret >= 0) { + ret = genlmsg_multicast_allns(family, msg, 0, 0, GFP_ATOMIC); + /* We don't care if no one is listening */ + if (ret == -ESRCH) + ret = 0; + return ret; + } nlmsg_free(msg); From 26fd5ed6e8ad66c86eb720cf85d1230c9b45a5f9 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 15 Feb 2016 17:01:10 +0100 Subject: [PATCH 039/418] pppoe: fix reference counting in PPPoE proxy [ Upstream commit 29e73269aa4d36f92b35610c25f8b01c789b0dc8 ] Drop reference on the relay_po socket when __pppoe_xmit() succeeds. This is already handled correctly in the error path. Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ppp/pppoe.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index 0a37f840fcc5..4e0068e775f9 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -395,6 +395,8 @@ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb) if (!__pppoe_xmit(sk_pppox(relay_po), skb)) goto abort_put; + + sock_put(sk_pppox(relay_po)); } else { if (sock_queue_rcv_skb(sk, skb)) goto abort_kfree; From d4775ea09519074dd41039a39dbcb589390ae08f Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Thu, 18 Feb 2016 07:38:04 -0500 Subject: [PATCH 040/418] net_sched fix: reclassification needs to consider ether protocol changes [ Upstream commit 619fe32640b4b01f370574d50344ae0f62689816 ] actions could change the etherproto in particular with ethernet tunnelled data. Typically such actions, after peeling the outer header, will ask for the packet to be reclassified. We then need to restart the classification with the new proto header. Example setup used to catch this: sudo tc qdisc add dev $ETH ingress sudo $TC filter add dev $ETH parent ffff: pref 1 protocol 802.1Q \ u32 match u32 0 0 flowid 1:1 \ action vlan pop reclassify Fixes: 3b3ae880266d ("net: sched: consolidate tc_classify{,_compat}") Signed-off-by: Jamal Hadi Salim Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_api.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index b5c2cf2aa6d4..af1acf009866 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1852,6 +1852,7 @@ reset: } tp = old_tp; + protocol = tc_skb_protocol(skb); goto reclassify; #endif } From 54d77a2201647b826f4be34a95c726dacde1e57b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 18 Feb 2016 21:21:19 +0800 Subject: [PATCH 041/418] route: check and remove route cache when we get route [ Upstream commit deed49df7390d5239024199e249190328f1651e7 ] Since the gc of ipv4 route was removed, the route cached would has no chance to be removed, and even it has been timeout, it still could be used, cause no code to check it's expires. Fix this issue by checking and removing route cache when we get route. Signed-off-by: Xin Long Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ip_fib.h | 1 + net/ipv4/route.c | 77 ++++++++++++++++++++++++++++++++++++-------- 2 files changed, 64 insertions(+), 14 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 9f4df68105ab..3f98233388fb 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -61,6 +61,7 @@ struct fib_nh_exception { struct rtable __rcu *fnhe_rth_input; struct rtable __rcu *fnhe_rth_output; unsigned long fnhe_stamp; + struct rcu_head rcu; }; struct fnhe_hash_bucket { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 85f184e429c6..02c62299d717 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -129,6 +129,7 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; +static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; /* * Interface to generic destination cache. */ @@ -755,7 +756,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, new_gw, - 0, 0); + 0, jiffies + ip_rt_gc_timeout); } if (kill_route) rt->dst.obsolete = DST_OBSOLETE_KILL; @@ -1556,6 +1557,36 @@ static void ip_handle_martian_source(struct net_device *dev, #endif } +static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) +{ + struct fnhe_hash_bucket *hash; + struct fib_nh_exception *fnhe, __rcu **fnhe_p; + u32 hval = fnhe_hashfun(daddr); + + spin_lock_bh(&fnhe_lock); + + hash = rcu_dereference_protected(nh->nh_exceptions, + lockdep_is_held(&fnhe_lock)); + hash += hval; + + fnhe_p = &hash->chain; + fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock)); + while (fnhe) { + if (fnhe->fnhe_daddr == daddr) { + rcu_assign_pointer(*fnhe_p, rcu_dereference_protected( + fnhe->fnhe_next, lockdep_is_held(&fnhe_lock))); + fnhe_flush_routes(fnhe); + kfree_rcu(fnhe, rcu); + break; + } + fnhe_p = &fnhe->fnhe_next; + fnhe = rcu_dereference_protected(fnhe->fnhe_next, + lockdep_is_held(&fnhe_lock)); + } + + spin_unlock_bh(&fnhe_lock); +} + /* called in rcu_read_lock() section */ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res, @@ -1609,11 +1640,20 @@ static int __mkroute_input(struct sk_buff *skb, fnhe = find_exception(&FIB_RES_NH(*res), daddr); if (do_cache) { - if (fnhe) + if (fnhe) { rth = rcu_dereference(fnhe->fnhe_rth_input); - else - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + if (rth && rth->dst.expires && + time_after(jiffies, rth->dst.expires)) { + ip_del_fnhe(&FIB_RES_NH(*res), daddr); + fnhe = NULL; + } else { + goto rt_cache; + } + } + rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + +rt_cache: if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); goto out; @@ -2014,19 +2054,29 @@ static struct rtable *__mkroute_output(const struct fib_result *res, struct fib_nh *nh = &FIB_RES_NH(*res); fnhe = find_exception(nh, fl4->daddr); - if (fnhe) + if (fnhe) { prth = &fnhe->fnhe_rth_output; - else { - if (unlikely(fl4->flowi4_flags & - FLOWI_FLAG_KNOWN_NH && - !(nh->nh_gw && - nh->nh_scope == RT_SCOPE_LINK))) { - do_cache = false; - goto add; + rth = rcu_dereference(*prth); + if (rth && rth->dst.expires && + time_after(jiffies, rth->dst.expires)) { + ip_del_fnhe(nh, fl4->daddr); + fnhe = NULL; + } else { + goto rt_cache; } - prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); } + + if (unlikely(fl4->flowi4_flags & + FLOWI_FLAG_KNOWN_NH && + !(nh->nh_gw && + nh->nh_scope == RT_SCOPE_LINK))) { + do_cache = false; + goto add; + } + prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); rth = rcu_dereference(*prth); + +rt_cache: if (rt_cache_valid(rth)) { dst_hold(&rth->dst); return rth; @@ -2569,7 +2619,6 @@ void ip_rt_multicast_event(struct in_device *in_dev) } #ifdef CONFIG_SYSCTL -static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; static int ip_rt_gc_interval __read_mostly = 60 * HZ; static int ip_rt_gc_min_interval __read_mostly = HZ / 2; static int ip_rt_gc_elasticity __read_mostly = 8; From 9653359eb65cfd56036cded9d41a111d1a9c817a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Feb 2016 05:39:18 -0800 Subject: [PATCH 042/418] tcp/dccp: fix another race at listener dismantle [ Upstream commit 7716682cc58e305e22207d5bb315f26af6b1e243 ] Ilya reported following lockdep splat: kernel: ========================= kernel: [ BUG: held lock freed! ] kernel: 4.5.0-rc1-ceph-00026-g5e0a311 #1 Not tainted kernel: ------------------------- kernel: swapper/5/0 is freeing memory ffff880035c9d200-ffff880035c9dbff, with a lock still held there! kernel: (&(&queue->rskq_lock)->rlock){+.-...}, at: [] inet_csk_reqsk_queue_add+0x28/0xa0 kernel: 4 locks held by swapper/5/0: kernel: #0: (rcu_read_lock){......}, at: [] netif_receive_skb_internal+0x4b/0x1f0 kernel: #1: (rcu_read_lock){......}, at: [] ip_local_deliver_finish+0x3f/0x380 kernel: #2: (slock-AF_INET){+.-...}, at: [] sk_clone_lock+0x19b/0x440 kernel: #3: (&(&queue->rskq_lock)->rlock){+.-...}, at: [] inet_csk_reqsk_queue_add+0x28/0xa0 To properly fix this issue, inet_csk_reqsk_queue_add() needs to return to its callers if the child as been queued into accept queue. We also need to make sure listener is still there before calling sk->sk_data_ready(), by holding a reference on it, since the reference carried by the child can disappear as soon as the child is put on accept queue. Reported-by: Ilya Dryomov Fixes: ebb516af60e1 ("tcp/dccp: fix race at listener dismantle phase") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/inet_connection_sock.h | 5 +++-- net/dccp/ipv4.c | 14 +++++++------- net/dccp/ipv6.c | 14 +++++++------- net/ipv4/inet_connection_sock.c | 14 +++++++------- net/ipv4/tcp_ipv4.c | 14 +++++++------- net/ipv6/tcp_ipv6.c | 14 +++++++------- 6 files changed, 38 insertions(+), 37 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 481fe1c9044c..49dcad4fe99e 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -270,8 +270,9 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, struct sock *newsk, const struct request_sock *req); -void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, - struct sock *child); +struct sock *inet_csk_reqsk_queue_add(struct sock *sk, + struct request_sock *req, + struct sock *child); void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, unsigned long timeout); struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 5684e14932bd..902d606324a0 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -824,26 +824,26 @@ lookup: if (sk->sk_state == DCCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); - struct sock *nsk = NULL; + struct sock *nsk; sk = req->rsk_listener; - if (likely(sk->sk_state == DCCP_LISTEN)) { - nsk = dccp_check_req(sk, skb, req); - } else { + if (unlikely(sk->sk_state != DCCP_LISTEN)) { inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } + sock_hold(sk); + nsk = dccp_check_req(sk, skb, req); if (!nsk) { reqsk_put(req); - goto discard_it; + goto discard_and_relse; } if (nsk == sk) { - sock_hold(sk); reqsk_put(req); } else if (dccp_child_process(sk, nsk, skb)) { dccp_v4_ctl_send_reset(sk, skb); - goto discard_it; + goto discard_and_relse; } else { + sock_put(sk); return 0; } } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 9c6d0508e63a..b8608b71a66d 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -691,26 +691,26 @@ lookup: if (sk->sk_state == DCCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); - struct sock *nsk = NULL; + struct sock *nsk; sk = req->rsk_listener; - if (likely(sk->sk_state == DCCP_LISTEN)) { - nsk = dccp_check_req(sk, skb, req); - } else { + if (unlikely(sk->sk_state != DCCP_LISTEN)) { inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } + sock_hold(sk); + nsk = dccp_check_req(sk, skb, req); if (!nsk) { reqsk_put(req); - goto discard_it; + goto discard_and_relse; } if (nsk == sk) { - sock_hold(sk); reqsk_put(req); } else if (dccp_child_process(sk, nsk, skb)) { dccp_v6_ctl_send_reset(sk, skb); - goto discard_it; + goto discard_and_relse; } else { + sock_put(sk); return 0; } } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 46b9c887bede..64148914803a 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -789,14 +789,16 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req, reqsk_put(req); } -void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, - struct sock *child) +struct sock *inet_csk_reqsk_queue_add(struct sock *sk, + struct request_sock *req, + struct sock *child) { struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; spin_lock(&queue->rskq_lock); if (unlikely(sk->sk_state != TCP_LISTEN)) { inet_child_forget(sk, req, child); + child = NULL; } else { req->sk = child; req->dl_next = NULL; @@ -808,6 +810,7 @@ void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, sk_acceptq_added(sk); } spin_unlock(&queue->rskq_lock); + return child; } EXPORT_SYMBOL(inet_csk_reqsk_queue_add); @@ -817,11 +820,8 @@ struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child, if (own_req) { inet_csk_reqsk_queue_drop(sk, req); reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); - inet_csk_reqsk_queue_add(sk, req, child); - /* Warning: caller must not call reqsk_put(req); - * child stole last reference on it. - */ - return child; + if (inet_csk_reqsk_queue_add(sk, req, child)) + return child; } /* Too bad, another child took ownership of the request, undo. */ bh_unlock_sock(child); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index bfcddee60897..8c7e63163e92 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1594,30 +1594,30 @@ process: if (sk->sk_state == TCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); - struct sock *nsk = NULL; + struct sock *nsk; sk = req->rsk_listener; if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) { reqsk_put(req); goto discard_it; } - if (likely(sk->sk_state == TCP_LISTEN)) { - nsk = tcp_check_req(sk, skb, req, false); - } else { + if (unlikely(sk->sk_state != TCP_LISTEN)) { inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } + sock_hold(sk); + nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); - goto discard_it; + goto discard_and_relse; } if (nsk == sk) { - sock_hold(sk); reqsk_put(req); } else if (tcp_child_process(sk, nsk, skb)) { tcp_v4_send_reset(nsk, skb); - goto discard_it; + goto discard_and_relse; } else { + sock_put(sk); return 0; } } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index aea071eca370..b8d405623f4f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1388,7 +1388,7 @@ process: if (sk->sk_state == TCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); - struct sock *nsk = NULL; + struct sock *nsk; sk = req->rsk_listener; tcp_v6_fill_cb(skb, hdr, th); @@ -1396,24 +1396,24 @@ process: reqsk_put(req); goto discard_it; } - if (likely(sk->sk_state == TCP_LISTEN)) { - nsk = tcp_check_req(sk, skb, req, false); - } else { + if (unlikely(sk->sk_state != TCP_LISTEN)) { inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } + sock_hold(sk); + nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); - goto discard_it; + goto discard_and_relse; } if (nsk == sk) { - sock_hold(sk); reqsk_put(req); tcp_v6_restore_cb(skb); } else if (tcp_child_process(sk, nsk, skb)) { tcp_v6_send_reset(nsk, skb); - goto discard_it; + goto discard_and_relse; } else { + sock_put(sk); return 0; } } From 9b87f63bf814c3c8445d3e2baca204a37753fc92 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 17 Feb 2016 15:37:43 +0100 Subject: [PATCH 043/418] IFF_NO_QUEUE: Fix for drivers not calling ether_setup() [ Upstream commit a813104d923339144078939175faf4e66aca19b4 ] My implementation around IFF_NO_QUEUE driver flag assumed that leaving tx_queue_len untouched (specifically: not setting it to zero) by drivers would make it possible to assign a regular qdisc to them without having to worry about setting tx_queue_len to a useful value. This was only partially true: I overlooked that some drivers don't call ether_setup() and therefore not initialize tx_queue_len to the default value of 1000. Consequently, removing the workarounds in place for that case in qdisc implementations which cared about it (namely, pfifo, bfifo, gred, htb, plug and sfb) leads to problems with these specific interface types and qdiscs. Luckily, there's already a sanitization point for drivers setting tx_queue_len to zero, which can be reused to assign the fallback value most qdisc implementations used, which is 1. Fixes: 348e3435cbefa ("net: sched: drop all special handling of tx_queue_len == 0") Tested-by: Mathieu Desnoyers Signed-off-by: Phil Sutter Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 7dc137fba5c1..9efbdb3ff78a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7128,8 +7128,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev); - if (!dev->tx_queue_len) + if (!dev->tx_queue_len) { dev->priv_flags |= IFF_NO_QUEUE; + dev->tx_queue_len = 1; + } dev->num_tx_queues = txqs; dev->real_num_tx_queues = txqs; From b7c2e2acc62c3c1a7d2b36032fd14ee53417fa7d Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Tue, 16 Feb 2016 21:43:16 -0500 Subject: [PATCH 044/418] rtnl: RTM_GETNETCONF: fix wrong return value [ Upstream commit a97eb33ff225f34a8124774b3373fd244f0e83ce ] An error response from a RTM_GETNETCONF request can return the positive error value EINVAL in the struct nlmsgerr that can mislead userspace. Signed-off-by: Anton Protopopov Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/devinet.c | 2 +- net/ipv6/addrconf.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index cebd9d31e65a..f6303b17546b 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1847,7 +1847,7 @@ static int inet_netconf_get_devconf(struct sk_buff *in_skb, if (err < 0) goto errout; - err = EINVAL; + err = -EINVAL; if (!tb[NETCONFA_IFINDEX]) goto errout; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 828ab28854c8..e8d3da0817d3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -583,7 +583,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, if (err < 0) goto errout; - err = EINVAL; + err = -EINVAL; if (!tb[NETCONFA_IFINDEX]) goto errout; From 4ac39c3e2c80000a40b11bde33f425a44f1ef55b Mon Sep 17 00:00:00 2001 From: Insu Yun Date: Wed, 17 Feb 2016 11:47:35 -0500 Subject: [PATCH 045/418] tipc: unlock in error path [ Upstream commit b53ce3e7d407aa4196877a48b8601181162ab158 ] tipc_bcast_unlock need to be unlocked in error path. Signed-off-by: Insu Yun Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/bcast.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 9dc239dfe192..92e367a0a5ce 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -399,8 +399,10 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg) hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_LINK_GET); - if (!hdr) + if (!hdr) { + tipc_bcast_unlock(net); return -EMSGSIZE; + } attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK); if (!attrs) From 82f26aa4a5537b080c0cf71f0f1016c37f01d25e Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Fri, 19 Feb 2016 04:27:48 +0300 Subject: [PATCH 046/418] unix_diag: fix incorrect sign extension in unix_lookup_by_ino [ Upstream commit b5f0549231ffb025337be5a625b0ff9f52b016f0 ] The value passed by unix_diag_get_exact to unix_lookup_by_ino has type __u32, but unix_lookup_by_ino's argument ino has type int, which is not a problem yet. However, when ino is compared with sock_i_ino return value of type unsigned long, ino is sign extended to signed long, and this results to incorrect comparison on 64-bit architectures for inode numbers greater than INT_MAX. This bug was found by strace test suite. Fixes: 5d3cae8bc39d ("unix_diag: Dumping exact socket core") Signed-off-by: Dmitry V. Levin Acked-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/unix/diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/unix/diag.c b/net/unix/diag.c index c512f64d5287..4d9679701a6d 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -220,7 +220,7 @@ done: return skb->len; } -static struct sock *unix_lookup_by_ino(int ino) +static struct sock *unix_lookup_by_ino(unsigned int ino) { int i; struct sock *sk; From 1c2efb14a21bfbd445b9bb6204ac4b33adcab785 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Thu, 18 Feb 2016 16:10:57 -0500 Subject: [PATCH 047/418] sctp: Fix port hash table size computation [ Upstream commit d9749fb5942f51555dc9ce1ac0dbb1806960a975 ] Dmitry Vyukov noted recently that the sctp_port_hashtable had an error in its size computation, observing that the current method never guaranteed that the hashsize (measured in number of entries) would be a power of two, which the input hash function for that table requires. The root cause of the problem is that two values need to be computed (one, the allocation order of the storage requries, as passed to __get_free_pages, and two the number of entries for the hash table). Both need to be ^2, but for different reasons, and the existing code is simply computing one order value, and using it as the basis for both, which is wrong (i.e. it assumes that ((1< Reported-by: Dmitry Vyukov CC: Dmitry Vyukov CC: Vladislav Yasevich CC: "David S. Miller" Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/protocol.c | 43 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 3d9ea9a48289..8b4ff315695e 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -60,6 +60,8 @@ #include #include +#define MAX_SCTP_PORT_HASH_ENTRIES (64 * 1024) + /* Global data structures. */ struct sctp_globals sctp_globals __read_mostly; @@ -1352,6 +1354,8 @@ static __init int sctp_init(void) unsigned long limit; int max_share; int order; + int num_entries; + int max_entry_order; sock_skb_cb_check_size(sizeof(struct sctp_ulpevent)); @@ -1404,14 +1408,24 @@ static __init int sctp_init(void) /* Size and allocate the association hash table. * The methodology is similar to that of the tcp hash tables. + * Though not identical. Start by getting a goal size */ if (totalram_pages >= (128 * 1024)) goal = totalram_pages >> (22 - PAGE_SHIFT); else goal = totalram_pages >> (24 - PAGE_SHIFT); - for (order = 0; (1UL << order) < goal; order++) - ; + /* Then compute the page order for said goal */ + order = get_order(goal); + + /* Now compute the required page order for the maximum sized table we + * want to create + */ + max_entry_order = get_order(MAX_SCTP_PORT_HASH_ENTRIES * + sizeof(struct sctp_bind_hashbucket)); + + /* Limit the page order by that maximum hash table size */ + order = min(order, max_entry_order); do { sctp_assoc_hashsize = (1UL << order) * PAGE_SIZE / @@ -1445,20 +1459,35 @@ static __init int sctp_init(void) INIT_HLIST_HEAD(&sctp_ep_hashtable[i].chain); } - /* Allocate and initialize the SCTP port hash table. */ + /* Allocate and initialize the SCTP port hash table. + * Note that order is initalized to start at the max sized + * table we want to support. If we can't get that many pages + * reduce the order and try again + */ do { - sctp_port_hashsize = (1UL << order) * PAGE_SIZE / - sizeof(struct sctp_bind_hashbucket); - if ((sctp_port_hashsize > (64 * 1024)) && order > 0) - continue; sctp_port_hashtable = (struct sctp_bind_hashbucket *) __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, order); } while (!sctp_port_hashtable && --order > 0); + if (!sctp_port_hashtable) { pr_err("Failed bind hash alloc\n"); status = -ENOMEM; goto err_bhash_alloc; } + + /* Now compute the number of entries that will fit in the + * port hash space we allocated + */ + num_entries = (1UL << order) * PAGE_SIZE / + sizeof(struct sctp_bind_hashbucket); + + /* And finish by rounding it down to the nearest power of two + * this wastes some memory of course, but its needed because + * the hash function operates based on the assumption that + * that the number of entries is a power of two + */ + sctp_port_hashsize = rounddown_pow_of_two(num_entries); + for (i = 0; i < sctp_port_hashsize; i++) { spin_lock_init(&sctp_port_hashtable[i].lock); INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain); From 7c3d1424dd42356a00e83b06c0a39629c4a7e5e3 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 19 Feb 2016 00:18:25 -0500 Subject: [PATCH 048/418] ext4: fix bh->b_state corruption commit ed8ad83808f009ade97ebbf6519bc3a97fefbc0c upstream. ext4 can update bh->b_state non-atomically in _ext4_get_block() and ext4_da_get_block_prep(). Usually this is fine since bh is just a temporary storage for mapping information on stack but in some cases it can be fully living bh attached to a page. In such case non-atomic update of bh->b_state can race with an atomic update which then gets lost. Usually when we are mapping bh and thus updating bh->b_state non-atomically, nobody else touches the bh and so things work out fine but there is one case to especially worry about: ext4_finish_bio() uses BH_Uptodate_Lock on the first bh in the page to synchronize handling of PageWriteback state. So when blocksize < pagesize, we can be atomically modifying bh->b_state of a buffer that actually isn't under IO and thus can race e.g. with delalloc trying to map that buffer. The result is that we can mistakenly set / clear BH_Uptodate_Lock bit resulting in the corruption of PageWriteback state or missed unlock of BH_Uptodate_Lock. Fix the problem by always updating bh->b_state bits atomically. Reported-by: Nikolay Borisov Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Signed-off-by: Nikolay Borisov [NB: Backported to 4.4.2] Acked-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/inode.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ea433a7f4bca..06bda0361e7c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -657,6 +657,34 @@ has_zeroout: return retval; } +/* + * Update EXT4_MAP_FLAGS in bh->b_state. For buffer heads attached to pages + * we have to be careful as someone else may be manipulating b_state as well. + */ +static void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags) +{ + unsigned long old_state; + unsigned long new_state; + + flags &= EXT4_MAP_FLAGS; + + /* Dummy buffer_head? Set non-atomically. */ + if (!bh->b_page) { + bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | flags; + return; + } + /* + * Someone else may be modifying b_state. Be careful! This is ugly but + * once we get rid of using bh as a container for mapping information + * to pass to / from get_block functions, this can go away. + */ + do { + old_state = READ_ONCE(bh->b_state); + new_state = (old_state & ~EXT4_MAP_FLAGS) | flags; + } while (unlikely( + cmpxchg(&bh->b_state, old_state, new_state) != old_state)); +} + /* Maximum number of blocks we map for direct IO at once. */ #define DIO_MAX_BLOCKS 4096 @@ -693,7 +721,7 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, ext4_io_end_t *io_end = ext4_inode_aio(inode); map_bh(bh, inode->i_sb, map.m_pblk); - bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; + ext4_update_bh_state(bh, map.m_flags); if (IS_DAX(inode) && buffer_unwritten(bh)) { /* * dgc: I suspect unwritten conversion on ext4+DAX is @@ -1669,7 +1697,7 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, return ret; map_bh(bh, inode->i_sb, map.m_pblk); - bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; + ext4_update_bh_state(bh, map.m_flags); if (buffer_unwritten(bh)) { /* A delayed write to unwritten bh should be marked From bb10b5e76eac0a4ea6cd2350c8c9efdc620233ed Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 18 Jan 2016 10:45:00 +0100 Subject: [PATCH 049/418] ARM: debug-ll: fix BCM63xx entry for multiplatform commit 6c54809977de3c9e2ef9e9934a2c6625f7e161e7 upstream. During my randconfig build testing, I found that a kernel with DEBUG_AT91_UART and ARCH_BCM_63XX fails to build: arch/arm/include/debug/at91.S:18:0: error: "CONFIG_DEBUG_UART_VIRT" redefined [-Werror] It turns out that the DEBUG_UART_BCM63XX option is enabled whenever the ARCH_BCM_63XX is, and that breaks multiplatform kernels because we then end up using the UART address from BCM63XX rather than the one we actually configured (if any). This changes the BCM63XX options to only have one Kconfig option, and only enable that if the user explicitly turns it on. Signed-off-by: Arnd Bergmann Fixes: b51312bebfa4 ("ARM: BCM63XX: add low-level UART debug support") Signed-off-by: Greg Kroah-Hartman --- arch/arm/Kconfig.debug | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 259c0ca9c99a..ddbb361267d8 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -162,10 +162,9 @@ choice mobile SoCs in the Kona family of chips (e.g. bcm28155, bcm11351, etc...) - config DEBUG_BCM63XX + config DEBUG_BCM63XX_UART bool "Kernel low-level debugging on BCM63XX UART" depends on ARCH_BCM_63XX - select DEBUG_UART_BCM63XX config DEBUG_BERLIN_UART bool "Marvell Berlin SoC Debug UART" @@ -1348,7 +1347,7 @@ config DEBUG_LL_INCLUDE default "debug/vf.S" if DEBUG_VF_UART default "debug/vt8500.S" if DEBUG_VT8500_UART0 default "debug/zynq.S" if DEBUG_ZYNQ_UART0 || DEBUG_ZYNQ_UART1 - default "debug/bcm63xx.S" if DEBUG_UART_BCM63XX + default "debug/bcm63xx.S" if DEBUG_BCM63XX_UART default "debug/digicolor.S" if DEBUG_DIGICOLOR_UA0 default "mach/debug-macro.S" @@ -1364,10 +1363,6 @@ config DEBUG_UART_8250 ARCH_IOP33X || ARCH_IXP4XX || \ ARCH_LPC32XX || ARCH_MV78XX0 || ARCH_ORION5X || ARCH_RPC -# Compatibility options for BCM63xx -config DEBUG_UART_BCM63XX - def_bool ARCH_BCM_63XX - config DEBUG_UART_PHYS hex "Physical base address of debug UART" default 0x00100a00 if DEBUG_NETX_UART @@ -1462,7 +1457,7 @@ config DEBUG_UART_PHYS default 0xfffb0000 if DEBUG_OMAP1UART1 || DEBUG_OMAP7XXUART1 default 0xfffb0800 if DEBUG_OMAP1UART2 || DEBUG_OMAP7XXUART2 default 0xfffb9800 if DEBUG_OMAP1UART3 || DEBUG_OMAP7XXUART3 - default 0xfffe8600 if DEBUG_UART_BCM63XX + default 0xfffe8600 if DEBUG_BCM63XX_UART default 0xfffff700 if ARCH_IOP33X depends on ARCH_EP93XX || \ DEBUG_LL_UART_8250 || DEBUG_LL_UART_PL01X || \ @@ -1474,7 +1469,7 @@ config DEBUG_UART_PHYS DEBUG_RCAR_GEN2_SCIF0 || DEBUG_RCAR_GEN2_SCIF2 || \ DEBUG_RMOBILE_SCIFA0 || DEBUG_RMOBILE_SCIFA1 || \ DEBUG_RMOBILE_SCIFA4 || DEBUG_S3C24XX_UART || \ - DEBUG_UART_BCM63XX || DEBUG_ASM9260_UART || \ + DEBUG_BCM63XX_UART || DEBUG_ASM9260_UART || \ DEBUG_SIRFSOC_UART || DEBUG_DIGICOLOR_UA0 || \ DEBUG_AT91_UART @@ -1515,7 +1510,7 @@ config DEBUG_UART_VIRT default 0xfb10c000 if DEBUG_REALVIEW_PB1176_PORT default 0xfc40ab00 if DEBUG_BRCMSTB_UART default 0xfc705000 if DEBUG_ZTE_ZX - default 0xfcfe8600 if DEBUG_UART_BCM63XX + default 0xfcfe8600 if DEBUG_BCM63XX_UART default 0xfd000000 if ARCH_SPEAR3XX || ARCH_SPEAR6XX default 0xfd000000 if ARCH_SPEAR13XX default 0xfd012000 if ARCH_MV78XX0 @@ -1566,7 +1561,7 @@ config DEBUG_UART_VIRT DEBUG_UART_8250 || DEBUG_UART_PL01X || DEBUG_MESON_UARTAO || \ DEBUG_NETX_UART || \ DEBUG_QCOM_UARTDM || DEBUG_S3C24XX_UART || \ - DEBUG_UART_BCM63XX || DEBUG_ASM9260_UART || \ + DEBUG_BCM63XX_UART || DEBUG_ASM9260_UART || \ DEBUG_SIRFSOC_UART || DEBUG_DIGICOLOR_UA0 config DEBUG_UART_8250_SHIFT From db8bb76c6f9564e18d5dda4d89e4fc70dc83c3fd Mon Sep 17 00:00:00 2001 From: dann frazier Date: Mon, 25 Jan 2016 16:52:16 -0700 Subject: [PATCH 050/418] arm64: errata: Add -mpc-relative-literal-loads to build flags commit 67dfa1751ce71e629aad7c438e1678ad41054677 upstream. GCC6 (and Linaro's 2015.12 snapshot of GCC5) has a new default that uses adrp/ldr or adrp/add to address literal pools. When CONFIG_ARM64_ERRATUM_843419 is enabled, modules built with this toolchain fail to load: module libahci: unsupported RELA relocation: 275 This patch fixes the problem by passing '-mpc-relative-literal-loads' to the compiler. Cc: stable@vger.kernel.org Fixes: df057cc7b4fa ("arm64: errata: add module build workaround for erratum #843419") BugLink: http://bugs.launchpad.net/bugs/1533009 Acked-by: Ard Biesheuvel Suggested-by: Christophe Lyon Signed-off-by: Dann Frazier [will: backport to 4.4-stable] Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index cd822d8454c0..b6c90e5006e4 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -27,6 +27,7 @@ $(warning LSE atomics not supported by binutils) endif KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) +KBUILD_CFLAGS += $(call cc-option, -mpc-relative-literal-loads) KBUILD_AFLAGS += $(lseinstr) ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) From aa5f10879d2d745b67a0b21c91d788d8ac6717fe Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 22 Jan 2016 14:55:56 +0100 Subject: [PATCH 051/418] KVM: s390: fix guest fprs memory leak commit 9c7ebb613bffea2feef4ec562ba1dbcaa810942b upstream. fprs is never freed, therefore resulting in a memory leak if kvm_vcpu_init() fails or the vcpu is destroyed. Fixes: 9977e886cbbc ("s390/kernel: lazy restore fpu registers") Reported-by: Eric Farman Signed-off-by: David Hildenbrand Reviewed-by: Eric Farman Signed-off-by: Christian Borntraeger Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/kvm-s390.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 846589281b04..408138e716c4 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1202,6 +1202,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) if (vcpu->kvm->arch.use_cmma) kvm_s390_vcpu_unsetup_cmma(vcpu); + kfree(vcpu->arch.guest_fpregs.fprs); free_page((unsigned long)(vcpu->arch.sie_block)); kvm_vcpu_uninit(vcpu); @@ -1516,12 +1517,14 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, rc = kvm_vcpu_init(vcpu, kvm, id); if (rc) - goto out_free_sie_block; + goto out_free_fprs; VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, vcpu->arch.sie_block); trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); return vcpu; +out_free_fprs: + kfree(vcpu->arch.guest_fpregs.fprs); out_free_sie_block: free_page((unsigned long)(vcpu->arch.sie_block)); out_free_cpu: From bb7d70e1c3cefe4785a291af49c48ff70cf1525c Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Sat, 20 Feb 2016 14:32:24 -0800 Subject: [PATCH 052/418] devm_memremap: Fix error value when memremap failed commit 93f834df9c2d4e362dfdc4b05daa0a4e18814836 upstream. devm_memremap() returns an ERR_PTR() value in case of error. However, it returns NULL when memremap() failed. This causes the caller, such as the pmem driver, to proceed and oops later. Change devm_memremap() to return ERR_PTR(-ENXIO) when memremap() failed. Signed-off-by: Toshi Kani Cc: Andrew Morton Reviewed-by: Ross Zwisler Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- kernel/memremap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/memremap.c b/kernel/memremap.c index 7a4e473cea4d..25ced161ebeb 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -133,8 +133,10 @@ void *devm_memremap(struct device *dev, resource_size_t offset, if (addr) { *ptr = addr; devres_add(dev, ptr); - } else + } else { devres_free(ptr); + return ERR_PTR(-ENXIO); + } return addr; } From ab3b00b70e5552d6060118952312f9c201218dc4 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 23 Nov 2015 10:32:49 +0100 Subject: [PATCH 053/418] drm/gma500: Use correct unref in the gem bo create function commit d3e376f52d095103ca51dbda4d6ff8aaf488f98f upstream. This is called without dev->struct_mutex held, we need to use the _unlocked variant. Never caught in the wild since you'd need an evil userspace which races a gem_close ioctl call with the in-progress open. Cc: Patrik Jakobsson Acked-by: Patrik Jakobsson Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1448271183-20523-17-git-send-email-daniel.vetter@ffwll.ch Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/gma500/gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/gma500/gem.c b/drivers/gpu/drm/gma500/gem.c index c707fa6fca85..e3bdc8b1c32c 100644 --- a/drivers/gpu/drm/gma500/gem.c +++ b/drivers/gpu/drm/gma500/gem.c @@ -130,7 +130,7 @@ int psb_gem_create(struct drm_file *file, struct drm_device *dev, u64 size, return ret; } /* We have the initial and handle reference but need only one now */ - drm_gem_object_unreference(&r->gem); + drm_gem_object_unreference_unlocked(&r->gem); *handlep = handle; return 0; } From 9453c72919e57ee0227c2d1c18538d85cc62346d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 19 Nov 2015 15:03:57 +0100 Subject: [PATCH 054/418] ARM: 8457/1: psci-smp is built only for SMP commit be95485a0b8288a93402705730d3ea32f9f812b9 upstream. The PSCI SMP implementation is built only when both CONFIG_SMP and CONFIG_ARM_PSCI are set, so a configuration that has the latter but not the former can get a link error when it tries to call psci_smp_available(). arch/arm/mach-tegra/built-in.o: In function `tegra114_cpuidle_init': cpuidle-tegra114.c:(.init.text+0x52a): undefined reference to `psci_smp_available' This corrects the #ifdef in the psci.h header file to match the Makefile conditional we have for building that function. Signed-off-by: Arnd Bergmann Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/psci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/psci.h b/arch/arm/include/asm/psci.h index 68ee3ce17b82..b4c6d99364f1 100644 --- a/arch/arm/include/asm/psci.h +++ b/arch/arm/include/asm/psci.h @@ -16,7 +16,7 @@ extern struct smp_operations psci_smp_ops; -#ifdef CONFIG_ARM_PSCI +#if defined(CONFIG_SMP) && defined(CONFIG_ARM_PSCI) bool psci_smp_available(void); #else static inline bool psci_smp_available(void) { return false; } From e7f2a86c9106eb9417f4bc7b97f1dfa63140277a Mon Sep 17 00:00:00 2001 From: Peter Jones Date: Mon, 8 Feb 2016 14:48:11 -0500 Subject: [PATCH 055/418] lib/ucs2_string: Add ucs2 -> utf8 helper functions commit 73500267c930baadadb0d02284909731baf151f7 upstream. This adds ucs2_utf8size(), which tells us how big our ucs2 string is in bytes, and ucs2_as_utf8, which translates from ucs2 to utf8.. Signed-off-by: Peter Jones Tested-by: Lee, Chun-Yi Acked-by: Matthew Garrett Signed-off-by: Matt Fleming Signed-off-by: Greg Kroah-Hartman --- include/linux/ucs2_string.h | 4 +++ lib/ucs2_string.c | 62 +++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/include/linux/ucs2_string.h b/include/linux/ucs2_string.h index cbb20afdbc01..bb679b48f408 100644 --- a/include/linux/ucs2_string.h +++ b/include/linux/ucs2_string.h @@ -11,4 +11,8 @@ unsigned long ucs2_strlen(const ucs2_char_t *s); unsigned long ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength); int ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len); +unsigned long ucs2_utf8size(const ucs2_char_t *src); +unsigned long ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, + unsigned long maxlength); + #endif /* _LINUX_UCS2_STRING_H_ */ diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c index 6f500ef2301d..17dd74e21ef9 100644 --- a/lib/ucs2_string.c +++ b/lib/ucs2_string.c @@ -49,3 +49,65 @@ ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len) } } EXPORT_SYMBOL(ucs2_strncmp); + +unsigned long +ucs2_utf8size(const ucs2_char_t *src) +{ + unsigned long i; + unsigned long j = 0; + + for (i = 0; i < ucs2_strlen(src); i++) { + u16 c = src[i]; + + if (c > 0x800) + j += 3; + else if (c > 0x80) + j += 2; + else + j += 1; + } + + return j; +} +EXPORT_SYMBOL(ucs2_utf8size); + +/* + * copy at most maxlength bytes of whole utf8 characters to dest from the + * ucs2 string src. + * + * The return value is the number of characters copied, not including the + * final NUL character. + */ +unsigned long +ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, unsigned long maxlength) +{ + unsigned int i; + unsigned long j = 0; + unsigned long limit = ucs2_strnlen(src, maxlength); + + for (i = 0; maxlength && i < limit; i++) { + u16 c = src[i]; + + if (c > 0x800) { + if (maxlength < 3) + break; + maxlength -= 3; + dest[j++] = 0xe0 | (c & 0xf000) >> 12; + dest[j++] = 0x80 | (c & 0x0fc0) >> 8; + dest[j++] = 0x80 | (c & 0x003f); + } else if (c > 0x80) { + if (maxlength < 2) + break; + maxlength -= 2; + dest[j++] = 0xc0 | (c & 0xfe0) >> 5; + dest[j++] = 0x80 | (c & 0x01f); + } else { + maxlength -= 1; + dest[j++] = c & 0x7f; + } + } + if (maxlength) + dest[j] = '\0'; + return j; +} +EXPORT_SYMBOL(ucs2_as_utf8); From 8df7c6d3bcef0f1711f1e775b2ffe0215f32011f Mon Sep 17 00:00:00 2001 From: Peter Jones Date: Mon, 8 Feb 2016 14:48:12 -0500 Subject: [PATCH 056/418] efi: Use ucs2_as_utf8 in efivarfs instead of open coding a bad version commit e0d64e6a880e64545ad7d55786aa84ab76bac475 upstream. Translate EFI's UCS-2 variable names to UTF-8 instead of just assuming all variable names fit in ASCII. Signed-off-by: Peter Jones Acked-by: Matthew Garrett Tested-by: Lee, Chun-Yi Signed-off-by: Matt Fleming Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/efivars.c | 30 +++++++++++------------------- fs/efivarfs/super.c | 7 +++---- 2 files changed, 14 insertions(+), 23 deletions(-) diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index 756eca8c4cf8..f4ff8abc5f3e 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -540,38 +540,30 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj, static int efivar_create_sysfs_entry(struct efivar_entry *new_var) { - int i, short_name_size; + int short_name_size; char *short_name; - unsigned long variable_name_size; - efi_char16_t *variable_name; + unsigned long utf8_name_size; + efi_char16_t *variable_name = new_var->var.VariableName; int ret; - variable_name = new_var->var.VariableName; - variable_name_size = ucs2_strlen(variable_name) * sizeof(efi_char16_t); - /* - * Length of the variable bytes in ASCII, plus the '-' separator, + * Length of the variable bytes in UTF8, plus the '-' separator, * plus the GUID, plus trailing NUL */ - short_name_size = variable_name_size / sizeof(efi_char16_t) - + 1 + EFI_VARIABLE_GUID_LEN + 1; - - short_name = kzalloc(short_name_size, GFP_KERNEL); + utf8_name_size = ucs2_utf8size(variable_name); + short_name_size = utf8_name_size + 1 + EFI_VARIABLE_GUID_LEN + 1; + short_name = kmalloc(short_name_size, GFP_KERNEL); if (!short_name) return -ENOMEM; - /* Convert Unicode to normal chars (assume top bits are 0), - ala UTF-8 */ - for (i=0; i < (int)(variable_name_size / sizeof(efi_char16_t)); i++) { - short_name[i] = variable_name[i] & 0xFF; - } + ucs2_as_utf8(short_name, variable_name, short_name_size); + /* This is ugly, but necessary to separate one vendor's private variables from another's. */ - - *(short_name + strlen(short_name)) = '-'; + short_name[utf8_name_size] = '-'; efi_guid_to_str(&new_var->var.VendorGuid, - short_name + strlen(short_name)); + short_name + utf8_name_size + 1); new_var->kobj.kset = efivars_kset; diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 86a2121828c3..0eb73793b737 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -118,7 +118,7 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor, struct dentry *dentry, *root = sb->s_root; unsigned long size = 0; char *name; - int len, i; + int len; int err = -ENOMEM; entry = kzalloc(sizeof(*entry), GFP_KERNEL); @@ -128,15 +128,14 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor, memcpy(entry->var.VariableName, name16, name_size); memcpy(&(entry->var.VendorGuid), &vendor, sizeof(efi_guid_t)); - len = ucs2_strlen(entry->var.VariableName); + len = ucs2_utf8size(entry->var.VariableName); /* name, plus '-', plus GUID, plus NUL*/ name = kmalloc(len + 1 + EFI_VARIABLE_GUID_LEN + 1, GFP_KERNEL); if (!name) goto fail; - for (i = 0; i < len; i++) - name[i] = entry->var.VariableName[i] & 0xFF; + ucs2_as_utf8(name, entry->var.VariableName, len); name[len] = '-'; From 542f954e2d5d4b22cb68fa4cd6b5dcdfd880fccd Mon Sep 17 00:00:00 2001 From: Peter Jones Date: Mon, 8 Feb 2016 14:48:13 -0500 Subject: [PATCH 057/418] efi: Do variable name validation tests in utf8 commit 3dcb1f55dfc7631695e69df4a0d589ce5274bd07 upstream. Actually translate from ucs2 to utf8 before doing the test, and then test against our other utf8 data, instead of fudging it. Signed-off-by: Peter Jones Acked-by: Matthew Garrett Tested-by: Lee, Chun-Yi Signed-off-by: Matt Fleming Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/vars.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 70a0fb10517f..5c5fde3e6c37 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -189,10 +189,19 @@ static const struct variable_validate variable_validate[] = { }; bool -efivar_validate(efi_char16_t *var_name, u8 *data, unsigned long len) +efivar_validate(efi_char16_t *var_name, u8 *data, unsigned long data_size) { int i; - u16 *unicode_name = var_name; + unsigned long utf8_size; + u8 *utf8_name; + + utf8_size = ucs2_utf8size(var_name); + utf8_name = kmalloc(utf8_size + 1, GFP_KERNEL); + if (!utf8_name) + return false; + + ucs2_as_utf8(utf8_name, var_name, utf8_size); + utf8_name[utf8_size] = '\0'; for (i = 0; variable_validate[i].validate != NULL; i++) { const char *name = variable_validate[i].name; @@ -200,28 +209,29 @@ efivar_validate(efi_char16_t *var_name, u8 *data, unsigned long len) for (match = 0; ; match++) { char c = name[match]; - u16 u = unicode_name[match]; - - /* All special variables are plain ascii */ - if (u > 127) - return true; + char u = utf8_name[match]; /* Wildcard in the matching name means we've matched */ - if (c == '*') + if (c == '*') { + kfree(utf8_name); return variable_validate[i].validate(var_name, - match, data, len); + match, data, data_size); + } /* Case sensitive match */ if (c != u) break; /* Reached the end of the string while matching */ - if (!c) + if (!c) { + kfree(utf8_name); return variable_validate[i].validate(var_name, - match, data, len); + match, data, data_size); + } } } + kfree(utf8_name); return true; } EXPORT_SYMBOL_GPL(efivar_validate); From 5134c82b53ddd6f95317f159f62827d9cb7843e1 Mon Sep 17 00:00:00 2001 From: Peter Jones Date: Mon, 8 Feb 2016 14:48:14 -0500 Subject: [PATCH 058/418] efi: Make our variable validation list include the guid commit 8282f5d9c17fe15a9e658c06e3f343efae1a2a2f upstream. All the variables in this list so far are defined to be in the global namespace in the UEFI spec, so this just further ensures we're validating the variables we think we are. Including the guid for entries will become more important in future patches when we decide whether or not to allow deletion of variables based on presence in this list. Signed-off-by: Peter Jones Tested-by: Lee, Chun-Yi Acked-by: Matthew Garrett Signed-off-by: Matt Fleming Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/efivars.c | 5 ++-- drivers/firmware/efi/vars.c | 52 +++++++++++++++++++++------------- include/linux/efi.h | 3 +- 3 files changed, 38 insertions(+), 22 deletions(-) diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index f4ff8abc5f3e..10e6774ab2a2 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -221,7 +221,7 @@ sanity_check(struct efi_variable *var, efi_char16_t *name, efi_guid_t vendor, } if ((attributes & ~EFI_VARIABLE_MASK) != 0 || - efivar_validate(name, data, size) == false) { + efivar_validate(vendor, name, data, size) == false) { printk(KERN_ERR "efivars: Malformed variable content\n"); return -EINVAL; } @@ -447,7 +447,8 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj, } if ((attributes & ~EFI_VARIABLE_MASK) != 0 || - efivar_validate(name, data, size) == false) { + efivar_validate(new_var->VendorGuid, name, data, + size) == false) { printk(KERN_ERR "efivars: Malformed variable content\n"); return -EINVAL; } diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 5c5fde3e6c37..9a53da21e7b6 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -165,31 +165,42 @@ validate_ascii_string(efi_char16_t *var_name, int match, u8 *buffer, } struct variable_validate { + efi_guid_t vendor; char *name; bool (*validate)(efi_char16_t *var_name, int match, u8 *data, unsigned long len); }; +/* + * This is the list of variables we need to validate. + * + * If it has a validate() method that's not NULL, it'll go into the + * validation routine. If not, it is assumed valid. + * + * Note that it's sorted by {vendor,name}, but globbed names must come after + * any other name with the same prefix. + */ static const struct variable_validate variable_validate[] = { - { "BootNext", validate_uint16 }, - { "BootOrder", validate_boot_order }, - { "DriverOrder", validate_boot_order }, - { "Boot*", validate_load_option }, - { "Driver*", validate_load_option }, - { "ConIn", validate_device_path }, - { "ConInDev", validate_device_path }, - { "ConOut", validate_device_path }, - { "ConOutDev", validate_device_path }, - { "ErrOut", validate_device_path }, - { "ErrOutDev", validate_device_path }, - { "Timeout", validate_uint16 }, - { "Lang", validate_ascii_string }, - { "PlatformLang", validate_ascii_string }, - { "", NULL }, + { EFI_GLOBAL_VARIABLE_GUID, "BootNext", validate_uint16 }, + { EFI_GLOBAL_VARIABLE_GUID, "BootOrder", validate_boot_order }, + { EFI_GLOBAL_VARIABLE_GUID, "Boot*", validate_load_option }, + { EFI_GLOBAL_VARIABLE_GUID, "DriverOrder", validate_boot_order }, + { EFI_GLOBAL_VARIABLE_GUID, "Driver*", validate_load_option }, + { EFI_GLOBAL_VARIABLE_GUID, "ConIn", validate_device_path }, + { EFI_GLOBAL_VARIABLE_GUID, "ConInDev", validate_device_path }, + { EFI_GLOBAL_VARIABLE_GUID, "ConOut", validate_device_path }, + { EFI_GLOBAL_VARIABLE_GUID, "ConOutDev", validate_device_path }, + { EFI_GLOBAL_VARIABLE_GUID, "ErrOut", validate_device_path }, + { EFI_GLOBAL_VARIABLE_GUID, "ErrOutDev", validate_device_path }, + { EFI_GLOBAL_VARIABLE_GUID, "Lang", validate_ascii_string }, + { EFI_GLOBAL_VARIABLE_GUID, "PlatformLang", validate_ascii_string }, + { EFI_GLOBAL_VARIABLE_GUID, "Timeout", validate_uint16 }, + { NULL_GUID, "", NULL }, }; bool -efivar_validate(efi_char16_t *var_name, u8 *data, unsigned long data_size) +efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data, + unsigned long data_size) { int i; unsigned long utf8_size; @@ -203,9 +214,12 @@ efivar_validate(efi_char16_t *var_name, u8 *data, unsigned long data_size) ucs2_as_utf8(utf8_name, var_name, utf8_size); utf8_name[utf8_size] = '\0'; - for (i = 0; variable_validate[i].validate != NULL; i++) { + for (i = 0; variable_validate[i].name[0] != '\0'; i++) { const char *name = variable_validate[i].name; - int match; + int match = 0; + + if (efi_guidcmp(vendor, variable_validate[i].vendor)) + continue; for (match = 0; ; match++) { char c = name[match]; @@ -862,7 +876,7 @@ int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes, *set = false; - if (efivar_validate(name, data, *size) == false) + if (efivar_validate(*vendor, name, data, *size) == false) return -EINVAL; /* diff --git a/include/linux/efi.h b/include/linux/efi.h index 569b5a866bb1..16ca611aabc8 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1199,7 +1199,8 @@ int efivar_entry_iter(int (*func)(struct efivar_entry *, void *), struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid, struct list_head *head, bool remove); -bool efivar_validate(efi_char16_t *var_name, u8 *data, unsigned long len); +bool efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data, + unsigned long data_size); extern struct work_struct efivar_work; void efivar_run_worker(void); From 05913989c8892f6dc1726d03b0d8e680aec3c1a5 Mon Sep 17 00:00:00 2001 From: Peter Jones Date: Mon, 8 Feb 2016 14:48:15 -0500 Subject: [PATCH 059/418] efi: Make efivarfs entries immutable by default commit ed8b0de5a33d2a2557dce7f9429dca8cb5bc5879 upstream. "rm -rf" is bricking some peoples' laptops because of variables being used to store non-reinitializable firmware driver data that's required to POST the hardware. These are 100% bugs, and they need to be fixed, but in the mean time it shouldn't be easy to *accidentally* brick machines. We have to have delete working, and picking which variables do and don't work for deletion is quite intractable, so instead make everything immutable by default (except for a whitelist), and make tools that aren't quite so broad-spectrum unset the immutable flag. Signed-off-by: Peter Jones Tested-by: Lee, Chun-Yi Acked-by: Matthew Garrett Signed-off-by: Matt Fleming Signed-off-by: Greg Kroah-Hartman --- Documentation/filesystems/efivarfs.txt | 7 ++ drivers/firmware/efi/vars.c | 87 ++++++++++++++----- fs/efivarfs/file.c | 70 +++++++++++++++ fs/efivarfs/inode.c | 30 ++++--- fs/efivarfs/internal.h | 3 +- fs/efivarfs/super.c | 9 +- include/linux/efi.h | 2 + tools/testing/selftests/efivarfs/efivarfs.sh | 19 +++- .../testing/selftests/efivarfs/open-unlink.c | 72 ++++++++++++++- 9 files changed, 258 insertions(+), 41 deletions(-) diff --git a/Documentation/filesystems/efivarfs.txt b/Documentation/filesystems/efivarfs.txt index c477af086e65..686a64bba775 100644 --- a/Documentation/filesystems/efivarfs.txt +++ b/Documentation/filesystems/efivarfs.txt @@ -14,3 +14,10 @@ filesystem. efivarfs is typically mounted like this, mount -t efivarfs none /sys/firmware/efi/efivars + +Due to the presence of numerous firmware bugs where removing non-standard +UEFI variables causes the system firmware to fail to POST, efivarfs +files that are not well-known standardized variables are created +as immutable files. This doesn't prevent removal - "chattr -i" will work - +but it does prevent this kind of failure from being accomplished +accidentally. diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 9a53da21e7b6..50f10bad2604 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -172,10 +172,12 @@ struct variable_validate { }; /* - * This is the list of variables we need to validate. + * This is the list of variables we need to validate, as well as the + * whitelist for what we think is safe not to default to immutable. * * If it has a validate() method that's not NULL, it'll go into the - * validation routine. If not, it is assumed valid. + * validation routine. If not, it is assumed valid, but still used for + * whitelisting. * * Note that it's sorted by {vendor,name}, but globbed names must come after * any other name with the same prefix. @@ -193,11 +195,37 @@ static const struct variable_validate variable_validate[] = { { EFI_GLOBAL_VARIABLE_GUID, "ErrOut", validate_device_path }, { EFI_GLOBAL_VARIABLE_GUID, "ErrOutDev", validate_device_path }, { EFI_GLOBAL_VARIABLE_GUID, "Lang", validate_ascii_string }, + { EFI_GLOBAL_VARIABLE_GUID, "OsIndications", NULL }, { EFI_GLOBAL_VARIABLE_GUID, "PlatformLang", validate_ascii_string }, { EFI_GLOBAL_VARIABLE_GUID, "Timeout", validate_uint16 }, { NULL_GUID, "", NULL }, }; +static bool +variable_matches(const char *var_name, size_t len, const char *match_name, + int *match) +{ + for (*match = 0; ; (*match)++) { + char c = match_name[*match]; + char u = var_name[*match]; + + /* Wildcard in the matching name means we've matched */ + if (c == '*') + return true; + + /* Case sensitive match */ + if (!c && *match == len) + return true; + + if (c != u) + return false; + + if (!c) + return true; + } + return true; +} + bool efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data, unsigned long data_size) @@ -221,35 +249,48 @@ efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data, if (efi_guidcmp(vendor, variable_validate[i].vendor)) continue; - for (match = 0; ; match++) { - char c = name[match]; - char u = utf8_name[match]; - - /* Wildcard in the matching name means we've matched */ - if (c == '*') { - kfree(utf8_name); - return variable_validate[i].validate(var_name, - match, data, data_size); - } - - /* Case sensitive match */ - if (c != u) + if (variable_matches(utf8_name, utf8_size+1, name, &match)) { + if (variable_validate[i].validate == NULL) break; - - /* Reached the end of the string while matching */ - if (!c) { - kfree(utf8_name); - return variable_validate[i].validate(var_name, - match, data, data_size); - } + kfree(utf8_name); + return variable_validate[i].validate(var_name, match, + data, data_size); } } - kfree(utf8_name); return true; } EXPORT_SYMBOL_GPL(efivar_validate); +bool +efivar_variable_is_removable(efi_guid_t vendor, const char *var_name, + size_t len) +{ + int i; + bool found = false; + int match = 0; + + /* + * Check if our variable is in the validated variables list + */ + for (i = 0; variable_validate[i].name[0] != '\0'; i++) { + if (efi_guidcmp(variable_validate[i].vendor, vendor)) + continue; + + if (variable_matches(var_name, len, + variable_validate[i].name, &match)) { + found = true; + break; + } + } + + /* + * If it's in our list, it is removable. + */ + return found; +} +EXPORT_SYMBOL_GPL(efivar_variable_is_removable); + static efi_status_t check_var_size(u32 attributes, unsigned long size) { diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c index 90001da9abfd..66842e55c48c 100644 --- a/fs/efivarfs/file.c +++ b/fs/efivarfs/file.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "internal.h" @@ -103,9 +104,78 @@ out_free: return size; } +static int +efivarfs_ioc_getxflags(struct file *file, void __user *arg) +{ + struct inode *inode = file->f_mapping->host; + unsigned int i_flags; + unsigned int flags = 0; + + i_flags = inode->i_flags; + if (i_flags & S_IMMUTABLE) + flags |= FS_IMMUTABLE_FL; + + if (copy_to_user(arg, &flags, sizeof(flags))) + return -EFAULT; + return 0; +} + +static int +efivarfs_ioc_setxflags(struct file *file, void __user *arg) +{ + struct inode *inode = file->f_mapping->host; + unsigned int flags; + unsigned int i_flags = 0; + int error; + + if (!inode_owner_or_capable(inode)) + return -EACCES; + + if (copy_from_user(&flags, arg, sizeof(flags))) + return -EFAULT; + + if (flags & ~FS_IMMUTABLE_FL) + return -EOPNOTSUPP; + + if (!capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + + if (flags & FS_IMMUTABLE_FL) + i_flags |= S_IMMUTABLE; + + + error = mnt_want_write_file(file); + if (error) + return error; + + mutex_lock(&inode->i_mutex); + inode_set_flags(inode, i_flags, S_IMMUTABLE); + mutex_unlock(&inode->i_mutex); + + mnt_drop_write_file(file); + + return 0; +} + +long +efivarfs_file_ioctl(struct file *file, unsigned int cmd, unsigned long p) +{ + void __user *arg = (void __user *)p; + + switch (cmd) { + case FS_IOC_GETFLAGS: + return efivarfs_ioc_getxflags(file, arg); + case FS_IOC_SETFLAGS: + return efivarfs_ioc_setxflags(file, arg); + } + + return -ENOTTY; +} + const struct file_operations efivarfs_file_operations = { .open = simple_open, .read = efivarfs_file_read, .write = efivarfs_file_write, .llseek = no_llseek, + .unlocked_ioctl = efivarfs_file_ioctl, }; diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c index 3381b9da9ee6..e2ab6d0497f2 100644 --- a/fs/efivarfs/inode.c +++ b/fs/efivarfs/inode.c @@ -15,7 +15,8 @@ #include "internal.h" struct inode *efivarfs_get_inode(struct super_block *sb, - const struct inode *dir, int mode, dev_t dev) + const struct inode *dir, int mode, + dev_t dev, bool is_removable) { struct inode *inode = new_inode(sb); @@ -23,6 +24,7 @@ struct inode *efivarfs_get_inode(struct super_block *sb, inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_flags = is_removable ? 0 : S_IMMUTABLE; switch (mode & S_IFMT) { case S_IFREG: inode->i_fop = &efivarfs_file_operations; @@ -102,22 +104,17 @@ static void efivarfs_hex_to_guid(const char *str, efi_guid_t *guid) static int efivarfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { - struct inode *inode; + struct inode *inode = NULL; struct efivar_entry *var; int namelen, i = 0, err = 0; + bool is_removable = false; if (!efivarfs_valid_name(dentry->d_name.name, dentry->d_name.len)) return -EINVAL; - inode = efivarfs_get_inode(dir->i_sb, dir, mode, 0); - if (!inode) - return -ENOMEM; - var = kzalloc(sizeof(struct efivar_entry), GFP_KERNEL); - if (!var) { - err = -ENOMEM; - goto out; - } + if (!var) + return -ENOMEM; /* length of the variable name itself: remove GUID and separator */ namelen = dentry->d_name.len - EFI_VARIABLE_GUID_LEN - 1; @@ -125,6 +122,16 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry, efivarfs_hex_to_guid(dentry->d_name.name + namelen + 1, &var->var.VendorGuid); + if (efivar_variable_is_removable(var->var.VendorGuid, + dentry->d_name.name, namelen)) + is_removable = true; + + inode = efivarfs_get_inode(dir->i_sb, dir, mode, 0, is_removable); + if (!inode) { + err = -ENOMEM; + goto out; + } + for (i = 0; i < namelen; i++) var->var.VariableName[i] = dentry->d_name.name[i]; @@ -138,7 +145,8 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry, out: if (err) { kfree(var); - iput(inode); + if (inode) + iput(inode); } return err; } diff --git a/fs/efivarfs/internal.h b/fs/efivarfs/internal.h index b5ff16addb7c..b4505188e799 100644 --- a/fs/efivarfs/internal.h +++ b/fs/efivarfs/internal.h @@ -15,7 +15,8 @@ extern const struct file_operations efivarfs_file_operations; extern const struct inode_operations efivarfs_dir_inode_operations; extern bool efivarfs_valid_name(const char *str, int len); extern struct inode *efivarfs_get_inode(struct super_block *sb, - const struct inode *dir, int mode, dev_t dev); + const struct inode *dir, int mode, dev_t dev, + bool is_removable); extern struct list_head efivarfs_list; diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 0eb73793b737..abb244b06024 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -120,6 +120,7 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor, char *name; int len; int err = -ENOMEM; + bool is_removable = false; entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) @@ -137,13 +138,17 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor, ucs2_as_utf8(name, entry->var.VariableName, len); + if (efivar_variable_is_removable(entry->var.VendorGuid, name, len)) + is_removable = true; + name[len] = '-'; efi_guid_to_str(&entry->var.VendorGuid, name + len + 1); name[len + EFI_VARIABLE_GUID_LEN+1] = '\0'; - inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0); + inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0, + is_removable); if (!inode) goto fail_name; @@ -199,7 +204,7 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_d_op = &efivarfs_d_ops; sb->s_time_gran = 1; - inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0); + inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0, true); if (!inode) return -ENOMEM; inode->i_op = &efivarfs_dir_inode_operations; diff --git a/include/linux/efi.h b/include/linux/efi.h index 16ca611aabc8..47be3ad7d3e5 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1201,6 +1201,8 @@ struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid, bool efivar_validate(efi_guid_t vendor, efi_char16_t *var_name, u8 *data, unsigned long data_size); +bool efivar_variable_is_removable(efi_guid_t vendor, const char *name, + size_t len); extern struct work_struct efivar_work; void efivar_run_worker(void); diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh index 77edcdcc016b..057278448515 100755 --- a/tools/testing/selftests/efivarfs/efivarfs.sh +++ b/tools/testing/selftests/efivarfs/efivarfs.sh @@ -88,7 +88,11 @@ test_delete() exit 1 fi - rm $file + rm $file 2>/dev/null + if [ $? -ne 0 ]; then + chattr -i $file + rm $file + fi if [ -e $file ]; then echo "$file couldn't be deleted" >&2 @@ -111,6 +115,7 @@ test_zero_size_delete() exit 1 fi + chattr -i $file printf "$attrs" > $file if [ -e $file ]; then @@ -141,7 +146,11 @@ test_valid_filenames() echo "$file could not be created" >&2 ret=1 else - rm $file + rm $file 2>/dev/null + if [ $? -ne 0 ]; then + chattr -i $file + rm $file + fi fi done @@ -174,7 +183,11 @@ test_invalid_filenames() if [ -e $file ]; then echo "Creating $file should have failed" >&2 - rm $file + rm $file 2>/dev/null + if [ $? -ne 0 ]; then + chattr -i $file + rm $file + fi ret=1 fi done diff --git a/tools/testing/selftests/efivarfs/open-unlink.c b/tools/testing/selftests/efivarfs/open-unlink.c index 8c0764407b3c..4af74f733036 100644 --- a/tools/testing/selftests/efivarfs/open-unlink.c +++ b/tools/testing/selftests/efivarfs/open-unlink.c @@ -1,10 +1,68 @@ +#include #include #include #include #include +#include #include #include #include +#include + +static int set_immutable(const char *path, int immutable) +{ + unsigned int flags; + int fd; + int rc; + int error; + + fd = open(path, O_RDONLY); + if (fd < 0) + return fd; + + rc = ioctl(fd, FS_IOC_GETFLAGS, &flags); + if (rc < 0) { + error = errno; + close(fd); + errno = error; + return rc; + } + + if (immutable) + flags |= FS_IMMUTABLE_FL; + else + flags &= ~FS_IMMUTABLE_FL; + + rc = ioctl(fd, FS_IOC_SETFLAGS, &flags); + error = errno; + close(fd); + errno = error; + return rc; +} + +static int get_immutable(const char *path) +{ + unsigned int flags; + int fd; + int rc; + int error; + + fd = open(path, O_RDONLY); + if (fd < 0) + return fd; + + rc = ioctl(fd, FS_IOC_GETFLAGS, &flags); + if (rc < 0) { + error = errno; + close(fd); + errno = error; + return rc; + } + close(fd); + if (flags & FS_IMMUTABLE_FL) + return 1; + return 0; +} int main(int argc, char **argv) { @@ -27,7 +85,7 @@ int main(int argc, char **argv) buf[4] = 0; /* create a test variable */ - fd = open(path, O_WRONLY | O_CREAT); + fd = open(path, O_WRONLY | O_CREAT, 0600); if (fd < 0) { perror("open(O_WRONLY)"); return EXIT_FAILURE; @@ -41,6 +99,18 @@ int main(int argc, char **argv) close(fd); + rc = get_immutable(path); + if (rc < 0) { + perror("ioctl(FS_IOC_GETFLAGS)"); + return EXIT_FAILURE; + } else if (rc) { + rc = set_immutable(path, 0); + if (rc < 0) { + perror("ioctl(FS_IOC_SETFLAGS)"); + return EXIT_FAILURE; + } + } + fd = open(path, O_RDONLY); if (fd < 0) { perror("open"); From 9168b9b4cd91c442041a5c3a3d9f9a5b1f8676f5 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 15 Feb 2016 10:34:05 +0000 Subject: [PATCH 060/418] efi: Add pstore variables to the deletion whitelist commit e246eb568bc4cbbdd8a30a3c11151ff9b7ca7312 upstream. Laszlo explains why this is a good idea, 'This is because the pstore filesystem can be backed by UEFI variables, and (for example) a crash might dump the last kilobytes of the dmesg into a number of pstore entries, each entry backed by a separate UEFI variable in the above GUID namespace, and with a variable name according to the above pattern. Please see "drivers/firmware/efi/efi-pstore.c". While this patch series will not prevent the user from deleting those UEFI variables via the pstore filesystem (i.e., deleting a pstore fs entry will continue to delete the backing UEFI variable), I think it would be nice to preserve the possibility for the sysadmin to delete Linux-created UEFI variables that carry portions of the crash log, *without* having to mount the pstore filesystem.' There's also no chance of causing machines to become bricked by deleting these variables, which is the whole purpose of excluding things from the whitelist. Use the LINUX_EFI_CRASH_GUID guid and a wildcard '*' for the match so that we don't have to update the string in the future if new variable name formats are created for crash dump variables. Reported-by: Laszlo Ersek Acked-by: Peter Jones Tested-by: Peter Jones Cc: Matthew Garrett Cc: "Lee, Chun-Yi" Signed-off-by: Matt Fleming Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/vars.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 50f10bad2604..7f2ea21c730d 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -198,6 +198,7 @@ static const struct variable_validate variable_validate[] = { { EFI_GLOBAL_VARIABLE_GUID, "OsIndications", NULL }, { EFI_GLOBAL_VARIABLE_GUID, "PlatformLang", validate_ascii_string }, { EFI_GLOBAL_VARIABLE_GUID, "Timeout", validate_uint16 }, + { LINUX_EFI_CRASH_GUID, "*", NULL }, { NULL_GUID, "", NULL }, }; From 9e8afc94ffae6f7ab9ba77308a9ab53aaf10335e Mon Sep 17 00:00:00 2001 From: Jason Andryuk Date: Fri, 12 Feb 2016 23:13:33 +0000 Subject: [PATCH 061/418] lib/ucs2_string: Correct ucs2 -> utf8 conversion commit a68075908a37850918ad96b056acc9ac4ce1bd90 upstream. The comparisons should be >= since 0x800 and 0x80 require an additional bit to store. For the 3 byte case, the existing shift would drop off 2 more bits than intended. For the 2 byte case, there should be 5 bits bits in byte 1, and 6 bits in byte 2. Signed-off-by: Jason Andryuk Reviewed-by: Laszlo Ersek Cc: Peter Jones Cc: Matthew Garrett Cc: "Lee, Chun-Yi" Signed-off-by: Matt Fleming Signed-off-by: Greg Kroah-Hartman --- lib/ucs2_string.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c index 17dd74e21ef9..f0b323abb4c6 100644 --- a/lib/ucs2_string.c +++ b/lib/ucs2_string.c @@ -59,9 +59,9 @@ ucs2_utf8size(const ucs2_char_t *src) for (i = 0; i < ucs2_strlen(src); i++) { u16 c = src[i]; - if (c > 0x800) + if (c >= 0x800) j += 3; - else if (c > 0x80) + else if (c >= 0x80) j += 2; else j += 1; @@ -88,19 +88,19 @@ ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, unsigned long maxlength) for (i = 0; maxlength && i < limit; i++) { u16 c = src[i]; - if (c > 0x800) { + if (c >= 0x800) { if (maxlength < 3) break; maxlength -= 3; dest[j++] = 0xe0 | (c & 0xf000) >> 12; - dest[j++] = 0x80 | (c & 0x0fc0) >> 8; + dest[j++] = 0x80 | (c & 0x0fc0) >> 6; dest[j++] = 0x80 | (c & 0x003f); - } else if (c > 0x80) { + } else if (c >= 0x80) { if (maxlength < 2) break; maxlength -= 2; - dest[j++] = 0xc0 | (c & 0xfe0) >> 5; - dest[j++] = 0x80 | (c & 0x01f); + dest[j++] = 0xc0 | (c & 0x7c0) >> 6; + dest[j++] = 0x80 | (c & 0x03f); } else { maxlength -= 1; dest[j++] = c & 0x7f; From 69ccf81eb6cf68f4533e97e355215e0796d7834b Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Sun, 29 Nov 2015 17:17:05 -0800 Subject: [PATCH 062/418] bcache: fix a livelock when we cause a huge number of cache misses commit 2ef9ccbfcb90cf84bdba320a571b18b05c41101b upstream. Subject : [PATCH v2] bcache: fix a livelock in btree lock Date : Wed, 25 Feb 2015 20:32:09 +0800 (02/25/2015 04:32:09 AM) This commit tries to fix a livelock in bcache. This livelock might happen when we causes a huge number of cache misses simultaneously. When we get a cache miss, bcache will execute the following path. ->cached_dev_make_request() ->cached_dev_read() ->cached_lookup() ->bch->btree_map_keys() ->btree_root() <------------------------ ->bch_btree_map_keys_recurse() | ->cache_lookup_fn() | ->cached_dev_cache_miss() | ->bch_btree_insert_check_key() -| [If btree->seq is not equal to seq + 1, we should return EINTR and traverse btree again.] In bch_btree_insert_check_key() function we first need to check upgrade flag (op->lock == -1), and when this flag is true we need to release read btree->lock and try to take write btree->lock. During taking and releasing this write lock, btree->seq will be monotone increased in order to prevent other threads modify this in cache miss (see btree.h:74). But if there are some cache misses caused by some requested, we could meet a livelock because btree->seq is always changed by others. Thus no one can make progress. This commit will try to take write btree->lock if it encounters a race when we traverse btree. Although it sacrifice the scalability but we can ensure that only one can modify the btree. Signed-off-by: Zheng Liu Tested-by: Joshua Schmid Tested-by: Eric Wheeler Cc: Joshua Schmid Cc: Zhu Yanhai Cc: Kent Overstreet Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/btree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 83392f856dfd..4a1179cbc52e 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -2162,8 +2162,10 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op, rw_lock(true, b, b->level); if (b->key.ptr[0] != btree_ptr || - b->seq != seq + 1) + b->seq != seq + 1) { + op->lock = b->level; goto out; + } } SET_KEY_PTRS(check_key, 1); From f4c87d7c2e5d6d20fb35066f3ee51e4bdc467d74 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 29 Nov 2015 17:18:33 -0800 Subject: [PATCH 063/418] bcache: Add a cond_resched() call to gc commit c5f1e5adf956e3ba82d204c7c141a75da9fa449a upstream. Signed-off-by: Takashi Iwai Tested-by: Eric Wheeler Cc: Kent Overstreet Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/btree.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 4a1179cbc52e..22b9e34ceb75 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1741,6 +1741,7 @@ static void bch_btree_gc(struct cache_set *c) do { ret = btree_root(gc_root, c, &op, &writes, &stats); closure_sync(&writes); + cond_resched(); if (ret && ret != -EAGAIN) pr_warn("gc failed!"); From 1ebc85013f58e40ee47538481be5236671761c16 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Sun, 29 Nov 2015 17:19:32 -0800 Subject: [PATCH 064/418] bcache: clear BCACHE_DEV_UNLINK_DONE flag when attaching a backing device commit fecaee6f20ee122ad75402c53d8278f9bb142ddc upstream. This bug can be reproduced by the following script: #!/bin/bash bcache_sysfs="/sys/fs/bcache" function clear_cache() { if [ ! -e $bcache_sysfs ]; then echo "no bcache sysfs" exit fi cset_uuid=$(ls -l $bcache_sysfs|head -n 2|tail -n 1|awk '{print $9}') sudo sh -c "echo $cset_uuid > /sys/block/sdb/sdb1/bcache/detach" sleep 5 sudo sh -c "echo $cset_uuid > /sys/block/sdb/sdb1/bcache/attach" } for ((i=0;i<10;i++)); do clear_cache done The warning messages look like below: [ 275.948611] ------------[ cut here ]------------ [ 275.963840] WARNING: at fs/sysfs/dir.c:512 sysfs_add_one+0xb8/0xd0() (Tainted: P W --------------- ) [ 275.979253] Hardware name: Tecal RH2285 [ 275.994106] sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:09.0/0000:08:00.0/host4/target4:2:1/4:2:1:0/block/sdb/sdb1/bcache/cache' [ 276.024105] Modules linked in: bcache tcp_diag inet_diag ipmi_devintf ipmi_si ipmi_msghandler bonding 8021q garp stp llc ipv6 ext3 jbd loop sg iomemory_vsl(P) bnx2 microcode serio_raw i2c_i801 i2c_core iTCO_wdt iTCO_vendor_support i7core_edac edac_core shpchp ext4 jbd2 mbcache megaraid_sas pata_acpi ata_generic ata_piix dm_mod [last unloaded: scsi_wait_scan] [ 276.072643] Pid: 2765, comm: sh Tainted: P W --------------- 2.6.32 #1 [ 276.089315] Call Trace: [ 276.105801] [] ? warn_slowpath_common+0x87/0xc0 [ 276.122650] [] ? warn_slowpath_fmt+0x46/0x50 [ 276.139361] [] ? sysfs_add_one+0xb8/0xd0 [ 276.156012] [] ? sysfs_do_create_link+0x12b/0x170 [ 276.172682] [] ? sysfs_create_link+0x13/0x20 [ 276.189282] [] ? bcache_device_link+0xc1/0x110 [bcache] [ 276.205993] [] ? bch_cached_dev_attach+0x478/0x4f0 [bcache] [ 276.222794] [] ? bch_cached_dev_store+0x627/0x780 [bcache] [ 276.239680] [] ? alloc_pages_current+0xaa/0x110 [ 276.256594] [] ? sysfs_write_file+0xe5/0x170 [ 276.273364] [] ? vfs_write+0xb8/0x1a0 [ 276.290133] [] ? sys_write+0x51/0x90 [ 276.306368] [] ? system_call_fastpath+0x16/0x1b [ 276.322301] ---[ end trace 9f5d4fcdd0c3edfb ]--- [ 276.338241] ------------[ cut here ]------------ [ 276.354109] WARNING: at /home/wenqing.lz/bcache/bcache/super.c:720 bcache_device_link+0xdf/0x110 [bcache]() (Tainted: P W --------------- ) [ 276.386017] Hardware name: Tecal RH2285 [ 276.401430] Couldn't create device <-> cache set symlinks [ 276.401759] Modules linked in: bcache tcp_diag inet_diag ipmi_devintf ipmi_si ipmi_msghandler bonding 8021q garp stp llc ipv6 ext3 jbd loop sg iomemory_vsl(P) bnx2 microcode serio_raw i2c_i801 i2c_core iTCO_wdt iTCO_vendor_support i7core_edac edac_core shpchp ext4 jbd2 mbcache megaraid_sas pata_acpi ata_generic ata_piix dm_mod [last unloaded: scsi_wait_scan] [ 276.465477] Pid: 2765, comm: sh Tainted: P W --------------- 2.6.32 #1 [ 276.482169] Call Trace: [ 276.498610] [] ? warn_slowpath_common+0x87/0xc0 [ 276.515405] [] ? warn_slowpath_fmt+0x46/0x50 [ 276.532059] [] ? bcache_device_link+0xdf/0x110 [bcache] [ 276.548808] [] ? bch_cached_dev_attach+0x478/0x4f0 [bcache] [ 276.565569] [] ? bch_cached_dev_store+0x627/0x780 [bcache] [ 276.582418] [] ? alloc_pages_current+0xaa/0x110 [ 276.599341] [] ? sysfs_write_file+0xe5/0x170 [ 276.616142] [] ? vfs_write+0xb8/0x1a0 [ 276.632607] [] ? sys_write+0x51/0x90 [ 276.648671] [] ? system_call_fastpath+0x16/0x1b [ 276.664756] ---[ end trace 9f5d4fcdd0c3edfc ]--- We forget to clear BCACHE_DEV_UNLINK_DONE flag in bcache_device_attach() function when we attach a backing device first time. After detaching this backing device, this flag will be true and sysfs_remove_link() isn't called in bcache_device_unlink(). Then when we attach this backing device again, sysfs_create_link() will return EEXIST error in bcache_device_link(). So the fix is trival and we clear this flag in bcache_device_link(). Signed-off-by: Zheng Liu Tested-by: Joshua Schmid Tested-by: Eric Wheeler Cc: Kent Overstreet Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 679a093a3bf6..383f06028a0a 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -685,6 +685,8 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c, WARN(sysfs_create_link(&d->kobj, &c->kobj, "cache") || sysfs_create_link(&c->kobj, &d->kobj, d->name), "Couldn't create device <-> cache set symlinks"); + + clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags); } static void bcache_device_detach(struct bcache_device *d) From 8e086f9f3cf528eb27708618cc45ee3485d84cf2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 Nov 2015 17:20:59 -0800 Subject: [PATCH 065/418] bcache: fix a leak in bch_cached_dev_run() commit 4d4d8573a8451acc9f01cbea24b7e55f04a252fe upstream. Signed-off-by: Al Viro Tested-by: Joshua Schmid Tested-by: Eric Wheeler Cc: Kent Overstreet Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/super.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 383f06028a0a..43e911e4e8d0 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -849,8 +849,11 @@ void bch_cached_dev_run(struct cached_dev *dc) buf[SB_LABEL_SIZE] = '\0'; env[2] = kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf); - if (atomic_xchg(&dc->running, 1)) + if (atomic_xchg(&dc->running, 1)) { + kfree(env[1]); + kfree(env[2]); return; + } if (!d->c && BDEV_STATE(&dc->sb) != BDEV_STATE_NONE) { From d81b4c865e68a2db5997a6311dfe5c007dc16b7a Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Sun, 29 Nov 2015 17:21:57 -0800 Subject: [PATCH 066/418] bcache: unregister reboot notifier if bcache fails to unregister device commit 2ecf0cdb2b437402110ab57546e02abfa68a716b upstream. In bcache_init() function it forgot to unregister reboot notifier if bcache fails to unregister a block device. This commit fixes this. Signed-off-by: Zheng Liu Tested-by: Joshua Schmid Tested-by: Eric Wheeler Cc: Kent Overstreet Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/super.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 43e911e4e8d0..18f14a246d70 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -2071,8 +2071,10 @@ static int __init bcache_init(void) closure_debug_init(); bcache_major = register_blkdev(0, "bcache"); - if (bcache_major < 0) + if (bcache_major < 0) { + unregister_reboot_notifier(&reboot); return bcache_major; + } if (!(bcache_wq = create_workqueue("bcache")) || !(bcache_kobj = kobject_create_and_add("bcache", fs_kobj)) || From b38798df6cca121e0df4c6a500408070eba24264 Mon Sep 17 00:00:00 2001 From: Gabriel de Perthuis Date: Sun, 29 Nov 2015 18:40:23 -0800 Subject: [PATCH 067/418] bcache: allows use of register in udev to avoid "device_busy" error. commit d7076f21629f8f329bca4a44dc408d94670f49e2 upstream. Allows to use register, not register_quiet in udev to avoid "device_busy" error. The initial patch proposed at https://lkml.org/lkml/2013/8/26/549 by Gabriel de Perthuis does not unlock the mutex and hangs the kernel. See http://thread.gmane.org/gmane.linux.kernel.bcache.devel/2594 for the discussion. Cc: Denis Bychkov Cc: Kent Overstreet Cc: Eric Wheeler Cc: Gabriel de Perthuis Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/super.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 18f14a246d70..8d0ead98eb6e 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1938,6 +1938,8 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, else err = "device busy"; mutex_unlock(&bch_register_lock); + if (attr == &ksysfs_register_quiet) + goto out; } goto err; } @@ -1976,8 +1978,7 @@ out: err_close: blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); err: - if (attr != &ksysfs_register_quiet) - pr_info("error opening %s: %s", path, err); + pr_info("error opening %s: %s", path, err); ret = -EINVAL; goto out; } From 9e761c1b436d04823baa161c294fddc524750015 Mon Sep 17 00:00:00 2001 From: Stefan Bader Date: Sun, 29 Nov 2015 18:44:49 -0800 Subject: [PATCH 068/418] bcache: prevent crash on changing writeback_running commit 8d16ce540c94c9d366eb36fc91b7154d92d6397b upstream. Added a safeguard in the shutdown case. At least while not being attached it is also possible to trigger a kernel bug by writing into writeback_running. This change adds the same check before trying to wake up the thread for that case. Signed-off-by: Stefan Bader Cc: Kent Overstreet Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/writeback.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h index 0a9dab187b79..073a042aed24 100644 --- a/drivers/md/bcache/writeback.h +++ b/drivers/md/bcache/writeback.h @@ -63,7 +63,8 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio, static inline void bch_writeback_queue(struct cached_dev *dc) { - wake_up_process(dc->writeback_thread); + if (!IS_ERR_OR_NULL(dc->writeback_thread)) + wake_up_process(dc->writeback_thread); } static inline void bch_writeback_add(struct cached_dev *dc) From a556b804dfa654f054f3d304c2c4d274ffe81f92 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 29 Nov 2015 18:47:01 -0800 Subject: [PATCH 069/418] bcache: Change refill_dirty() to always scan entire disk if necessary commit 627ccd20b4ad3ba836472468208e2ac4dfadbf03 upstream. Previously, it would only scan the entire disk if it was starting from the very start of the disk - i.e. if the previous scan got to the end. This was broken by refill_full_stripes(), which updates last_scanned so that refill_dirty was never triggering the searched_from_start path. But if we change refill_dirty() to always scan the entire disk if necessary, regardless of what last_scanned was, the code gets cleaner and we fix that bug too. Signed-off-by: Kent Overstreet Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/writeback.c | 37 ++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index b23f88d9f18c..b9346cd9cda1 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -323,6 +323,10 @@ void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode, static bool dirty_pred(struct keybuf *buf, struct bkey *k) { + struct cached_dev *dc = container_of(buf, struct cached_dev, writeback_keys); + + BUG_ON(KEY_INODE(k) != dc->disk.id); + return KEY_DIRTY(k); } @@ -372,11 +376,24 @@ next: } } +/* + * Returns true if we scanned the entire disk + */ static bool refill_dirty(struct cached_dev *dc) { struct keybuf *buf = &dc->writeback_keys; + struct bkey start = KEY(dc->disk.id, 0, 0); struct bkey end = KEY(dc->disk.id, MAX_KEY_OFFSET, 0); - bool searched_from_start = false; + struct bkey start_pos; + + /* + * make sure keybuf pos is inside the range for this disk - at bringup + * we might not be attached yet so this disk's inode nr isn't + * initialized then + */ + if (bkey_cmp(&buf->last_scanned, &start) < 0 || + bkey_cmp(&buf->last_scanned, &end) > 0) + buf->last_scanned = start; if (dc->partial_stripes_expensive) { refill_full_stripes(dc); @@ -384,14 +401,20 @@ static bool refill_dirty(struct cached_dev *dc) return false; } - if (bkey_cmp(&buf->last_scanned, &end) >= 0) { - buf->last_scanned = KEY(dc->disk.id, 0, 0); - searched_from_start = true; - } - + start_pos = buf->last_scanned; bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred); - return bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start; + if (bkey_cmp(&buf->last_scanned, &end) < 0) + return false; + + /* + * If we get to the end start scanning again from the beginning, and + * only scan up to where we initially started scanning from: + */ + buf->last_scanned = start; + bch_refill_keybuf(dc->disk.c, buf, &start_pos, dirty_pred); + + return bkey_cmp(&buf->last_scanned, &start_pos) >= 0; } static int bch_writeback_thread(void *arg) From 5c8a03a351257ae175d57c9c06f57ab95fc99db3 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 17 Dec 2015 18:03:35 +0200 Subject: [PATCH 070/418] dm thin: fix race condition when destroying thin pool workqueue commit 18d03e8c25f173f4107a40d0b8c24defb6ed69f3 upstream. When a thin pool is being destroyed delayed work items are cancelled using cancel_delayed_work(), which doesn't guarantee that on return the delayed item isn't running. This can cause the work item to requeue itself on an already destroyed workqueue. Fix this by using cancel_delayed_work_sync() which guarantees that on return the work item is not running anymore. Fixes: 905e51b39a555 ("dm thin: commit outstanding data every second") Fixes: 85ad643b7e7e5 ("dm thin: add timeout to stop out-of-data-space mode holding IO forever") Signed-off-by: Nikolay Borisov Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-thin.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 63903a5a5d9e..a1cc797fe88f 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -3453,8 +3453,8 @@ static void pool_postsuspend(struct dm_target *ti) struct pool_c *pt = ti->private; struct pool *pool = pt->pool; - cancel_delayed_work(&pool->waker); - cancel_delayed_work(&pool->no_space_timeout); + cancel_delayed_work_sync(&pool->waker); + cancel_delayed_work_sync(&pool->no_space_timeout); flush_workqueue(pool->wq); (void) commit(pool); } From f847ff0db6da0b42acb7447f8629dd2a33ddfe24 Mon Sep 17 00:00:00 2001 From: Gerhard Uttenthaler Date: Tue, 22 Dec 2015 17:29:16 +0100 Subject: [PATCH 071/418] can: ems_usb: Fix possible tx overflow commit 90cfde46586d2286488d8ed636929e936c0c9ab2 upstream. This patch fixes the problem that more CAN messages could be sent to the interface as could be send on the CAN bus. This was more likely for slow baud rates. The sleeping _start_xmit was woken up in the _write_bulk_callback. Under heavy TX load this produced another bulk transfer without checking the free_slots variable and hence caused the overflow in the interface. Signed-off-by: Gerhard Uttenthaler Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/ems_usb.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index fc5b75675cd8..eb7192fab593 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -117,6 +117,9 @@ MODULE_LICENSE("GPL v2"); */ #define EMS_USB_ARM7_CLOCK 8000000 +#define CPC_TX_QUEUE_TRIGGER_LOW 25 +#define CPC_TX_QUEUE_TRIGGER_HIGH 35 + /* * CAN-Message representation in a CPC_MSG. Message object type is * CPC_MSG_TYPE_CAN_FRAME or CPC_MSG_TYPE_RTR_FRAME or @@ -278,6 +281,11 @@ static void ems_usb_read_interrupt_callback(struct urb *urb) switch (urb->status) { case 0: dev->free_slots = dev->intr_in_buffer[1]; + if(dev->free_slots > CPC_TX_QUEUE_TRIGGER_HIGH){ + if (netif_queue_stopped(netdev)){ + netif_wake_queue(netdev); + } + } break; case -ECONNRESET: /* unlink */ @@ -526,8 +534,6 @@ static void ems_usb_write_bulk_callback(struct urb *urb) /* Release context */ context->echo_index = MAX_TX_URBS; - if (netif_queue_stopped(netdev)) - netif_wake_queue(netdev); } /* @@ -587,7 +593,7 @@ static int ems_usb_start(struct ems_usb *dev) int err, i; dev->intr_in_buffer[0] = 0; - dev->free_slots = 15; /* initial size */ + dev->free_slots = 50; /* initial size */ for (i = 0; i < MAX_RX_URBS; i++) { struct urb *urb = NULL; @@ -835,7 +841,7 @@ static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *ne /* Slow down tx path */ if (atomic_read(&dev->active_tx_urbs) >= MAX_TX_URBS || - dev->free_slots < 5) { + dev->free_slots < CPC_TX_QUEUE_TRIGGER_LOW) { netif_stop_queue(netdev); } } From e8d634243df782ea93f650588d0abdfb63cd95f8 Mon Sep 17 00:00:00 2001 From: John Youn Date: Tue, 16 Feb 2016 20:10:53 -0800 Subject: [PATCH 072/418] usb: dwc3: Fix assignment of EP transfer resources commit c450960187f45d4260db87c7dd4fc0bceb5565d8 upstream. The assignement of EP transfer resources was not handled properly in the dwc3 driver. Commit aebda6187181 ("usb: dwc3: Reset the transfer resource index on SET_INTERFACE") previously fixed one aspect of this where resources may be exhausted with multiple calls to SET_INTERFACE. However, it introduced an issue where composite devices with multiple interfaces can be assigned the same transfer resources for different endpoints. This patch solves both issues. The assignment of transfer resources cannot perfectly follow the data book due to the fact that the controller driver does not have all knowledge of the configuration in advance. It is given this information piecemeal by the composite gadget framework after every SET_CONFIGURATION and SET_INTERFACE. Trying to follow the databook programming model in this scenario can cause errors. For two reasons: 1) The databook says to do DEPSTARTCFG for every SET_CONFIGURATION and SET_INTERFACE (8.1.5). This is incorrect in the scenario of multiple interfaces. 2) The databook does not mention doing more DEPXFERCFG for new endpoint on alt setting (8.1.6). The following simplified method is used instead: All hardware endpoints can be assigned a transfer resource and this setting will stay persistent until either a core reset or hibernation. So whenever we do a DEPSTARTCFG(0) we can go ahead and do DEPXFERCFG for every hardware endpoint as well. We are guaranteed that there are as many transfer resources as endpoints. This patch triggers off of the calling dwc3_gadget_start_config() for EP0-out, which always happens first, and which should only happen in one of the above conditions. Fixes: aebda6187181 ("usb: dwc3: Reset the transfer resource index on SET_INTERFACE") Reported-by: Ravi Babu Signed-off-by: John Youn Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.h | 1 - drivers/usb/dwc3/ep0.c | 5 --- drivers/usb/dwc3/gadget.c | 70 +++++++++++++++++++++++++++++---------- 3 files changed, 52 insertions(+), 24 deletions(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 36f1cb74588c..78be201d81f4 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -853,7 +853,6 @@ struct dwc3 { unsigned pullups_connected:1; unsigned resize_fifos:1; unsigned setup_packet_pending:1; - unsigned start_config_issued:1; unsigned three_stage_setup:1; unsigned usb3_lpm_capable:1; diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 5320e939e090..b13912d5fa99 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -555,7 +555,6 @@ static int dwc3_ep0_set_config(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl) int ret; u32 reg; - dwc->start_config_issued = false; cfg = le16_to_cpu(ctrl->wValue); switch (state) { @@ -737,10 +736,6 @@ static int dwc3_ep0_std_request(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl) dwc3_trace(trace_dwc3_ep0, "USB_REQ_SET_ISOCH_DELAY"); ret = dwc3_ep0_set_isoch_delay(dwc, ctrl); break; - case USB_REQ_SET_INTERFACE: - dwc3_trace(trace_dwc3_ep0, "USB_REQ_SET_INTERFACE"); - dwc->start_config_issued = false; - /* Fall through */ default: dwc3_trace(trace_dwc3_ep0, "Forwarding to gadget driver"); ret = dwc3_ep0_delegate_req(dwc, ctrl); diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index a58376fd65fe..69ffe6e8d77f 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -388,24 +388,66 @@ static void dwc3_free_trb_pool(struct dwc3_ep *dep) dep->trb_pool_dma = 0; } +static int dwc3_gadget_set_xfer_resource(struct dwc3 *dwc, struct dwc3_ep *dep); + +/** + * dwc3_gadget_start_config - Configure EP resources + * @dwc: pointer to our controller context structure + * @dep: endpoint that is being enabled + * + * The assignment of transfer resources cannot perfectly follow the + * data book due to the fact that the controller driver does not have + * all knowledge of the configuration in advance. It is given this + * information piecemeal by the composite gadget framework after every + * SET_CONFIGURATION and SET_INTERFACE. Trying to follow the databook + * programming model in this scenario can cause errors. For two + * reasons: + * + * 1) The databook says to do DEPSTARTCFG for every SET_CONFIGURATION + * and SET_INTERFACE (8.1.5). This is incorrect in the scenario of + * multiple interfaces. + * + * 2) The databook does not mention doing more DEPXFERCFG for new + * endpoint on alt setting (8.1.6). + * + * The following simplified method is used instead: + * + * All hardware endpoints can be assigned a transfer resource and this + * setting will stay persistent until either a core reset or + * hibernation. So whenever we do a DEPSTARTCFG(0) we can go ahead and + * do DEPXFERCFG for every hardware endpoint as well. We are + * guaranteed that there are as many transfer resources as endpoints. + * + * This function is called for each endpoint when it is being enabled + * but is triggered only when called for EP0-out, which always happens + * first, and which should only happen in one of the above conditions. + */ static int dwc3_gadget_start_config(struct dwc3 *dwc, struct dwc3_ep *dep) { struct dwc3_gadget_ep_cmd_params params; u32 cmd; + int i; + int ret; + + if (dep->number) + return 0; memset(¶ms, 0x00, sizeof(params)); + cmd = DWC3_DEPCMD_DEPSTARTCFG; - if (dep->number != 1) { - cmd = DWC3_DEPCMD_DEPSTARTCFG; - /* XferRscIdx == 0 for ep0 and 2 for the remaining */ - if (dep->number > 1) { - if (dwc->start_config_issued) - return 0; - dwc->start_config_issued = true; - cmd |= DWC3_DEPCMD_PARAM(2); - } + ret = dwc3_send_gadget_ep_cmd(dwc, 0, cmd, ¶ms); + if (ret) + return ret; - return dwc3_send_gadget_ep_cmd(dwc, 0, cmd, ¶ms); + for (i = 0; i < DWC3_ENDPOINTS_NUM; i++) { + struct dwc3_ep *dep = dwc->eps[i]; + + if (!dep) + continue; + + ret = dwc3_gadget_set_xfer_resource(dwc, dep); + if (ret) + return ret; } return 0; @@ -519,10 +561,6 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep, struct dwc3_trb *trb_st_hw; struct dwc3_trb *trb_link; - ret = dwc3_gadget_set_xfer_resource(dwc, dep); - if (ret) - return ret; - dep->endpoint.desc = desc; dep->comp_desc = comp_desc; dep->type = usb_endpoint_type(desc); @@ -1604,8 +1642,6 @@ static int dwc3_gadget_start(struct usb_gadget *g, } dwc3_writel(dwc->regs, DWC3_DCFG, reg); - dwc->start_config_issued = false; - /* Start with SuperSpeed Default */ dwc3_gadget_ep0_desc.wMaxPacketSize = cpu_to_le16(512); @@ -2202,7 +2238,6 @@ static void dwc3_gadget_disconnect_interrupt(struct dwc3 *dwc) dwc3_writel(dwc->regs, DWC3_DCTL, reg); dwc3_disconnect_gadget(dwc); - dwc->start_config_issued = false; dwc->gadget.speed = USB_SPEED_UNKNOWN; dwc->setup_packet_pending = false; @@ -2253,7 +2288,6 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc) dwc3_stop_active_transfers(dwc); dwc3_clear_stall_all_ep(dwc); - dwc->start_config_issued = false; /* Reset device address to zero */ reg = dwc3_readl(dwc->regs, DWC3_DCFG); From a9d2586256f30fe0c4074fd7e845d517e0398df5 Mon Sep 17 00:00:00 2001 From: Ken Lin Date: Mon, 1 Feb 2016 14:57:25 -0500 Subject: [PATCH 073/418] USB: cp210x: add IDs for GE B650V3 and B850V3 boards commit 6627ae19385283b89356a199d7f03c75ba35fb29 upstream. Add USB ID for cp2104/5 devices on GE B650v3 and B850v3 boards. Signed-off-by: Ken Lin Signed-off-by: Akshay Bhat Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 1dd9919081f8..a7caf53d8b5e 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -162,6 +162,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */ + { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */ + { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */ { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */ { USB_DEVICE(0x1B1C, 0x1C00) }, /* Corsair USB Dongle */ { USB_DEVICE(0x1BA4, 0x0002) }, /* Silicon Labs 358x factory default */ From aed4333616e0eaabef465dd36bd7fef89ba23390 Mon Sep 17 00:00:00 2001 From: Andrey Skvortsov Date: Fri, 29 Jan 2016 00:07:30 +0300 Subject: [PATCH 074/418] USB: option: add support for SIM7100E commit 3158a8d416f4e1b79dcc867d67cb50013140772c upstream. $ lsusb: Bus 001 Device 101: ID 1e0e:9001 Qualcomm / Option $ usb-devices: T: Bus=01 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#=101 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 2 P: Vendor=1e0e ProdID=9001 Rev= 2.32 S: Manufacturer=SimTech, Incorporated S: Product=SimTech, Incorporated S: SerialNumber=0123456789ABCDEF C:* #Ifs= 7 Cfg#= 1 Atr=80 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I:* If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) The last interface (6) is used for Android Composite ADB interface. Serial port layout: 0: QCDM/DIAG 1: NMEA 2: AT 3: AT/PPP 4: audio Signed-off-by: Andrey Skvortsov Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index db86e512e0fc..e7eb08d56bae 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -315,6 +315,7 @@ static void option_instat_callback(struct urb *urb); #define TOSHIBA_PRODUCT_G450 0x0d45 #define ALINK_VENDOR_ID 0x1e0e +#define SIMCOM_PRODUCT_SIM7100E 0x9001 /* Yes, ALINK_VENDOR_ID */ #define ALINK_PRODUCT_PH300 0x9100 #define ALINK_PRODUCT_3GU 0x9200 @@ -607,6 +608,10 @@ static const struct option_blacklist_info zte_1255_blacklist = { .reserved = BIT(3) | BIT(4), }; +static const struct option_blacklist_info simcom_sim7100e_blacklist = { + .reserved = BIT(5) | BIT(6), +}; + static const struct option_blacklist_info telit_le910_blacklist = { .sendsetup = BIT(0), .reserved = BIT(1) | BIT(2), @@ -1645,6 +1650,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(ALINK_VENDOR_ID, 0x9000) }, { USB_DEVICE(ALINK_VENDOR_ID, ALINK_PRODUCT_PH300) }, { USB_DEVICE_AND_INTERFACE_INFO(ALINK_VENDOR_ID, ALINK_PRODUCT_3GU, 0xff, 0xff, 0xff) }, + { USB_DEVICE(ALINK_VENDOR_ID, SIMCOM_PRODUCT_SIM7100E), + .driver_info = (kernel_ulong_t)&simcom_sim7100e_blacklist }, { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X060S_X200), .driver_info = (kernel_ulong_t)&alcatel_x200_blacklist }, From 80d0fecf76920ea2e8f1d45522ac27d4d5ead892 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Fri, 12 Feb 2016 16:40:00 +0100 Subject: [PATCH 075/418] USB: option: add "4G LTE usb-modem U901" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d061c1caa31d4d9792cfe48a2c6b309a0e01ef46 upstream. Thomas reports: T: Bus=01 Lev=01 Prnt=01 Port=03 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=05c6 ProdID=6001 Rev=00.00 S: Manufacturer=USB Modem S: Product=USB Modem S: SerialNumber=1234567890ABCDEF C: #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 4 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage Reported-by: Thomas Schäfer Signed-off-by: Bjørn Mork Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index e7eb08d56bae..8849439a8f18 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1127,6 +1127,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC650) }, { USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC680) }, { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6000)}, /* ZTE AC8700 */ + { USB_DEVICE_AND_INTERFACE_INFO(QUALCOMM_VENDOR_ID, 0x6001, 0xff, 0xff, 0xff), /* 4G LTE usb-modem U901 */ + .driver_info = (kernel_ulong_t)&net_intf3_blacklist }, { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */ From 3e908446627523e106c1edc7cf6fe7706924d6d3 Mon Sep 17 00:00:00 2001 From: Lisa Du Date: Wed, 17 Feb 2016 09:32:52 +0800 Subject: [PATCH 076/418] drivers: android: correct the size of struct binder_uintptr_t for BC_DEAD_BINDER_DONE commit 7a64cd887fdb97f074c3fda03bee0bfb9faceac3 upstream. There's one point was missed in the patch commit da49889deb34 ("staging: binder: Support concurrent 32 bit and 64 bit processes."). When configure BINDER_IPC_32BIT, the size of binder_uintptr_t was 32bits, but size of void * is 64bit on 64bit system. Correct it here. Signed-off-by: Lisa Du Signed-off-by: Nicolas Boichat Fixes: da49889deb34 ("staging: binder: Support concurrent 32 bit and 64 bit processes.") Acked-by: Olof Johansson Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index a39e85f9efa9..7d00b7a015ea 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2074,7 +2074,7 @@ static int binder_thread_write(struct binder_proc *proc, if (get_user(cookie, (binder_uintptr_t __user *)ptr)) return -EFAULT; - ptr += sizeof(void *); + ptr += sizeof(cookie); list_for_each_entry(w, &proc->delivered_death, entry) { struct binder_ref_death *tmp_death = container_of(w, struct binder_ref_death, work); From 50d60403753a0149c82e5b1851e0cd0499ddff4e Mon Sep 17 00:00:00 2001 From: Michael Welling Date: Mon, 30 Nov 2015 09:02:39 -0600 Subject: [PATCH 077/418] spi: omap2-mcspi: Prevent duplicate gpio_request commit 2f538c017e1a8620d19553931199c6d6a6d31bb2 upstream. Occasionally the setup function will be called multiple times. Only request the gpio the first time otherwise -EBUSY will occur on subsequent calls to setup. Reported-by: Joseph Bell Signed-off-by: Michael Welling Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-omap2-mcspi.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index 1f8903d356e5..ed8283e7397a 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -1024,6 +1024,16 @@ static int omap2_mcspi_setup(struct spi_device *spi) spi->controller_state = cs; /* Link this to context save list */ list_add_tail(&cs->node, &ctx->cs); + + if (gpio_is_valid(spi->cs_gpio)) { + ret = gpio_request(spi->cs_gpio, dev_name(&spi->dev)); + if (ret) { + dev_err(&spi->dev, "failed to request gpio\n"); + return ret; + } + gpio_direction_output(spi->cs_gpio, + !(spi->mode & SPI_CS_HIGH)); + } } if (!mcspi_dma->dma_rx || !mcspi_dma->dma_tx) { @@ -1032,15 +1042,6 @@ static int omap2_mcspi_setup(struct spi_device *spi) return ret; } - if (gpio_is_valid(spi->cs_gpio)) { - ret = gpio_request(spi->cs_gpio, dev_name(&spi->dev)); - if (ret) { - dev_err(&spi->dev, "failed to request gpio\n"); - return ret; - } - gpio_direction_output(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH)); - } - ret = pm_runtime_get_sync(mcspi->dev); if (ret < 0) return ret; From 64fb3e29bf47e5db029b81fc99ac40f6cd2620ac Mon Sep 17 00:00:00 2001 From: Hariprasad S Date: Fri, 11 Dec 2015 13:59:17 +0530 Subject: [PATCH 078/418] iw_cxgb3: Fix incorrectly returning error on success commit 67f1aee6f45059fd6b0f5b0ecb2c97ad0451f6b3 upstream. The cxgb3_*_send() functions return NET_XMIT_ values, which are positive integers values. So don't treat positive return values as an error. Signed-off-by: Steve Wise Signed-off-by: Hariprasad Shenai Signed-off-by: Doug Ledford [a pox on developers and maintainers who do not cc: stable for bug fixes like this - gregkh] Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/cxgb3/iwch_cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index cb78b1e9bcd9..f504ba73e5dc 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -149,7 +149,7 @@ static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_en error = l2t_send(tdev, skb, l2e); if (error < 0) kfree_skb(skb); - return error; + return error < 0 ? error : 0; } int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb) @@ -165,7 +165,7 @@ int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb) error = cxgb3_ofld_send(tdev, skb); if (error < 0) kfree_skb(skb); - return error; + return error < 0 ? error : 0; } static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb) From cc082a1cde269bb3b757162f2af113b134b876a1 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 23 Oct 2015 10:56:12 +0200 Subject: [PATCH 079/418] drm/i915: shut up gen8+ SDE irq dmesg noise commit 97e5ed1111dcc5300a0f59a55248cd243937a8ab upstream. We get tons of cases where the master interrupt handler apparently set a bit, with the SDEIIR disagreeing. No idea what's going on there, but it's consistent on gen8+, no one seems to care about it and it's making CI results flaky. Shut it up. No idea what's going on here, but we've had fun with PCH interrupts before: commit 44498aea293b37af1d463acd9658cdce1ecdf427 Author: Paulo Zanoni Date: Fri Feb 22 17:05:28 2013 -0300 drm/i915: also disable south interrupts when handling them Note that there's a regression report in Bugzilla, and other regression reports on the mailing lists keep croping up. But no ill effects have ever been reported. But for paranoia still keep the message at a debug level as a breadcrumb, just in case. This message was introduced in commit 38cc46d73ed99dd7002f1406002e52d7975d16cc Author: Oscar Mateo Date: Mon Jun 16 16:10:59 2014 +0100 drm/i915/bdw: Ack interrupts before handling them (GEN8) v2: Improve commit message a bit. Cc: Paulo Zanoni Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1445590572-23631-2-git-send-email-daniel.vetter@ffwll.ch Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92084 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=80896 Acked-by: Mika Kuoppala Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_irq.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 0d228f909dcb..0f42a2782afc 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2354,9 +2354,13 @@ static irqreturn_t gen8_irq_handler(int irq, void *arg) spt_irq_handler(dev, pch_iir); else cpt_irq_handler(dev, pch_iir); - } else - DRM_ERROR("The master control interrupt lied (SDE)!\n"); - + } else { + /* + * Like on previous PCH there seems to be something + * fishy going on with forwarding PCH interrupts. + */ + DRM_DEBUG_DRIVER("The master control interrupt lied (SDE)!\n"); + } } I915_WRITE_FW(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL); From c88edc09363243dd84d6e8ac7a36773213b26ac8 Mon Sep 17 00:00:00 2001 From: Guozhonghua Date: Fri, 26 Feb 2016 15:19:40 -0800 Subject: [PATCH 080/418] ocfs2: unlock inode if deleting inode from orphan fails commit a4a8481ff68a8a324a878e281bc37f18665224f7 upstream. When doing append direct io cleanup, if deleting inode fails, it goes out without unlocking inode, which will cause the inode deadlock. This issue was introduced by commit cf1776a9e834 ("ocfs2: fix a tiny race when truncate dio orohaned entry"). Signed-off-by: Guozhonghua Signed-off-by: Joseph Qi Reviewed-by: Gang He Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/aops.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 7f604727f487..e6795c7c76a8 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -956,6 +956,7 @@ clean_orphan: tmp_ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh, update_isize, end); if (tmp_ret < 0) { + ocfs2_inode_unlock(inode, 1); ret = tmp_ret; mlog_errno(ret); brelse(di_bh); From 915d02457e74344bcd99fe64b159de2f6074b2c6 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 26 Feb 2016 15:19:28 -0800 Subject: [PATCH 081/418] mm: thp: fix SMP race condition between THP page fault and MADV_DONTNEED commit ad33bb04b2a6cee6c1f99fabb15cddbf93ff0433 upstream. pmd_trans_unstable()/pmd_none_or_trans_huge_or_clear_bad() were introduced to locklessy (but atomically) detect when a pmd is a regular (stable) pmd or when the pmd is unstable and can infinitely transition from pmd_none() and pmd_trans_huge() from under us, while only holding the mmap_sem for reading (for writing not). While holding the mmap_sem only for reading, MADV_DONTNEED can run from under us and so before we can assume the pmd to be a regular stable pmd we need to compare it against pmd_none() and pmd_trans_huge() in an atomic way, with pmd_trans_unstable(). The old pmd_trans_huge() left a tiny window for a race. Useful applications are unlikely to notice the difference as doing MADV_DONTNEED concurrently with a page fault would lead to undefined behavior. [akpm@linux-foundation.org: tidy up comment grammar/layout] Signed-off-by: Andrea Arcangeli Reported-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/memory.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index c387430f06c3..b80bf4746b67 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3399,8 +3399,18 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(pmd_none(*pmd)) && unlikely(__pte_alloc(mm, vma, pmd, address))) return VM_FAULT_OOM; - /* if an huge pmd materialized from under us just retry later */ - if (unlikely(pmd_trans_huge(*pmd))) + /* + * If a huge pmd materialized under us just retry later. Use + * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd + * didn't become pmd_trans_huge under us and then back to pmd_none, as + * a result of MADV_DONTNEED running immediately after a huge pmd fault + * in a different thread of this mm, in turn leading to a misleading + * pmd_trans_huge() retval. All we have to ensure is that it is a + * regular pmd that we can walk with pte_offset_map() and we can do that + * through an atomic read in C, which is what pmd_trans_unstable() + * provides. + */ + if (unlikely(pmd_trans_unstable(pmd))) return 0; /* * A regular pmd is established and it can't morph into a huge pmd From 18b75e0bdc6c1413a44db3d37b6d1d82d02eb84b Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 26 Feb 2016 15:19:31 -0800 Subject: [PATCH 082/418] mm: numa: quickly fail allocations for NUMA balancing on full nodes commit 8479eba7781fa9ffb28268840de6facfc12c35a7 upstream. Commit 4167e9b2cf10 ("mm: remove GFP_THISNODE") removed the GFP_THISNODE flag combination due to confusing semantics. It noted that alloc_misplaced_dst_page() was one such user after changes made by commit e97ca8e5b864 ("mm: fix GFP_THISNODE callers and clarify"). Unfortunately when GFP_THISNODE was removed, users of alloc_misplaced_dst_page() started waking kswapd and entering direct reclaim because the wrong GFP flags are cleared. The consequence is that workloads that used to fit into memory now get reclaimed which is addressed by this patch. The problem can be demonstrated with "mutilate" that exercises memcached which is software dedicated to memory object caching. The configuration uses 80% of memory and is run 3 times for varying numbers of clients. The results on a 4-socket NUMA box are mutilate 4.4.0 4.4.0 vanilla numaswap-v1 Hmean 1 8394.71 ( 0.00%) 8395.32 ( 0.01%) Hmean 4 30024.62 ( 0.00%) 34513.54 ( 14.95%) Hmean 7 32821.08 ( 0.00%) 70542.96 (114.93%) Hmean 12 55229.67 ( 0.00%) 93866.34 ( 69.96%) Hmean 21 39438.96 ( 0.00%) 85749.21 (117.42%) Hmean 30 37796.10 ( 0.00%) 50231.49 ( 32.90%) Hmean 47 18070.91 ( 0.00%) 38530.13 (113.22%) The metric is queries/second with the more the better. The results are way outside of the noise and the reason for the improvement is obvious from some of the vmstats 4.4.0 4.4.0 vanillanumaswap-v1r1 Minor Faults 1929399272 2146148218 Major Faults 19746529 3567 Swap Ins 57307366 9913 Swap Outs 50623229 17094 Allocation stalls 35909 443 DMA allocs 0 0 DMA32 allocs 72976349 170567396 Normal allocs 5306640898 5310651252 Movable allocs 0 0 Direct pages scanned 404130893 799577 Kswapd pages scanned 160230174 0 Kswapd pages reclaimed 55928786 0 Direct pages reclaimed 1843936 41921 Page writes file 2391 0 Page writes anon 50623229 17094 The vanilla kernel is swapping like crazy with large amounts of direct reclaim and kswapd activity. The figures are aggregate but it's known that the bad activity is throughout the entire test. Note that simple streaming anon/file memory consumers also see this problem but it's not as obvious. In those cases, kswapd is awake when it should not be. As there are at least two reclaim-related bugs out there, it's worth spelling out the user-visible impact. This patch only addresses bugs related to excessive reclaim on NUMA hardware when the working set is larger than a NUMA node. There is a bug related to high kswapd CPU usage but the reports are against laptops and other UMA hardware and is not addressed by this patch. Signed-off-by: Mel Gorman Cc: Vlastimil Babka Cc: Johannes Weiner Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/migrate.c b/mm/migrate.c index 7890d0bb5e23..6d17e0ab42d4 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1578,7 +1578,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page, (GFP_HIGHUSER_MOVABLE | __GFP_THISNODE | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & - ~(__GFP_IO | __GFP_FS), 0); + ~__GFP_RECLAIM, 0); return newpage; } From 827685a637e0074e0d61d84eac4a50481c7e1b61 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Jan 2016 14:07:25 +0100 Subject: [PATCH 083/418] genirq: Validate action before dereferencing it in handle_irq_event_percpu() commit 570540d50710ed192e98e2f7f74578c9486b6b05 upstream. commit 71f64340fc0e changed the handling of irq_desc->action from CPU 0 CPU 1 free_irq() lock(desc) lock(desc) handle_edge_irq() if (desc->action) { handle_irq_event() action = desc->action unlock(desc) desc->action = NULL handle_irq_event_percpu(desc, action) action->xxx to CPU 0 CPU 1 free_irq() lock(desc) lock(desc) handle_edge_irq() if (desc->action) { handle_irq_event() unlock(desc) desc->action = NULL handle_irq_event_percpu(desc, action) action = desc->action action->xxx So if free_irq manages to set the action to NULL between the unlock and before the readout, we happily dereference a null pointer. We could simply revert 71f64340fc0e, but we want to preserve the better code generation. A simple solution is to change the action loop from a do {} while to a while {} loop. This is safe because we either see a valid desc->action or NULL. If the action is about to be removed it is still valid as free_irq() is blocked on synchronize_irq(). CPU 0 CPU 1 free_irq() lock(desc) lock(desc) handle_edge_irq() handle_irq_event(desc) set(INPROGRESS) unlock(desc) handle_irq_event_percpu(desc) action = desc->action desc->action = NULL while (action) { action->xxx ... action = action->next; sychronize_irq() while(INPROGRESS); lock(desc) clr(INPROGRESS) free(action) That's basically the same mechanism as we have for shared interrupts. action->next can become NULL while handle_irq_event_percpu() runs. Either it sees the action or NULL. It does not matter, because action itself cannot go away before the interrupt in progress flag has been cleared. Fixes: commit 71f64340fc0e "genirq: Remove the second parameter from handle_irq_event_percpu()" Reported-by: zyjzyj2000@gmail.com Signed-off-by: Thomas Gleixner Cc: Huang Shijie Cc: Jiang Liu Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1601131224190.3575@nanos Signed-off-by: Greg Kroah-Hartman --- kernel/irq/handle.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index a302cf9a2126..57bff7857e87 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -138,7 +138,8 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc) unsigned int flags = 0, irq = desc->irq_data.irq; struct irqaction *action = desc->action; - do { + /* action might have become NULL since we dropped the lock */ + while (action) { irqreturn_t res; trace_irq_handler_entry(irq, action); @@ -173,7 +174,7 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc) retval |= res; action = action->next; - } while (action); + } add_interrupt_randomness(irq, flags); From 142ad71dd6de7368cc8cc5c52288b628062f391d Mon Sep 17 00:00:00 2001 From: Roman Volkov Date: Fri, 1 Jan 2016 16:24:41 +0300 Subject: [PATCH 084/418] clocksource/drivers/vt8500: Increase the minimum delta commit f9eccf24615672896dc13251410c3f2f33a14f95 upstream. The vt8500 clocksource driver declares itself as capable to handle the minimum delay of 4 cycles by passing the value into clockevents_config_and_register(). The vt8500_timer_set_next_event() requires the passed cycles value to be at least 16. The impact is that userspace hangs in nanosleep() calls with small delay intervals. This problem is reproducible in Linux 4.2 starting from: c6eb3f70d448 ('hrtimer: Get rid of hrtimer softirq') From Russell King, more detailed explanation: "It's a speciality of the StrongARM/PXA hardware. It takes a certain number of OSCR cycles for the value written to hit the compare registers. So, if a very small delta is written (eg, the compare register is written with a value of OSCR + 1), the OSCR will have incremented past this value before it hits the underlying hardware. The result is, that you end up waiting a very long time for the OSCR to wrap before the event fires. So, we introduce a check in set_next_event() to detect this and return -ETIME if the calculated delta is too small, which causes the generic clockevents code to retry after adding the min_delta specified in clockevents_config_and_register() to the current time value. min_delta must be sufficient that we don't re-trip the -ETIME check - if we do, we will return -ETIME, forward the next event time, try to set it, return -ETIME again, and basically lock the system up. So, min_delta must be larger than the check inside set_next_event(). A factor of two was chosen to ensure that this situation would never occur. The PXA code worked on PXA systems for years, and I'd suggest no one changes this mechanism without access to a wide range of PXA systems, otherwise they're risking breakage." Cc: Russell King Acked-by: Alexey Charkov Signed-off-by: Roman Volkov Signed-off-by: Daniel Lezcano Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/vt8500_timer.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/vt8500_timer.c b/drivers/clocksource/vt8500_timer.c index a92e94b40b5b..dfc3bb410b00 100644 --- a/drivers/clocksource/vt8500_timer.c +++ b/drivers/clocksource/vt8500_timer.c @@ -50,6 +50,8 @@ #define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t) +#define MIN_OSCR_DELTA 16 + static void __iomem *regbase; static cycle_t vt8500_timer_read(struct clocksource *cs) @@ -80,7 +82,7 @@ static int vt8500_timer_set_next_event(unsigned long cycles, cpu_relax(); writel((unsigned long)alarm, regbase + TIMER_MATCH_VAL); - if ((signed)(alarm - clocksource.read(&clocksource)) <= 16) + if ((signed)(alarm - clocksource.read(&clocksource)) <= MIN_OSCR_DELTA) return -ETIME; writel(1, regbase + TIMER_IER_VAL); @@ -151,7 +153,7 @@ static void __init vt8500_timer_init(struct device_node *np) pr_err("%s: setup_irq failed for %s\n", __func__, clockevent.name); clockevents_config_and_register(&clockevent, VT8500_TIMER_HZ, - 4, 0xf0000000); + MIN_OSCR_DELTA * 2, 0xf0000000); } CLOCKSOURCE_OF_DECLARE(vt8500, "via,vt8500-timer", vt8500_timer_init); From b5e7257409d78623717550d0f18bcf30003b4b2c Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 23 Oct 2015 09:02:32 +0200 Subject: [PATCH 085/418] s390/kvm: remove dependency on struct save_area definition commit d9a3a09af54d01ab8b0c320580f4f95328d4a7ac upstream. Replace the offsets based on the struct area_area with the offset constants from asm-offsets.c based on the struct _lowcore. Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/asm-offsets.c | 1 + arch/s390/kvm/kvm-s390.c | 30 +++++++++++++++--------------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 9cd248f637c7..dc6c9c604543 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -181,6 +181,7 @@ int main(void) OFFSET(__LC_PSW_SAVE_AREA, _lowcore, psw_save_area); OFFSET(__LC_PREFIX_SAVE_AREA, _lowcore, prefixreg_save_area); OFFSET(__LC_FP_CREG_SAVE_AREA, _lowcore, fpt_creg_save_area); + OFFSET(__LC_TOD_PROGREG_SAVE_AREA, _lowcore, tod_progreg_save_area); OFFSET(__LC_CPU_TIMER_SAVE_AREA, _lowcore, cpu_timer_save_area); OFFSET(__LC_CLOCK_COMP_SAVE_AREA, _lowcore, clock_comp_save_area); OFFSET(__LC_AREGS_SAVE_AREA, _lowcore, access_regs_save_area); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 408138e716c4..db7cca5a6094 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2273,37 +2273,37 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) u64 clkcomp; int rc; + px = kvm_s390_get_prefix(vcpu); if (gpa == KVM_S390_STORE_STATUS_NOADDR) { if (write_guest_abs(vcpu, 163, &archmode, 1)) return -EFAULT; - gpa = SAVE_AREA_BASE; + gpa = 0; } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) { if (write_guest_real(vcpu, 163, &archmode, 1)) return -EFAULT; - gpa = kvm_s390_real_to_abs(vcpu, SAVE_AREA_BASE); - } - rc = write_guest_abs(vcpu, gpa + offsetof(struct save_area, fp_regs), + gpa = px; + } else + gpa -= __LC_FPREGS_SAVE_AREA; + rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, vcpu->arch.guest_fpregs.fprs, 128); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, gp_regs), + rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, vcpu->run->s.regs.gprs, 128); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, psw), + rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, &vcpu->arch.sie_block->gpsw, 16); - px = kvm_s390_get_prefix(vcpu); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, pref_reg), + rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, &px, 4); - rc |= write_guest_abs(vcpu, - gpa + offsetof(struct save_area, fp_ctrl_reg), + rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, &vcpu->arch.guest_fpregs.fpc, 4); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, tod_reg), + rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, &vcpu->arch.sie_block->todpr, 4); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, timer), + rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, &vcpu->arch.sie_block->cputm, 8); clkcomp = vcpu->arch.sie_block->ckc >> 8; - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, clk_cmp), + rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA, &clkcomp, 8); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, acc_regs), + rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA, &vcpu->run->s.regs.acrs, 64); - rc |= write_guest_abs(vcpu, gpa + offsetof(struct save_area, ctrl_regs), + rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA, &vcpu->arch.sie_block->gcr, 128); return rc ? -EFAULT : 0; } From 3824f7874a7521961642287b4b93f14afbc9565a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 14 Jan 2016 22:12:47 +0100 Subject: [PATCH 086/418] KVM: s390: fix memory overwrites when vx is disabled commit 9abc2a08a7d665b02bdde974fd6c44aae86e923e upstream. The kernel now always uses vector registers when available, however KVM has special logic if support is really enabled for a guest. If support is disabled, guest_fpregs.fregs will only contain memory for the fpu. The kernel, however, will store vector registers into that area, resulting in crazy memory overwrites. Simply extending that area is not enough, because the format of the registers also changes. We would have to do additional conversions, making the code even more complex. Therefore let's directly use one place for the vector/fpu registers + fpc (in kvm_run). We just have to convert the data properly when accessing it. This makes current code much easier. Please note that vector/fpu registers are now always stored to vcpu->run->s.regs.vrs. Although this data is visible to QEMU and used for migration, we only guarantee valid values to user space when KVM_SYNC_VRS is set. As that is only the case when we have vector register support, we are on the safe side. Fixes: b5510d9b68c3 ("s390/fpu: always enable the vector facility if it is available") Cc: stable@vger.kernel.org # v4.4 d9a3a09af54d s390/kvm: remove dependency on struct save_area definition Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger [adopt to d9a3a09af54d] Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/kvm_host.h | 1 - arch/s390/kvm/kvm-s390.c | 128 +++++++++++-------------------- 2 files changed, 43 insertions(+), 86 deletions(-) diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index efaac2c3bb77..e9a983f40a24 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -506,7 +506,6 @@ struct kvm_vcpu_arch { struct kvm_s390_sie_block *sie_block; unsigned int host_acrs[NUM_ACRS]; struct fpu host_fpregs; - struct fpu guest_fpregs; struct kvm_s390_local_interrupt local_int; struct hrtimer ckc_timer; struct kvm_s390_pgm_info pgm; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index db7cca5a6094..a08d0afd5ff6 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1202,7 +1202,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) if (vcpu->kvm->arch.use_cmma) kvm_s390_vcpu_unsetup_cmma(vcpu); - kfree(vcpu->arch.guest_fpregs.fprs); free_page((unsigned long)(vcpu->arch.sie_block)); kvm_vcpu_uninit(vcpu); @@ -1269,44 +1268,18 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) return 0; } -/* - * Backs up the current FP/VX register save area on a particular - * destination. Used to switch between different register save - * areas. - */ -static inline void save_fpu_to(struct fpu *dst) -{ - dst->fpc = current->thread.fpu.fpc; - dst->regs = current->thread.fpu.regs; -} - -/* - * Switches the FP/VX register save area from which to lazy - * restore register contents. - */ -static inline void load_fpu_from(struct fpu *from) -{ - current->thread.fpu.fpc = from->fpc; - current->thread.fpu.regs = from->regs; -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { /* Save host register state */ save_fpu_regs(); - save_fpu_to(&vcpu->arch.host_fpregs); - - if (test_kvm_facility(vcpu->kvm, 129)) { - current->thread.fpu.fpc = vcpu->run->s.regs.fpc; - /* - * Use the register save area in the SIE-control block - * for register restore and save in kvm_arch_vcpu_put() - */ - current->thread.fpu.vxrs = - (__vector128 *)&vcpu->run->s.regs.vrs; - } else - load_fpu_from(&vcpu->arch.guest_fpregs); + vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc; + vcpu->arch.host_fpregs.regs = current->thread.fpu.regs; + /* Depending on MACHINE_HAS_VX, data stored to vrs either + * has vector register or floating point register format. + */ + current->thread.fpu.regs = vcpu->run->s.regs.vrs; + current->thread.fpu.fpc = vcpu->run->s.regs.fpc; if (test_fp_ctl(current->thread.fpu.fpc)) /* User space provided an invalid FPC, let's clear it */ current->thread.fpu.fpc = 0; @@ -1322,19 +1295,13 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); gmap_disable(vcpu->arch.gmap); + /* Save guest register state */ save_fpu_regs(); + vcpu->run->s.regs.fpc = current->thread.fpu.fpc; - if (test_kvm_facility(vcpu->kvm, 129)) - /* - * kvm_arch_vcpu_load() set up the register save area to - * the &vcpu->run->s.regs.vrs and, thus, the vector registers - * are already saved. Only the floating-point control must be - * copied. - */ - vcpu->run->s.regs.fpc = current->thread.fpu.fpc; - else - save_fpu_to(&vcpu->arch.guest_fpregs); - load_fpu_from(&vcpu->arch.host_fpregs); + /* Restore host register state */ + current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; + current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; save_access_regs(vcpu->run->s.regs.acrs); restore_access_regs(vcpu->arch.host_acrs); @@ -1352,8 +1319,9 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); vcpu->arch.sie_block->gcr[0] = 0xE0UL; vcpu->arch.sie_block->gcr[14] = 0xC2000000UL; - vcpu->arch.guest_fpregs.fpc = 0; - asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc)); + /* make sure the new fpc will be lazily loaded */ + save_fpu_regs(); + current->thread.fpu.fpc = 0; vcpu->arch.sie_block->gbea = 1; vcpu->arch.sie_block->pp = 0; vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; @@ -1502,29 +1470,14 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.local_int.wq = &vcpu->wq; vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; - /* - * Allocate a save area for floating-point registers. If the vector - * extension is available, register contents are saved in the SIE - * control block. The allocated save area is still required in - * particular places, for example, in kvm_s390_vcpu_store_status(). - */ - vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS, - GFP_KERNEL); - if (!vcpu->arch.guest_fpregs.fprs) { - rc = -ENOMEM; - goto out_free_sie_block; - } - rc = kvm_vcpu_init(vcpu, kvm, id); if (rc) - goto out_free_fprs; + goto out_free_sie_block; VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, vcpu->arch.sie_block); trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); return vcpu; -out_free_fprs: - kfree(vcpu->arch.guest_fpregs.fprs); out_free_sie_block: free_page((unsigned long)(vcpu->arch.sie_block)); out_free_cpu: @@ -1737,19 +1690,27 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { + /* make sure the new values will be lazily loaded */ + save_fpu_regs(); if (test_fp_ctl(fpu->fpc)) return -EINVAL; - memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); - vcpu->arch.guest_fpregs.fpc = fpu->fpc; - save_fpu_regs(); - load_fpu_from(&vcpu->arch.guest_fpregs); + current->thread.fpu.fpc = fpu->fpc; + if (MACHINE_HAS_VX) + convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs); + else + memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs)); return 0; } int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { - memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); - fpu->fpc = vcpu->arch.guest_fpregs.fpc; + /* make sure we have the latest values */ + save_fpu_regs(); + if (MACHINE_HAS_VX) + convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs); + else + memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs)); + fpu->fpc = current->thread.fpu.fpc; return 0; } @@ -2269,6 +2230,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) { unsigned char archmode = 1; + freg_t fprs[NUM_FPRS]; unsigned int px; u64 clkcomp; int rc; @@ -2284,8 +2246,16 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) gpa = px; } else gpa -= __LC_FPREGS_SAVE_AREA; - rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, - vcpu->arch.guest_fpregs.fprs, 128); + + /* manually convert vector registers if necessary */ + if (MACHINE_HAS_VX) { + convert_vx_to_fp(fprs, current->thread.fpu.vxrs); + rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, + fprs, 128); + } else { + rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, + vcpu->run->s.regs.vrs, 128); + } rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA, vcpu->run->s.regs.gprs, 128); rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA, @@ -2293,7 +2263,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA, &px, 4); rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA, - &vcpu->arch.guest_fpregs.fpc, 4); + &vcpu->run->s.regs.fpc, 4); rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA, &vcpu->arch.sie_block->todpr, 4); rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA, @@ -2316,19 +2286,7 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) * it into the save area */ save_fpu_regs(); - if (test_kvm_facility(vcpu->kvm, 129)) { - /* - * If the vector extension is available, the vector registers - * which overlaps with floating-point registers are saved in - * the SIE-control block. Hence, extract the floating-point - * registers and the FPC value and store them in the - * guest_fpregs structure. - */ - vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc; - convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs, - current->thread.fpu.vxrs); - } else - save_fpu_to(&vcpu->arch.guest_fpregs); + vcpu->run->s.regs.fpc = current->thread.fpu.fpc; save_access_regs(vcpu->run->s.regs.acrs); return kvm_s390_store_status_unloaded(vcpu, addr); From 98d41a063d6584acc62ba1429ae074eac5d9d127 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Wed, 7 Oct 2015 17:23:23 +0800 Subject: [PATCH 087/418] Btrfs: add missing brelse when superblock checksum fails commit b2acdddfad13c38a1e8b927d83c3cf321f63601a upstream. Looks like oversight, call brelse() when checksum fails. Further down the code, in the non error path, we do call brelse() and so we don't see brelse() in the goto error paths. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0ddca6734494..0bbd69ed9961 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2667,6 +2667,7 @@ int open_ctree(struct super_block *sb, if (btrfs_check_super_csum(bh->b_data)) { printk(KERN_ERR "BTRFS: superblock checksum mismatch\n"); err = -EINVAL; + brelse(bh); goto fail_alloc; } From 1106639221ef3eb80f3436785948c36e63d45b62 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 22 Oct 2015 15:05:09 -0400 Subject: [PATCH 088/418] Btrfs: igrab inode in writepage commit be7bd730841e69fe8f70120098596f648cd1f3ff upstream. We hit this panic on a few of our boxes this week where we have an ordered_extent with an NULL inode. We do an igrab() of the inode in writepages, but weren't doing it in writepage which can be called directly from the VM on dirty pages. If the inode has been unlinked then we could have I_FREEING set which means igrab() would return NULL and we get this panic. Fix this by trying to igrab in btrfs_writepage, and if it returns NULL then just redirty the page and return AOP_WRITEPAGE_ACTIVATE; so the VM knows it wasn't successful. Thanks, Signed-off-by: Josef Bacik Reviewed-by: Liu Bo Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 54b5f0de623b..bbf081de2ab1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8548,15 +8548,28 @@ int btrfs_readpage(struct file *file, struct page *page) static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { struct extent_io_tree *tree; - + struct inode *inode = page->mapping->host; + int ret; if (current->flags & PF_MEMALLOC) { redirty_page_for_writepage(wbc, page); unlock_page(page); return 0; } + + /* + * If we are under memory pressure we will call this directly from the + * VM, we need to make sure we have the inode referenced for the ordered + * extent. If not just return like we didn't do anything. + */ + if (!igrab(inode)) { + redirty_page_for_writepage(wbc, page); + return AOP_WRITEPAGE_ACTIVATE; + } tree = &BTRFS_I(page->mapping->host)->io_tree; - return extent_write_full_page(tree, page, btrfs_get_extent, wbc); + ret = extent_write_full_page(tree, page, btrfs_get_extent, wbc); + btrfs_add_delayed_iput(inode); + return ret; } static int btrfs_writepages(struct address_space *mapping, From ee695402b83a6c2f03a94aa4c7510adf1971e080 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Sat, 10 Oct 2015 17:59:53 +0200 Subject: [PATCH 089/418] btrfs: statfs: report zero available if metadata are exhausted commit ca8a51b3a979d57b082b14eda38602b7f52d81d1 upstream. There is one ENOSPC case that's very confusing. There's Available greater than zero but no file operation succeds (besides removing files). This happens when the metadata are exhausted and there's no possibility to allocate another chunk. In this scenario it's normal that there's still some space in the data chunk and the calculation in df reflects that in the Avail value. To at least give some clue about the ENOSPC situation, let statfs report zero value in Avail, even if there's still data space available. Current: /dev/sdb1 4.0G 3.3G 719M 83% /mnt/test New: /dev/sdb1 4.0G 3.3G 0 100% /mnt/test We calculate the remaining metadata space minus global reserve. If this is (supposedly) smaller than zero, there's no space. But this does not hold in practice, the exhausted state happens where's still some positive delta. So we apply some guesswork and compare the delta to a 4M threshold. (Practically observed delta was 2M.) We probably cannot calculate the exact threshold value because this depends on the internal reservations requested by various operations, so some operations that consume a few metadata will succeed even if the Avail is zero. But this is better than the other way around. Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/super.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 24154e422945..fe609b81dd1b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1956,6 +1956,8 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) * there are other factors that may change the result (like a new metadata * chunk). * + * If metadata is exhausted, f_bavail will be 0. + * * FIXME: not accurate for mixed block groups, total and free/used are ok, * available appears slightly larger. */ @@ -1967,11 +1969,13 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) struct btrfs_space_info *found; u64 total_used = 0; u64 total_free_data = 0; + u64 total_free_meta = 0; int bits = dentry->d_sb->s_blocksize_bits; __be32 *fsid = (__be32 *)fs_info->fsid; unsigned factor = 1; struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; int ret; + u64 thresh = 0; /* * holding chunk_muext to avoid allocating new chunks, holding @@ -1997,6 +2001,8 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) } } } + if (found->flags & BTRFS_BLOCK_GROUP_METADATA) + total_free_meta += found->disk_total - found->disk_used; total_used += found->disk_used; } @@ -2019,6 +2025,24 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bavail += div_u64(total_free_data, factor); buf->f_bavail = buf->f_bavail >> bits; + /* + * We calculate the remaining metadata space minus global reserve. If + * this is (supposedly) smaller than zero, there's no space. But this + * does not hold in practice, the exhausted state happens where's still + * some positive delta. So we apply some guesswork and compare the + * delta to a 4M threshold. (Practically observed delta was ~2M.) + * + * We probably cannot calculate the exact threshold value because this + * depends on the internal reservations requested by various + * operations, so some operations that consume a few metadata will + * succeed even if the Avail is zero. But this is better than the other + * way around. + */ + thresh = 4 * 1024 * 1024; + + if (total_free_meta - thresh < block_rsv->size) + buf->f_bavail = 0; + buf->f_type = BTRFS_SUPER_MAGIC; buf->f_bsize = dentry->d_sb->s_blocksize; buf->f_namelen = BTRFS_NAME_LEN; From d70554d661868f8fb87a5eda236a69b1e9b0e615 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 31 Dec 2015 18:07:59 +0000 Subject: [PATCH 090/418] Btrfs: send, don't BUG_ON() when an empty symlink is found commit a879719b8c90e15c9e7fa7266d5e3c0ca962f9df upstream. When a symlink is successfully created it always has an inline extent containing the source path. However if an error happens when creating the symlink, we can leave in the subvolume's tree a symlink inode without any such inline extent item - this happens if after btrfs_symlink() calls btrfs_end_transaction() and before it calls the inode eviction handler (through the final iput() call), the transaction gets committed and a crash happens before the eviction handler gets called, or if a snapshot of the subvolume is made before the eviction handler gets called. Sadly we can't just avoid this by making btrfs_symlink() call btrfs_end_transaction() after it calls the eviction handler, because the later can commit the current transaction before it removes any items from the subvolume tree (if it encounters ENOSPC errors while reserving space for removing all the items). So make send fail more gracefully, with an -EIO error, and print a message to dmesg/syslog informing that there's an empty symlink inode, so that the user can delete the empty symlink or do something else about it. Reported-by: Stephen R. van den Berg Signed-off-by: Filipe Manana Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/send.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 355a458cba1a..63a6152be04b 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1469,7 +1469,21 @@ static int read_symlink(struct btrfs_root *root, ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; - BUG_ON(ret); + if (ret) { + /* + * An empty symlink inode. Can happen in rare error paths when + * creating a symlink (transaction committed before the inode + * eviction handler removed the symlink inode items and a crash + * happened in between or the subvol was snapshoted in between). + * Print an informative message to dmesg/syslog so that the user + * can delete the symlink. + */ + btrfs_err(root->fs_info, + "Found empty symlink inode %llu at root %llu", + ino, root->root_key.objectid); + ret = -EIO; + goto out; + } ei = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_file_extent_item); From c6362a1e3e428e101c0fc089b496f30e06b7e3c5 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 31 Dec 2015 18:16:29 +0000 Subject: [PATCH 091/418] Btrfs: fix number of transaction units required to create symlink commit 9269d12b2d57d9e3d13036bb750762d1110d425c upstream. We weren't accounting for the insertion of an inline extent item for the symlink inode nor that we need to update the parent inode item (through the call to btrfs_add_nondir()). So fix this by including two more transaction units. Signed-off-by: Filipe Manana Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bbf081de2ab1..a93e36343688 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9663,9 +9663,11 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, /* * 2 items for inode item and ref * 2 items for dir items + * 1 item for updating parent inode item + * 1 item for the inline extent item * 1 item for xattr if selinux is on */ - trans = btrfs_start_transaction(root, 5); + trans = btrfs_start_transaction(root, 7); if (IS_ERR(trans)) return PTR_ERR(trans); From e6edd99cfa44512924be856cdf68c974d52629ca Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 5 Jan 2016 16:24:05 +0000 Subject: [PATCH 092/418] Btrfs: fix transaction handle leak on failure to create hard link commit 271dba4521aed0c37c063548f876b49f5cd64b2e upstream. If we failed to create a hard link we were not always releasing the the transaction handle we got before, resulting in a memory leak and preventing any other tasks from being able to commit the current transaction. Fix this by always releasing our transaction handle. Signed-off-by: Filipe Manana Reviewed-by: Liu Bo Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a93e36343688..52fc1b5e9f03 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6493,7 +6493,7 @@ out_unlock_inode: static int btrfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { - struct btrfs_trans_handle *trans; + struct btrfs_trans_handle *trans = NULL; struct btrfs_root *root = BTRFS_I(dir)->root; struct inode *inode = d_inode(old_dentry); u64 index; @@ -6519,6 +6519,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, trans = btrfs_start_transaction(root, 5); if (IS_ERR(trans)) { err = PTR_ERR(trans); + trans = NULL; goto fail; } @@ -6552,9 +6553,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, btrfs_log_new_name(trans, inode, NULL, parent); } - btrfs_end_transaction(trans, root); btrfs_balance_delayed_items(root); fail: + if (trans) + btrfs_end_transaction(trans, root); if (drop_inode) { inode_dec_link_count(inode); iput(inode); From 8102f96ea48daf86a5f55c07699e926d8f9dee1f Mon Sep 17 00:00:00 2001 From: Chandan Rajendra Date: Thu, 7 Jan 2016 18:56:59 +0530 Subject: [PATCH 093/418] Btrfs: Initialize btrfs_root->highest_objectid when loading tree root and subvolume roots commit f32e48e925964c4f8ab917850788a87e1cef3bad upstream. The following call trace is seen when btrfs/031 test is executed in a loop, [ 158.661848] ------------[ cut here ]------------ [ 158.662634] WARNING: CPU: 2 PID: 890 at /home/chandan/repos/linux/fs/btrfs/ioctl.c:558 create_subvol+0x3d1/0x6ea() [ 158.664102] BTRFS: Transaction aborted (error -2) [ 158.664774] Modules linked in: [ 158.665266] CPU: 2 PID: 890 Comm: btrfs Not tainted 4.4.0-rc6-g511711a #2 [ 158.666251] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 [ 158.667392] ffffffff81c0a6b0 ffff8806c7c4f8e8 ffffffff81431fc8 ffff8806c7c4f930 [ 158.668515] ffff8806c7c4f920 ffffffff81051aa1 ffff880c85aff000 ffff8800bb44d000 [ 158.669647] ffff8808863b5c98 0000000000000000 00000000fffffffe ffff8806c7c4f980 [ 158.670769] Call Trace: [ 158.671153] [] dump_stack+0x44/0x5c [ 158.671884] [] warn_slowpath_common+0x81/0xc0 [ 158.672769] [] warn_slowpath_fmt+0x47/0x50 [ 158.673620] [] create_subvol+0x3d1/0x6ea [ 158.674440] [] btrfs_mksubvol.isra.30+0x369/0x520 [ 158.675376] [] ? percpu_down_read+0x1a/0x50 [ 158.676235] [] btrfs_ioctl_snap_create_transid+0x101/0x180 [ 158.677268] [] btrfs_ioctl_snap_create+0x52/0x70 [ 158.678183] [] btrfs_ioctl+0x474/0x2f90 [ 158.678975] [] ? vma_merge+0xee/0x300 [ 158.679751] [] ? alloc_pages_vma+0x91/0x170 [ 158.680599] [] ? lru_cache_add_active_or_unevictable+0x22/0x70 [ 158.681686] [] ? selinux_file_ioctl+0xff/0x1d0 [ 158.682581] [] do_vfs_ioctl+0x2c1/0x490 [ 158.683399] [] ? security_file_ioctl+0x3e/0x60 [ 158.684297] [] SyS_ioctl+0x74/0x80 [ 158.685051] [] entry_SYSCALL_64_fastpath+0x12/0x6a [ 158.685958] ---[ end trace 4b63312de5a2cb76 ]--- [ 158.686647] BTRFS: error (device loop0) in create_subvol:558: errno=-2 No such entry [ 158.709508] BTRFS info (device loop0): forced readonly [ 158.737113] BTRFS info (device loop0): disk space caching is enabled [ 158.738096] BTRFS error (device loop0): Remounting read-write after error is not allowed [ 158.851303] BTRFS error (device loop0): cleaner transaction attach returned -30 This occurs because, Mount filesystem Create subvol with ID 257 Unmount filesystem Mount filesystem Delete subvol with ID 257 btrfs_drop_snapshot() Add root corresponding to subvol 257 into btrfs_transaction->dropped_roots list Create new subvol (i.e. create_subvol()) 257 is returned as the next free objectid btrfs_read_fs_root_no_name() Finds the btrfs_root instance corresponding to the old subvol with ID 257 in btrfs_fs_info->fs_roots_radix. Returns error since btrfs_root_item->refs has the value of 0. To fix the issue the commit initializes tree root's and subvolume root's highest_objectid when loading the roots from disk. Signed-off-by: Chandan Rajendra Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 27 +++++++++++++++++++++++++++ fs/btrfs/inode-map.c | 9 +-------- fs/btrfs/inode-map.h | 1 + fs/btrfs/ioctl.c | 4 ++++ 4 files changed, 33 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0bbd69ed9961..4958360a44f7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1582,8 +1582,23 @@ int btrfs_init_fs_root(struct btrfs_root *root) ret = get_anon_bdev(&root->anon_dev); if (ret) goto free_writers; + + mutex_lock(&root->objectid_mutex); + ret = btrfs_find_highest_objectid(root, + &root->highest_objectid); + if (ret) { + mutex_unlock(&root->objectid_mutex); + goto free_root_dev; + } + + ASSERT(root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID); + + mutex_unlock(&root->objectid_mutex); + return 0; +free_root_dev: + free_anon_bdev(root->anon_dev); free_writers: btrfs_free_subvolume_writers(root->subv_writers); fail: @@ -2900,6 +2915,18 @@ retry_root_backup: tree_root->commit_root = btrfs_root_node(tree_root); btrfs_set_root_refs(&tree_root->root_item, 1); + mutex_lock(&tree_root->objectid_mutex); + ret = btrfs_find_highest_objectid(tree_root, + &tree_root->highest_objectid); + if (ret) { + mutex_unlock(&tree_root->objectid_mutex); + goto recovery_tree_root; + } + + ASSERT(tree_root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID); + + mutex_unlock(&tree_root->objectid_mutex); + ret = btrfs_read_roots(fs_info, tree_root); if (ret) goto recovery_tree_root; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 767a6056ac45..07573dc1614a 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -515,7 +515,7 @@ out: return ret; } -static int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid) +int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid) { struct btrfs_path *path; int ret; @@ -555,13 +555,6 @@ int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid) int ret; mutex_lock(&root->objectid_mutex); - if (unlikely(root->highest_objectid < BTRFS_FIRST_FREE_OBJECTID)) { - ret = btrfs_find_highest_objectid(root, - &root->highest_objectid); - if (ret) - goto out; - } - if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) { ret = -ENOSPC; goto out; diff --git a/fs/btrfs/inode-map.h b/fs/btrfs/inode-map.h index ddb347bfee23..c8e864b2d530 100644 --- a/fs/btrfs/inode-map.h +++ b/fs/btrfs/inode-map.h @@ -9,5 +9,6 @@ int btrfs_save_ino_cache(struct btrfs_root *root, struct btrfs_trans_handle *trans); int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid); +int btrfs_find_highest_objectid(struct btrfs_root *root, u64 *objectid); #endif diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 08fd3f0f34fd..f07d01bc4875 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -568,6 +568,10 @@ static noinline int create_subvol(struct inode *dir, goto fail; } + mutex_lock(&new_root->objectid_mutex); + new_root->highest_objectid = new_dirid; + mutex_unlock(&new_root->objectid_mutex); + /* * insert the directory item */ From e59ba555b270e1ab41efb6d632642dd3e5443303 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 15 Jan 2016 14:37:15 +0100 Subject: [PATCH 094/418] btrfs: initialize the seq counter in struct btrfs_device commit 546bed631203344611f42b2af1d224d2eedb4e6b upstream. I managed to trigger this: | INFO: trying to register non-static key. | the code is fine but needs lockdep annotation. | turning off the locking correctness validator. | CPU: 1 PID: 781 Comm: systemd-gpt-aut Not tainted 4.4.0-rt2+ #14 | Hardware name: ARM-Versatile Express | [<80307cec>] (dump_stack) | [<80070e98>] (__lock_acquire) | [<8007184c>] (lock_acquire) | [<80287800>] (btrfs_ioctl) | [<8012a8d4>] (do_vfs_ioctl) | [<8012ac14>] (SyS_ioctl) so I think that btrfs_device_data_ordered_init() is not invoked behind a macro somewhere. Fixes: 7cc8e58d53cd ("Btrfs: fix unprotected device's variants on 32bits machine") Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9e084477d320..9c62a6f9757a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -232,6 +232,7 @@ static struct btrfs_device *__alloc_device(void) spin_lock_init(&dev->reada_lock); atomic_set(&dev->reada_in_flight, 0); atomic_set(&dev->dev_stats_ccnt, 0); + btrfs_device_data_ordered_init(dev); INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); From ee17e6e3ec5cf4b3cf23c3925e6969f520d4e10a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 1 Jan 2016 13:39:22 +0100 Subject: [PATCH 095/418] s390: fix normalization bug in exception table sorting commit bcb7825a77f41c7dd91da6f7ac10b928156a322e upstream. The normalization pass in the sorting routine of the relative exception table serves two purposes: - it ensures that the address fields of the exception table entries are fully ordered, so that no ambiguities arise between entries with identical instruction offsets (i.e., when two instructions that are exactly 8 bytes apart each have an exception table entry associated with them) - it ensures that the offsets of both the instruction and the fixup fields of each entry are relative to their final location after sorting. Commit eb608fb366de ("s390/exceptions: switch to relative exception table entries") ported the relative exception table format from x86, but modified the sorting routine to only normalize the instruction offset field and not the fixup offset field. The result is that the fixup offset of each entry will be relative to the original location of the entry before sorting, likely leading to crashes when those entries are dereferenced. Fixes: eb608fb366de ("s390/exceptions: switch to relative exception table entries") Signed-off-by: Ard Biesheuvel Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/mm/extable.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index 4d1ee88864e8..18c8b819b0aa 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -52,12 +52,16 @@ void sort_extable(struct exception_table_entry *start, int i; /* Normalize entries to being relative to the start of the section */ - for (p = start, i = 0; p < finish; p++, i += 8) + for (p = start, i = 0; p < finish; p++, i += 8) { p->insn += i; + p->fixup += i + 4; + } sort(start, finish - start, sizeof(*start), cmp_ex, NULL); /* Denormalize all entries */ - for (p = start, i = 0; p < finish; p++, i += 8) + for (p = start, i = 0; p < finish; p++, i += 8) { p->insn -= i; + p->fixup -= i + 4; + } } #ifdef CONFIG_MODULES From ecf5ebf51d9f006ca9c6a93f38847af8d8b0818c Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Tue, 15 Dec 2015 10:16:43 +0100 Subject: [PATCH 096/418] s390/dasd: prevent incorrect length error under z/VM after PAV changes commit 020bf042e5b397479c1174081b935d0ff15d1a64 upstream. The channel checks the specified length and the provided amount of data for CCWs and provides an incorrect length error if the size does not match. Under z/VM with simulation activated the length may get changed. Having the suppress length indication bit set is stated as good CCW coding practice and avoids errors under z/VM. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- drivers/s390/block/dasd_alias.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index 184b1dbeb554..4ff914c0f13c 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -723,7 +723,7 @@ static int reset_summary_unit_check(struct alias_lcu *lcu, ASCEBC((char *) &cqr->magic, 4); ccw = cqr->cpaddr; ccw->cmd_code = DASD_ECKD_CCW_RSCK; - ccw->flags = 0 ; + ccw->flags = CCW_FLAG_SLI; ccw->count = 16; ccw->cda = (__u32)(addr_t) cqr->data; ((char *)cqr->data)[0] = reason; From 2e80d52bc8fb39da0208d73bb2a2ef7f10b7db2e Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Tue, 15 Dec 2015 10:45:05 +0100 Subject: [PATCH 097/418] s390/dasd: fix refcount for PAV reassignment commit 9d862ababb609439c5d6987f6d3ddd09e703aa0b upstream. Add refcount to the DASD device when a summary unit check worker is scheduled. This prevents that the device is set offline with worker in place. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- drivers/s390/block/dasd_alias.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index 4ff914c0f13c..286782c60da4 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -264,8 +264,10 @@ void dasd_alias_disconnect_device_from_lcu(struct dasd_device *device) spin_unlock_irqrestore(&lcu->lock, flags); cancel_work_sync(&lcu->suc_data.worker); spin_lock_irqsave(&lcu->lock, flags); - if (device == lcu->suc_data.device) + if (device == lcu->suc_data.device) { + dasd_put_device(device); lcu->suc_data.device = NULL; + } } was_pending = 0; if (device == lcu->ruac_data.device) { @@ -273,8 +275,10 @@ void dasd_alias_disconnect_device_from_lcu(struct dasd_device *device) was_pending = 1; cancel_delayed_work_sync(&lcu->ruac_data.dwork); spin_lock_irqsave(&lcu->lock, flags); - if (device == lcu->ruac_data.device) + if (device == lcu->ruac_data.device) { + dasd_put_device(device); lcu->ruac_data.device = NULL; + } } private->lcu = NULL; spin_unlock_irqrestore(&lcu->lock, flags); @@ -549,8 +553,10 @@ static void lcu_update_work(struct work_struct *work) if ((rc && (rc != -EOPNOTSUPP)) || (lcu->flags & NEED_UAC_UPDATE)) { DBF_DEV_EVENT(DBF_WARNING, device, "could not update" " alias data in lcu (rc = %d), retry later", rc); - schedule_delayed_work(&lcu->ruac_data.dwork, 30*HZ); + if (!schedule_delayed_work(&lcu->ruac_data.dwork, 30*HZ)) + dasd_put_device(device); } else { + dasd_put_device(device); lcu->ruac_data.device = NULL; lcu->flags &= ~UPDATE_PENDING; } @@ -593,8 +599,10 @@ static int _schedule_lcu_update(struct alias_lcu *lcu, */ if (!usedev) return -EINVAL; + dasd_get_device(usedev); lcu->ruac_data.device = usedev; - schedule_delayed_work(&lcu->ruac_data.dwork, 0); + if (!schedule_delayed_work(&lcu->ruac_data.dwork, 0)) + dasd_put_device(usedev); return 0; } @@ -930,6 +938,7 @@ static void summary_unit_check_handling_work(struct work_struct *work) /* 3. read new alias configuration */ _schedule_lcu_update(lcu, device); lcu->suc_data.device = NULL; + dasd_put_device(device); spin_unlock_irqrestore(&lcu->lock, flags); } @@ -989,6 +998,8 @@ void dasd_alias_handle_summary_unit_check(struct dasd_device *device, } lcu->suc_data.reason = reason; lcu->suc_data.device = device; + dasd_get_device(device); spin_unlock(&lcu->lock); - schedule_work(&lcu->suc_data.worker); + if (!schedule_work(&lcu->suc_data.worker)) + dasd_put_device(device); }; From a90ad5c546fa28f28f5870d884603a7822e9c82a Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Fri, 12 Feb 2016 14:50:52 +0100 Subject: [PATCH 098/418] s390/dasd: fix performance drop commit 12d319b920fa673a4d5e7c1785c5dc82dcd15257 upstream. Commit ca369d51b ("sd: Fix device-imposed transfer length limits") introduced a new queue limit max_dev_sectors which limits the maximum sectors for requests. The default value leads to small dasd requests and therefor to a performance drop. Set the max_dev_sectors value to the same value as the max_hw_sectors to use the maximum available request size for DASD devices. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- drivers/s390/block/dasd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index a263c10359e1..4abfbdb285ec 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -3031,6 +3031,7 @@ static void dasd_setup_queue(struct dasd_block *block) max = block->base->discipline->max_blocks << block->s2b_shift; } queue_flag_set_unlocked(QUEUE_FLAG_NONROT, block->request_queue); + block->request_queue->limits.max_dev_sectors = max; blk_queue_logical_block_size(block->request_queue, block->bp_block); blk_queue_max_hw_sectors(block->request_queue, max); From d70325085f56a5aa544afe5a4765e7ec5c72ccc2 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 19 Feb 2016 14:44:14 +0100 Subject: [PATCH 099/418] s390/compat: correct restore of high gprs on signal return commit 342300cc9cd3428bc6bfe5809bfcc1b9a0f06702 upstream. git commit 8070361799ae1e3f4ef347bd10f0a508ac10acfb "s390: add support for vector extension" broke 31-bit compat processes in regard to signal handling. The restore_sigregs_ext32() function is used to restore the additional elements from the user space signal frame. Among the additional elements are the upper registers halves for 64-bit register support for 31-bit processes. The copy_from_user that is used to retrieve the high-gprs array from the user stack uses an incorrect length, 8 bytes instead of 64 bytes. This causes incorrect upper register halves to get loaded. Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/compat_signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 66c94417c0ba..4af60374eba0 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -271,7 +271,7 @@ static int restore_sigregs_ext32(struct pt_regs *regs, /* Restore high gprs from signal stack */ if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high, - sizeof(&sregs_ext->gprs_high))) + sizeof(sregs_ext->gprs_high))) return -EFAULT; for (i = 0; i < NUM_GPRS; i++) *(__u32 *)®s->gprs[i] = gprs_high[i]; From caa2a2b56b7313b0f5c8373facc44768d3055f3c Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 19 Feb 2016 15:29:05 +0100 Subject: [PATCH 100/418] s390/fpu: signals vs. floating point control register commit 1b17cb796f5d40ffa239c6926385abd83a77a49b upstream. git commit 904818e2f229f3d94ec95f6932a6358c81e73d78 "s390/kernel: introduce fpu-internal.h with fpu helper functions" introduced the fpregs_store / fp_regs_load helper. These function fail to save and restore the floating pointer control registers. The effect is that the FPC is not correctly handled on signal delivery and signal return. Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/fpu/internal.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/include/asm/fpu/internal.h b/arch/s390/include/asm/fpu/internal.h index 2559b16da525..17d9dcd29d45 100644 --- a/arch/s390/include/asm/fpu/internal.h +++ b/arch/s390/include/asm/fpu/internal.h @@ -48,6 +48,7 @@ static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs) static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu) { fpregs->pad = 0; + fpregs->fpc = fpu->fpc; if (MACHINE_HAS_VX) convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs); else @@ -57,6 +58,7 @@ static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu) static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu) { + fpu->fpc = fpregs->fpc; if (MACHINE_HAS_VX) convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs); else From 1f39f82654a34d136bbdb984f0e092946d60625f Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 18 Dec 2015 21:28:53 +0100 Subject: [PATCH 101/418] uml: flush stdout before forking commit 0754fb298f2f2719f0393491d010d46cfb25d043 upstream. I was seeing some really weird behaviour where piping UML's output somewhere would cause output to get duplicated: $ ./vmlinux | head -n 40 Checking that ptrace can change system call numbers...Core dump limits : soft - 0 hard - NONE OK Checking syscall emulation patch for ptrace...Core dump limits : soft - 0 hard - NONE OK Checking advanced syscall emulation patch for ptrace...Core dump limits : soft - 0 hard - NONE OK Core dump limits : soft - 0 hard - NONE This is because these tests do a fork() which duplicates the non-empty stdout buffer, then glibc flushes the duplicated buffer as each child exits. A simple workaround is to flush before forking. Signed-off-by: Vegard Nossum Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- arch/um/os-Linux/start_up.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 47f1ff056a54..22a358ef1b0c 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -94,6 +94,8 @@ static int start_ptraced_child(void) { int pid, n, status; + fflush(stdout); + pid = fork(); if (pid == 0) ptrace_child(); From cfcae394f6889cfe502f23433278a303948bdd39 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Wed, 16 Dec 2015 21:59:56 +0100 Subject: [PATCH 102/418] uml: fix hostfs mknod() commit 9f2dfda2f2f1c6181c3732c16b85c59ab2d195e0 upstream. An inverted return value check in hostfs_mknod() caused the function to return success after handling it as an error (and cleaning up). It resulted in the following segfault when trying to bind() a named unix socket: Pid: 198, comm: a.out Not tainted 4.4.0-rc4 RIP: 0033:[<0000000061077df6>] RSP: 00000000daae5d60 EFLAGS: 00010202 RAX: 0000000000000000 RBX: 000000006092a460 RCX: 00000000dfc54208 RDX: 0000000061073ef1 RSI: 0000000000000070 RDI: 00000000e027d600 RBP: 00000000daae5de0 R08: 00000000da980ac0 R09: 0000000000000000 R10: 0000000000000003 R11: 00007fb1ae08f72a R12: 0000000000000000 R13: 000000006092a460 R14: 00000000daaa97c0 R15: 00000000daaa9a88 Kernel panic - not syncing: Kernel mode fault at addr 0x40, ip 0x61077df6 CPU: 0 PID: 198 Comm: a.out Not tainted 4.4.0-rc4 #1 Stack: e027d620 dfc54208 0000006f da981398 61bee000 0000c1ed daae5de0 0000006e e027d620 dfcd4208 00000005 6092a460 Call Trace: [<60dedc67>] SyS_bind+0xf7/0x110 [<600587be>] handle_syscall+0x7e/0x80 [<60066ad7>] userspace+0x3e7/0x4e0 [<6006321f>] ? save_registers+0x1f/0x40 [<6006c88e>] ? arch_prctl+0x1be/0x1f0 [<60054985>] fork_handler+0x85/0x90 Let's also get rid of the "cosmic ray protection" while we're at it. Fixes: e9193059b1b3 "hostfs: fix races in dentry_name() and inode_name()" Signed-off-by: Vegard Nossum Cc: Jeff Dike Cc: Al Viro Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- fs/hostfs/hostfs_kern.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 2ac99db3750e..5a7b3229b956 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -730,15 +730,13 @@ static int hostfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, init_special_inode(inode, mode, dev); err = do_mknod(name, mode, MAJOR(dev), MINOR(dev)); - if (!err) + if (err) goto out_free; err = read_name(inode, name); __putname(name); if (err) goto out_put; - if (err) - goto out_put; d_instantiate(dentry, inode); return 0; From 1b4d88b5de8641d570600c5c83afe1761fa8d1fd Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Thu, 31 Dec 2015 17:06:17 +0100 Subject: [PATCH 103/418] um: link with -lpthread commit a7df4716d19594b7b3f106f0bc0ca1c548e508e6 upstream. Similarly to commit fb1770aa78a43530940d0c2dd161e77bc705bdac, with gcc 5 on Ubuntu and CONFIG_STATIC_LINK=y I was seeing these linker errors: /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/librt.a(timer_create.o): In function `__timer_create_new': (.text+0xcd): undefined reference to `pthread_once' /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/librt.a(timer_create.o): In function `__timer_create_new': (.text+0x126): undefined reference to `pthread_attr_init' /usr/lib/gcc/x86_64-linux-gnu/5/../../../x86_64-linux-gnu/librt.a(timer_create.o): In function `__timer_create_new': (.text+0x168): undefined reference to `pthread_attr_setdetachstate' [...] Obviously we also need -lpthread for librt.a. Signed-off-by: Vegard Nossum Signed-off-by: Richard Weinberger Signed-off-by: Greg Kroah-Hartman --- scripts/link-vmlinux.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index dacf71a43ad4..ba6c34ea5429 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -62,7 +62,7 @@ vmlinux_link() -Wl,--start-group \ ${KBUILD_VMLINUX_MAIN} \ -Wl,--end-group \ - -lutil -lrt ${1} + -lutil -lrt -lpthread ${1} rm -f linux fi } From 6d88b00ede29eef3f0b7cbb7d5e769ad5c11b9a0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 7 Jan 2016 16:38:10 -0500 Subject: [PATCH 104/418] locks: fix unlock when fcntl_setlk races with a close commit 7f3697e24dc3820b10f445a4a7d914fc356012d1 upstream. Dmitry reported that he was able to reproduce the WARN_ON_ONCE that fires in locks_free_lock_context when the flc_posix list isn't empty. The problem turns out to be that we're basically rebuilding the file_lock from scratch in fcntl_setlk when we discover that the setlk has raced with a close. If the l_whence field is SEEK_CUR or SEEK_END, then we may end up with fl_start and fl_end values that differ from when the lock was initially set, if the file position or length of the file has changed in the interim. Fix this by just reusing the same lock request structure, and simply override fl_type value with F_UNLCK as appropriate. That ensures that we really are unlocking the lock that was initially set. While we're there, make sure that we do pop a WARN_ON_ONCE if the removal ever fails. Also return -EBADF in this event, since that's what we would have returned if the close had happened earlier. Cc: Alexander Viro Fixes: c293621bbf67 (stale POSIX lock handling) Reported-by: Dmitry Vyukov Signed-off-by: Jeff Layton Acked-by: "J. Bruce Fields" Signed-off-by: Greg Kroah-Hartman --- fs/locks.c | 51 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 0d2b3267e2a3..6333263b7bc8 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2182,7 +2182,6 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, goto out; } -again: error = flock_to_posix_lock(filp, file_lock, &flock); if (error) goto out; @@ -2224,19 +2223,22 @@ again: * Attempt to detect a close/fcntl race and recover by * releasing the lock that was just acquired. */ - /* - * we need that spin_lock here - it prevents reordering between - * update of i_flctx->flc_posix and check for it done in close(). - * rcu_read_lock() wouldn't do. - */ - spin_lock(¤t->files->file_lock); - f = fcheck(fd); - spin_unlock(¤t->files->file_lock); - if (!error && f != filp && flock.l_type != F_UNLCK) { - flock.l_type = F_UNLCK; - goto again; + if (!error && file_lock->fl_type != F_UNLCK) { + /* + * We need that spin_lock here - it prevents reordering between + * update of i_flctx->flc_posix and check for it done in + * close(). rcu_read_lock() wouldn't do. + */ + spin_lock(¤t->files->file_lock); + f = fcheck(fd); + spin_unlock(¤t->files->file_lock); + if (f != filp) { + file_lock->fl_type = F_UNLCK; + error = do_lock_file_wait(filp, cmd, file_lock); + WARN_ON_ONCE(error); + error = -EBADF; + } } - out: locks_free_lock(file_lock); return error; @@ -2322,7 +2324,6 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, goto out; } -again: error = flock64_to_posix_lock(filp, file_lock, &flock); if (error) goto out; @@ -2364,14 +2365,22 @@ again: * Attempt to detect a close/fcntl race and recover by * releasing the lock that was just acquired. */ - spin_lock(¤t->files->file_lock); - f = fcheck(fd); - spin_unlock(¤t->files->file_lock); - if (!error && f != filp && flock.l_type != F_UNLCK) { - flock.l_type = F_UNLCK; - goto again; + if (!error && file_lock->fl_type != F_UNLCK) { + /* + * We need that spin_lock here - it prevents reordering between + * update of i_flctx->flc_posix and check for it done in + * close(). rcu_read_lock() wouldn't do. + */ + spin_lock(¤t->files->file_lock); + f = fcheck(fd); + spin_unlock(¤t->files->file_lock); + if (f != filp) { + file_lock->fl_type = F_UNLCK; + error = do_lock_file_wait(filp, cmd, file_lock); + WARN_ON_ONCE(error); + error = -EBADF; + } } - out: locks_free_lock(file_lock); return error; From 78899f827ef13fd7b841acd023a12a2b740ca54e Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 21 Dec 2015 17:05:08 -0600 Subject: [PATCH 105/418] rtlwifi: rtl_pci: Fix kernel panic commit f99551a2d39dc26ea03dc6761be11ac913eb2d57 upstream. In commit 38506ecefab9 (rtlwifi: rtl_pci: Start modification for new drivers), a bug was introduced that causes a NULL pointer dereference. As this bug only affects the infrequently used RTL8192EE and only under low-memory conditions, it has taken a long time for the bug to show up. The bug was reported on the linux-wireless mailing list and also at https://bugs.launchpad.net/ubuntu/+source/ubuntu-release-upgrader/ as bug #1527603 (kernel crashes due to rtl8192ee driver on ubuntu 15.10). Fixes: 38506ecefab9 ("rtlwifi: rtl_pci: Start modification for new drivers") Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/pci.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index f46c9d7f6528..7f471bff435c 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -801,7 +801,9 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) hw_queue); if (rx_remained_cnt == 0) return; - + buffer_desc = &rtlpci->rx_ring[rxring_idx].buffer_desc[ + rtlpci->rx_ring[rxring_idx].idx]; + pdesc = (struct rtl_rx_desc *)skb->data; } else { /* rx descriptor */ pdesc = &rtlpci->rx_ring[rxring_idx].desc[ rtlpci->rx_ring[rxring_idx].idx]; @@ -824,13 +826,6 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw) new_skb = dev_alloc_skb(rtlpci->rxbuffersize); if (unlikely(!new_skb)) goto no_new; - if (rtlpriv->use_new_trx_flow) { - buffer_desc = - &rtlpci->rx_ring[rxring_idx].buffer_desc - [rtlpci->rx_ring[rxring_idx].idx]; - /*means rx wifi info*/ - pdesc = (struct rtl_rx_desc *)skb->data; - } memset(&rx_status , 0 , sizeof(rx_status)); rtlpriv->cfg->ops->query_rx_desc(hw, &stats, &rx_status, (u8 *)pdesc, skb); From 766e2109218a47821227d12552c174b75931d96e Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 14 Dec 2015 16:34:38 -0600 Subject: [PATCH 106/418] rtlwifi: rtl8192cu: Add missing parameter setup commit b68d0ae7e58624c33f2eddab471fee55db27dbf9 upstream. This driver fails to copy the module parameter for software encryption to the locations used by the main code. Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c index fd4a5353d216..7c6f7f0d18c6 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c @@ -65,6 +65,8 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->dm.disable_framebursting = false; rtlpriv->dm.thermalvalue = 0; rtlpriv->dbg.global_debuglevel = rtlpriv->cfg->mod_params->debug; + rtlpriv->cfg->mod_params->sw_crypto = + rtlpriv->cfg->mod_params->sw_crypto; /* for firmware buf */ rtlpriv->rtlhal.pfirmware = vzalloc(0x4000); From 8c2265e9fa89927e9a13cbb26f5fac7dac138333 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 14 Dec 2015 16:34:37 -0600 Subject: [PATCH 107/418] rtlwifi: rtl8192ce: Fix handling of module parameters commit b24f19f16b9e43f54218c07609b783ea8625406a upstream. The module parameter for software encryption was never transferred to the location used by the driver. Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c index de6cb6c3a48c..4780bdc63b2b 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c @@ -139,6 +139,8 @@ int rtl92c_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->psc.inactiveps = rtlpriv->cfg->mod_params->inactiveps; rtlpriv->psc.swctrl_lps = rtlpriv->cfg->mod_params->swctrl_lps; rtlpriv->psc.fwctrl_lps = rtlpriv->cfg->mod_params->fwctrl_lps; + rtlpriv->cfg->mod_params->sw_crypto = + rtlpriv->cfg->mod_params->sw_crypto; if (!rtlpriv->psc.inactiveps) pr_info("rtl8192ce: Power Save off (module option)\n"); if (!rtlpriv->psc.fwctrl_lps) From b7e9aec7b4c684d4f7af15ede8b23e540bcbfc78 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 14 Dec 2015 16:34:35 -0600 Subject: [PATCH 108/418] rtlwifi: rtl8192de: Fix incorrect module parameter descriptions commit d4d60b4caaa5926e1b243070770968f05656107a upstream. Two of the module parameters are listed with incorrect default values. Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c index b19d0398215f..c6e09a19de1a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c @@ -376,8 +376,8 @@ module_param_named(swlps, rtl92de_mod_params.swctrl_lps, bool, 0444); module_param_named(fwlps, rtl92de_mod_params.fwctrl_lps, bool, 0444); MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n"); MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n"); -MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n"); -MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n"); +MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 1)\n"); +MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 0)\n"); MODULE_PARM_DESC(debug, "Set debug level (0-5) (default 0)"); static SIMPLE_DEV_PM_OPS(rtlwifi_pm_ops, rtl_pci_suspend, rtl_pci_resume); From 626f652ac51b51008d9523db3442284eff35cbeb Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 14 Dec 2015 16:34:32 -0600 Subject: [PATCH 109/418] rtlwifi: rtl8723ae: Fix initialization of module parameters commit 793b09994211fbe0b5cd5f7b6dd8cc9b6256c3c6 upstream. This driver has some errors in the handling of module parameters. These include missing initialization for parameters msi_support and disable_watchdog. In addition, neither of these parameters nor sw_crypto are transferred into the locations used by the driver. A final fix is adding parameter msi to the module named and description macros. Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c index 3859b3e3d158..ff49a8c0ff61 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c @@ -150,6 +150,11 @@ int rtl8723e_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->psc.inactiveps = rtlpriv->cfg->mod_params->inactiveps; rtlpriv->psc.swctrl_lps = rtlpriv->cfg->mod_params->swctrl_lps; rtlpriv->psc.fwctrl_lps = rtlpriv->cfg->mod_params->fwctrl_lps; + rtlpci->msi_support = rtlpriv->cfg->mod_params->msi_support; + rtlpriv->cfg->mod_params->sw_crypto = + rtlpriv->cfg->mod_params->sw_crypto; + rtlpriv->cfg->mod_params->disable_watchdog = + rtlpriv->cfg->mod_params->disable_watchdog; if (rtlpriv->cfg->mod_params->disable_watchdog) pr_info("watchdog disabled\n"); rtlpriv->psc.reg_fwctrl_lps = 3; @@ -267,6 +272,8 @@ static struct rtl_mod_params rtl8723e_mod_params = { .swctrl_lps = false, .fwctrl_lps = true, .debug = DBG_EMERG, + .msi_support = false, + .disable_watchdog = false, }; static struct rtl_hal_cfg rtl8723e_hal_cfg = { @@ -383,12 +390,14 @@ module_param_named(debug, rtl8723e_mod_params.debug, int, 0444); module_param_named(ips, rtl8723e_mod_params.inactiveps, bool, 0444); module_param_named(swlps, rtl8723e_mod_params.swctrl_lps, bool, 0444); module_param_named(fwlps, rtl8723e_mod_params.fwctrl_lps, bool, 0444); +module_param_named(msi, rtl8723e_mod_params.msi_support, bool, 0444); module_param_named(disable_watchdog, rtl8723e_mod_params.disable_watchdog, bool, 0444); MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n"); MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n"); MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n"); MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n"); +MODULE_PARM_DESC(msi, "Set to 1 to use MSI interrupts mode (default 0)\n"); MODULE_PARM_DESC(debug, "Set debug level (0-5) (default 0)"); MODULE_PARM_DESC(disable_watchdog, "Set to 1 to disable the watchdog (default 0)\n"); From da5c2c01da0cfccde6f2cedf35d8074d2bc19d10 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 14 Dec 2015 16:34:36 -0600 Subject: [PATCH 110/418] rtlwifi: rtl8192se: Fix module parameter initialization commit 7503efbd82c15c4070adffff1344e5169d3634b4 upstream. Two of the module parameter descriptions show incorrect default values. In addition the value for software encryption is not transferred to the locations used by the driver. Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c index e1fd27c888bf..31baca41ac2f 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c @@ -187,6 +187,8 @@ static int rtl92s_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->psc.inactiveps = rtlpriv->cfg->mod_params->inactiveps; rtlpriv->psc.swctrl_lps = rtlpriv->cfg->mod_params->swctrl_lps; rtlpriv->psc.fwctrl_lps = rtlpriv->cfg->mod_params->fwctrl_lps; + rtlpriv->cfg->mod_params->sw_crypto = + rtlpriv->cfg->mod_params->sw_crypto; if (!rtlpriv->psc.inactiveps) pr_info("Power Save off (module option)\n"); if (!rtlpriv->psc.fwctrl_lps) @@ -425,8 +427,8 @@ module_param_named(swlps, rtl92se_mod_params.swctrl_lps, bool, 0444); module_param_named(fwlps, rtl92se_mod_params.fwctrl_lps, bool, 0444); MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n"); MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n"); -MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n"); -MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n"); +MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 1)\n"); +MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 0)\n"); MODULE_PARM_DESC(debug, "Set debug level (0-5) (default 0)"); static SIMPLE_DEV_PM_OPS(rtlwifi_pm_ops, rtl_pci_suspend, rtl_pci_resume); From e1b08009fb532a2785b8b38adec825488abc46fa Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 14 Dec 2015 16:34:34 -0600 Subject: [PATCH 111/418] rtlwifi: rtl8188ee: Fix module parameter initialization commit 06f34572c6110e2e2d5e653a957f1d74db9e3f2b upstream. In this driver, parameters disable_watchdog and sw_crypto are never copied into the locations used in the main code. While modifying the parameter handling, the copying of parameter msi_support is moved to be with the rest. Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c index 11344121c55e..47e32cb0ec1a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c @@ -88,8 +88,6 @@ int rtl88e_init_sw_vars(struct ieee80211_hw *hw) u8 tid; rtl8188ee_bt_reg_init(hw); - rtlpci->msi_support = rtlpriv->cfg->mod_params->msi_support; - rtlpriv->dm.dm_initialgain_enable = 1; rtlpriv->dm.dm_flag = 0; rtlpriv->dm.disable_framebursting = 0; @@ -138,6 +136,11 @@ int rtl88e_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->psc.inactiveps = rtlpriv->cfg->mod_params->inactiveps; rtlpriv->psc.swctrl_lps = rtlpriv->cfg->mod_params->swctrl_lps; rtlpriv->psc.fwctrl_lps = rtlpriv->cfg->mod_params->fwctrl_lps; + rtlpci->msi_support = rtlpriv->cfg->mod_params->msi_support; + rtlpriv->cfg->mod_params->sw_crypto = + rtlpriv->cfg->mod_params->sw_crypto; + rtlpriv->cfg->mod_params->disable_watchdog = + rtlpriv->cfg->mod_params->disable_watchdog; if (rtlpriv->cfg->mod_params->disable_watchdog) pr_info("watchdog disabled\n"); if (!rtlpriv->psc.inactiveps) From f037c35161f04459ff804eaa197a81f66f973379 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Mon, 14 Dec 2015 16:34:31 -0600 Subject: [PATCH 112/418] rtlwifi: rtl8723be: Fix module parameter initialization commit 7079604ddb83f428359feace3aeaf8a9f435be4a upstream. This driver has a number of errors in the module initialization. These include the following: Parameter msi_support is stored in two places - one is removed. Paramters sw_crypto and disable_watchdog were never stored in the final locations, nor were they initialized properly. Signed-off-by: Larry Finger Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c index d091f1d5f91e..a78eaeda0008 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c @@ -93,7 +93,6 @@ int rtl8723be_init_sw_vars(struct ieee80211_hw *hw) struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); rtl8723be_bt_reg_init(hw); - rtlpci->msi_support = rtlpriv->cfg->mod_params->msi_support; rtlpriv->btcoexist.btc_ops = rtl_btc_get_ops_pointer(); rtlpriv->dm.dm_initialgain_enable = 1; @@ -151,6 +150,10 @@ int rtl8723be_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->psc.swctrl_lps = rtlpriv->cfg->mod_params->swctrl_lps; rtlpriv->psc.fwctrl_lps = rtlpriv->cfg->mod_params->fwctrl_lps; rtlpci->msi_support = rtlpriv->cfg->mod_params->msi_support; + rtlpriv->cfg->mod_params->sw_crypto = + rtlpriv->cfg->mod_params->sw_crypto; + rtlpriv->cfg->mod_params->disable_watchdog = + rtlpriv->cfg->mod_params->disable_watchdog; if (rtlpriv->cfg->mod_params->disable_watchdog) pr_info("watchdog disabled\n"); rtlpriv->psc.reg_fwctrl_lps = 3; @@ -267,6 +270,9 @@ static struct rtl_mod_params rtl8723be_mod_params = { .inactiveps = true, .swctrl_lps = false, .fwctrl_lps = true, + .msi_support = false, + .disable_watchdog = false, + .debug = DBG_EMERG, }; static struct rtl_hal_cfg rtl8723be_hal_cfg = { From 6472e6b6afc6e3fb7ec4d2890b4eed0d25f71ae9 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 7 Jan 2016 14:46:38 +0200 Subject: [PATCH 113/418] mei: fix fasync return value on error commit ed6dc538e5a36a331b6256d54f435c80f6715460 upstream. fasync should return a negative value on error and not poll mask POLLERR. Cc: Al Viro Reported-by: Al Viro Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c index b2f2486b3d75..677d0362f334 100644 --- a/drivers/misc/mei/main.c +++ b/drivers/misc/mei/main.c @@ -657,7 +657,9 @@ out: * @file: pointer to file structure * @band: band bitmap * - * Return: poll mask + * Return: negative on error, + * 0 if it did no changes, + * and positive a process was added or deleted */ static int mei_fasync(int fd, struct file *file, int band) { @@ -665,7 +667,7 @@ static int mei_fasync(int fd, struct file *file, int band) struct mei_cl *cl = file->private_data; if (!mei_cl_is_connected(cl)) - return POLLERR; + return -ENODEV; return fasync_helper(fd, file, band, &cl->ev_async); } From a007fc3d6569b620ec5bca1b4636d952ebdd2627 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sun, 17 Jan 2016 12:25:01 +0200 Subject: [PATCH 114/418] mei: validate request value in client notify request ioctl commit 7326fffb712f09a315bc73cc1ee63843f59b8bd4 upstream. This patch address a possible security issue: The request field in client notify request ioctl comes from user space as u32 and is downcasted to u8 with out validation. Check request field to have approved values MEI_HBM_NOTIFICATION_STAR/STOP Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c index 677d0362f334..80f9afcb1382 100644 --- a/drivers/misc/mei/main.c +++ b/drivers/misc/mei/main.c @@ -458,7 +458,11 @@ static int mei_ioctl_client_notify_request(struct file *file, u32 request) { struct mei_cl *cl = file->private_data; - return mei_cl_notify_request(cl, file, request); + if (request != MEI_HBM_NOTIFICATION_START && + request != MEI_HBM_NOTIFICATION_STOP) + return -EINVAL; + + return mei_cl_notify_request(cl, file, (u8)request); } /** From aaf1569ce4575ed163839002f27da2cde4d10a77 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 27 Feb 2016 19:23:16 -0500 Subject: [PATCH 115/418] namei: ->d_inode of a pinned dentry is stable only for positives commit d4565649b6d6923369112758212b851adc407f0c upstream. both do_last() and walk_component() risk picking a NULL inode out of dentry about to become positive, *then* checking its flags and seeing that it's not negative anymore and using (already stale by then) value they'd fetched earlier. Usually ends up oopsing soon after that... Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 0c3974cd3ecd..c9eb7e84d05f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1742,11 +1742,11 @@ static int walk_component(struct nameidata *nd, int flags) if (err < 0) return err; - inode = d_backing_inode(path.dentry); seq = 0; /* we are already out of RCU mode */ err = -ENOENT; if (d_is_negative(path.dentry)) goto out_path_put; + inode = d_backing_inode(path.dentry); } if (flags & WALK_PUT) @@ -3130,12 +3130,12 @@ retry_lookup: return error; BUG_ON(nd->flags & LOOKUP_RCU); - inode = d_backing_inode(path.dentry); seq = 0; /* out of RCU mode, so the value doesn't matter */ if (unlikely(d_is_negative(path.dentry))) { path_to_nameidata(&path, nd); return -ENOENT; } + inode = d_backing_inode(path.dentry); finish_lookup: if (nd->depth) put_link(nd); From 3214cf3b5fbe69b0274cb18a4147f62f926bd7fd Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 22 Dec 2015 02:27:35 -0200 Subject: [PATCH 116/418] rc: sunxi-cir: Initialize the spinlock properly commit 768acf46e1320d6c41ed1b7c4952bab41c1cde79 upstream. The driver allocates the spinlock but fails to initialize it correctly. The kernel reports a BUG indicating bad spinlock magic when spinlock debugging is enabled. Call spin_lock_init() on it to initialize it correctly. Fixes: b4e3e59fb59c ("[media] rc: add sunxi-ir driver") Signed-off-by: Chen-Yu Tsai Acked-by: Hans de Goede Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/rc/sunxi-cir.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/rc/sunxi-cir.c b/drivers/media/rc/sunxi-cir.c index 7830aef3db45..40f77685cc4a 100644 --- a/drivers/media/rc/sunxi-cir.c +++ b/drivers/media/rc/sunxi-cir.c @@ -153,6 +153,8 @@ static int sunxi_ir_probe(struct platform_device *pdev) if (!ir) return -ENOMEM; + spin_lock_init(&ir->ir_lock); + if (of_device_is_compatible(dn, "allwinner,sun5i-a13-ir")) ir->fifo_size = 64; else From aecba569dc08bc746885aeae6086886cd62db7eb Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Mon, 31 Aug 2015 06:13:45 -0300 Subject: [PATCH 117/418] media: dvb-core: Don't force CAN_INVERSION_AUTO in oneshot mode commit c9d57de6103e343f2d4e04ea8d9e417e10a24da7 upstream. When in FE_TUNE_MODE_ONESHOT the frontend must report the actual capabilities so user can take appropriate action. With frontends that can't do auto inversion this is done by dvb-core automatically so CAN_INVERSION_AUTO is valid. However, when in FE_TUNE_MODE_ONESHOT this is not true. So only set FE_CAN_INVERSION_AUTO in modes other than FE_TUNE_MODE_ONESHOT Signed-off-by: Malcolm Priestley Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-core/dvb_frontend.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c index c38ef1a72b4a..e2a3833170e3 100644 --- a/drivers/media/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb-core/dvb_frontend.c @@ -2313,9 +2313,9 @@ static int dvb_frontend_ioctl_legacy(struct file *file, dev_dbg(fe->dvb->device, "%s: current delivery system on cache: %d, V3 type: %d\n", __func__, c->delivery_system, fe->ops.info.type); - /* Force the CAN_INVERSION_AUTO bit on. If the frontend doesn't - * do it, it is done for it. */ - info->caps |= FE_CAN_INVERSION_AUTO; + /* Set CAN_INVERSION_AUTO bit on in other than oneshot mode */ + if (!(fepriv->tune_mode_flags & FE_TUNE_MODE_ONESHOT)) + info->caps |= FE_CAN_INVERSION_AUTO; err = 0; break; } From c44a410e1680345b13a2e7c1011a3a470beb76d8 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Mon, 5 Oct 2015 19:33:29 -0300 Subject: [PATCH 118/418] si2157: return -EINVAL if firmware blob is too big commit d2cc2f0b35465951eaaf0387fd55e29835ed7ea6 upstream. A previous patch added a check if the firmware is too big, but it didn't set the return error code with the right value. [mchehab@osg.samsung.com: I ended by applying a v1 of Laura's patch, without the proper return code. This patch contains the difference between v2 and v1 of the Laura's "si2157: Bounds check firmware" patch] Signed-off-by: Laura Abbott Reviewed-by: Olli Salonen Tested-by: Olli Salonen Signed-off-by: Greg Kroah-Hartman Signed-off-by: Mauro Carvalho Chehab --- drivers/media/tuners/si2157.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/media/tuners/si2157.c b/drivers/media/tuners/si2157.c index ce157edd45fa..0e1ca2b00e61 100644 --- a/drivers/media/tuners/si2157.c +++ b/drivers/media/tuners/si2157.c @@ -168,6 +168,7 @@ static int si2157_init(struct dvb_frontend *fe) len = fw->data[fw->size - remaining]; if (len > SI2157_ARGLEN) { dev_err(&client->dev, "Bad firmware length\n"); + ret = -EINVAL; goto err_release_firmware; } memcpy(cmd.args, &fw->data[(fw->size - remaining) + 1], len); From f51c5d31c88d47a93135b89cf541aeb194f62908 Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Fri, 2 Oct 2015 17:33:13 -0300 Subject: [PATCH 119/418] gspca: ov534/topro: prevent a division by 0 commit dcc7fdbec53a960588f2c40232db2c6466c09917 upstream. v4l2-compliance sends a zeroed struct v4l2_streamparm in v4l2-test-formats.cpp::testParmType(), and this results in a division by 0 in some gspca subdrivers: divide error: 0000 [#1] SMP Modules linked in: gspca_ov534 gspca_main ... CPU: 0 PID: 17201 Comm: v4l2-compliance Not tainted 4.3.0-rc2-ao2 #1 Hardware name: System manufacturer System Product Name/M2N-E SLI, BIOS ASUS M2N-E SLI ACPI BIOS Revision 1301 09/16/2010 task: ffff8800818306c0 ti: ffff880095c4c000 task.ti: ffff880095c4c000 RIP: 0010:[] [] sd_set_streamparm+0x12/0x60 [gspca_ov534] RSP: 0018:ffff880095c4fce8 EFLAGS: 00010296 RAX: 0000000000000000 RBX: ffff8800c9522000 RCX: ffffffffa077a140 RDX: 0000000000000000 RSI: ffff880095e0c100 RDI: ffff8800c9522000 RBP: ffff880095e0c100 R08: ffffffffa077a100 R09: 00000000000000cc R10: ffff880067ec7740 R11: 0000000000000016 R12: ffffffffa07bb400 R13: 0000000000000000 R14: ffff880081b6a800 R15: 0000000000000000 FS: 00007fda0de78740(0000) GS:ffff88012fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000014630f8 CR3: 00000000cf349000 CR4: 00000000000006f0 Stack: ffffffffa07a6431 ffff8800c9522000 ffffffffa077656e 00000000c0cc5616 ffff8800c9522000 ffffffffa07a5e20 ffff880095e0c100 0000000000000000 ffff880067ec7740 ffffffffa077a140 ffff880067ec7740 0000000000000016 Call Trace: [] ? v4l_s_parm+0x21/0x50 [videodev] [] ? vidioc_s_parm+0x4e/0x60 [gspca_main] [] ? __video_do_ioctl+0x280/0x2f0 [videodev] [] ? video_ioctl2+0x20/0x20 [videodev] [] ? video_usercopy+0x319/0x4e0 [videodev] [] ? page_add_new_anon_rmap+0x71/0xa0 [] ? mem_cgroup_commit_charge+0x52/0x90 [] ? handle_mm_fault+0xc18/0x1680 [] ? v4l2_ioctl+0xac/0xd0 [videodev] [] ? do_vfs_ioctl+0x28f/0x480 [] ? SyS_ioctl+0x74/0x80 [] ? entry_SYSCALL_64_fastpath+0x16/0x75 Code: c7 93 d9 79 a0 5b 5d e9 f1 f3 9a e0 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 53 31 d2 48 89 fb 48 83 ec 08 8b 46 10 76 0c 80 bf ac 0c 00 00 00 88 87 4e 0e 00 00 74 09 80 bf 4f RIP [] sd_set_streamparm+0x12/0x60 [gspca_ov534] RSP ---[ end trace 279710c2c6c72080 ]--- Following what the doc says about a zeroed timeperframe (see http://www.linuxtv.org/downloads/v4l-dvb-apis/vidioc-g-parm.html): ... To reset manually applications can just set this field to zero. fix the issue by resetting the frame rate to a default value in case of an unusable timeperframe. The fix is done in the subdrivers instead of gspca.c because only the subdrivers have notion of a default frame rate to reset the camera to. Signed-off-by: Antonio Ospite Reviewed-by: Hans de Goede Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/usb/gspca/ov534.c | 9 +++++++-- drivers/media/usb/gspca/topro.c | 6 +++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/media/usb/gspca/ov534.c b/drivers/media/usb/gspca/ov534.c index 146071b8e116..bfff1d1c70ab 100644 --- a/drivers/media/usb/gspca/ov534.c +++ b/drivers/media/usb/gspca/ov534.c @@ -1491,8 +1491,13 @@ static void sd_set_streamparm(struct gspca_dev *gspca_dev, struct v4l2_fract *tpf = &cp->timeperframe; struct sd *sd = (struct sd *) gspca_dev; - /* Set requested framerate */ - sd->frame_rate = tpf->denominator / tpf->numerator; + if (tpf->numerator == 0 || tpf->denominator == 0) + /* Set default framerate */ + sd->frame_rate = 30; + else + /* Set requested framerate */ + sd->frame_rate = tpf->denominator / tpf->numerator; + if (gspca_dev->streaming) set_frame_rate(gspca_dev); diff --git a/drivers/media/usb/gspca/topro.c b/drivers/media/usb/gspca/topro.c index c70ff406b07a..c028a5c2438e 100644 --- a/drivers/media/usb/gspca/topro.c +++ b/drivers/media/usb/gspca/topro.c @@ -4802,7 +4802,11 @@ static void sd_set_streamparm(struct gspca_dev *gspca_dev, struct v4l2_fract *tpf = &cp->timeperframe; int fr, i; - sd->framerate = tpf->denominator / tpf->numerator; + if (tpf->numerator == 0 || tpf->denominator == 0) + sd->framerate = 30; + else + sd->framerate = tpf->denominator / tpf->numerator; + if (gspca_dev->streaming) setframerate(gspca_dev, v4l2_ctrl_g_ctrl(gspca_dev->exposure)); From 368e554ba69f8cd9752c74adc7b6bd1f5f7c630e Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 29 Oct 2015 03:02:06 -0200 Subject: [PATCH 120/418] vb2: fix a regression in poll() behavior for output,streams commit 4623e5967448444a4ea1e77beb58898c4af48693 upstream. In the 3.17 kernel the poll() behavior changed for output streams: as long as not all buffers were queued up poll() would return that userspace can write. This is fine for the write() call, but when using stream I/O this changed the behavior since the expectation was that it would wait for buffers to become available for dequeuing. This patch only enables the check whether you can queue buffers for file I/O only, and skips it for stream I/O. Signed-off-by: Hans Verkuil Acked-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/v4l2-core/videobuf2-v4l2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/v4l2-core/videobuf2-v4l2.c b/drivers/media/v4l2-core/videobuf2-v4l2.c index 27b4b9e7c0c2..502984c724ff 100644 --- a/drivers/media/v4l2-core/videobuf2-v4l2.c +++ b/drivers/media/v4l2-core/videobuf2-v4l2.c @@ -822,10 +822,10 @@ unsigned int vb2_poll(struct vb2_queue *q, struct file *file, poll_table *wait) return res | POLLERR; /* - * For output streams you can write as long as there are fewer buffers - * queued than there are buffers available. + * For output streams you can call write() as long as there are fewer + * buffers queued than there are buffers available. */ - if (q->is_output && q->queued_count < q->num_buffers) + if (q->is_output && q->fileio && q->queued_count < q->num_buffers) return res | POLLOUT | POLLWRNORM; if (list_empty(&q->done_list)) { From e862efec7272eab9950ca329eceda7fec272fd20 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 3 Feb 2016 17:33:48 -0200 Subject: [PATCH 121/418] tda1004x: only update the frontend properties if locked commit e8beb02343e7582980c6705816cd957cf4f74c7a upstream. The tda1004x was updating the properties cache before locking. If the device is not locked, the data at the registers are just random values with no real meaning. This caused the driver to fail with libdvbv5, as such library calls GET_PROPERTY from time to time, in order to return the DVB stats. Tested with a saa7134 card 78: ASUSTeK P7131 Dual, vendor PCI ID: 1043:4862 Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-frontends/tda1004x.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/media/dvb-frontends/tda1004x.c b/drivers/media/dvb-frontends/tda1004x.c index 0e209b56c76c..c6abeb4fba9d 100644 --- a/drivers/media/dvb-frontends/tda1004x.c +++ b/drivers/media/dvb-frontends/tda1004x.c @@ -903,9 +903,18 @@ static int tda1004x_get_fe(struct dvb_frontend *fe) { struct dtv_frontend_properties *fe_params = &fe->dtv_property_cache; struct tda1004x_state* state = fe->demodulator_priv; + int status; dprintk("%s\n", __func__); + status = tda1004x_read_byte(state, TDA1004X_STATUS_CD); + if (status == -1) + return -EIO; + + /* Only update the properties cache if device is locked */ + if (!(status & 8)) + return 0; + // inversion status fe_params->inversion = INVERSION_OFF; if (tda1004x_read_byte(state, TDA1004X_CONFC1) & 0x20) From d7a853e6f7882e91fbdfb56663ed89355463ddef Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 14 Dec 2015 09:26:01 -0500 Subject: [PATCH 122/418] dm space map metadata: remove unused variable in brb_pop() commit 512167788a6fe9481a33a3cce5f80b684631a1bb upstream. Remove the unused struct block_op pointer that was inadvertantly introduced, via cut-and-paste of previous brb_op() code, as part of commit 50dd842ad. (Cc'ing stable@ because commit 50dd842ad did) Fixes: 50dd842ad ("dm space map metadata: fix ref counting bug when bootstrapping a new space map") Reported-by: David Binderman Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/persistent-data/dm-space-map-metadata.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index fca6dbcf9a47..7e44005595c1 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -152,12 +152,9 @@ static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result) static int brb_pop(struct bop_ring_buffer *brb) { - struct block_op *bop; - if (brb_empty(brb)) return -ENODATA; - bop = brb->bops + brb->begin; brb->begin = brb_next(brb, brb->begin); return 0; From 1264fbd72f85446aa75bc3895e13589c1ed2c6df Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 8 Jan 2016 19:07:55 -0500 Subject: [PATCH 123/418] dm snapshot: fix hung bios when copy error occurs commit 385277bfb57faac44e92497104ba542cdd82d5fe upstream. When there is an error copying a chunk dm-snapshot can incorrectly hold associated bios indefinitely, resulting in hung IO. The function copy_callback sets pe->error if there was error copying the chunk, and then calls complete_exception. complete_exception calls pending_complete on error, otherwise it calls commit_exception with commit_callback (and commit_callback calls complete_exception). The persistent exception store (dm-snap-persistent.c) assumes that calls to prepare_exception and commit_exception are paired. persistent_prepare_exception increases ps->pending_count and persistent_commit_exception decreases it. If there is a copy error, persistent_prepare_exception is called but persistent_commit_exception is not. This results in the variable ps->pending_count never returning to zero and that causes some pending exceptions (and their associated bios) to be held forever. Fix this by unconditionally calling commit_exception regardless of whether the copy was successful. A new "valid" parameter is added to commit_exception -- when the copy fails this parameter is set to zero so that the chunk that failed to copy (and all following chunks) is not recorded in the snapshot store. Also, remove commit_callback now that it is merely a wrapper around pending_complete. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-exception-store.h | 2 +- drivers/md/dm-snap-persistent.c | 5 ++++- drivers/md/dm-snap-transient.c | 4 ++-- drivers/md/dm-snap.c | 20 +++++--------------- 4 files changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index fae34e7a0b1e..12b5216c2cfe 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h @@ -69,7 +69,7 @@ struct dm_exception_store_type { * Update the metadata with this exception. */ void (*commit_exception) (struct dm_exception_store *store, - struct dm_exception *e, + struct dm_exception *e, int valid, void (*callback) (void *, int success), void *callback_context); diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 3164b8bce294..4d3909393f2c 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -695,7 +695,7 @@ static int persistent_prepare_exception(struct dm_exception_store *store, } static void persistent_commit_exception(struct dm_exception_store *store, - struct dm_exception *e, + struct dm_exception *e, int valid, void (*callback) (void *, int success), void *callback_context) { @@ -704,6 +704,9 @@ static void persistent_commit_exception(struct dm_exception_store *store, struct core_exception ce; struct commit_callback *cb; + if (!valid) + ps->valid = 0; + ce.old_chunk = e->old_chunk; ce.new_chunk = e->new_chunk; write_exception(ps, ps->current_committed++, &ce); diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c index 9b7c8c8049d6..4d50a12cf00c 100644 --- a/drivers/md/dm-snap-transient.c +++ b/drivers/md/dm-snap-transient.c @@ -52,12 +52,12 @@ static int transient_prepare_exception(struct dm_exception_store *store, } static void transient_commit_exception(struct dm_exception_store *store, - struct dm_exception *e, + struct dm_exception *e, int valid, void (*callback) (void *, int success), void *callback_context) { /* Just succeed */ - callback(callback_context, 1); + callback(callback_context, valid); } static void transient_usage(struct dm_exception_store *store, diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index c06b74e91cd6..61f184ad081c 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1438,8 +1438,9 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err) dm_table_event(s->ti->table); } -static void pending_complete(struct dm_snap_pending_exception *pe, int success) +static void pending_complete(void *context, int success) { + struct dm_snap_pending_exception *pe = context; struct dm_exception *e; struct dm_snapshot *s = pe->snap; struct bio *origin_bios = NULL; @@ -1509,24 +1510,13 @@ out: free_pending_exception(pe); } -static void commit_callback(void *context, int success) -{ - struct dm_snap_pending_exception *pe = context; - - pending_complete(pe, success); -} - static void complete_exception(struct dm_snap_pending_exception *pe) { struct dm_snapshot *s = pe->snap; - if (unlikely(pe->copy_error)) - pending_complete(pe, 0); - - else - /* Update the metadata if we are persistent */ - s->store->type->commit_exception(s->store, &pe->e, - commit_callback, pe); + /* Update the metadata if we are persistent */ + s->store->type->commit_exception(s->store, &pe->e, !pe->copy_error, + pending_complete, pe); } /* From 5c6f66674274958e0ea2d2d6c7a905e4ea73881e Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Sun, 21 Feb 2016 19:09:22 -0500 Subject: [PATCH 124/418] dm: fix dm_rq_target_io leak on faults with .request_fn DM w/ blk-mq paths commit 4328daa2e79ed904a42ce00a9f38b9c36b44b21a upstream. Using request-based DM mpath configured with the following stacking (.request_fn DM mpath ontop of scsi-mq paths): echo Y > /sys/module/scsi_mod/parameters/use_blk_mq echo N > /sys/module/dm_mod/parameters/use_blk_mq 'struct dm_rq_target_io' would leak if a request is requeued before a blk-mq clone is allocated (or fails to allocate). free_rq_tio() wasn't being called. kmemleak reported: unreferenced object 0xffff8800b90b98c0 (size 112): comm "kworker/7:1H", pid 5692, jiffies 4295056109 (age 78.589s) hex dump (first 32 bytes): 00 d0 5c 2c 03 88 ff ff 40 00 bf 01 00 c9 ff ff ..\,....@....... e0 d9 b1 34 00 88 ff ff 00 00 00 00 00 00 00 00 ...4............ backtrace: [] kmemleak_alloc+0x4e/0xb0 [] kmem_cache_alloc+0xc3/0x1e0 [] mempool_alloc_slab+0x15/0x20 [] mempool_alloc+0x6e/0x170 [] dm_old_prep_fn+0x3c/0x180 [dm_mod] [] blk_peek_request+0x168/0x290 [] dm_request_fn+0xb2/0x1b0 [dm_mod] [] __blk_run_queue+0x33/0x40 [] blk_delay_work+0x25/0x40 [] process_one_work+0x14f/0x3d0 [] worker_thread+0x125/0x4b0 [] kthread+0xd8/0xf0 [] ret_from_fork+0x3f/0x70 [] 0xffffffffffffffff crash> struct -o dm_rq_target_io struct dm_rq_target_io { ... } SIZE: 112 Fixes: e5863d9ad7 ("dm: allocate requests in target when stacking on blk-mq devices") Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 5df40480228b..dd834927bc66 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1191,6 +1191,8 @@ static void dm_unprep_request(struct request *rq) if (clone) free_rq_clone(clone); + else if (!tio->md->queue->mq_ops) + free_rq_tio(tio); } /* From 53cc688d090bbd461e92ab28ecf4bde8b0c2a6da Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Thu, 17 Dec 2015 08:47:02 -0700 Subject: [PATCH 125/418] coresight: checking for NULL string in coresight_name_match() commit fadf3a44e974b030e7145218ad1ab25e3ef91738 upstream. Connection child names associated to ports can sometimes be NULL, which is the case when booting a system on QEMU or when the Coresight power domain isn't switched on. This patch is adding a check to make sure a NULL string isn't fed to strcmp(), something that avoid crashing the system. Reported-by: Tyler Baker Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/coresight/coresight.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index e25492137d8b..93738dfbf631 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -548,7 +548,7 @@ static int coresight_name_match(struct device *dev, void *data) to_match = data; i_csdev = to_coresight_device(dev); - if (!strcmp(to_match, dev_name(&i_csdev->dev))) + if (to_match && !strcmp(to_match, dev_name(&i_csdev->dev))) return 1; return 0; From 61836242f8a64deec07cfe4d4be3d6ed23875bea Mon Sep 17 00:00:00 2001 From: Sekhar Nori Date: Tue, 15 Dec 2015 19:56:12 +0530 Subject: [PATCH 126/418] irqchip/omap-intc: Add support for spurious irq handling commit d3b421cd07e4c0d4d6c0bbd55ca169c054fc081d upstream. Under some conditions, irq sorting procedure used by INTC can go wrong resulting in a spurious irq getting reported. If this condition is not handled, it results in endless stream of: unexpected IRQ trap at vector 00 messages from ack_bad_irq() Handle the spurious interrupt condition in omap-intc driver to prevent this. Measurements using kernel function profiler on AM335x EVM running at 720MHz show that after this patch omap_intc_handle_irq() takes about 37.4us against 34us before this patch. Signed-off-by: Sekhar Nori Acked-by: Tony Lindgren Cc: John Ogness Cc: Felipe Balbi Cc: Jason Cooper Cc: Marc Zyngier Link: http://lkml.kernel.org/r/9c78a6db02ac55f7af7371b417b6e414d2c3095b.1450188128.git.nsekhar@ti.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-omap-intc.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-omap-intc.c b/drivers/irqchip/irq-omap-intc.c index 8587d0f8d8c0..f6cb1b8bb981 100644 --- a/drivers/irqchip/irq-omap-intc.c +++ b/drivers/irqchip/irq-omap-intc.c @@ -47,6 +47,7 @@ #define INTC_ILR0 0x0100 #define ACTIVEIRQ_MASK 0x7f /* omap2/3 active interrupt bits */ +#define SPURIOUSIRQ_MASK (0x1ffffff << 7) #define INTCPS_NR_ILR_REGS 128 #define INTCPS_NR_MIR_REGS 4 @@ -330,11 +331,35 @@ static int __init omap_init_irq(u32 base, struct device_node *node) static asmlinkage void __exception_irq_entry omap_intc_handle_irq(struct pt_regs *regs) { + extern unsigned long irq_err_count; u32 irqnr; irqnr = intc_readl(INTC_SIR); + + /* + * A spurious IRQ can result if interrupt that triggered the + * sorting is no longer active during the sorting (10 INTC + * functional clock cycles after interrupt assertion). Or a + * change in interrupt mask affected the result during sorting + * time. There is no special handling required except ignoring + * the SIR register value just read and retrying. + * See section 6.2.5 of AM335x TRM Literature Number: SPRUH73K + * + * Many a times, a spurious interrupt situation has been fixed + * by adding a flush for the posted write acking the IRQ in + * the device driver. Typically, this is going be the device + * driver whose interrupt was handled just before the spurious + * IRQ occurred. Pay attention to those device drivers if you + * run into hitting the spurious IRQ condition below. + */ + if (unlikely((irqnr & SPURIOUSIRQ_MASK) == SPURIOUSIRQ_MASK)) { + pr_err_once("%s: spurious irq!\n", __func__); + irq_err_count++; + omap_ack_irq(NULL); + return; + } + irqnr &= ACTIVEIRQ_MASK; - WARN_ONCE(!irqnr, "Spurious IRQ ?\n"); handle_domain_irq(domain, irqnr, regs); } From 4778f99a2c60a826146000c31472fc79935255c1 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 29 Jan 2016 10:57:53 +0100 Subject: [PATCH 127/418] irqchip/mxs: Add missing set_handle_irq() commit c5b635203032462603c503ecce91a7021c1ad44a upstream. The rework of the driver missed to move the call to set_handle_irq() into asm9260_of_init(). As a consequence no interrupt entry point is installed and no interrupts are delivered Solution is simple: Install the interrupt entry handler. Fixes: 7e4ac676ee ("irqchip/mxs: Add Alphascale ASM9260 support") Signed-off-by: Oleksij Rempel Cc: kernel@pengutronix.de Cc: jason@lakedaemon.net Cc: marc.zyngier@arm.com Link: http://lkml.kernel.org/r/1454061473-24957-1-git-send-email-linux@rempel-privat.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-mxs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c index c22e2d40cb30..efe50845939d 100644 --- a/drivers/irqchip/irq-mxs.c +++ b/drivers/irqchip/irq-mxs.c @@ -241,6 +241,7 @@ static int __init asm9260_of_init(struct device_node *np, writel(0, icoll_priv.intr + i); icoll_add_domain(np, ASM9260_NUM_IRQS); + set_handle_irq(icoll_handle_irq); return 0; } From 2edd7c99031950e5a66e1254b787b9a70aaa61f1 Mon Sep 17 00:00:00 2001 From: Milo Kim Date: Wed, 13 Jan 2016 16:19:50 +0900 Subject: [PATCH 128/418] irqchip/atmel-aic: Fix wrong bit operation for IRQ priority commit 49f34134aea74f19ca016f055d25ee55ec359dee upstream. Atmel AIC has common structure for SMR (Source Mode Register). bit[6:5] Interrupt source type bit[2:0] Priority level Other bits are unused. To update new priority value, bit[2:0] should be cleared first and then new priority level can be written. However, aic_common_set_priority() helper clears source type bits instead of priority bits. This patch fixes wrong mask bit operation. Fixes: b1479ebb7720 "irqchip: atmel-aic: Add atmel AIC/AIC5 drivers" Signed-off-by: Milo Kim Acked-by: Boris Brezillon Cc: Jason Cooper Cc: Marc Zyngier Cc: Ludovic Desroches Cc: Nicholas Ferre Link: http://lkml.kernel.org/r/1452669592-3401-2-git-send-email-milo.kim@ti.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-atmel-aic-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-atmel-aic-common.c b/drivers/irqchip/irq-atmel-aic-common.c index b12a5d58546f..37199b9b2cfa 100644 --- a/drivers/irqchip/irq-atmel-aic-common.c +++ b/drivers/irqchip/irq-atmel-aic-common.c @@ -86,7 +86,7 @@ int aic_common_set_priority(int priority, unsigned *val) priority > AT91_AIC_IRQ_MAX_PRIORITY) return -EINVAL; - *val &= AT91_AIC_PRIOR; + *val &= ~AT91_AIC_PRIOR; *val |= priority; return 0; From b966c761afe6d8560b463fe695525c883ce3fea6 Mon Sep 17 00:00:00 2001 From: Ashok Kumar Date: Thu, 11 Feb 2016 05:38:53 -0800 Subject: [PATCH 129/418] irqchip/gic-v3-its: Fix double ICC_EOIR write for LPI in EOImode==1 commit 004fa08d7aba2a13974446bf212a48c0b3b0d9fd upstream. When the GIC is using EOImode==1, the EOI is done immediately, leaving the deactivation to be performed when the EOI was previously done. Unfortunately, the ITS is not aware of the EOImode at all, and blindly EOIs the interrupt again. On most systems, this is ignored (despite being a programming error), but some others do raise a SError exception as there is no priority drop to perform for this interrupt. The fix is to stop trying to be clever, and always call into the underlying GIC to perform the right access, irrespective of the more we're in. [Marc: Reworked commit message] Fixes: 0b996fd35957a ("irqchip/GICv3: Convert to EOImode == 1") Acked-by: Marc Zyngier Signed-off-by: Ashok Kumar Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-gic-v3-its.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index e23d1d18f9d6..a159529f9d53 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -597,11 +597,6 @@ static void its_unmask_irq(struct irq_data *d) lpi_set_config(d, true); } -static void its_eoi_irq(struct irq_data *d) -{ - gic_write_eoir(d->hwirq); -} - static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val, bool force) { @@ -638,7 +633,7 @@ static struct irq_chip its_irq_chip = { .name = "ITS", .irq_mask = its_mask_irq, .irq_unmask = its_unmask_irq, - .irq_eoi = its_eoi_irq, + .irq_eoi = irq_chip_eoi_parent, .irq_set_affinity = its_set_affinity, .irq_compose_msi_msg = its_irq_compose_msi_msg, }; From b8175b171fde996b5306fd113bd865506166df19 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Tue, 22 Dec 2015 22:19:58 +0100 Subject: [PATCH 130/418] posix-clock: Fix return code on the poll method's error path commit 1b9f23727abb92c5e58f139e7d180befcaa06fe0 upstream. The posix_clock_poll function is supposed to return a bit mask of POLLxxx values. However, in case the hardware has disappeared (due to hot plugging for example) this code returns -ENODEV in a futile attempt to throw an error at the file descriptor level. The kernel's file_operations interface does not accept such error codes from the poll method. Instead, this function aught to return POLLERR. The value -ENODEV does, in fact, contain the POLLERR bit (and almost all the other POLLxxx bits as well), but only by chance. This patch fixes code to return a proper bit mask. Credit goes to Markus Elfring for pointing out the suspicious signed/unsigned mismatch. Reported-by: Markus Elfring igned-off-by: Richard Cochran Cc: John Stultz Cc: Julia Lawall Link: http://lkml.kernel.org/r/1450819198-17420-1-git-send-email-richardcochran@gmail.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/time/posix-clock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index ce033c7aa2e8..9cff0ab82b63 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -69,10 +69,10 @@ static ssize_t posix_clock_read(struct file *fp, char __user *buf, static unsigned int posix_clock_poll(struct file *fp, poll_table *wait) { struct posix_clock *clk = get_posix_clock(fp); - int result = 0; + unsigned int result = 0; if (!clk) - return -ENODEV; + return POLLERR; if (clk->ops.poll) result = clk->ops.poll(clk, fp, wait); From c0cbcf2cb59c076f4ff6422153534dd2762c3d44 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Fri, 15 Jan 2016 11:34:21 +0100 Subject: [PATCH 131/418] clockevents/tcb_clksrc: Prevent disabling an already disabled clock commit f02b4b72d12cbae7020a959e2ed0410a464b4cc4 upstream. clockevents_exchange_device is calling clockevents_shutdown() on the new clockenvents device but it may have never been enabled in the first place. This results in the tcb clock being disabled without being enabled first: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at drivers/clk/clk.c:680 clk_disable+0x28/0x34() Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 4.4.0+ #6 Hardware name: Atmel AT91SAM9 [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (warn_slowpath_common+0x78/0xa0) [] (warn_slowpath_common) from [] (warn_slowpath_null+0x18/0x20) [] (warn_slowpath_null) from [] (clk_disable+0x28/0x34) [] (clk_disable) from [] (tc_shutdown+0x38/0x4c) [] (tc_shutdown) from [] (clockevents_switch_state+0x38/0x6c) [] (clockevents_switch_state) from [] (clockevents_shutdown+0x10/0x24) [] (clockevents_shutdown) from [] (tick_check_new_device+0x84/0xac) [] (tick_check_new_device) from [] (clockevents_register_device+0x7c/0x108) [] (clockevents_register_device) from [] (tcb_clksrc_init+0x390/0x3e8) [] (tcb_clksrc_init) from [] (do_one_initcall+0x114/0x1d4) [] (do_one_initcall) from [] (kernel_init_freeable+0xfc/0x1b8) [] (kernel_init_freeable) from [] (kernel_init+0x8/0xe0) [] (kernel_init) from [] (ret_from_fork+0x14/0x24) ---[ end trace 0000000000000001 ]--- Check what state we were in before trying to disable the clock. Fixes: cf4541c101ea ("clockevents/drivers/tcb_clksrc: Migrate to new 'set-state' interface") Signed-off-by: Alexandre Belloni Cc: Nicolas Ferre Cc: Boris Brezillon Cc: linux-arm-kernel@lists.infradead.org Cc: Daniel Lezcano Link: http://lkml.kernel.org/r/1452854061-30370-1-git-send-email-alexandre.belloni@free-electrons.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/tcb_clksrc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c index 6ee91401918e..4da2af9694a2 100644 --- a/drivers/clocksource/tcb_clksrc.c +++ b/drivers/clocksource/tcb_clksrc.c @@ -98,7 +98,8 @@ static int tc_shutdown(struct clock_event_device *d) __raw_writel(0xff, regs + ATMEL_TC_REG(2, IDR)); __raw_writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR)); - clk_disable(tcd->clk); + if (!clockevent_state_detached(d)) + clk_disable(tcd->clk); return 0; } From 84bad9db0d8cfa0d84ab988c40226413cb34e2b3 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Fri, 27 Nov 2015 12:59:11 +0100 Subject: [PATCH 132/418] mmc: usdhi6rol0: handle NULL data in timeout commit 05caee939f8d58d81e962071da85761e1e3a4c73 upstream. Commit bb08a7d489bd ("mmc: usdhi6rol0: fix NULL pointer deref in debug print") fixed one NULL pointer dereference but unfortunately introduced another. "data" may be NULL if this is a command timeout for a command without any data, so we should only use it if we're actually waiting for data. Fixes: bb08a7d489bd ("mmc: usdhi6rol0: fix NULL pointer deref in debug print") Signed-off-by: Rabin Vincent Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/usdhi6rol0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/usdhi6rol0.c b/drivers/mmc/host/usdhi6rol0.c index 4498e92116b8..b47122d3e8d8 100644 --- a/drivers/mmc/host/usdhi6rol0.c +++ b/drivers/mmc/host/usdhi6rol0.c @@ -1634,7 +1634,7 @@ static void usdhi6_timeout_work(struct work_struct *work) struct usdhi6_host *host = container_of(d, struct usdhi6_host, timeout_work); struct mmc_request *mrq = host->mrq; struct mmc_data *data = mrq ? mrq->data : NULL; - struct scatterlist *sg = host->sg ?: data->sg; + struct scatterlist *sg; dev_warn(mmc_dev(host->mmc), "%s timeout wait %u CMD%d: IRQ 0x%08x:0x%08x, last IRQ 0x%08x\n", @@ -1666,6 +1666,7 @@ static void usdhi6_timeout_work(struct work_struct *work) case USDHI6_WAIT_FOR_MWRITE: case USDHI6_WAIT_FOR_READ: case USDHI6_WAIT_FOR_WRITE: + sg = host->sg ?: data->sg; dev_dbg(mmc_dev(host->mmc), "%c: page #%u @ +0x%zx %ux%u in SG%u. Current SG %u bytes @ %u\n", data->flags & MMC_DATA_READ ? 'R' : 'W', host->page_idx, From 4a3d7e16b0f4729151f41061f5fc9cf91279b997 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 26 Nov 2015 14:00:45 +0200 Subject: [PATCH 133/418] mmc: sdhci-pci: Do not default to 33 Ohm driver strength for Intel SPT commit 1ca896856281d3f1ad4f6f7d4e32e2943452de23 upstream. In some cases, the stronger 33 Ohm driver strength must not be used so it is not a suitable default. Change it to the standard default 50 Ohm value. The patch applies to v4.2+ except the file name changed. It is drivers/mmc/host/sdhci-pci.c prior to v.4.4. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-pci-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index cf7ad458b4f4..08f4a9fe8550 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -277,7 +277,7 @@ static int spt_select_drive_strength(struct sdhci_host *host, if (sdhci_pci_spt_drive_strength > 0) drive_strength = sdhci_pci_spt_drive_strength & 0xf; else - drive_strength = 1; /* 33-ohm */ + drive_strength = 0; /* Default 50-ohm */ if ((mmc_driver_type_mask(drive_strength) & card_drv) == 0) drive_strength = 0; /* Default 50-ohm */ From 14772905465177b47ff0980eea5acf745ce7f625 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 26 Nov 2015 14:00:48 +0200 Subject: [PATCH 134/418] mmc: sdhci: Fix DMA descriptor with zero data length commit 347ea32dc118326c4f2636928239a29d192cc9b8 upstream. SDHCI has built-in DMA called ADMA2. ADMA2 uses a descriptor table to define DMA scatter-gather. Each desciptor can specify a data length up to 65536 bytes, however the length field is only 16-bits so zero means 65536. Consequently, putting zero when the size is zero must not be allowed. This patch fixes one case where zero data length could be set inadvertently. The problem happens because unaligned data gets split and the code did not consider that the remaining aligned portion might be zero length. That case really only happens for SDIO because SD and eMMC cards transfer blocks that are invariably sector- aligned. For SDIO, access to function registers is done by data transfer (CMD53) when the register is bigger than 1 byte. Generally registers are 4 bytes but 2-byte registers are possible. So DMA of 4 bytes or less can happen. When 32-bit DMA is used, the data alignment must be 4, so 4-byte transfers won't casue a problem, but a 2-byte transfer could. However with the introduction of 64-bit DMA, the data alignment for 64-bit DMA was made 8 bytes, so all 4-byte transfers not on 8-byte boundaries get "split" into a 4-byte chunk and a 0-byte chunk, thereby hitting the bug. In fact, a closer look at the SDHCI specs indicates that only the descriptor table requires 8-byte alignment for 64-bit DMA. That will be dealt with in a separate patch, but the potential for a 2-byte access remains, so this fix is needed anyway. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index b48565ed5616..7e58a1723c9b 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -540,9 +540,12 @@ static int sdhci_adma_table_pre(struct sdhci_host *host, BUG_ON(len > 65536); - /* tran, valid */ - sdhci_adma_write_desc(host, desc, addr, len, ADMA2_TRAN_VALID); - desc += host->desc_sz; + if (len) { + /* tran, valid */ + sdhci_adma_write_desc(host, desc, addr, len, + ADMA2_TRAN_VALID); + desc += host->desc_sz; + } /* * If this triggers then we have a calculation bug From 8ae7192d91c8ca19fd0b919b594bc317d33fbede Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 26 Nov 2015 14:00:47 +0200 Subject: [PATCH 135/418] mmc: sdio: Fix invalid vdd in voltage switch power cycle commit d9bfbb95ed598a09cf336adb0f190ee0ff802f0d upstream. The 'ocr' parameter passed to mmc_set_signal_voltage() defines the power-on voltage used when power cycling after a failure to set the voltage. However, in the case of mmc_sdio_init_card(), the value passed has the R4_18V_PRESENT flag set which is not valid for power-on and results in an invalid vdd. Fix by passing the card's ocr value which does not have the flag. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/sdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 16d838e6d623..d61ba1a0495e 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -630,7 +630,7 @@ try_again: */ if (!powered_resume && (rocr & ocr & R4_18V_PRESENT)) { err = mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180, - ocr); + ocr_card); if (err == -EAGAIN) { sdio_reset(host); mmc_go_idle(host); From 6eec26b7b07ece01ad547fce01f0229c733f6462 Mon Sep 17 00:00:00 2001 From: Wenkai Du Date: Thu, 26 Nov 2015 14:00:44 +0200 Subject: [PATCH 136/418] mmc: mmc: Fix incorrect use of driver strength switching HS200 and HS400 commit adb24d42a516bca8b9741ed21206509daaab5b13 upstream. Commit cc4f414c885c ("mmc: mmc: Add driver strength selection") added driver strength selection for eMMC HS200 and HS400 modes. That patch also set the driver stength when transitioning through High Speed mode to HS200/HS400, but driver strength is not defined for High Speed mode. While the JEDEC specification is not clear on this point it has been observed to cause problems for some eMMC, and removing the driver strength setting in this case makes it consistent with the normal use of High Speed mode. Signed-off-by: Wenkai Du Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/mmc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 3a9a79ec4343..3d5087b03999 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1076,8 +1076,7 @@ static int mmc_select_hs400(struct mmc_card *card) mmc_set_clock(host, max_dtr); /* Switch card to HS mode */ - val = EXT_CSD_TIMING_HS | - card->drive_strength << EXT_CSD_DRV_STR_SHIFT; + val = EXT_CSD_TIMING_HS; err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_HS_TIMING, val, card->ext_csd.generic_cmd6_time, @@ -1160,8 +1159,7 @@ int mmc_hs400_to_hs200(struct mmc_card *card) mmc_set_clock(host, max_dtr); /* Switch HS400 to HS DDR */ - val = EXT_CSD_TIMING_HS | - card->drive_strength << EXT_CSD_DRV_STR_SHIFT; + val = EXT_CSD_TIMING_HS; err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_HS_TIMING, val, card->ext_csd.generic_cmd6_time, true, send_status, true); From fca2d31d02fd13878137ebba7073ba3d9df9862e Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 26 Nov 2015 14:00:50 +0200 Subject: [PATCH 137/418] mmc: sdhci: Fix sdhci_runtime_pm_bus_on/off() commit 5c671c410c8704800f4f1673b6f572137e7e6ddd upstream. sdhci has a legacy facility to prevent runtime suspend if the bus power is on. This is needed in cases where the power to the card is dependent on the bus power. It is controlled by a pair of functions: sdhci_runtime_pm_bus_on() and sdhci_runtime_pm_bus_off(). These functions use a boolean variable 'bus_on' to ensure changes are always paired. There is an additional check for 'runtime_suspended' which is the problem. In fact, its use is ill-conceived as the only requirement for the logic is that 'on' and 'off' are paired, which is actually broken by the check, for example if the bus power is turned on during runtime resume. So remove the check. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 7e58a1723c9b..161ae1f2ec45 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2763,7 +2763,7 @@ static int sdhci_runtime_pm_put(struct sdhci_host *host) static void sdhci_runtime_pm_bus_on(struct sdhci_host *host) { - if (host->runtime_suspended || host->bus_on) + if (host->bus_on) return; host->bus_on = true; pm_runtime_get_noresume(host->mmc->parent); @@ -2771,7 +2771,7 @@ static void sdhci_runtime_pm_bus_on(struct sdhci_host *host) static void sdhci_runtime_pm_bus_off(struct sdhci_host *host) { - if (host->runtime_suspended || !host->bus_on) + if (!host->bus_on) return; host->bus_on = false; pm_runtime_put_noidle(host->mmc->parent); From 3cafb70cabf27616ac3da795a92a9b651fb28733 Mon Sep 17 00:00:00 2001 From: Carlo Caione Date: Wed, 13 Jan 2016 09:36:55 +0100 Subject: [PATCH 138/418] mmc: core: Enable tuning according to the actual timing commit e10c321977091f163eceedec0650e0ef4b3cf4bb upstream. While in sdhci_execute_tuning() the choice whether or not to enable the tuning is done on the actual timing, in the mmc_sdio_init_uhs_card() the check is done on the capability of the card. This difference is causing some issues with some SDIO cards in DDR50 mode where the CDM19 is wrongly issued. With this patch we modify the check in both mmc_(sd|sdio)_init_uhs_card() functions to take the proper decision only according to the actual timing specification. Signed-off-by: Carlo Caione Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/sd.c | 8 ++++---- drivers/mmc/core/sdio.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 141eaa923e18..967535d76e34 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -626,9 +626,9 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card) * SDR104 mode SD-cards. Note that tuning is mandatory for SDR104. */ if (!mmc_host_is_spi(card->host) && - (card->sd_bus_speed == UHS_SDR50_BUS_SPEED || - card->sd_bus_speed == UHS_DDR50_BUS_SPEED || - card->sd_bus_speed == UHS_SDR104_BUS_SPEED)) { + (card->host->ios.timing == MMC_TIMING_UHS_SDR50 || + card->host->ios.timing == MMC_TIMING_UHS_DDR50 || + card->host->ios.timing == MMC_TIMING_UHS_SDR104)) { err = mmc_execute_tuning(card); /* @@ -638,7 +638,7 @@ static int mmc_sd_init_uhs_card(struct mmc_card *card) * difference between v3.00 and 3.01 spec means that CMD19 * tuning is also available for DDR50 mode. */ - if (err && card->sd_bus_speed == UHS_DDR50_BUS_SPEED) { + if (err && card->host->ios.timing == MMC_TIMING_UHS_DDR50) { pr_warn("%s: ddr50 tuning failed\n", mmc_hostname(card->host)); err = 0; diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index d61ba1a0495e..467b3cf80c44 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -535,8 +535,8 @@ static int mmc_sdio_init_uhs_card(struct mmc_card *card) * SDR104 mode SD-cards. Note that tuning is mandatory for SDR104. */ if (!mmc_host_is_spi(card->host) && - ((card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR50) || - (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR104))) + ((card->host->ios.timing == MMC_TIMING_UHS_SDR50) || + (card->host->ios.timing == MMC_TIMING_UHS_SDR104))) err = mmc_execute_tuning(card); out: return err; From 2bfda88879216667b7f60d91851606f165244b0a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 4 Jan 2016 02:21:55 +0100 Subject: [PATCH 139/418] mmc: mmci: fix an ages old detection error commit 0bcb7efdff63564e80fe84dd36a9fbdfbf6697a4 upstream. commit 4956e10903fd ("ARM: 6244/1: mmci: add variant data and default MCICLOCK support") added variant data for ARM, U300 and Ux500 variants. The Nomadik NHK8815/8820 variant was erroneously labeled as a U300 variant, and when the proper Nomadik variant was later introduced in commit 34fd421349ff ("ARM: 7378/1: mmci: add support for the Nomadik MMCI variant") this was not fixes. Let's say this fixes the latter commit as there was no proper Nomadik support until then. Fixes: 34fd421349ff ("ARM: 7378/1: mmci: add support for the Nomadik...") Signed-off-by: Linus Walleij Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/mmci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index fb266745f824..acece3299756 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -1886,7 +1886,7 @@ static struct amba_id mmci_ids[] = { { .id = 0x00280180, .mask = 0x00ffffff, - .data = &variant_u300, + .data = &variant_nomadik, }, { .id = 0x00480180, From 43919d0ca15d5e93efd40467c63502b33b7acf5c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 9 Feb 2016 16:12:38 +0200 Subject: [PATCH 140/418] mmc: sdhci-acpi: Fix card detect race for Intel BXT/APL commit 6a645dd87a5a506779810d7d9935c25fc2ef4687 upstream. Intel BXT/APL use a card detect GPIO however the host controller will not enable bus power unless it's card detect also reflects the presence of a card. Unfortunately those 2 things race which can result in commands not starting, after which the controller does nothing and there is a 10 second wait for the driver's 10-second timer to timeout. That is fixed by having the driver look also at the present state register to determine if the card is present. Consequently, provide a 'get_cd' mmc host operation for BXT/APL that does that. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-acpi.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index f6047fc94062..a5cda926d38e 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -146,6 +146,33 @@ static const struct sdhci_acpi_chip sdhci_acpi_chip_int = { .ops = &sdhci_acpi_ops_int, }; +static int bxt_get_cd(struct mmc_host *mmc) +{ + int gpio_cd = mmc_gpio_get_cd(mmc); + struct sdhci_host *host = mmc_priv(mmc); + unsigned long flags; + int ret = 0; + + if (!gpio_cd) + return 0; + + pm_runtime_get_sync(mmc->parent); + + spin_lock_irqsave(&host->lock, flags); + + if (host->flags & SDHCI_DEVICE_DEAD) + goto out; + + ret = !!(sdhci_readl(host, SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT); +out: + spin_unlock_irqrestore(&host->lock, flags); + + pm_runtime_mark_last_busy(mmc->parent); + pm_runtime_put_autosuspend(mmc->parent); + + return ret; +} + static int sdhci_acpi_emmc_probe_slot(struct platform_device *pdev, const char *hid, const char *uid) { @@ -196,6 +223,9 @@ static int sdhci_acpi_sd_probe_slot(struct platform_device *pdev, /* Platform specific code during sd probe slot goes here */ + if (hid && !strcmp(hid, "80865ACA")) + host->mmc_host_ops.get_cd = bxt_get_cd; + return 0; } From b7f7a5e2e914e9f5332ea56d10e0db4be3363375 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Fri, 29 Jan 2016 00:21:26 +0100 Subject: [PATCH 141/418] mmc: pxamci: fix again read-only gpio detection polarity commit 41c89159a6ae5472d39ed8bded5b3b4e07a37944 upstream. The commit fixing the conversion of pxamci to slot-gpio API fixed the inverted the logic of the read-only gpio. Unfortunately, the commit was tested on a non-inverted gpio, and not on the inverted one. And the fix did work partially, by luck. This is the remaining missing part of the fix, trivial but still necessary. Fixes: Fixes: 26d49fe71953 ("mmc: pxamci: fix read-only gpio detection polarity") Reported-by: Andrea Adami Tested-by: Andrea Adami Signed-off-by: Robert Jarzmik Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/pxamci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index ce08896b9d69..28a057fae0a1 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -804,7 +804,7 @@ static int pxamci_probe(struct platform_device *pdev) dev_err(&pdev->dev, "Failed requesting gpio_ro %d\n", gpio_ro); goto out; } else { - mmc->caps |= host->pdata->gpio_card_ro_invert ? + mmc->caps2 |= host->pdata->gpio_card_ro_invert ? 0 : MMC_CAP2_RO_ACTIVE_HIGH; } From dfca191876436fd33c6ad18258116a2bf5506618 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 9 Feb 2016 16:12:37 +0200 Subject: [PATCH 142/418] mmc: sdhci-pci: Fix card detect race for Intel BXT/APL commit 163cbe31e5163459908a41a2b4e0d33a28fd557a upstream. Intel BXT/APL use a card detect GPIO however the host controller will not enable bus power unless it's card detect also reflects the presence of a card. Unfortunately those 2 things race which can result in commands not starting, after which the controller does nothing and there is a 10 second wait for the driver's 10-second timer to timeout. That is fixed by having the driver look also at the present state register to determine if the card is present. Consequently, provide a 'get_cd' mmc host operation for BXT/APL that does that. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-pci-core.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 08f4a9fe8550..45ee07d3a761 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -330,6 +330,33 @@ static void spt_read_drive_strength(struct sdhci_host *host) sdhci_pci_spt_drive_strength = 0x10 | ((val >> 12) & 0xf); } +static int bxt_get_cd(struct mmc_host *mmc) +{ + int gpio_cd = mmc_gpio_get_cd(mmc); + struct sdhci_host *host = mmc_priv(mmc); + unsigned long flags; + int ret = 0; + + if (!gpio_cd) + return 0; + + pm_runtime_get_sync(mmc->parent); + + spin_lock_irqsave(&host->lock, flags); + + if (host->flags & SDHCI_DEVICE_DEAD) + goto out; + + ret = !!(sdhci_readl(host, SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT); +out: + spin_unlock_irqrestore(&host->lock, flags); + + pm_runtime_mark_last_busy(mmc->parent); + pm_runtime_put_autosuspend(mmc->parent); + + return ret; +} + static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot) { slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE | @@ -362,6 +389,10 @@ static int byt_sd_probe_slot(struct sdhci_pci_slot *slot) slot->cd_con_id = NULL; slot->cd_idx = 0; slot->cd_override_level = true; + if (slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_BXT_SD || + slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_APL_SD) + slot->host->mmc_host_ops.get_cd = bxt_get_cd; + return 0; } From bafb71422d791e7c074dadbab5bf8e421a1ef97b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 9 Feb 2016 16:12:35 +0200 Subject: [PATCH 143/418] mmc: sdhci: Allow override of mmc host operations commit bf60e592a1af4d6f65dd54593250183f14360eed upstream. In the past, fixes for specific hardware devices were implemented in sdhci using quirks. That approach is no longer accepted because the growing number of quirks was starting to make the code difficult to understand and maintain. One alternative to quirks, is to allow drivers to override the default mmc host operations. This patch makes it easy to do that, and it is needed for a subsequent bug fix, for which separate patches are provided. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 3 ++- drivers/mmc/host/sdhci.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 161ae1f2ec45..eeea73de9f7b 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2864,6 +2864,8 @@ struct sdhci_host *sdhci_alloc_host(struct device *dev, host = mmc_priv(mmc); host->mmc = mmc; + host->mmc_host_ops = sdhci_ops; + mmc->ops = &host->mmc_host_ops; return host; } @@ -3060,7 +3062,6 @@ int sdhci_add_host(struct sdhci_host *host) /* * Set host parameters. */ - mmc->ops = &sdhci_ops; max_clk = host->max_clk; if (host->ops->get_min_clock) diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 9d4aa31b683a..9c331ac5ad6b 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -425,6 +425,7 @@ struct sdhci_host { /* Internal data */ struct mmc_host *mmc; /* MMC structure */ + struct mmc_host_ops mmc_host_ops; /* MMC host ops */ u64 dma_mask; /* custom DMA mask */ #if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE) From 61b9408bfd06779c8b2073829bf2f73a81531a99 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 9 Feb 2016 16:12:36 +0200 Subject: [PATCH 144/418] mmc: sdhci: Allow override of get_cd() called from sdhci_request() commit 8d28b7a72fe18bcdcdb047243ba8fec36b149955 upstream. Drivers may need to provide their own get_cd() mmc host op, but currently the internals of the current op (sdhci_get_cd()) are provided by sdhci_do_get_cd() which is also called from sdhci_request(). To allow override of the get_cd functionality, change sdhci_request() to call ->get_cd() instead of sdhci_do_get_cd(). Note, in the future the call to ->get_cd() will likely be removed from sdhci_request() since most drivers don't need actually it. However this change is being done now to facilitate a subsequent bug fix. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index eeea73de9f7b..8814eb6b83bf 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1367,7 +1367,7 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq) sdhci_runtime_pm_get(host); /* Firstly check card presence */ - present = sdhci_do_get_cd(host); + present = mmc->ops->get_cd(mmc); spin_lock_irqsave(&host->lock, flags); From 7db755f538b093335e43b8ca81667f898ca51ede Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Mon, 14 Dec 2015 16:01:58 -0800 Subject: [PATCH 145/418] tools: hv: vss: fix the write()'s argument: error -> vss_msg commit a689d2510f188e75391dbebacbddfd74d42f2a7e upstream. Fix the write()'s argument in the daemon code. Cc: Vitaly Kuznetsov Cc: "K. Y. Srinivasan" Signed-off-by: Dexuan Cui Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- tools/hv/hv_vss_daemon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c index 96234b638249..5d51d6ff08e6 100644 --- a/tools/hv/hv_vss_daemon.c +++ b/tools/hv/hv_vss_daemon.c @@ -254,7 +254,7 @@ int main(int argc, char *argv[]) syslog(LOG_ERR, "Illegal op:%d\n", op); } vss_msg->error = error; - len = write(vss_fd, &error, sizeof(struct hv_vss_msg)); + len = write(vss_fd, vss_msg, sizeof(struct hv_vss_msg)); if (len != sizeof(struct hv_vss_msg)) { syslog(LOG_ERR, "write failed; error: %d %s", errno, strerror(errno)); From 2a383bcc68ad1437315eb4298a84cdde070a69bc Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Mon, 14 Dec 2015 16:01:54 -0800 Subject: [PATCH 146/418] Drivers: hv: vmbus: Fix a Host signaling bug commit 8599846d73997cdbccf63f23394d871cfad1e5e6 upstream. Currently we have two policies for deciding when to signal the host: One based on the ring buffer state and the other based on what the VMBUS client driver wants to do. Consider the case when the client wants to explicitly control when to signal the host. In this case, if the client were to defer signaling, we will not be able to signal the host subsequently when the client does want to signal since the ring buffer state will prevent the signaling. Implement logic to have only one signaling policy in force for a given channel. Signed-off-by: K. Y. Srinivasan Reviewed-by: Haiyang Zhang Tested-by: Haiyang Zhang Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 18 ++++++++++++++++++ include/linux/hyperv.h | 18 ++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index c4dcab048cb8..9098f13f2f44 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -630,10 +630,19 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, * on the ring. We will not signal if more data is * to be placed. * + * Based on the channel signal state, we will decide + * which signaling policy will be applied. + * * If we cannot write to the ring-buffer; signal the host * even if we may not have written anything. This is a rare * enough condition that it should not matter. */ + + if (channel->signal_policy) + signal = true; + else + kick_q = true; + if (((ret == 0) && kick_q && signal) || (ret)) vmbus_setevent(channel); @@ -733,10 +742,19 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, * on the ring. We will not signal if more data is * to be placed. * + * Based on the channel signal state, we will decide + * which signaling policy will be applied. + * * If we cannot write to the ring-buffer; signal the host * even if we may not have written anything. This is a rare * enough condition that it should not matter. */ + + if (channel->signal_policy) + signal = true; + else + kick_q = true; + if (((ret == 0) && kick_q && signal) || (ret)) vmbus_setevent(channel); diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 8fdc17b84739..ae6a711dcd1d 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -630,6 +630,11 @@ struct hv_input_signal_event_buffer { struct hv_input_signal_event event; }; +enum hv_signal_policy { + HV_SIGNAL_POLICY_DEFAULT = 0, + HV_SIGNAL_POLICY_EXPLICIT, +}; + struct vmbus_channel { /* Unique channel id */ int id; @@ -757,8 +762,21 @@ struct vmbus_channel { * link up channels based on their CPU affinity. */ struct list_head percpu_list; + /* + * Host signaling policy: The default policy will be + * based on the ring buffer state. We will also support + * a policy where the client driver can have explicit + * signaling control. + */ + enum hv_signal_policy signal_policy; }; +static inline void set_channel_signal_state(struct vmbus_channel *c, + enum hv_signal_policy policy) +{ + c->signal_policy = policy; +} + static inline void set_channel_read_state(struct vmbus_channel *c, bool state) { c->batched_reading = state; From c76633bf38205c9ee640096eb387dce31cb8b8c9 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 3 Dec 2015 12:45:19 +0200 Subject: [PATCH 147/418] Bluetooth: Use continuous scanning when creating LE connections commit 2f99536a5b34d5b0f54723067d68f6cef3f0fdc6 upstream. All LE connections are now triggered through a preceding passive scan and waiting for a connectable advertising report. This means we've got the best possible guarantee that the device is within range and should be able to request the controller to perform continuous scanning. This way we minimize the risk that we miss out on any advertising packets. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_conn.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 85b82f7adbd2..24e9410923d0 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -722,8 +722,12 @@ static void hci_req_add_le_create_conn(struct hci_request *req, if (hci_update_random_address(req, false, &own_addr_type)) return; + /* Set window to be the same value as the interval to enable + * continuous scanning. + */ cp.scan_interval = cpu_to_le16(hdev->le_scan_interval); - cp.scan_window = cpu_to_le16(hdev->le_scan_window); + cp.scan_window = cp.scan_interval; + bacpy(&cp.peer_addr, &conn->dst); cp.peer_addr_type = conn->dst_type; cp.own_address_type = own_addr_type; From 2b2b55a0d88ef81f33ea77f79cbdeee25b2ed891 Mon Sep 17 00:00:00 2001 From: Dmitry Tunin Date: Sat, 5 Dec 2015 14:09:36 +0300 Subject: [PATCH 148/418] Bluetooth: Add support of Toshiba Broadcom based devices commit 1623d0bf847d3b38d8cf24367b3689ba0e3fe2aa upstream. BugLink: https://bugs.launchpad.net/bugs/1522949 T: Bus=03 Lev=02 Prnt=02 Port=05 Cnt=02 Dev#= 4 Spd=12 MxCh= 0 D: Ver= 2.00 Cls=ff(vend.) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0930 ProdID=0225 Rev=01.12 S: Manufacturer=Broadcom Corp S: Product=BCM43142A0 S: SerialNumber=4CBB58034671 C: #Ifs= 4 Cfg#= 1 Atr=e0 MxPwr=0mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=01 Prot=01 Driver=(none) I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=01 Prot=01 Driver=(none) I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) I: If#= 3 Alt= 0 #EPs= 0 Cls=fe(app. ) Sub=01 Prot=01 Driver=(none) Signed-off-by: Dmitry Tunin Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 92f0ee388f9e..968897108c76 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -153,6 +153,10 @@ static const struct usb_device_id btusb_table[] = { { USB_VENDOR_AND_INTERFACE_INFO(0x13d3, 0xff, 0x01, 0x01), .driver_info = BTUSB_BCM_PATCHRAM }, + /* Toshiba Corp - Broadcom based */ + { USB_VENDOR_AND_INTERFACE_INFO(0x0930, 0xff, 0x01, 0x01), + .driver_info = BTUSB_BCM_PATCHRAM }, + /* Intel Bluetooth USB Bootloader (RAM module) */ { USB_DEVICE(0x8087, 0x0a5a), .driver_info = BTUSB_INTEL_BOOT | BTUSB_BROKEN_ISOC }, From 701495c14d09658bffe78e0e605f298752edd36c Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 26 Jan 2016 14:31:31 -0500 Subject: [PATCH 149/418] Bluetooth: Fix incorrect removing of IRKs commit cff10ce7b4f02718ffd25e3914e60559f5ef6ca0 upstream. The commit cad20c278085d893ebd616cd20c0747a8e9d53c7 was supposed to fix handling of devices first using public addresses and then switching to RPAs after pairing. Unfortunately it missed a couple of key places in the code. 1. When evaluating which devices should be removed from the existing white list we also need to consider whether we have an IRK for them or not, i.e. a call to hci_find_irk_by_addr() is needed. 2. In smp_notify_keys() we should not be requiring the knowledge of the RPA, but should simply keep the IRK around if the other conditions require it. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_request.c | 28 ++++++++++++++++++---------- net/bluetooth/smp.c | 16 ---------------- 2 files changed, 18 insertions(+), 26 deletions(-) diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 981f8a202c27..02778c5bc149 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -175,21 +175,29 @@ static u8 update_white_list(struct hci_request *req) * command to remove it from the controller. */ list_for_each_entry(b, &hdev->le_white_list, list) { - struct hci_cp_le_del_from_white_list cp; + /* If the device is neither in pend_le_conns nor + * pend_le_reports then remove it from the whitelist. + */ + if (!hci_pend_le_action_lookup(&hdev->pend_le_conns, + &b->bdaddr, b->bdaddr_type) && + !hci_pend_le_action_lookup(&hdev->pend_le_reports, + &b->bdaddr, b->bdaddr_type)) { + struct hci_cp_le_del_from_white_list cp; - if (hci_pend_le_action_lookup(&hdev->pend_le_conns, - &b->bdaddr, b->bdaddr_type) || - hci_pend_le_action_lookup(&hdev->pend_le_reports, - &b->bdaddr, b->bdaddr_type)) { - white_list_entries++; + cp.bdaddr_type = b->bdaddr_type; + bacpy(&cp.bdaddr, &b->bdaddr); + + hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, + sizeof(cp), &cp); continue; } - cp.bdaddr_type = b->bdaddr_type; - bacpy(&cp.bdaddr, &b->bdaddr); + if (hci_find_irk_by_addr(hdev, &b->bdaddr, b->bdaddr_type)) { + /* White list can not be used with RPAs */ + return 0x00; + } - hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST, - sizeof(cp), &cp); + white_list_entries++; } /* Since all no longer valid white list entries have been diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index ffed8a1d4f27..4b175df35184 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -1072,22 +1072,6 @@ static void smp_notify_keys(struct l2cap_conn *conn) hcon->dst_type = smp->remote_irk->addr_type; queue_work(hdev->workqueue, &conn->id_addr_update_work); } - - /* When receiving an indentity resolving key for - * a remote device that does not use a resolvable - * private address, just remove the key so that - * it is possible to use the controller white - * list for scanning. - * - * Userspace will have been told to not store - * this key at this point. So it is safe to - * just remove it. - */ - if (!bacmp(&smp->remote_irk->rpa, BDADDR_ANY)) { - list_del_rcu(&smp->remote_irk->list); - kfree_rcu(smp->remote_irk, rcu); - smp->remote_irk = NULL; - } } if (smp->csrk) { From cf5d2ff495fdf7b7c3936166ca18f7d37d4f6c25 Mon Sep 17 00:00:00 2001 From: Glenn Ruben Bakke Date: Wed, 13 Jan 2016 16:41:42 +0100 Subject: [PATCH 150/418] Bluetooth: 6lowpan: Fix kernel NULL pointer dereferences commit 4c58f3282e3de43d34f8955f8eca676294380bf9 upstream. The fixes provided in this patch assigns a valid net_device structure to skb before dispatching it for further processing. Scenario #1: ============ Bluetooth 6lowpan receives an uncompressed IPv6 header, and dispatches it to netif. The following error occurs: Null pointer dereference error #1 crash log: [ 845.854013] BUG: unable to handle kernel NULL pointer dereference at 0000000000000048 [ 845.855785] IP: [] enqueue_to_backlog+0x56/0x240 ... [ 845.909459] Call Trace: [ 845.911678] [] netif_rx_internal+0x44/0xf0 The first modification fixes the NULL pointer dereference error by assigning dev to the local_skb in order to set a valid net_device before processing the skb by netif_rx_ni(). Scenario #2: ============ Bluetooth 6lowpan receives an UDP compressed message which needs further decompression by nhc_udp. The following error occurs: Null pointer dereference error #2 crash log: [ 63.295149] BUG: unable to handle kernel NULL pointer dereference at 0000000000000840 [ 63.295931] IP: [] udp_uncompress+0x320/0x626 [nhc_udp] The second modification fixes the NULL pointer dereference error by assigning dev to the local_skb in the case of a udp compressed packet. The 6lowpan udp_uncompress function expects that the net_device is set in the skb when checking lltype. Signed-off-by: Glenn Ruben Bakke Signed-off-by: Lukasz Duda Acked-by: Jukka Rissanen Signed-off-by: Johan Hedberg Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/6lowpan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 9e9cca3689a0..2b6132fa0faf 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -317,6 +317,7 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev, local_skb->protocol = htons(ETH_P_IPV6); local_skb->pkt_type = PACKET_HOST; + local_skb->dev = dev; skb_set_transport_header(local_skb, sizeof(struct ipv6hdr)); @@ -335,6 +336,8 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev, if (!local_skb) goto drop; + local_skb->dev = dev; + ret = iphc_decompress(local_skb, dev, chan); if (ret < 0) { kfree_skb(local_skb); @@ -343,7 +346,6 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev, local_skb->protocol = htons(ETH_P_IPV6); local_skb->pkt_type = PACKET_HOST; - local_skb->dev = dev; if (give_skb_to_upper(local_skb, dev) != NET_RX_SUCCESS) { From 8cf0f282de522d9f44dc2e0f6f58361fdef4d722 Mon Sep 17 00:00:00 2001 From: Lukasz Duda Date: Wed, 13 Jan 2016 16:57:48 +0100 Subject: [PATCH 151/418] Bluetooth: 6lowpan: Fix handling of uncompressed IPv6 packets commit 87f5fedb3bebbbb566f847dd0c567fcea49a36a6 upstream. This patch fixes incorrect handling of the 6lowpan packets that contain uncompressed IPv6 header. RFC4944 specifies a special dispatch for 6lowpan to carry uncompressed IPv6 header. This dispatch (1 byte long) has to be removed during reception and skb data pointer has to be moved. To correctly point in the beginning of the IPv6 header the dispatch byte has to be pulled off before packet can be processed by netif_rx_in(). Test scenario: IPv6 packets are not correctly interpreted by the network layer when IPv6 header is not compressed (e.g. ICMPv6 Echo Reply is not propagated correctly to the ICMPv6 layer because the extra byte will make the header look corrupted). Similar approach is done for IEEE 802.15.4. Signed-off-by: Lukasz Duda Signed-off-by: Glenn Ruben Bakke Acked-by: Jukka Rissanen Signed-off-by: Johan Hedberg Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/6lowpan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 2b6132fa0faf..795ddd8b2f77 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -307,6 +307,9 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev, /* check that it's our buffer */ if (lowpan_is_ipv6(*skb_network_header(skb))) { + /* Pull off the 1-byte of 6lowpan header. */ + skb_pull(skb, 1); + /* Copy the packet so that the IPv6 header is * properly aligned. */ From 972e9e3c7f447bb2becfdcde9931790e78dd43fa Mon Sep 17 00:00:00 2001 From: David Gibson Date: Mon, 30 Nov 2015 12:30:30 +1100 Subject: [PATCH 152/418] time: Avoid signed overflow in timekeeping_get_ns() commit 35a4933a895927990772ae96fdcfd2f806929ee2 upstream. 1e75fa8 "time: Condense timekeeper.xtime into xtime_sec" replaced a call to clocksource_cyc2ns() from timekeeping_get_ns() with an open-coded version of the same logic to avoid keeping a semi-redundant struct timespec in struct timekeeper. However, the commit also introduced a subtle semantic change - where clocksource_cyc2ns() uses purely unsigned math, the new version introduces a signed temporary, meaning that if (delta * tk->mult) has a 63-bit overflow the following shift will still give a negative result. The choice of 'maxsec' in __clocksource_updatefreq_scale() means this will generally happen if there's a ~10 minute pause in examining the clocksource. This can be triggered on a powerpc KVM guest by stopping it from qemu for a bit over 10 minutes. After resuming time has jumped backwards several minutes causing numerous problems (jiffies does not advance, msleep()s can be extended by minutes..). It doesn't happen on x86 KVM guests, because the guest TSC is effectively frozen while the guest is stopped, which is not the case for the powerpc timebase. Obviously an unsigned (64 bit) overflow will only take twice as long as a signed, 63-bit overflow. I don't know the time code well enough to know if that will still cause incorrect calculations, or if a 64-bit overflow is avoided elsewhere. Still, an incorrect forwards clock adjustment will cause less trouble than time going backwards. So, this patch removes the potential for intermediate signed overflow. Suggested-by: Laurent Vivier Tested-by: Laurent Vivier Signed-off-by: David Gibson Signed-off-by: John Stultz Signed-off-by: Greg Kroah-Hartman --- kernel/time/timekeeping.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index d563c1960302..99188ee5d9d0 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -305,8 +305,7 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) delta = timekeeping_get_delta(tkr); - nsec = delta * tkr->mult + tkr->xtime_nsec; - nsec >>= tkr->shift; + nsec = (delta * tkr->mult + tkr->xtime_nsec) >> tkr->shift; /* If arch requires, add in get_arch_timeoffset() */ return nsec + arch_gettimeoffset(); From 242e16cd87b1d6e19382dc683b6cab121fcfd69c Mon Sep 17 00:00:00 2001 From: zengtao Date: Tue, 2 Feb 2016 11:38:34 +0800 Subject: [PATCH 153/418] cputime: Prevent 32bit overflow in time[val|spec]_to_cputime() commit 0f26922fe5dc5724b1adbbd54b21bad03590b4f3 upstream. The datatype __kernel_time_t is u32 on 32bit platform, so its subject to overflows in the timeval/timespec to cputime conversion. Currently the following functions are affected: 1. setitimer() 2. timer_create/timer_settime() 3. sys_clock_nanosleep This can happen on MIPS32 and ARM32 with "Full dynticks CPU time accounting" enabled, which is required for CONFIG_NO_HZ_FULL. Enforce u64 conversion to prevent the overflow. Fixes: 31c1fc818715 ("ARM: Kconfig: allow full nohz CPU accounting") Signed-off-by: zengtao Reviewed-by: Arnd Bergmann Cc: Link: http://lkml.kernel.org/r/1454384314-154784-1-git-send-email-prime.zeng@huawei.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/cputime_nsecs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/cputime_nsecs.h b/include/asm-generic/cputime_nsecs.h index 0419485891f2..0f1c6f315cdc 100644 --- a/include/asm-generic/cputime_nsecs.h +++ b/include/asm-generic/cputime_nsecs.h @@ -75,7 +75,7 @@ typedef u64 __nocast cputime64_t; */ static inline cputime_t timespec_to_cputime(const struct timespec *val) { - u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec; + u64 ret = (u64)val->tv_sec * NSEC_PER_SEC + val->tv_nsec; return (__force cputime_t) ret; } static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val) @@ -91,7 +91,8 @@ static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val) */ static inline cputime_t timeval_to_cputime(const struct timeval *val) { - u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC; + u64 ret = (u64)val->tv_sec * NSEC_PER_SEC + + val->tv_usec * NSEC_PER_USEC; return (__force cputime_t) ret; } static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val) From 97ff5367a76179adeb2c4003ec9ee83a9a36b242 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 22 Jan 2016 09:20:37 -0800 Subject: [PATCH 154/418] Revert "MIPS: Fix PAGE_MASK definition" commit 800dc4f49cc002879e1e5e6b79926f86b60528e6 upstream. This reverts commit 22b14523994588279ae9c5ccfe64073c1e5b3c00. It was originally sent in an earlier revision of the pfn_t patchset. Besides being broken, the warning is also fixed by PFN_FLAGS_MASK casting the PAGE_MASK to an unsigned long. Reported-by: Manuel Lauss Signed-off-by: Dan Williams Cc: linux-kernel@vger.kernel.org Cc: Linux-MIPS Patchwork: https://patchwork.linux-mips.org/patch/12182/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/page.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h index 2046c0230224..21ed7150fec3 100644 --- a/arch/mips/include/asm/page.h +++ b/arch/mips/include/asm/page.h @@ -33,7 +33,7 @@ #define PAGE_SHIFT 16 #endif #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE - 1)) +#define PAGE_MASK (~((1 << PAGE_SHIFT) - 1)) /* * This is used for calculating the real page sizes From 332e40db5154bb9162ad0bb0621876ac48f75e16 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 21 Jan 2016 21:09:49 +0800 Subject: [PATCH 155/418] MIPS: Loongson-3: Fix SMP_ASK_C0COUNT IPI handler commit 5754843225f78ac7cbe142a6899890a9733a5a5d upstream. When Core-0 handle SMP_ASK_C0COUNT IPI, we should make other cores to see the result as soon as possible (especially when Store-Fill-Buffer is enabled). Otherwise, C0_Count syncronization makes no sense. BTW, array is more suitable than per-cpu variable for syncronization, and there is a corner case should be avoid: C0_Count of Core-0 can be really 0. Signed-off-by: Huacai Chen Cc: Aurelien Jarno Cc: Steven J. Hill Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: Huacai Chen Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/12160/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/loongson64/loongson-3/smp.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c index 1a4738a8f2d3..509832a9836c 100644 --- a/arch/mips/loongson64/loongson-3/smp.c +++ b/arch/mips/loongson64/loongson-3/smp.c @@ -30,13 +30,13 @@ #include "smp.h" DEFINE_PER_CPU(int, cpu_state); -DEFINE_PER_CPU(uint32_t, core0_c0count); static void *ipi_set0_regs[16]; static void *ipi_clear0_regs[16]; static void *ipi_status0_regs[16]; static void *ipi_en0_regs[16]; static void *ipi_mailbox_buf[16]; +static uint32_t core0_c0count[NR_CPUS]; /* read a 32bit value from ipi register */ #define loongson3_ipi_read32(addr) readl(addr) @@ -275,12 +275,14 @@ void loongson3_ipi_interrupt(struct pt_regs *regs) if (action & SMP_ASK_C0COUNT) { BUG_ON(cpu != 0); c0count = read_c0_count(); - for (i = 1; i < num_possible_cpus(); i++) - per_cpu(core0_c0count, i) = c0count; + c0count = c0count ? c0count : 1; + for (i = 1; i < nr_cpu_ids; i++) + core0_c0count[i] = c0count; + __wbflush(); /* Let others see the result ASAP */ } } -#define MAX_LOOPS 1111 +#define MAX_LOOPS 800 /* * SMP init and finish on secondary CPUs */ @@ -305,16 +307,20 @@ static void loongson3_init_secondary(void) cpu_logical_map(cpu) / loongson_sysconf.cores_per_package; i = 0; - __this_cpu_write(core0_c0count, 0); + core0_c0count[cpu] = 0; loongson3_send_ipi_single(0, SMP_ASK_C0COUNT); - while (!__this_cpu_read(core0_c0count)) { + while (!core0_c0count[cpu]) { i++; cpu_relax(); } if (i > MAX_LOOPS) i = MAX_LOOPS; - initcount = __this_cpu_read(core0_c0count) + i; + if (cpu_data[cpu].package) + initcount = core0_c0count[cpu] + i; + else /* Local access is faster for loops */ + initcount = core0_c0count[cpu] + i/2; + write_c0_count(initcount); } From e76f1004a01d5475d9f09c32aa19c912b71f29db Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 21 Jan 2016 21:09:50 +0800 Subject: [PATCH 156/418] MIPS: hpet: Choose a safe value for the ETIME check commit 5610b1254e3689b6ef8ebe2db260709a74da06c8 upstream. This patch is borrowed from x86 hpet driver and explaind below: Due to the overly intelligent design of HPETs, we need to workaround the problem that the compare value which we write is already behind the actual counter value at the point where the value hits the real compare register. This happens for two reasons: 1) We read out the counter, add the delta and write the result to the compare register. When a NMI hits between the read out and the write then the counter can be ahead of the event already. 2) The write to the compare register is delayed by up to two HPET cycles in AMD chipsets. We can work around this by reading back the compare register to make sure that the written value has hit the hardware. But that is bad performance wise for the normal case where the event is far enough in the future. As we already know that the write can be delayed by up to two cycles we can avoid the read back of the compare register completely if we make the decision whether the delta has elapsed already or not based on the following calculation: cmp = event - actual_count; If cmp is less than 64 HPET clock cycles, then we decide that the event has happened already and return -ETIME. That covers the above #1 and #2 problems which would cause a wait for HPET wraparound (~306 seconds). Signed-off-by: Huacai Chen Cc: Aurelien Jarno Cc: Steven J. Hill Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: Huacai Chen Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/12162/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/loongson64/loongson-3/hpet.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/mips/loongson64/loongson-3/hpet.c b/arch/mips/loongson64/loongson-3/hpet.c index bf9f1a77f0e5..a2631a52ca99 100644 --- a/arch/mips/loongson64/loongson-3/hpet.c +++ b/arch/mips/loongson64/loongson-3/hpet.c @@ -13,6 +13,9 @@ #define SMBUS_PCI_REG64 0x64 #define SMBUS_PCI_REGB4 0xb4 +#define HPET_MIN_CYCLES 64 +#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1)) + static DEFINE_SPINLOCK(hpet_lock); DEFINE_PER_CPU(struct clock_event_device, hpet_clockevent_device); @@ -161,8 +164,9 @@ static int hpet_next_event(unsigned long delta, cnt += delta; hpet_write(HPET_T0_CMP, cnt); - res = ((int)(hpet_read(HPET_COUNTER) - cnt) > 0) ? -ETIME : 0; - return res; + res = (int)(cnt - hpet_read(HPET_COUNTER)); + + return res < HPET_MIN_CYCLES ? -ETIME : 0; } static irqreturn_t hpet_irq_handler(int irq, void *data) @@ -237,7 +241,7 @@ void __init setup_hpet_timer(void) cd->cpumask = cpumask_of(cpu); clockevent_set_clock(cd, HPET_FREQ); cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd); - cd->min_delta_ns = 5000; + cd->min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA, cd); clockevents_register_device(cd); setup_irq(HPET_T0_IRQ, &hpet_irq); From dbdb0333caf272500f495da2da71125d564146e7 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 21 Jan 2016 21:09:52 +0800 Subject: [PATCH 157/418] MIPS: Fix some missing CONFIG_CPU_MIPSR6 #ifdefs commit 4f33f6c522948fffc345261896042b58dea23754 upstream. Commit be0c37c985eddc4 (MIPS: Rearrange PTE bits into fixed positions.) defines fixed PTE bits for MIPS R2. Then, commit d7b631419b3d230a4d383 (MIPS: pgtable-bits: Fix XPA damage to R6 definitions.) adds the MIPS R6 definitions in the same way as MIPS R2. But some R6 #ifdefs in the later commit are missing, so in this patch I fix that. Signed-off-by: Huacai Chen Cc: Aurelien Jarno Cc: Steven J. Hill Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/12164/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/pgtable.h | 4 ++-- arch/mips/mm/tlbex.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 8957f15e21ec..18826aa15a7c 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -353,7 +353,7 @@ static inline pte_t pte_mkdirty(pte_t pte) static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; -#ifdef CONFIG_CPU_MIPSR2 +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) if (!(pte_val(pte) & _PAGE_NO_READ)) pte_val(pte) |= _PAGE_SILENT_READ; else @@ -560,7 +560,7 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd) { pmd_val(pmd) |= _PAGE_ACCESSED; -#ifdef CONFIG_CPU_MIPSR2 +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) if (!(pmd_val(pmd) & _PAGE_NO_READ)) pmd_val(pmd) |= _PAGE_SILENT_READ; else diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 32e0be27673f..29f73e00253d 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -242,7 +242,7 @@ static void output_pgtable_bits_defines(void) pr_define("_PAGE_HUGE_SHIFT %d\n", _PAGE_HUGE_SHIFT); pr_define("_PAGE_SPLITTING_SHIFT %d\n", _PAGE_SPLITTING_SHIFT); #endif -#ifdef CONFIG_CPU_MIPSR2 +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) if (cpu_has_rixi) { #ifdef _PAGE_NO_EXEC_SHIFT pr_define("_PAGE_NO_EXEC_SHIFT %d\n", _PAGE_NO_EXEC_SHIFT); From a869e6b05d4bc17afeed7ade9f4ffc0d1c3bac47 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 25 Jan 2016 20:32:03 +0000 Subject: [PATCH 158/418] MIPS: Fix buffer overflow in syscall_get_arguments() commit f4dce1ffd2e30fa31756876ef502ce6d2324be35 upstream. Since commit 4c21b8fd8f14 ("MIPS: seccomp: Handle indirect system calls (o32)"), syscall_get_arguments() attempts to handle o32 indirect syscall arguments by incrementing both the start argument number and the number of arguments to fetch. However only the start argument number needs to be incremented. The number of arguments does not change, they're just shifted up by one, and in fact the output array is provided by the caller and is likely only n entries long, so reading more arguments overflows the output buffer. In the case of seccomp, this results in it fetching 7 arguments starting at the 2nd one, which overflows the unsigned long args[6] in populate_seccomp_data(). This clobbers the $s0 register from syscall_trace_enter() which __seccomp_phase1_filter() saved onto the stack, into which syscall_trace_enter() had placed its syscall number argument. This caused Chromium to crash. Credit goes to Milko for tracking it down as far as $s0 being clobbered. Fixes: 4c21b8fd8f14 ("MIPS: seccomp: Handle indirect system calls (o32)") Reported-by: Milko Leporis Signed-off-by: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/12213/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/syscall.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h index 6499d93ae68d..47bc45a67e9b 100644 --- a/arch/mips/include/asm/syscall.h +++ b/arch/mips/include/asm/syscall.h @@ -101,10 +101,8 @@ static inline void syscall_get_arguments(struct task_struct *task, /* O32 ABI syscall() - Either 64-bit with O32 or 32-bit */ if ((config_enabled(CONFIG_32BIT) || test_tsk_thread_flag(task, TIF_32BIT_REGS)) && - (regs->regs[2] == __NR_syscall)) { + (regs->regs[2] == __NR_syscall)) i++; - n++; - } while (n--) ret |= mips_get_syscall_arg(args++, task, regs, i++); From de46e6540360c72943239bf5ce0a2839432204d8 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 27 Nov 2015 10:38:38 +0100 Subject: [PATCH 159/418] EDAC: Robustify workqueues destruction commit fcd5c4dd8201595d4c598c9cca5e54760277d687 upstream. EDAC workqueue destruction is really fragile. We cancel delayed work but if it is still running and requeues itself, we still go ahead and destroy the workqueue and the queued work explodes when workqueue core attempts to run it. Make the destruction more robust by switching op_state to offline so that requeuing stops. Cancel any pending work *synchronously* too. EDAC i7core: Driver loaded. general protection fault: 0000 [#1] SMP CPU 12 Modules linked in: Supported: Yes Pid: 0, comm: kworker/0:1 Tainted: G IE 3.0.101-0-default #1 HP ProLiant DL380 G7 RIP: 0010:[] [] __queue_work+0x17/0x3f0 < ... regs ...> Process kworker/0:1 (pid: 0, threadinfo ffff88019def6000, task ffff88019def4600) Stack: ... Call Trace: call_timer_fn run_timer_softirq __do_softirq call_softirq do_softirq irq_exit smp_apic_timer_interrupt apic_timer_interrupt intel_idle cpuidle_idle_call cpu_idle Code: ... RIP __queue_work RSP <...> Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- drivers/edac/edac_device.c | 11 ++++------- drivers/edac/edac_mc.c | 14 +++----------- drivers/edac/edac_pci.c | 9 ++++----- 3 files changed, 11 insertions(+), 23 deletions(-) diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 592af5f0cf39..53587377e672 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -435,16 +435,13 @@ void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev, */ void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev) { - int status; - if (!edac_dev->edac_check) return; - status = cancel_delayed_work(&edac_dev->work); - if (status == 0) { - /* workq instance might be running, wait for it */ - flush_workqueue(edac_workqueue); - } + edac_dev->op_state = OP_OFFLINE; + + cancel_delayed_work_sync(&edac_dev->work); + flush_workqueue(edac_workqueue); } /* diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 77ecd6a4179a..1b2c2187b347 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -586,18 +586,10 @@ static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec, */ static void edac_mc_workq_teardown(struct mem_ctl_info *mci) { - int status; + mci->op_state = OP_OFFLINE; - if (mci->op_state != OP_RUNNING_POLL) - return; - - status = cancel_delayed_work(&mci->work); - if (status == 0) { - edac_dbg(0, "not canceled, flush the queue\n"); - - /* workq instance might be running, wait for it */ - flush_workqueue(edac_workqueue); - } + cancel_delayed_work_sync(&mci->work); + flush_workqueue(edac_workqueue); } /* diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c index 2cf44b4db80c..b4b38603b804 100644 --- a/drivers/edac/edac_pci.c +++ b/drivers/edac/edac_pci.c @@ -274,13 +274,12 @@ static void edac_pci_workq_setup(struct edac_pci_ctl_info *pci, */ static void edac_pci_workq_teardown(struct edac_pci_ctl_info *pci) { - int status; - edac_dbg(0, "\n"); - status = cancel_delayed_work(&pci->work); - if (status == 0) - flush_workqueue(edac_workqueue); + pci->op_state = OP_OFFLINE; + + cancel_delayed_work_sync(&pci->work); + flush_workqueue(edac_workqueue); } /* From b49777c656daba9cb9788cebae91c304f6fe5782 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 1 Dec 2015 15:52:36 +0100 Subject: [PATCH 160/418] EDAC, mc_sysfs: Fix freeing bus' name commit 12e26969b32c79018165d52caff3762135614aa1 upstream. I get the splat below when modprobing/rmmoding EDAC drivers. It happens because bus->name is invalid after bus_unregister() has run. The Code: section below corresponds to: .loc 1 1108 0 movq 672(%rbx), %rax # mci_1(D)->bus, mci_1(D)->bus .loc 1 1109 0 popq %rbx # .loc 1 1108 0 movq (%rax), %rdi # _7->name, jmp kfree # and %rax has some funky stuff 2030203020312030 which looks a lot like something walked over it. Fix that by saving the name ptr before doing stuff to string it points to. general protection fault: 0000 [#1] SMP Modules linked in: ... CPU: 4 PID: 10318 Comm: modprobe Tainted: G I EN 3.12.51-11-default+ #48 Hardware name: HP ProLiant DL380 G7, BIOS P67 05/05/2011 task: ffff880311320280 ti: ffff88030da3e000 task.ti: ffff88030da3e000 RIP: 0010:[] [] edac_unregister_sysfs+0x22/0x30 [edac_core] RSP: 0018:ffff88030da3fe28 EFLAGS: 00010292 RAX: 2030203020312030 RBX: ffff880311b4e000 RCX: 000000000000095c RDX: 0000000000000001 RSI: ffff880327bb9600 RDI: 0000000000000286 RBP: ffff880311b4e750 R08: 0000000000000000 R09: ffffffff81296110 R10: 0000000000000400 R11: 0000000000000000 R12: ffff88030ba1ac68 R13: 0000000000000001 R14: 00000000011b02f0 R15: 0000000000000000 FS: 00007fc9bf8f5700(0000) GS:ffff8801a7c40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000403c90 CR3: 000000019ebdf000 CR4: 00000000000007e0 Stack: Call Trace: i7core_unregister_mci.isra.9 i7core_remove pci_device_remove __device_release_driver driver_detach bus_remove_driver pci_unregister_driver i7core_exit SyS_delete_module system_call_fastpath 0x7fc9bf426536 Code: 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 53 48 89 fb e8 52 2a 1f e1 48 8b bb a0 02 00 00 e8 46 59 1f e1 48 8b 83 a0 02 00 00 5b <48> 8b 38 e9 26 9a fe e0 66 0f 1f 44 00 00 66 66 66 66 90 48 8b RIP [] edac_unregister_sysfs+0x22/0x30 [edac_core] RSP Signed-off-by: Borislav Petkov Cc: Mauro Carvalho Chehab Fixes: 7a623c039075 ("edac: rewrite the sysfs code to use struct device") Signed-off-by: Greg Kroah-Hartman --- drivers/edac/edac_mc_sysfs.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index a75acea0f674..58aed67b7eba 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -880,21 +880,26 @@ static struct device_type mci_attr_type = { int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, const struct attribute_group **groups) { + char *name; int i, err; /* * The memory controller needs its own bus, in order to avoid * namespace conflicts at /sys/bus/edac. */ - mci->bus->name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx); - if (!mci->bus->name) + name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx); + if (!name) return -ENOMEM; + mci->bus->name = name; + edac_dbg(0, "creating bus %s\n", mci->bus->name); err = bus_register(mci->bus); - if (err < 0) - goto fail_free_name; + if (err < 0) { + kfree(name); + return err; + } /* get the /sys/devices/system/edac subsys reference */ mci->dev.type = &mci_attr_type; @@ -961,8 +966,8 @@ fail_unregister_dimm: device_unregister(&mci->dev); fail_unregister_bus: bus_unregister(mci->bus); -fail_free_name: - kfree(mci->bus->name); + kfree(name); + return err; } @@ -993,10 +998,12 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci) void edac_unregister_sysfs(struct mem_ctl_info *mci) { + const char *name = mci->bus->name; + edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev)); device_unregister(&mci->dev); bus_unregister(mci->bus); - kfree(mci->bus->name); + kfree(name); } static void mc_attr_release(struct device *dev) From a0b1c2d0f53c8368eef4018ed7ce15b6c015aa77 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Sun, 27 Dec 2015 02:13:27 +0300 Subject: [PATCH 161/418] sparc64: fix incorrect sign extension in sys_sparc64_personality commit 525fd5a94e1be0776fa652df5c687697db508c91 upstream. The value returned by sys_personality has type "long int". It is saved to a variable of type "int", which is not a problem yet because the type of task_struct->pesonality is "unsigned int". The problem is the sign extension from "int" to "long int" that happens on return from sys_sparc64_personality. For example, a userspace call personality((unsigned) -EINVAL) will result to any subsequent personality call, including absolutely harmless read-only personality(0xffffffff) call, failing with errno set to EINVAL. Signed-off-by: Dmitry V. Levin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/kernel/sys_sparc_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 30e7ddb27a3a..c690c8e16a96 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -413,7 +413,7 @@ out: SYSCALL_DEFINE1(sparc64_personality, unsigned long, personality) { - int ret; + long ret; if (personality(current->personality) == PER_LINUX32 && personality(personality) == PER_LINUX) From e3d4bcc39fcdd54d44afd45f00b9b840961430be Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Wed, 4 Nov 2015 13:24:09 +1100 Subject: [PATCH 162/418] cxl: use correct operator when writing pcie config space values commit 48f0f6b717e314a30be121b67e1d044f6d311d66 upstream. When writing a value to config space, cxl_pcie_write_config() calls cxl_pcie_config_info() to obtain a mask and shift value, shifts the new value accordingly, then uses the mask to combine the shifted value with the existing value at the address as part of a read-modify-write pattern. Currently, we use a logical OR operator rather than a bitwise OR operator, which means any use of this function results in an incorrect value being written. Replace the logical OR operator with a bitwise OR operator so the value is written correctly. Reported-by: Michael Ellerman Fixes: 6f7f0b3df6d4 ("cxl: Add AFU virtual PHB and kernel API") Signed-off-by: Andrew Donnellan Acked-by: Ian Munsie Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cxl/vphb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index c241e15cacb1..cbd4331fb45c 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -203,7 +203,7 @@ static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn, mask <<= shift; val <<= shift; - v = (in_le32(ioaddr) & ~mask) || (val & mask); + v = (in_le32(ioaddr) & ~mask) | (val & mask); out_le32(ioaddr, v); return PCIBIOS_SUCCESSFUL; From 7ca59689c947bda700e07fdea4d9e4f582601c68 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 11 Dec 2015 15:38:40 +0100 Subject: [PATCH 163/418] clk: exynos: use irqsave version of spin_lock to avoid deadlock with irqs commit 6b4feaea251a97bf08c7d41eabdec07f63a11073 upstream. It is allowed to enable/disable clocks from interrupts, so common Exynos ARM clock management code for CPUfreq should use 'irqsave' version of spin_lock calls to avoid potential deadlock caused by spin_lock recursion. The same spin_lock is used by gate/mux clocks during enable/disable calls. This deadlock, can be reproduced by enabling CPUfreq (ondemand or userspace) and decoding video with s5p-mfc driver. Relevant stack trace: [ 5928.061534] BUG: spinlock recursion on CPU#0, bash/1252 [ 5928.061609] lock: 0xee80454c, .magic: dead4ead, .owner: bash/1252, .owner_cpu: 0 [ 5928.068586] CPU: 0 PID: 1252 Comm: bash Tainted: G W 4.4.0-rc4-00001-g447a7fd #678 [ 5928.077260] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) [ 5928.083359] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 5928.091072] [] (show_stack) from [] (dump_stack+0x68/0xb8) [ 5928.098275] [] (dump_stack) from [] (do_raw_spin_lock+0x184/0x1ac) [ 5928.106177] [] (do_raw_spin_lock) from [] (_raw_spin_lock_irqsave+0x20/0x28) [ 5928.114943] [] (_raw_spin_lock_irqsave) from [] (clk_gate_endisable+0x24/0x98) [ 5928.123882] [] (clk_gate_endisable) from [] (clk_core_disable+0x60/0x84) [ 5928.132299] [] (clk_core_disable) from [] (clk_disable+0x24/0x30) [ 5928.140117] [] (clk_disable) from [] (s5p_mfc_handle_frame+0x254/0x860) [ 5928.148445] [] (s5p_mfc_handle_frame) from [] (s5p_mfc_irq+0x890/0xa24) [ 5928.156778] [] (s5p_mfc_irq) from [] (handle_irq_event_percpu+0x50/0x14c) [ 5928.165283] [] (handle_irq_event_percpu) from [] (handle_irq_event+0x38/0x5c) [ 5928.174143] [] (handle_irq_event) from [] (handle_fasteoi_irq+0xdc/0x1a4) [ 5928.182645] [] (handle_fasteoi_irq) from [] (generic_handle_irq+0x18/0x28) [ 5928.191236] [] (generic_handle_irq) from [] (__handle_domain_irq+0x6c/0xdc) [ 5928.199917] [] (__handle_domain_irq) from [] (gic_handle_irq+0x4c/0x98) [ 5928.208249] [] (gic_handle_irq) from [] (__irq_svc+0x54/0x90) [ 5928.215709] Exception stack(0xeddb5cb8 to 0xeddb5d00) [ 5928.220745] 5ca0: ee80454c faddfadc [ 5928.228906] 5cc0: 00000000 01000001 ee831ce0 f8114200 ee807c00 01130520 00000403 eddb5d84 [ 5928.237063] 5ce0: ee807c48 2faf0800 ee807c0c eddb5d08 c046b618 c046b634 20000053 ffffffff [ 5928.245225] [] (__irq_svc) from [] (exynos_cpuclk_notifier_cb+0x170/0x270) [ 5928.253823] [] (exynos_cpuclk_notifier_cb) from [] (notifier_call_chain+0x44/0x84) [ 5928.263106] [] (notifier_call_chain) from [] (__srcu_notifier_call_chain+0x6c/0x9c) [ 5928.272480] [] (__srcu_notifier_call_chain) from [] (srcu_notifier_call_chain+0x18/0x20) [ 5928.282288] [] (srcu_notifier_call_chain) from [] (__clk_notify+0x6c/0x74) [ 5928.290881] [] (__clk_notify) from [] (clk_propagate_rate_change+0xa0/0xac) [ 5928.299561] [] (clk_propagate_rate_change) from [] (clk_propagate_rate_change+0x90/0xac) [ 5928.309370] [] (clk_propagate_rate_change) from [] (clk_core_set_rate_nolock+0x64/0xa8) [ 5928.319091] [] (clk_core_set_rate_nolock) from [] (clk_set_rate+0x20/0x30) [ 5928.327686] [] (clk_set_rate) from [] (set_target+0xe8/0x23c) [ 5928.335152] [] (set_target) from [] (__cpufreq_driver_target+0x184/0x29c) [ 5928.343655] [] (__cpufreq_driver_target) from [] (cpufreq_set+0x44/0x64) [ 5928.352074] [] (cpufreq_set) from [] (store_scaling_setspeed+0x5c/0x74) [ 5928.360407] [] (store_scaling_setspeed) from [] (store+0x7c/0x98) [ 5928.368221] [] (store) from [] (sysfs_kf_write+0x44/0x48) [ 5928.375338] [] (sysfs_kf_write) from [] (kernfs_fop_write+0xb8/0x1bc) [ 5928.383496] [] (kernfs_fop_write) from [] (__vfs_write+0x2c/0xd4) [ 5928.391308] [] (__vfs_write) from [] (vfs_write+0xa0/0x144) [ 5928.398598] [] (vfs_write) from [] (SyS_write+0x44/0x84) [ 5928.405631] [] (SyS_write) from [] (ret_fast_syscall+0x0/0x3c) Signed-off-by: Marek Szyprowski Signed-off-by: Sylwester Nawrocki Signed-off-by: Greg Kroah-Hartman --- drivers/clk/samsung/clk-cpu.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/clk/samsung/clk-cpu.c b/drivers/clk/samsung/clk-cpu.c index 2fe37f708dc7..813003d6ce09 100644 --- a/drivers/clk/samsung/clk-cpu.c +++ b/drivers/clk/samsung/clk-cpu.c @@ -148,6 +148,7 @@ static int exynos_cpuclk_pre_rate_change(struct clk_notifier_data *ndata, unsigned long alt_prate = clk_get_rate(cpuclk->alt_parent); unsigned long alt_div = 0, alt_div_mask = DIV_MASK; unsigned long div0, div1 = 0, mux_reg; + unsigned long flags; /* find out the divider values to use for clock data */ while ((cfg_data->prate * 1000) != ndata->new_rate) { @@ -156,7 +157,7 @@ static int exynos_cpuclk_pre_rate_change(struct clk_notifier_data *ndata, cfg_data++; } - spin_lock(cpuclk->lock); + spin_lock_irqsave(cpuclk->lock, flags); /* * For the selected PLL clock frequency, get the pre-defined divider @@ -212,7 +213,7 @@ static int exynos_cpuclk_pre_rate_change(struct clk_notifier_data *ndata, DIV_MASK_ALL); } - spin_unlock(cpuclk->lock); + spin_unlock_irqrestore(cpuclk->lock, flags); return 0; } @@ -223,6 +224,7 @@ static int exynos_cpuclk_post_rate_change(struct clk_notifier_data *ndata, const struct exynos_cpuclk_cfg_data *cfg_data = cpuclk->cfg; unsigned long div = 0, div_mask = DIV_MASK; unsigned long mux_reg; + unsigned long flags; /* find out the divider values to use for clock data */ if (cpuclk->flags & CLK_CPU_NEEDS_DEBUG_ALT_DIV) { @@ -233,7 +235,7 @@ static int exynos_cpuclk_post_rate_change(struct clk_notifier_data *ndata, } } - spin_lock(cpuclk->lock); + spin_lock_irqsave(cpuclk->lock, flags); /* select mout_apll as the alternate parent */ mux_reg = readl(base + E4210_SRC_CPU); @@ -246,7 +248,7 @@ static int exynos_cpuclk_post_rate_change(struct clk_notifier_data *ndata, } exynos_set_safe_div(base, div, div_mask); - spin_unlock(cpuclk->lock); + spin_unlock_irqrestore(cpuclk->lock, flags); return 0; } From 40ab2d7d44aff753b6c43f016b6fdb166fe8e4ff Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 22 Dec 2015 17:08:06 +0800 Subject: [PATCH 164/418] regulator: axp20x: Fix GPIO LDO enable value for AXP22x commit 3cb99e2ea99a454c8837a55aac88753ef05fc1eb upstream. The enable/disable values for GPIO LDOs are reversed. It seems no one noticed as AXP22x support was introduced recently, and no one was using the GPIO LDOs, either because no designs actually use them or board support hasn't caught up. Fixes: 1b82b4e4f954 ("regulator: axp20x: Add support for AXP22X regulators") Signed-off-by: Chen-Yu Tsai Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/axp20x-regulator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c index 35de22fdb7a0..f2e1a39ce0f3 100644 --- a/drivers/regulator/axp20x-regulator.c +++ b/drivers/regulator/axp20x-regulator.c @@ -27,8 +27,8 @@ #define AXP20X_IO_ENABLED 0x03 #define AXP20X_IO_DISABLED 0x07 -#define AXP22X_IO_ENABLED 0x04 -#define AXP22X_IO_DISABLED 0x03 +#define AXP22X_IO_ENABLED 0x03 +#define AXP22X_IO_DISABLED 0x04 #define AXP20X_WORKMODE_DCDC2_MASK BIT(2) #define AXP20X_WORKMODE_DCDC3_MASK BIT(1) From 7d661c4658ce4485d5737eb522303d6eefa42749 Mon Sep 17 00:00:00 2001 From: Henry Chen Date: Tue, 17 Nov 2015 16:36:49 +0800 Subject: [PATCH 165/418] regulator: mt6311: MT6311_REGULATOR needs to select REGMAP_I2C commit aab3c3f34cc2dd8230052770712606d65de6538f upstream. This patch fix the below build error: drivers/regulator/mt6311-regulator.c:111: undefined reference to `__devm_regmap_init_i2c' Signed-off-by: Henry Chen Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/regulator/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index 8df0b0e62976..00676208080e 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -446,6 +446,7 @@ config REGULATOR_MC13892 config REGULATOR_MT6311 tristate "MediaTek MT6311 PMIC" depends on I2C + select REGMAP_I2C help Say y here to select this option to enable the power regulator of MediaTek MT6311 PMIC. From ac3981b170863371c0386f9578d6bf4b8f961cc0 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 28 Dec 2015 08:35:12 +0900 Subject: [PATCH 166/418] virtio_balloon: fix race by fill and leak commit f68b992bbb474641881932c61c92dcfa6f5b3689 upstream. During my compaction-related stuff, I encountered a bug with ballooning. With repeated inflating and deflating cycle, guest memory( ie, cat /proc/meminfo | grep MemTotal) is decreased and couldn't be recovered. The reason is balloon_lock doesn't cover release_pages_balloon so struct virtio_balloon fields could be overwritten by race of fill_balloon(e,g, vb->*pfns could be critical). This patch fixes it in my test. Signed-off-by: Minchan Kim Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio_balloon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 7efc32945810..7d3e5d0e9aa4 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -209,8 +209,8 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num) */ if (vb->num_pfns != 0) tell_host(vb, vb->deflate_vq); - mutex_unlock(&vb->balloon_lock); release_pages_balloon(vb); + mutex_unlock(&vb->balloon_lock); return num_freed_pages; } From afb02993539468a15700481b54968edc10940b0d Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 28 Dec 2015 08:35:13 +0900 Subject: [PATCH 167/418] virtio_balloon: fix race between migration and ballooning commit 21ea9fb69e7c4b1b1559c3e410943d3ff248ffcb upstream. In balloon_page_dequeue, pages_lock should cover the loop (ie, list_for_each_entry_safe). Otherwise, the cursor page could be isolated by compaction and then list_del by isolation could poison the page->lru.{prev,next} so the loop finally could access wrong address like this. This patch fixes the bug. general protection fault: 0000 [#1] SMP Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 2 PID: 82 Comm: vballoon Not tainted 4.4.0-rc5-mm1-access_bit+ #1906 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff8800a7ff0000 ti: ffff8800a7fec000 task.ti: ffff8800a7fec000 RIP: 0010:[] [] balloon_page_dequeue+0x54/0x130 RSP: 0018:ffff8800a7fefdc0 EFLAGS: 00010246 RAX: ffff88013fff9a70 RBX: ffffea000056fe00 RCX: 0000000000002b7d RDX: ffff88013fff9a70 RSI: ffffea000056fe00 RDI: ffff88013fff9a68 RBP: ffff8800a7fefde8 R08: ffffea000056fda0 R09: 0000000000000000 R10: ffff8800a7fefd90 R11: 0000000000000001 R12: dead0000000000e0 R13: ffffea000056fe20 R14: ffff880138809070 R15: ffff880138809060 FS: 0000000000000000(0000) GS:ffff88013fc40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f229c10e000 CR3: 00000000b8b53000 CR4: 00000000000006a0 Stack: 0000000000000100 ffff880138809088 ffff880138809000 ffff880138809060 0000000000000046 ffff8800a7fefe28 ffffffff812c86d3 ffff880138809020 ffff880138809000 fffffffffff91900 0000000000000100 ffff880138809060 Call Trace: [] leak_balloon+0x93/0x1a0 [] balloon+0x217/0x2a0 [] ? __schedule+0x31e/0x8b0 [] ? abort_exclusive_wait+0xb0/0xb0 [] ? update_balloon_stats+0xf0/0xf0 [] kthread+0xc9/0xe0 [] ? kthread_park+0x60/0x60 [] ret_from_fork+0x3f/0x70 [] ? kthread_park+0x60/0x60 Code: 8d 60 e0 0f 84 af 00 00 00 48 8b 43 20 a8 01 75 3b 48 89 d8 f0 0f ba 28 00 72 10 48 8b 03 f6 c4 08 75 2f 48 89 df e8 8c 83 f9 ff <49> 8b 44 24 20 4d 8d 6c 24 20 48 83 e8 20 4d 39 f5 74 7a 4c 89 RIP [] balloon_page_dequeue+0x54/0x130 RSP ---[ end trace 43cf28060d708d5f ]--- Kernel panic - not syncing: Fatal exception Dumping ftrace buffer: (ftrace buffer empty) Kernel Offset: disabled Signed-off-by: Minchan Kim Signed-off-by: Michael S. Tsirkin Acked-by: Rafael Aquini Signed-off-by: Greg Kroah-Hartman --- mm/balloon_compaction.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index d3116be5a00f..300117f1a08f 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -61,6 +61,7 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info) bool dequeued_page; dequeued_page = false; + spin_lock_irqsave(&b_dev_info->pages_lock, flags); list_for_each_entry_safe(page, tmp, &b_dev_info->pages, lru) { /* * Block others from accessing the 'page' while we get around @@ -75,15 +76,14 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info) continue; } #endif - spin_lock_irqsave(&b_dev_info->pages_lock, flags); balloon_page_delete(page); __count_vm_event(BALLOON_DEFLATE); - spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); unlock_page(page); dequeued_page = true; break; } } + spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); if (!dequeued_page) { /* From d56842d88978f54a3f7b8dfac3ae99ec3e732b95 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 14 Jan 2016 16:00:41 +0200 Subject: [PATCH 168/418] virtio_pci: fix use after free on release commit 2989be09a8a9d62a785137586ad941f916e08f83 upstream. KASan detected a use-after-free error in virtio-pci remove code. In virtio_pci_remove(), vp_dev is still used after being freed in unregister_virtio_device() (in virtio_pci_release_dev() more precisely). To fix, keep a reference until cleanup is done. Fixes: 63bd62a08ca4 ("virtio_pci: defer kfree until release callback") Reported-by: Jerome Marchand Cc: Sasha Levin Signed-off-by: Michael S. Tsirkin Tested-by: Jerome Marchand Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio_pci_common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 78f804af6c20..2046a68ad0ba 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -545,6 +545,7 @@ err_enable_device: static void virtio_pci_remove(struct pci_dev *pci_dev) { struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); + struct device *dev = get_device(&vp_dev->vdev.dev); unregister_virtio_device(&vp_dev->vdev); @@ -554,6 +555,7 @@ static void virtio_pci_remove(struct pci_dev *pci_dev) virtio_pci_modern_remove(vp_dev); pci_disable_device(pci_dev); + put_device(dev); } static struct pci_driver virtio_pci_driver = { From a4de54fb20e68c235ab7bebf521873b724816c93 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Fri, 8 Jan 2016 20:29:39 +0100 Subject: [PATCH 169/418] drm/vmwgfx: Fix an incorrect lock check commit fb89ac5102ae2875d685c847e6b5dbc141622d43 upstream. With CONFIG_SMP=n and CONFIG_DEBUG_SPINLOCK=y the vmwgfx kernel module would unconditionally throw a bug when checking for a held spinlock in the command buffer code. Fix this by using a lockdep check. Reported-and-tested-by: Tetsuo Handa Signed-off-by: Thomas Hellstrom Reviewed-by: Sinclair Yeh Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c index 6377e8151000..67cebb23c940 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c @@ -247,7 +247,7 @@ static void __vmw_cmdbuf_header_free(struct vmw_cmdbuf_header *header) { struct vmw_cmdbuf_man *man = header->man; - BUG_ON(!spin_is_locked(&man->lock)); + lockdep_assert_held_once(&man->lock); if (header->inline_space) { vmw_cmdbuf_header_inline_free(header); From 15ec2c4c10888f6403686be687e79165307e7e92 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Fri, 8 Jan 2016 20:29:40 +0100 Subject: [PATCH 170/418] drm/vmwgfx: Fix a width / pitch mismatch on framebuffer updates commit a50e2bf5a0f674d62b69f51f6935a30e82bd015c upstream. When the framebuffer is a vmwgfx dma buffer and a proxy surface is created, the vmw_kms_update_proxy() function requires that the proxy surface width and the framebuffer pitch are compatible, otherwise display corruption occurs as seen in gnome-shell/native with software 3D. Since the framebuffer pitch is determined by user-space, allocate a proxy surface the width of which is based on the framebuffer pitch rather than on the framebuffer width. Reported-by: Raphael Hertzog Tested-by: Mati Aharoni Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 9b4bb9e74d73..7c2e118a77b0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -763,21 +763,25 @@ static int vmw_create_dmabuf_proxy(struct drm_device *dev, uint32_t format; struct drm_vmw_size content_base_size; struct vmw_resource *res; + unsigned int bytes_pp; int ret; switch (mode_cmd->depth) { case 32: case 24: format = SVGA3D_X8R8G8B8; + bytes_pp = 4; break; case 16: case 15: format = SVGA3D_R5G6B5; + bytes_pp = 2; break; case 8: format = SVGA3D_P8; + bytes_pp = 1; break; default: @@ -785,7 +789,7 @@ static int vmw_create_dmabuf_proxy(struct drm_device *dev, return -EINVAL; } - content_base_size.width = mode_cmd->width; + content_base_size.width = mode_cmd->pitch / bytes_pp; content_base_size.height = mode_cmd->height; content_base_size.depth = 1; From 02b0518c0e5a9ba18f71fcc336bc7333b7a8893a Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 15 Oct 2014 15:00:47 -0400 Subject: [PATCH 171/418] drm/vmwgfx: respect 'nomodeset' commit 96c5d076f0a5e2023ecdb44d8261f87641ee71e0 upstream. Signed-off-by: Rob Clark Reviewed-by: Thomas Hellstrom . Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index c49812b80dd0..24fb348a44e1 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -25,6 +25,7 @@ * **************************************************************************/ #include +#include #include #include "vmwgfx_drv.h" @@ -1538,6 +1539,12 @@ static int vmw_probe(struct pci_dev *pdev, const struct pci_device_id *ent) static int __init vmwgfx_init(void) { int ret; + +#ifdef CONFIG_VGA_CONSOLE + if (vgacon_text_force()) + return -EINVAL; +#endif + ret = drm_pci_init(&driver, &vmw_pci_driver); if (ret) DRM_ERROR("Failed initializing DRM.\n"); From 8397905d2d56af92f2ba7e250bec5b6b0fdb9e57 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 23 Nov 2015 17:43:48 -0500 Subject: [PATCH 172/418] drm/amdgpu: Fix off-by-one errors in amdgpu_vm_bo_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 005ae95e6ec119c64e2d16eb65a94c49e1dcf9f0 upstream. eaddr is sometimes treated as the last address inside the address range, and sometimes as the first address outside the range. This was resulting in errors when a test filled up the entire address space. Make it consistent to always be the last address within the range. Signed-off-by: Felix.Kuehling Reviewed-by: Christian König Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b53d273eb7a1..39adbb6470d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1010,13 +1010,13 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, return -EINVAL; /* make sure object fit at this offset */ - eaddr = saddr + size; + eaddr = saddr + size - 1; if ((saddr >= eaddr) || (offset + size > amdgpu_bo_size(bo_va->bo))) return -EINVAL; last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE; - if (last_pfn > adev->vm_manager.max_pfn) { - dev_err(adev->dev, "va above limit (0x%08X > 0x%08X)\n", + if (last_pfn >= adev->vm_manager.max_pfn) { + dev_err(adev->dev, "va above limit (0x%08X >= 0x%08X)\n", last_pfn, adev->vm_manager.max_pfn); return -EINVAL; } @@ -1025,7 +1025,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, eaddr /= AMDGPU_GPU_PAGE_SIZE; spin_lock(&vm->it_lock); - it = interval_tree_iter_first(&vm->va, saddr, eaddr - 1); + it = interval_tree_iter_first(&vm->va, saddr, eaddr); spin_unlock(&vm->it_lock); if (it) { struct amdgpu_bo_va_mapping *tmp; @@ -1046,7 +1046,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, INIT_LIST_HEAD(&mapping->list); mapping->it.start = saddr; - mapping->it.last = eaddr - 1; + mapping->it.last = eaddr; mapping->offset = offset; mapping->flags = flags; From d1ad20f15b1d545642474860cc17a7dd6b051d50 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 24 Nov 2015 14:30:56 -0500 Subject: [PATCH 173/418] drm/amdgpu: call hpd_irq_event on resume commit 54fb2a5cd0baf8e97d743de411e2f832d1afa68d upstream. Need to call this on resume if displays changes during suspend in order to properly be notified of changes. Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d5b421330145..58cb6987b078 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1788,6 +1788,7 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon) } drm_kms_helper_poll_enable(dev); + drm_helper_hpd_irq_event(dev); if (fbcon) { amdgpu_fbdev_set_suspend(adev, 0); From 3be127a9622d781ea3239f8846087256df988c79 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Wed, 13 Jan 2016 12:55:18 +0800 Subject: [PATCH 174/418] drm/amdgpu: fix lost sync_to if scheduler is enabled. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 888c9e33e4c5a503285921046c621f7c73199d2f upstream. when scheduler is enabled, the semaphore isn't used at all. Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index dd005c336c97..181ce39ef5e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -293,7 +293,8 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync, fence = to_amdgpu_fence(sync->sync_to[i]); /* check if we really need to sync */ - if (!amdgpu_fence_need_sync(fence, ring)) + if (!amdgpu_enable_scheduler && + !amdgpu_fence_need_sync(fence, ring)) continue; /* prevent GPU deadlocks */ @@ -303,7 +304,7 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync, } if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores) { - r = fence_wait(&fence->base, true); + r = fence_wait(sync->sync_to[i], true); if (r) return r; continue; From 17ab2f11145f6e0c9657d098f3dcefd22f9e7478 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 14 Jan 2016 13:48:24 -0500 Subject: [PATCH 175/418] drm/amdgpu: fix tonga smu resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e160e4db833c7e8587ec3c88efaed0d84f1bcf42 upstream. Need to make sure smu buffers are pinned on resume. This matches what Fiji does. Reviewed-by: Junwei Zhang Reviewed-by: Christian König Reviewed-by: Ken Wang Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/tonga_dpm.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c index 204903897b4f..63d6cb3c1110 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c @@ -122,25 +122,12 @@ static int tonga_dpm_hw_fini(void *handle) static int tonga_dpm_suspend(void *handle) { - return 0; + return tonga_dpm_hw_fini(handle); } static int tonga_dpm_resume(void *handle) { - int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - mutex_lock(&adev->pm.mutex); - - ret = tonga_smu_start(adev); - if (ret) { - DRM_ERROR("SMU start failed\n"); - goto fail; - } - -fail: - mutex_unlock(&adev->pm.mutex); - return ret; + return tonga_dpm_hw_init(handle); } static int tonga_dpm_set_clockgating_state(void *handle, From 7c105b06bc6296ce215b4dc942204bdf82237323 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 19 Jan 2016 12:48:14 +0100 Subject: [PATCH 176/418] drm/amdgpu: fix amdgpu_bo_pin_restricted VRAM placing v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 78d0e182b6c1f5336f6e8cbb197f403276dabc7f upstream. We could pin BOs into invisible VRAM otherwise. v2: make logic more readable as suggested by Michel Signed-off-by: Christian König Reviewed-by: Alex Deucher (v1) Reviewed-by: Rex Zhu (v1) Reviewed-by: Michel Dänzer Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index c3ce103b6a33..a2a16acee34d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -399,7 +399,8 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, } if (fpfn > bo->placements[i].fpfn) bo->placements[i].fpfn = fpfn; - if (lpfn && lpfn < bo->placements[i].lpfn) + if (!bo->placements[i].lpfn || + (lpfn && lpfn < bo->placements[i].lpfn)) bo->placements[i].lpfn = lpfn; bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; } From 7710d03fbb6768f0ddc106025210a31035134db5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 28 Jan 2016 16:27:41 -0500 Subject: [PATCH 177/418] drm/amdgpu: no need to load MC firmware on fiji commit ad32152eb26043d165eed9406cb9e2f7011f6b10 upstream. Vbios does this for us on asic_init. Reviewed-by: Ken Wang >Qingqing.Wang@amd.com> Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index d39028440814..7e87b9024863 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -44,7 +44,6 @@ static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev); MODULE_FIRMWARE("amdgpu/topaz_mc.bin"); MODULE_FIRMWARE("amdgpu/tonga_mc.bin"); -MODULE_FIRMWARE("amdgpu/fiji_mc.bin"); static const u32 golden_settings_tonga_a11[] = { @@ -236,8 +235,6 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) chip_name = "tonga"; break; case CHIP_FIJI: - chip_name = "fiji"; - break; case CHIP_CARRIZO: case CHIP_STONEY: return 0; @@ -1003,7 +1000,8 @@ static int gmc_v8_0_hw_init(void *handle) gmc_v8_0_mc_program(adev); - if (!(adev->flags & AMD_IS_APU)) { + if ((adev->asic_type == CHIP_TOPAZ) || + (adev->asic_type == CHIP_TONGA)) { r = gmc_v8_0_mc_load_microcode(adev); if (r) { DRM_ERROR("Failed to load MC firmware!\n"); From 61d1fcfc8258c058af665e2abf6e42636cbd0274 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 2 Feb 2016 10:59:53 -0500 Subject: [PATCH 178/418] drm/amdgpu: move gmc7 support out of CIK dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e42d85261680edfc350a6c2a86b7fbb44a85014b upstream. It's used by iceland which is VI. Reviewed-by: Ken Wang Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 04c270757030..ca066018ea34 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -22,7 +22,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o # add asic specific block -amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o gmc_v7_0.o cik_ih.o kv_smc.o kv_dpm.o \ +amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o \ amdgpu_amdkfd_gfx_v7.o @@ -31,6 +31,7 @@ amdgpu-y += \ # add GMC block amdgpu-y += \ + gmc_v7_0.o \ gmc_v8_0.o # add IH block From d60703ca942e8d044d61360bc9792fcab54b95d0 Mon Sep 17 00:00:00 2001 From: Ken Wang Date: Wed, 3 Feb 2016 19:16:54 +0800 Subject: [PATCH 179/418] drm/amdgpu: iceland use CI based MC IP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 429c45deae6e57f1bb91bfb05b671063fb0cef60 upstream. Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Ken Wang Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 10 +++++++++- drivers/gpu/drm/amd/amdgpu/vi.c | 7 ++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index ed8abb58a785..98d54bc69415 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -42,6 +42,7 @@ static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev); MODULE_FIRMWARE("radeon/bonaire_mc.bin"); MODULE_FIRMWARE("radeon/hawaii_mc.bin"); +MODULE_FIRMWARE("amdgpu/topaz_mc.bin"); /** * gmc8_mc_wait_for_idle - wait for MC idle callback. @@ -132,13 +133,20 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev) case CHIP_HAWAII: chip_name = "hawaii"; break; + case CHIP_TOPAZ: + chip_name = "topaz"; + break; case CHIP_KAVERI: case CHIP_KABINI: return 0; default: BUG(); } - snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name); + if(adev->asic_type == CHIP_TOPAZ) + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name); + else + snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name); + err = request_firmware(&adev->mc.fw, fw_name, adev->dev); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 2adc1c855e85..7628eb44cce2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -60,6 +60,7 @@ #include "vi.h" #include "vi_dpm.h" #include "gmc_v8_0.h" +#include "gmc_v7_0.h" #include "gfx_v8_0.h" #include "sdma_v2_4.h" #include "sdma_v3_0.h" @@ -1128,10 +1129,10 @@ static const struct amdgpu_ip_block_version tonga_ip_blocks[] = }, { .type = AMD_IP_BLOCK_TYPE_GMC, - .major = 8, - .minor = 0, + .major = 7, + .minor = 4, .rev = 0, - .funcs = &gmc_v8_0_ip_funcs, + .funcs = &gmc_v7_0_ip_funcs, }, { .type = AMD_IP_BLOCK_TYPE_IH, From 321dfc30923a07ac90052af222cb40694d00ab2f Mon Sep 17 00:00:00 2001 From: Ken Wang Date: Wed, 3 Feb 2016 19:17:53 +0800 Subject: [PATCH 180/418] drm/amdgpu: The VI specific EXE bit should only apply to GMC v8.0 above MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8f3c162961fc2d92ec73a66496aab69eb2e19c36 upstream. Reviewed-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Ken Wang Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 8a1752ff3d8e..55cf05e1c81c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -808,7 +808,7 @@ uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, flags |= AMDGPU_PTE_SNOOPED; } - if (adev->asic_type >= CHIP_TOPAZ) + if (adev->asic_type >= CHIP_TONGA) flags |= AMDGPU_PTE_EXECUTABLE; flags |= AMDGPU_PTE_READABLE; From 2b840f690ab4a782ec8947d7ab00a57fd5309015 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 2 Feb 2016 10:56:15 -0500 Subject: [PATCH 181/418] drm/amdgpu: pull topaz gmc bits into gmc_v7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 72b459c8f716ef03a8a0c78078547ce64d8d29a2 upstream. Add the topaz golden settings into the gmc7 module. Reviewed-by: Ken Wang Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 35 +++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 98d54bc69415..272110cc18c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -44,8 +44,37 @@ MODULE_FIRMWARE("radeon/bonaire_mc.bin"); MODULE_FIRMWARE("radeon/hawaii_mc.bin"); MODULE_FIRMWARE("amdgpu/topaz_mc.bin"); +static const u32 golden_settings_iceland_a11[] = +{ + mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff, + mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff, + mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff, + mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff +}; + +static const u32 iceland_mgcg_cgcg_init[] = +{ + mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104 +}; + +static void gmc_v7_0_init_golden_registers(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_TOPAZ: + amdgpu_program_register_sequence(adev, + iceland_mgcg_cgcg_init, + (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init)); + amdgpu_program_register_sequence(adev, + golden_settings_iceland_a11, + (const u32)ARRAY_SIZE(golden_settings_iceland_a11)); + break; + default: + break; + } +} + /** - * gmc8_mc_wait_for_idle - wait for MC idle callback. + * gmc7_mc_wait_for_idle - wait for MC idle callback. * * @adev: amdgpu_device pointer * @@ -142,7 +171,7 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev) default: BUG(); } - if(adev->asic_type == CHIP_TOPAZ) + if (adev->asic_type == CHIP_TOPAZ) snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name); else snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name); @@ -988,6 +1017,8 @@ static int gmc_v7_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + gmc_v7_0_init_golden_registers(adev); + gmc_v7_0_mc_program(adev); if (!(adev->flags & AMD_IS_APU)) { From 73b424c77ce34fe8343c32feae15cc9f3dc82356 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 2 Feb 2016 10:57:30 -0500 Subject: [PATCH 182/418] drm/amdgpu: drop topaz support from gmc8 module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8878d8548ac7fae43cd6d82579f966eb8825e282 upstream. topaz is actually gmc7. Reviewed-by: Ken Wang Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 28 +-------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 7e87b9024863..ba4ad00ba8b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -42,7 +42,6 @@ static void gmc_v8_0_set_gart_funcs(struct amdgpu_device *adev); static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev); -MODULE_FIRMWARE("amdgpu/topaz_mc.bin"); MODULE_FIRMWARE("amdgpu/tonga_mc.bin"); static const u32 golden_settings_tonga_a11[] = @@ -74,19 +73,6 @@ static const u32 fiji_mgcg_cgcg_init[] = mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104 }; -static const u32 golden_settings_iceland_a11[] = -{ - mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff, - mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff, - mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff, - mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff -}; - -static const u32 iceland_mgcg_cgcg_init[] = -{ - mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104 -}; - static const u32 cz_mgcg_cgcg_init[] = { mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104 @@ -101,14 +87,6 @@ static const u32 stoney_mgcg_cgcg_init[] = static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { - case CHIP_TOPAZ: - amdgpu_program_register_sequence(adev, - iceland_mgcg_cgcg_init, - (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init)); - amdgpu_program_register_sequence(adev, - golden_settings_iceland_a11, - (const u32)ARRAY_SIZE(golden_settings_iceland_a11)); - break; case CHIP_FIJI: amdgpu_program_register_sequence(adev, fiji_mgcg_cgcg_init, @@ -228,9 +206,6 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); switch (adev->asic_type) { - case CHIP_TOPAZ: - chip_name = "topaz"; - break; case CHIP_TONGA: chip_name = "tonga"; break; @@ -1000,8 +975,7 @@ static int gmc_v8_0_hw_init(void *handle) gmc_v8_0_mc_program(adev); - if ((adev->asic_type == CHIP_TOPAZ) || - (adev->asic_type == CHIP_TONGA)) { + if (adev->asic_type == CHIP_TONGA) { r = gmc_v8_0_mc_load_microcode(adev); if (r) { DRM_ERROR("Failed to load MC firmware!\n"); From b8b1ad305f8de05b241a57707d5b3de3692dbdfa Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 2 Feb 2016 11:15:41 -0500 Subject: [PATCH 183/418] drm/amdgpu: don't load MEC2 on topaz MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 97dde76a30c2e67fa5fb9cb6a4072c0178c9df26 upstream. Not validated. Reviewed-by: Ken Wang Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/iceland_smc.c | 12 +++--------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index e1dcab98e249..4cb45f4602aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -90,7 +90,6 @@ MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); MODULE_FIRMWARE("amdgpu/topaz_me.bin"); MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); -MODULE_FIRMWARE("amdgpu/topaz_mec2.bin"); MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); @@ -807,7 +806,8 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - if (adev->asic_type != CHIP_STONEY) { + if ((adev->asic_type != CHIP_STONEY) && + (adev->asic_type != CHIP_TOPAZ)) { snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); if (!err) { diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c index 966d4b2ed9da..090486c18249 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c @@ -432,7 +432,7 @@ static uint32_t iceland_smu_get_mask_for_fw_type(uint32_t fw_type) case AMDGPU_UCODE_ID_CP_ME: return UCODE_ID_CP_ME_MASK; case AMDGPU_UCODE_ID_CP_MEC1: - return UCODE_ID_CP_MEC_MASK | UCODE_ID_CP_MEC_JT1_MASK | UCODE_ID_CP_MEC_JT2_MASK; + return UCODE_ID_CP_MEC_MASK | UCODE_ID_CP_MEC_JT1_MASK; case AMDGPU_UCODE_ID_CP_MEC2: return UCODE_ID_CP_MEC_MASK; case AMDGPU_UCODE_ID_RLC_G: @@ -522,12 +522,6 @@ static int iceland_smu_request_load_fw(struct amdgpu_device *adev) return -EINVAL; } - if (iceland_smu_populate_single_firmware_entry(adev, UCODE_ID_CP_MEC_JT2, - &toc->entry[toc->num_entries++])) { - DRM_ERROR("Failed to get firmware entry for MEC_JT2\n"); - return -EINVAL; - } - if (iceland_smu_populate_single_firmware_entry(adev, UCODE_ID_SDMA0, &toc->entry[toc->num_entries++])) { DRM_ERROR("Failed to get firmware entry for SDMA0\n"); @@ -550,8 +544,8 @@ static int iceland_smu_request_load_fw(struct amdgpu_device *adev) UCODE_ID_CP_ME_MASK | UCODE_ID_CP_PFP_MASK | UCODE_ID_CP_MEC_MASK | - UCODE_ID_CP_MEC_JT1_MASK | - UCODE_ID_CP_MEC_JT2_MASK; + UCODE_ID_CP_MEC_JT1_MASK; + if (iceland_send_msg_to_smc_with_parameter_without_waiting(adev, PPSMC_MSG_LoadUcodes, fw_to_load)) { DRM_ERROR("Fail to request SMU load ucode\n"); From 4474b85771139f2da8f8f4f443e6fad08081e99e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 2 Feb 2016 16:24:20 -0500 Subject: [PATCH 184/418] drm/amdgpu: remove exp hardware support from iceland MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit dba280b20bfd1c2bed8a07ce3f75a6da8ba7d247 upstream. It's working now. bug: https://bugs.freedesktop.org/show_bug.cgi?id=92270 Reviewed-by: Ken Wang Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 0508c5cd103a..8d6668cedf6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -250,11 +250,11 @@ static struct pci_device_id pciidlist[] = { {0x1002, 0x985F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_MULLINS|AMD_IS_MOBILITY|AMD_IS_APU}, #endif /* topaz */ - {0x1002, 0x6900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x6901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x6902, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x6903, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x6907, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x6900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ}, + {0x1002, 0x6901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ}, + {0x1002, 0x6902, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ}, + {0x1002, 0x6903, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ}, + {0x1002, 0x6907, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TOPAZ}, /* tonga */ {0x1002, 0x6920, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA}, {0x1002, 0x6921, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TONGA}, From 94703402213a01ce0fb0aa4e646cdfc2610f17b5 Mon Sep 17 00:00:00 2001 From: Flora Cui Date: Thu, 4 Feb 2016 15:10:08 +0800 Subject: [PATCH 185/418] drm/amdgpu: fix s4 resume commit ca19852884c8937eed89560f924f5a34cfcc22af upstream. No need to re-init asic if it's already been initialized. Skip IB tests since kernel processes are frozen in thaw. Signed-off-by: Flora Cui Reviewed-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 58cb6987b078..c961fe093e12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1744,15 +1744,20 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon) } /* post card */ - amdgpu_atom_asic_init(adev->mode_info.atom_context); + if (!amdgpu_card_posted(adev)) + amdgpu_atom_asic_init(adev->mode_info.atom_context); r = amdgpu_resume(adev); + if (r) + DRM_ERROR("amdgpu_resume failed (%d).\n", r); amdgpu_fence_driver_resume(adev); - r = amdgpu_ib_ring_tests(adev); - if (r) - DRM_ERROR("ib ring test failed (%d).\n", r); + if (resume) { + r = amdgpu_ib_ring_tests(adev); + if (r) + DRM_ERROR("ib ring test failed (%d).\n", r); + } r = amdgpu_late_init(adev); if (r) From 978b8b828d357197f4f209c9b3a0337a1a7aa7fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Fri, 5 Feb 2016 10:49:50 -0500 Subject: [PATCH 186/418] drm/amdgpu: remove unnecessary forward declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b19763d0d867eb863953500a5c87f2fd663863b8 upstream. Signed-off-by: Nicolai Hähnle Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 048cfe073dae..692723066af1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -604,8 +604,6 @@ struct amdgpu_sa_manager { uint32_t align; }; -struct amdgpu_sa_bo; - /* sub-allocation buffer */ struct amdgpu_sa_bo { struct list_head olist; From 7a42e7cd29f4c9d59fab1a8e306e21d77f57a0a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Fri, 5 Feb 2016 10:59:43 -0500 Subject: [PATCH 187/418] drm/amdgpu: hold reference to fences in amdgpu_sa_bo_new (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a8d81b36267366603771431747438d18f32ae2d5 upstream. An arbitrary amount of time can pass between spin_unlock and fence_wait_any_timeout, so we need to ensure that nobody frees the fences from under us. A stress test (rapidly starting and killing hundreds of glxgears instances) ran into a deadlock in fence_wait_any_timeout after about an hour, and this race condition appears to be a plausible cause. v2: agd: rebase on upstream Signed-off-by: Nicolai Hähnle Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 8b88edb0434b..ca72a2e487b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -354,12 +354,15 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, for (i = 0, count = 0; i < AMDGPU_MAX_RINGS; ++i) if (fences[i]) - fences[count++] = fences[i]; + fences[count++] = fence_get(fences[i]); if (count) { spin_unlock(&sa_manager->wq.lock); t = fence_wait_any_timeout(fences, count, false, MAX_SCHEDULE_TIMEOUT); + for (i = 0; i < count; ++i) + fence_put(fences[i]); + r = (t > 0) ? 0 : t; spin_lock(&sa_manager->wq.lock); } else { From 4d6bc4fa3202a28dab7a4c03c22a569b87f98f84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 8 Feb 2016 10:57:22 +0100 Subject: [PATCH 188/418] drm/amdgpu: fix issue with overlapping userptrs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit cc1de6e800c253172334f8774c419dc64401cd2e upstream. Otherwise we could try to evict overlapping userptr BOs in get_user_pages(), leading to a possible circular locking dependency. Signed-off-by: Christian König Acked-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 19 +++++++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 692723066af1..bb1099c549df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -2312,6 +2312,8 @@ bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, uint32_t flags); bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm); +bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, + unsigned long end); bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, struct ttm_mem_reg *mem); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index b1969f2b2038..d4e2780c0796 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -142,7 +142,8 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, list_for_each_entry(bo, &node->bos, mn_list) { - if (!bo->tbo.ttm || bo->tbo.ttm->state != tt_bound) + if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, + end)) continue; r = amdgpu_bo_reserve(bo, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 55cf05e1c81c..6442a06d6fdc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -783,6 +783,25 @@ bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm) return !!gtt->userptr; } +bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, + unsigned long end) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + unsigned long size; + + if (gtt == NULL) + return false; + + if (gtt->ttm.ttm.state != tt_bound || !gtt->userptr) + return false; + + size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE; + if (gtt->userptr > end || gtt->userptr + size <= start) + return false; + + return true; +} + bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; From 8fb49d08fa1bb7a38545d0e7e7eeb2f8feae31f6 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Mon, 15 Feb 2016 19:41:45 +0100 Subject: [PATCH 189/418] drm/amdgpu: use post-decrement in error handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 09ccbb74b6718ad4d1290de3f5669212c0ac7d4b upstream. We need to use post-decrement to get the pci_map_page undone also for i==0, and to avoid some very unpleasant behaviour if pci_map_page failed already at i==0. Reviewed-by: Christian König Signed-off-by: Rasmus Villemoes Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 6442a06d6fdc..1cbb16e15307 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -712,7 +712,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm) 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); if (pci_dma_mapping_error(adev->pdev, gtt->ttm.dma_address[i])) { - while (--i) { + while (i--) { pci_unmap_page(adev->pdev, gtt->ttm.dma_address[i], PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); gtt->ttm.dma_address[i] = 0; From ae4fa0cb5ff9ed89f83cc25a85aff69e0103e05d Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Fri, 19 Feb 2016 02:06:39 +0100 Subject: [PATCH 190/418] drm/amdgpu: Don't hang in amdgpu_flip_work_func on disabled crtc. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e1d09dc0ccc6c91e3916476f636edb76da1f65bb upstream. This fixes a regression introduced in Linux 4.4. This is a port of the same fix for radeon-kms in the patch "drm/radeon: Don't hang in radeon_flip_work_func on disabled crtc. (v2)" Limit the amount of time amdgpu_flip_work_func can delay programming a page flip, by both limiting the maximum amount of time per wait cycle and the maximum number of wait cycles. Continue the flip if the limit is exceeded, even if that may result in a visual or timing glitch. This is to prevent a hang of page flips, as reported in fdo bug #93746: Disconnecting a DisplayPort display in parallel to a kms pageflip getting queued can cause the following hang of page flips and thereby an unusable desktop: 1. kms pageflip ioctl() queues pageflip -> queues execution of amdgpu_flip_work_func. 2. Hotunplug of display causes the driver to DPMS OFF the unplugged display. Display engine shuts down, scanout no longer moves, but stays at its resting position at start line of vblank. 3. amdgpu_flip_work_func executes while crtc is off, and due to the non-moving scanout position, the new flip delay code introduced into Linux 4.4 by commit 8e36f9d33c13 ("drm/amdgpu: Fixup hw vblank counter/ts..") enters an infinite wait loop. 4. After reconnecting the display, the pageflip continues to hang in 3. and the display doesn't update its view of the desktop. This patch fixes the Linux 4.4 regression from fdo bug #93746 Reported-by: Bernd Steinhauser Signed-off-by: Mario Kleiner Signed-off-by: Greg Kroah-Hartman Cc: Michel Dänzer Cc: Alex Deucher Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 5580d3420c3a..0c713a908304 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -72,8 +72,8 @@ static void amdgpu_flip_work_func(struct work_struct *__work) struct drm_crtc *crtc = &amdgpuCrtc->base; unsigned long flags; - unsigned i; - int vpos, hpos, stat, min_udelay; + unsigned i, repcnt = 4; + int vpos, hpos, stat, min_udelay = 0; struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id]; amdgpu_flip_wait_fence(adev, &work->excl); @@ -96,7 +96,7 @@ static void amdgpu_flip_work_func(struct work_struct *__work) * In practice this won't execute very often unless on very fast * machines because the time window for this to happen is very small. */ - for (;;) { + while (amdgpuCrtc->enabled && repcnt--) { /* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank * start in hpos, and to the "fudged earlier" vblank start in * vpos. @@ -114,10 +114,22 @@ static void amdgpu_flip_work_func(struct work_struct *__work) /* Sleep at least until estimated real start of hw vblank */ spin_unlock_irqrestore(&crtc->dev->event_lock, flags); min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5); + if (min_udelay > vblank->framedur_ns / 2000) { + /* Don't wait ridiculously long - something is wrong */ + repcnt = 0; + break; + } usleep_range(min_udelay, 2 * min_udelay); spin_lock_irqsave(&crtc->dev->event_lock, flags); }; + if (!repcnt) + DRM_DEBUG_DRIVER("Delay problem on crtc %d: min_udelay %d, " + "framedur %d, linedur %d, stat %d, vpos %d, " + "hpos %d\n", work->crtc_id, min_udelay, + vblank->framedur_ns / 1000, + vblank->linedur_ns / 1000, stat, vpos, hpos); + /* do the flip (mmio) */ adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base); /* set the flip status */ From 32c021915d41a56f58c42b1075238b75894cd29a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 19 Feb 2016 17:55:31 -0500 Subject: [PATCH 191/418] drm/amdgpu/pm: adjust display configuration after powerstate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8e7cedc6f7fe762ffe6e348502be34b11fa79298 upstream. set_power_state defaults to no displays, so we need to update the display configuration after setting up the powerstate on the first call. In most cases this is not an issue since ends up getting called multiple times at any given modeset and the proper order is achieved in the display changed handling at the top of the function. Reviewed-by: Christian König Acked-by: Jordan Lazare Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 22a8c7d3a3ab..03fe25142b78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -595,8 +595,6 @@ force: /* update display watermarks based on new power state */ amdgpu_display_bandwidth_update(adev); - /* update displays */ - amdgpu_dpm_display_configuration_changed(adev); adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs; adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count; @@ -616,6 +614,9 @@ force: amdgpu_dpm_post_set_power_state(adev); + /* update displays */ + amdgpu_dpm_display_configuration_changed(adev); + if (adev->pm.funcs->force_performance_level) { if (adev->pm.dpm.thermal_active) { enum amdgpu_dpm_forced_level level = adev->pm.dpm.forced_level; From 61392a0d006b9a3feae3b3afd36cbe8daca0bce7 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 8 Jan 2016 08:56:51 +1000 Subject: [PATCH 192/418] drm/nouveau/kms: take mode_config mutex in connector hotplug path commit 0a882cadbc63fd2da3994af7115b4ada2fcbd638 upstream. fdo#93634 Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_connector.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 2e7cbe933533..2a5ed7460354 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -969,10 +969,13 @@ nouveau_connector_hotplug(struct nvif_notify *notify) NV_DEBUG(drm, "%splugged %s\n", plugged ? "" : "un", name); + mutex_lock(&drm->dev->mode_config.mutex); if (plugged) drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); else drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); + mutex_unlock(&drm->dev->mode_config.mutex); + drm_helper_hpd_irq_event(connector->dev); } From a6976c7be5b193b4efd95fb16b2d8599dd98364e Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Fri, 12 Feb 2016 20:30:32 +0100 Subject: [PATCH 193/418] drm/nouveau/display: Enable vblank irqs after display engine is on again. commit ff683df7bf34f90766a50c7e7454e219aef2710e upstream. In the display resume path, move the calls to drm_vblank_on() after the point when the display engine is running again. Since changes were made to drm_update_vblank_count() in Linux 4.4+ to emulate hw vblank counters via vblank timestamping, the function drm_vblank_on() now needs working high precision vblank timestamping and therefore working scanout position queries at time of call. These don't work before the display engine gets restarted, causing miscalculation of vblank counter increments and thereby large forward jumps in vblank count at display resume. These jumps can cause client hangs on resume, or desktop hangs in the case of composited desktops. Fix this Linux 4.4 regression by reordering calls accordingly. Signed-off-by: Mario Kleiner Cc: Ben Skeggs Cc: ville.syrjala@linux.intel.com Cc: daniel.vetter@ffwll.ch Cc: dri-devel@lists.freedesktop.org Reviewed-by: Daniel Vetter Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_display.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 64c8d932d5f1..58a3f7cf2fb3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -634,10 +634,6 @@ nouveau_display_resume(struct drm_device *dev, bool runtime) nv_crtc->lut.depth = 0; } - /* Make sure that drm and hw vblank irqs get resumed if needed. */ - for (head = 0; head < dev->mode_config.num_crtc; head++) - drm_vblank_on(dev, head); - /* This should ensure we don't hit a locking problem when someone * wakes us up via a connector. We should never go into suspend * while the display is on anyways. @@ -647,6 +643,10 @@ nouveau_display_resume(struct drm_device *dev, bool runtime) drm_helper_resume_force_mode(dev); + /* Make sure that drm and hw vblank irqs get resumed if needed. */ + for (head = 0; head < dev->mode_config.num_crtc; head++) + drm_vblank_on(dev, head); + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); From 2168fe3d7bfe2738d9095e1ed8eaefea2011686a Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 18 Feb 2016 08:14:19 +1000 Subject: [PATCH 194/418] drm/nouveau/disp/dp: ensure sink is powered up before attempting link training commit 95664e66fad964c3dd7945d6edfb1d0931844664 upstream. This can happen under some annoying circumstances, and is a quick fix until more substantial changes can be made. Fixed eDP mode changes on (at least) the Lenovo P50. Signed-off-by: Ben Skeggs Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.c | 10 ++++++++++ drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.h | 6 ++++++ 2 files changed, 16 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.c index 74e2f7c6c07e..9688970eca47 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.c @@ -328,6 +328,7 @@ nvkm_dp_train(struct work_struct *w) .outp = outp, }, *dp = &_dp; u32 datarate = 0; + u8 pwr; int ret; if (!outp->base.info.location && disp->func->sor.magic) @@ -355,6 +356,15 @@ nvkm_dp_train(struct work_struct *w) /* disable link interrupt handling during link training */ nvkm_notify_put(&outp->irq); + /* ensure sink is not in a low-power state */ + if (!nvkm_rdaux(outp->aux, DPCD_SC00, &pwr, 1)) { + if ((pwr & DPCD_SC00_SET_POWER) != DPCD_SC00_SET_POWER_D0) { + pwr &= ~DPCD_SC00_SET_POWER; + pwr |= DPCD_SC00_SET_POWER_D0; + nvkm_wraux(outp->aux, DPCD_SC00, &pwr, 1); + } + } + /* enable down-spreading and execute pre-train script from vbios */ dp_link_train_init(dp, outp->dpcd[3] & 0x01); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.h index 9596290329c7..6e10c5e0ef11 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.h @@ -71,5 +71,11 @@ #define DPCD_LS0C_LANE1_POST_CURSOR2 0x0c #define DPCD_LS0C_LANE0_POST_CURSOR2 0x03 +/* DPCD Sink Control */ +#define DPCD_SC00 0x00600 +#define DPCD_SC00_SET_POWER 0x03 +#define DPCD_SC00_SET_POWER_D0 0x01 +#define DPCD_SC00_SET_POWER_D3 0x03 + void nvkm_dp_train(struct work_struct *); #endif From 4323b4dbfc7c02f4dda376088c08e6314ff8e5b1 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 24 Feb 2016 18:34:43 +0100 Subject: [PATCH 195/418] drm/nouveau: platform: Fix deferred probe commit 870571a5698b2e9d0f4d2e5c6245967b582aab45 upstream. The error cleanup paths aren't quite correct and will crash upon deferred probe. Reviewed-by: Ben Skeggs Reviewed-by: Alexandre Courbot Signed-off-by: Thierry Reding Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/nouveau/nouveau_platform.c | 2 +- .../drm/nouveau/nvkm/engine/device/tegra.c | 40 ++++++++++++++----- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_platform.c b/drivers/gpu/drm/nouveau/nouveau_platform.c index 60e32c4e4e49..35ecc0d0458f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_platform.c +++ b/drivers/gpu/drm/nouveau/nouveau_platform.c @@ -24,7 +24,7 @@ static int nouveau_platform_probe(struct platform_device *pdev) { const struct nvkm_device_tegra_func *func; - struct nvkm_device *device; + struct nvkm_device *device = NULL; struct drm_device *drm; int ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c index 7f8a42721eb2..e7e581d6a8ff 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c @@ -252,32 +252,40 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func, if (!(tdev = kzalloc(sizeof(*tdev), GFP_KERNEL))) return -ENOMEM; - *pdevice = &tdev->device; + tdev->func = func; tdev->pdev = pdev; tdev->irq = -1; tdev->vdd = devm_regulator_get(&pdev->dev, "vdd"); - if (IS_ERR(tdev->vdd)) - return PTR_ERR(tdev->vdd); + if (IS_ERR(tdev->vdd)) { + ret = PTR_ERR(tdev->vdd); + goto free; + } tdev->rst = devm_reset_control_get(&pdev->dev, "gpu"); - if (IS_ERR(tdev->rst)) - return PTR_ERR(tdev->rst); + if (IS_ERR(tdev->rst)) { + ret = PTR_ERR(tdev->rst); + goto free; + } tdev->clk = devm_clk_get(&pdev->dev, "gpu"); - if (IS_ERR(tdev->clk)) - return PTR_ERR(tdev->clk); + if (IS_ERR(tdev->clk)) { + ret = PTR_ERR(tdev->clk); + goto free; + } tdev->clk_pwr = devm_clk_get(&pdev->dev, "pwr"); - if (IS_ERR(tdev->clk_pwr)) - return PTR_ERR(tdev->clk_pwr); + if (IS_ERR(tdev->clk_pwr)) { + ret = PTR_ERR(tdev->clk_pwr); + goto free; + } nvkm_device_tegra_probe_iommu(tdev); ret = nvkm_device_tegra_power_up(tdev); if (ret) - return ret; + goto remove; tdev->gpu_speedo = tegra_sku_info.gpu_speedo_value; ret = nvkm_device_ctor(&nvkm_device_tegra_func, NULL, &pdev->dev, @@ -285,9 +293,19 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func, cfg, dbg, detect, mmio, subdev_mask, &tdev->device); if (ret) - return ret; + goto powerdown; + + *pdevice = &tdev->device; return 0; + +powerdown: + nvkm_device_tegra_power_down(tdev); +remove: + nvkm_device_tegra_remove_iommu(tdev); +free: + kfree(tdev); + return ret; } #else int From c9e8a98ebe2e6d2633e79f76e353fef1db1f1bcd Mon Sep 17 00:00:00 2001 From: Mykola Lysenko Date: Fri, 18 Dec 2015 17:14:42 -0500 Subject: [PATCH 196/418] drm/dp/mst: process broadcast messages correctly commit bd9343208704fcc70a5b919f228a7d26ae472727 upstream. In case broadcast message received in UP request, RAD cannot be used to identify message originator. Message should be parsed, originator should be found by GUID from parsed message. Also reply with broadcast in case broadcast message received (for now it is always broadcast) Acked-by: Dave Airlie Signed-off-by: Mykola Lysenko Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 95 +++++++++++++++++++++++---- 1 file changed, 84 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 809959d56d78..3baa95c1b14b 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1210,6 +1210,50 @@ out: return mstb; } +static struct drm_dp_mst_branch *get_mst_branch_device_by_guid_helper( + struct drm_dp_mst_branch *mstb, + uint8_t *guid) +{ + struct drm_dp_mst_branch *found_mstb; + struct drm_dp_mst_port *port; + + list_for_each_entry(port, &mstb->ports, next) { + if (!port->mstb) + continue; + + if (port->guid_valid && memcmp(port->guid, guid, 16) == 0) + return port->mstb; + + found_mstb = get_mst_branch_device_by_guid_helper(port->mstb, guid); + + if (found_mstb) + return found_mstb; + } + + return NULL; +} + +static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device_by_guid( + struct drm_dp_mst_topology_mgr *mgr, + uint8_t *guid) +{ + struct drm_dp_mst_branch *mstb; + + /* find the port by iterating down */ + mutex_lock(&mgr->lock); + + if (mgr->guid_valid && memcmp(mgr->guid, guid, 16) == 0) + mstb = mgr->mst_primary; + else + mstb = get_mst_branch_device_by_guid_helper(mgr->mst_primary, guid); + + if (mstb) + kref_get(&mstb->kref); + + mutex_unlock(&mgr->lock); + return mstb; +} + static void drm_dp_check_and_send_link_address(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_branch *mstb) { @@ -1320,6 +1364,7 @@ static int set_hdr_from_dst_qlock(struct drm_dp_sideband_msg_hdr *hdr, struct drm_dp_sideband_msg_tx *txmsg) { struct drm_dp_mst_branch *mstb = txmsg->dst; + u8 req_type; /* both msg slots are full */ if (txmsg->seqno == -1) { @@ -1336,7 +1381,13 @@ static int set_hdr_from_dst_qlock(struct drm_dp_sideband_msg_hdr *hdr, txmsg->seqno = 1; mstb->tx_slots[txmsg->seqno] = txmsg; } - hdr->broadcast = 0; + + req_type = txmsg->msg[0] & 0x7f; + if (req_type == DP_CONNECTION_STATUS_NOTIFY || + req_type == DP_RESOURCE_STATUS_NOTIFY) + hdr->broadcast = 1; + else + hdr->broadcast = 0; hdr->path_msg = txmsg->path_msg; hdr->lct = mstb->lct; hdr->lcr = mstb->lct - 1; @@ -2145,28 +2196,50 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) if (mgr->up_req_recv.have_eomt) { struct drm_dp_sideband_msg_req_body msg; - struct drm_dp_mst_branch *mstb; + struct drm_dp_mst_branch *mstb = NULL; bool seqno; - mstb = drm_dp_get_mst_branch_device(mgr, - mgr->up_req_recv.initial_hdr.lct, - mgr->up_req_recv.initial_hdr.rad); - if (!mstb) { - DRM_DEBUG_KMS("Got MST reply from unknown device %d\n", mgr->up_req_recv.initial_hdr.lct); - memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx)); - return 0; + + if (!mgr->up_req_recv.initial_hdr.broadcast) { + mstb = drm_dp_get_mst_branch_device(mgr, + mgr->up_req_recv.initial_hdr.lct, + mgr->up_req_recv.initial_hdr.rad); + if (!mstb) { + DRM_DEBUG_KMS("Got MST reply from unknown device %d\n", mgr->up_req_recv.initial_hdr.lct); + memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx)); + return 0; + } } seqno = mgr->up_req_recv.initial_hdr.seqno; drm_dp_sideband_parse_req(&mgr->up_req_recv, &msg); if (msg.req_type == DP_CONNECTION_STATUS_NOTIFY) { - drm_dp_send_up_ack_reply(mgr, mstb, msg.req_type, seqno, false); + drm_dp_send_up_ack_reply(mgr, mgr->mst_primary, msg.req_type, seqno, false); + + if (!mstb) + mstb = drm_dp_get_mst_branch_device_by_guid(mgr, msg.u.conn_stat.guid); + + if (!mstb) { + DRM_DEBUG_KMS("Got MST reply from unknown device %d\n", mgr->up_req_recv.initial_hdr.lct); + memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx)); + return 0; + } + drm_dp_update_port(mstb, &msg.u.conn_stat); DRM_DEBUG_KMS("Got CSN: pn: %d ldps:%d ddps: %d mcs: %d ip: %d pdt: %d\n", msg.u.conn_stat.port_number, msg.u.conn_stat.legacy_device_plug_status, msg.u.conn_stat.displayport_device_plug_status, msg.u.conn_stat.message_capability_status, msg.u.conn_stat.input_port, msg.u.conn_stat.peer_device_type); (*mgr->cbs->hotplug)(mgr); } else if (msg.req_type == DP_RESOURCE_STATUS_NOTIFY) { - drm_dp_send_up_ack_reply(mgr, mstb, msg.req_type, seqno, false); + drm_dp_send_up_ack_reply(mgr, mgr->mst_primary, msg.req_type, seqno, false); + if (!mstb) + mstb = drm_dp_get_mst_branch_device_by_guid(mgr, msg.u.resource_stat.guid); + + if (!mstb) { + DRM_DEBUG_KMS("Got MST reply from unknown device %d\n", mgr->up_req_recv.initial_hdr.lct); + memset(&mgr->up_req_recv, 0, sizeof(struct drm_dp_sideband_msg_rx)); + return 0; + } + DRM_DEBUG_KMS("Got RSN: pn: %d avail_pbn %d\n", msg.u.resource_stat.port_number, msg.u.resource_stat.available_pbn); } From 1596315171d84391fcf90d5942231cefaf691cee Mon Sep 17 00:00:00 2001 From: Mykola Lysenko Date: Fri, 18 Dec 2015 17:14:43 -0500 Subject: [PATCH 197/418] drm/dp/mst: always send reply for UP request commit 1f16ee7fa13649f4e55aa48ad31c3eb0722a62d3 upstream. We should always send reply for UP request in order to make downstream device clean-up resources appropriately. Issue was that reply for UP request was sent only once. Acked-by: Dave Airlie Signed-off-by: Mykola Lysenko Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 30 ++++++++++----------------- include/drm/drm_dp_mst_helper.h | 2 -- 2 files changed, 11 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 3baa95c1b14b..bda9be9a3087 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1489,26 +1489,18 @@ static void process_single_down_tx_qlock(struct drm_dp_mst_topology_mgr *mgr) } /* called holding qlock */ -static void process_single_up_tx_qlock(struct drm_dp_mst_topology_mgr *mgr) +static void process_single_up_tx_qlock(struct drm_dp_mst_topology_mgr *mgr, + struct drm_dp_sideband_msg_tx *txmsg) { - struct drm_dp_sideband_msg_tx *txmsg; int ret; /* construct a chunk from the first msg in the tx_msg queue */ - if (list_empty(&mgr->tx_msg_upq)) { - mgr->tx_up_in_progress = false; - return; - } - - txmsg = list_first_entry(&mgr->tx_msg_upq, struct drm_dp_sideband_msg_tx, next); ret = process_single_tx_qlock(mgr, txmsg, true); - if (ret == 1) { - /* up txmsgs aren't put in slots - so free after we send it */ - list_del(&txmsg->next); - kfree(txmsg); - } else if (ret) + + if (ret != 1) DRM_DEBUG_KMS("failed to send msg in q %d\n", ret); - mgr->tx_up_in_progress = true; + + txmsg->dst->tx_slots[txmsg->seqno] = NULL; } static void drm_dp_queue_down_tx(struct drm_dp_mst_topology_mgr *mgr, @@ -1895,11 +1887,12 @@ static int drm_dp_send_up_ack_reply(struct drm_dp_mst_topology_mgr *mgr, drm_dp_encode_up_ack_reply(txmsg, req_type); mutex_lock(&mgr->qlock); - list_add_tail(&txmsg->next, &mgr->tx_msg_upq); - if (!mgr->tx_up_in_progress) { - process_single_up_tx_qlock(mgr); - } + + process_single_up_tx_qlock(mgr, txmsg); + mutex_unlock(&mgr->qlock); + + kfree(txmsg); return 0; } @@ -2809,7 +2802,6 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr, mutex_init(&mgr->qlock); mutex_init(&mgr->payload_lock); mutex_init(&mgr->destroy_connector_lock); - INIT_LIST_HEAD(&mgr->tx_msg_upq); INIT_LIST_HEAD(&mgr->tx_msg_downq); INIT_LIST_HEAD(&mgr->destroy_connector_list); INIT_WORK(&mgr->work, drm_dp_mst_link_probe_work); diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h index 5340099741ae..006062a27639 100644 --- a/include/drm/drm_dp_mst_helper.h +++ b/include/drm/drm_dp_mst_helper.h @@ -450,9 +450,7 @@ struct drm_dp_mst_topology_mgr { the mstb tx_slots and txmsg->state once they are queued */ struct mutex qlock; struct list_head tx_msg_downq; - struct list_head tx_msg_upq; bool tx_down_in_progress; - bool tx_up_in_progress; /* payload info + lock for it */ struct mutex payload_lock; From 7b713e9ed440bed2d255aa5d247e40c7a8a3fe1b Mon Sep 17 00:00:00 2001 From: Mykola Lysenko Date: Fri, 25 Dec 2015 16:14:47 +0800 Subject: [PATCH 198/418] drm/dp/mst: fix in MSTB RAD initialization commit 75af4c8c4c0f60d7ad135419805798f144e9baf9 upstream. This fix is needed to support more then two branch displays, so RAD address consist at least of 2 elements Acked-by: Dave Airlie Signed-off-by: Mykola Lysenko Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index bda9be9a3087..f998b9eeb82c 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -973,17 +973,17 @@ static struct drm_dp_mst_port *drm_dp_get_port(struct drm_dp_mst_branch *mstb, u static u8 drm_dp_calculate_rad(struct drm_dp_mst_port *port, u8 *rad) { - int lct = port->parent->lct; + int parent_lct = port->parent->lct; int shift = 4; - int idx = lct / 2; - if (lct > 1) { - memcpy(rad, port->parent->rad, idx); - shift = (lct % 2) ? 4 : 0; + int idx = (parent_lct - 1) / 2; + if (parent_lct > 1) { + memcpy(rad, port->parent->rad, idx + 1); + shift = (parent_lct % 2) ? 4 : 0; } else rad[0] = 0; rad[idx] |= port->port_num << shift; - return lct + 1; + return parent_lct + 1; } /* From 2e614c50122b2c2470c6c0c79bcdad43bae8021b Mon Sep 17 00:00:00 2001 From: Mykola Lysenko Date: Fri, 25 Dec 2015 16:14:48 +0800 Subject: [PATCH 199/418] drm/dp/mst: fix in RAD element access commit 7a11a334aa6af4c65c6a0d81b60c97fc18673532 upstream. This is needed to receive correct port number from RAD, so MSTB could be found Acked-by: Dave Airlie Signed-off-by: Mykola Lysenko Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index f998b9eeb82c..b97f057d69b3 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1039,7 +1039,7 @@ static void build_mst_prop_path(const struct drm_dp_mst_branch *mstb, snprintf(proppath, proppath_size, "mst:%d", mstb->mgr->conn_base_id); for (i = 0; i < (mstb->lct - 1); i++) { int shift = (i % 2) ? 0 : 4; - int port_num = mstb->rad[i / 2] >> shift; + int port_num = (mstb->rad[i / 2] >> shift) & 0xf; snprintf(temp, sizeof(temp), "-%d", port_num); strlcat(proppath, temp, proppath_size); } @@ -1190,7 +1190,7 @@ static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device(struct drm_dp_mst_ for (i = 0; i < lct - 1; i++) { int shift = (i % 2) ? 0 : 4; - int port_num = rad[i / 2] >> shift; + int port_num = (rad[i / 2] >> shift) & 0xf; list_for_each_entry(port, &mstb->ports, next) { if (port->port_num == port_num) { From b870070037823bea90709ea6b4b8a2c8ffcdc5f5 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 22 Jan 2016 17:07:25 -0500 Subject: [PATCH 200/418] drm: Add drm_fixp_from_fraction and drm_fixp2int_ceil commit 64566b5e767f9bc3161055ca1b443a51afb52aad upstream. drm_fixp_from_fraction allows us to create a fixed point directly from a fraction, rather than creating fixed point values and dividing later. This avoids overflow of our 64 bit value for large numbers. drm_fixp2int_ceil allows us to return the ceiling of our fixed point value. [airlied: squash Jordan's fix] 32-bit-build-fix: Jordan Lazare Signed-off-by: Harry Wentland Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- include/drm/drm_fixed.h | 53 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h index d639049a613d..553210c02ee0 100644 --- a/include/drm/drm_fixed.h +++ b/include/drm/drm_fixed.h @@ -73,18 +73,28 @@ static inline u32 dfixed_div(fixed20_12 A, fixed20_12 B) #define DRM_FIXED_ONE (1ULL << DRM_FIXED_POINT) #define DRM_FIXED_DECIMAL_MASK (DRM_FIXED_ONE - 1) #define DRM_FIXED_DIGITS_MASK (~DRM_FIXED_DECIMAL_MASK) +#define DRM_FIXED_EPSILON 1LL +#define DRM_FIXED_ALMOST_ONE (DRM_FIXED_ONE - DRM_FIXED_EPSILON) static inline s64 drm_int2fixp(int a) { return ((s64)a) << DRM_FIXED_POINT; } -static inline int drm_fixp2int(int64_t a) +static inline int drm_fixp2int(s64 a) { return ((s64)a) >> DRM_FIXED_POINT; } -static inline unsigned drm_fixp_msbset(int64_t a) +static inline int drm_fixp2int_ceil(s64 a) +{ + if (a > 0) + return drm_fixp2int(a + DRM_FIXED_ALMOST_ONE); + else + return drm_fixp2int(a - DRM_FIXED_ALMOST_ONE); +} + +static inline unsigned drm_fixp_msbset(s64 a) { unsigned shift, sign = (a >> 63) & 1; @@ -136,6 +146,45 @@ static inline s64 drm_fixp_div(s64 a, s64 b) return result; } +static inline s64 drm_fixp_from_fraction(s64 a, s64 b) +{ + s64 res; + bool a_neg = a < 0; + bool b_neg = b < 0; + u64 a_abs = a_neg ? -a : a; + u64 b_abs = b_neg ? -b : b; + u64 rem; + + /* determine integer part */ + u64 res_abs = div64_u64_rem(a_abs, b_abs, &rem); + + /* determine fractional part */ + { + u32 i = DRM_FIXED_POINT; + + do { + rem <<= 1; + res_abs <<= 1; + if (rem >= b_abs) { + res_abs |= 1; + rem -= b_abs; + } + } while (--i != 0); + } + + /* round up LSB */ + { + u64 summand = (rem << 1) >= b_abs; + + res_abs += summand; + } + + res = (s64) res_abs; + if (a_neg ^ b_neg) + res = -res; + return res; +} + static inline s64 drm_fixp_exp(s64 x) { s64 tolerance = div64_s64(DRM_FIXED_ONE, 1000000); From 6cd43e68d937462d063fae86be892ec10125e545 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 22 Jan 2016 17:07:26 -0500 Subject: [PATCH 201/418] drm/dp/mst: Calculate MST PBN with 31.32 fixed point commit a9ebb3e46c7ef6112c0da466ef0954673ad36832 upstream. Our PBN value overflows the 20 bits integer part of the 20.12 fixed point. We need to use 31.32 fixed point to avoid this. This happens with display clocks larger than 293122 (at 24 bpp), which we see with the Sharp (and similar) 4k tiled displays. Signed-off-by: Harry Wentland Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 59 ++++++++++++++++----------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index b97f057d69b3..e77e96aeeb20 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -2571,32 +2571,31 @@ EXPORT_SYMBOL(drm_dp_check_act_status); */ int drm_dp_calc_pbn_mode(int clock, int bpp) { - fixed20_12 pix_bw; - fixed20_12 fbpp; - fixed20_12 result; - fixed20_12 margin, tmp; - u32 res; + u64 kbps; + s64 peak_kbps; + u32 numerator; + u32 denominator; - pix_bw.full = dfixed_const(clock); - fbpp.full = dfixed_const(bpp); - tmp.full = dfixed_const(8); - fbpp.full = dfixed_div(fbpp, tmp); + kbps = clock * bpp; - result.full = dfixed_mul(pix_bw, fbpp); - margin.full = dfixed_const(54); - tmp.full = dfixed_const(64); - margin.full = dfixed_div(margin, tmp); - result.full = dfixed_div(result, margin); + /* + * margin 5300ppm + 300ppm ~ 0.6% as per spec, factor is 1.006 + * The unit of 54/64Mbytes/sec is an arbitrary unit chosen based on + * common multiplier to render an integer PBN for all link rate/lane + * counts combinations + * calculate + * peak_kbps *= (1006/1000) + * peak_kbps *= (64/54) + * peak_kbps *= 8 convert to bytes + */ - margin.full = dfixed_const(1006); - tmp.full = dfixed_const(1000); - margin.full = dfixed_div(margin, tmp); - result.full = dfixed_mul(result, margin); + numerator = 64 * 1006; + denominator = 54 * 8 * 1000 * 1000; - result.full = dfixed_div(result, tmp); - result.full = dfixed_ceil(result); - res = dfixed_trunc(result); - return res; + kbps *= numerator; + peak_kbps = drm_fixp_from_fraction(kbps, denominator); + + return drm_fixp2int_ceil(peak_kbps); } EXPORT_SYMBOL(drm_dp_calc_pbn_mode); @@ -2604,11 +2603,23 @@ static int test_calc_pbn_mode(void) { int ret; ret = drm_dp_calc_pbn_mode(154000, 30); - if (ret != 689) + if (ret != 689) { + DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n", + 154000, 30, 689, ret); return -EINVAL; + } ret = drm_dp_calc_pbn_mode(234000, 30); - if (ret != 1047) + if (ret != 1047) { + DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n", + 234000, 30, 1047, ret); return -EINVAL; + } + ret = drm_dp_calc_pbn_mode(297000, 24); + if (ret != 1063) { + DRM_ERROR("PBN calculation test failed - clock %d, bpp %d, expected PBN %d, actual PBN %d.\n", + 297000, 24, 1063, ret); + return -EINVAL; + } return 0; } From ad9421d86ecd22e8bd9eddbca2ac93980a7b836e Mon Sep 17 00:00:00 2001 From: Hersen Wu Date: Fri, 22 Jan 2016 17:07:28 -0500 Subject: [PATCH 202/418] drm/dp/mst: move GUID storage from mgr, port to only mst branch commit 5e93b8208d3c419b515fb75e2601931c027e12ab upstream. Previous implementation does not handle case below: boot up one MST branch to DP connector of ASIC. After boot up, hot plug 2nd MST branch to DP output of 1st MST, GUID is not created for 2nd MST branch. When downstream port of 2nd MST branch send upstream request, it fails because 2nd MST branch GUID is not available. New Implementation: only create GUID for MST branch and save it within Branch. Signed-off-by: Hersen Wu Reviewed-by: Harry Wentland Acked-by: Alex Deucher Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 64 ++++++++++++--------------- include/drm/drm_dp_mst_helper.h | 25 +++++------ 2 files changed, 38 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index e77e96aeeb20..c79e44a712e7 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1013,18 +1013,27 @@ static bool drm_dp_port_setup_pdt(struct drm_dp_mst_port *port) return send_link; } -static void drm_dp_check_port_guid(struct drm_dp_mst_branch *mstb, - struct drm_dp_mst_port *port) +static void drm_dp_check_mstb_guid(struct drm_dp_mst_branch *mstb, u8 *guid) { int ret; - if (port->dpcd_rev >= 0x12) { - port->guid_valid = drm_dp_validate_guid(mstb->mgr, port->guid); - if (!port->guid_valid) { - ret = drm_dp_send_dpcd_write(mstb->mgr, - port, - DP_GUID, - 16, port->guid); - port->guid_valid = true; + + memcpy(mstb->guid, guid, 16); + + if (!drm_dp_validate_guid(mstb->mgr, mstb->guid)) { + if (mstb->port_parent) { + ret = drm_dp_send_dpcd_write( + mstb->mgr, + mstb->port_parent, + DP_GUID, + 16, + mstb->guid); + } else { + + ret = drm_dp_dpcd_write( + mstb->mgr->aux, + DP_GUID, + mstb->guid, + 16); } } } @@ -1081,7 +1090,6 @@ static void drm_dp_add_port(struct drm_dp_mst_branch *mstb, port->dpcd_rev = port_msg->dpcd_revision; port->num_sdp_streams = port_msg->num_sdp_streams; port->num_sdp_stream_sinks = port_msg->num_sdp_stream_sinks; - memcpy(port->guid, port_msg->peer_guid, 16); /* manage mstb port lists with mgr lock - take a reference for this list */ @@ -1094,11 +1102,9 @@ static void drm_dp_add_port(struct drm_dp_mst_branch *mstb, if (old_ddps != port->ddps) { if (port->ddps) { - drm_dp_check_port_guid(mstb, port); if (!port->input) drm_dp_send_enum_path_resources(mstb->mgr, mstb, port); } else { - port->guid_valid = false; port->available_pbn = 0; } } @@ -1157,10 +1163,8 @@ static void drm_dp_update_port(struct drm_dp_mst_branch *mstb, if (old_ddps != port->ddps) { if (port->ddps) { - drm_dp_check_port_guid(mstb, port); dowork = true; } else { - port->guid_valid = false; port->available_pbn = 0; } } @@ -1217,13 +1221,14 @@ static struct drm_dp_mst_branch *get_mst_branch_device_by_guid_helper( struct drm_dp_mst_branch *found_mstb; struct drm_dp_mst_port *port; + if (memcmp(mstb->guid, guid, 16) == 0) + return mstb; + + list_for_each_entry(port, &mstb->ports, next) { if (!port->mstb) continue; - if (port->guid_valid && memcmp(port->guid, guid, 16) == 0) - return port->mstb; - found_mstb = get_mst_branch_device_by_guid_helper(port->mstb, guid); if (found_mstb) @@ -1242,10 +1247,7 @@ static struct drm_dp_mst_branch *drm_dp_get_mst_branch_device_by_guid( /* find the port by iterating down */ mutex_lock(&mgr->lock); - if (mgr->guid_valid && memcmp(mgr->guid, guid, 16) == 0) - mstb = mgr->mst_primary; - else - mstb = get_mst_branch_device_by_guid_helper(mgr->mst_primary, guid); + mstb = get_mst_branch_device_by_guid_helper(mgr->mst_primary, guid); if (mstb) kref_get(&mstb->kref); @@ -1550,6 +1552,9 @@ static void drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr, txmsg->reply.u.link_addr.ports[i].num_sdp_streams, txmsg->reply.u.link_addr.ports[i].num_sdp_stream_sinks); } + + drm_dp_check_mstb_guid(mstb, txmsg->reply.u.link_addr.guid); + for (i = 0; i < txmsg->reply.u.link_addr.nports; i++) { drm_dp_add_port(mstb, mgr->dev, &txmsg->reply.u.link_addr.ports[i]); } @@ -1984,20 +1989,6 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms goto out_unlock; } - - /* sort out guid */ - ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, mgr->guid, 16); - if (ret != 16) { - DRM_DEBUG_KMS("failed to read DP GUID %d\n", ret); - goto out_unlock; - } - - mgr->guid_valid = drm_dp_validate_guid(mgr, mgr->guid); - if (!mgr->guid_valid) { - ret = drm_dp_dpcd_write(mgr->aux, DP_GUID, mgr->guid, 16); - mgr->guid_valid = true; - } - queue_work(system_long_wq, &mgr->work); ret = 0; @@ -2219,6 +2210,7 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr) } drm_dp_update_port(mstb, &msg.u.conn_stat); + DRM_DEBUG_KMS("Got CSN: pn: %d ldps:%d ddps: %d mcs: %d ip: %d pdt: %d\n", msg.u.conn_stat.port_number, msg.u.conn_stat.legacy_device_plug_status, msg.u.conn_stat.displayport_device_plug_status, msg.u.conn_stat.message_capability_status, msg.u.conn_stat.input_port, msg.u.conn_stat.peer_device_type); (*mgr->cbs->hotplug)(mgr); diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h index 006062a27639..f356f9716474 100644 --- a/include/drm/drm_dp_mst_helper.h +++ b/include/drm/drm_dp_mst_helper.h @@ -44,8 +44,6 @@ struct drm_dp_vcpi { /** * struct drm_dp_mst_port - MST port * @kref: reference count for this port. - * @guid_valid: for DP 1.2 devices if we have validated the GUID. - * @guid: guid for DP 1.2 device on this port. * @port_num: port number * @input: if this port is an input port. * @mcs: message capability status - DP 1.2 spec. @@ -70,10 +68,6 @@ struct drm_dp_vcpi { struct drm_dp_mst_port { struct kref kref; - /* if dpcd 1.2 device is on this port - its GUID info */ - bool guid_valid; - u8 guid[16]; - u8 port_num; bool input; bool mcs; @@ -109,10 +103,12 @@ struct drm_dp_mst_port { * @tx_slots: transmission slots for this device. * @last_seqno: last sequence number used to talk to this. * @link_address_sent: if a link address message has been sent to this device yet. + * @guid: guid for DP 1.2 branch device. port under this branch can be + * identified by port #. * * This structure represents an MST branch device, there is one - * primary branch device at the root, along with any others connected - * to downstream ports + * primary branch device at the root, along with any other branches connected + * to downstream port of parent branches. */ struct drm_dp_mst_branch { struct kref kref; @@ -131,6 +127,9 @@ struct drm_dp_mst_branch { struct drm_dp_sideband_msg_tx *tx_slots[2]; int last_seqno; bool link_address_sent; + + /* global unique identifier to identify branch devices */ + u8 guid[16]; }; @@ -405,11 +404,9 @@ struct drm_dp_payload { * @conn_base_id: DRM connector ID this mgr is connected to. * @down_rep_recv: msg receiver state for down replies. * @up_req_recv: msg receiver state for up requests. - * @lock: protects mst state, primary, guid, dpcd. + * @lock: protects mst state, primary, dpcd. * @mst_state: if this manager is enabled for an MST capable port. * @mst_primary: pointer to the primary branch device. - * @guid_valid: GUID valid for the primary branch device. - * @guid: GUID for primary port. * @dpcd: cache of DPCD for primary port. * @pbn_div: PBN to slots divisor. * @@ -431,13 +428,11 @@ struct drm_dp_mst_topology_mgr { struct drm_dp_sideband_msg_rx up_req_recv; /* pointer to info about the initial MST device */ - struct mutex lock; /* protects mst_state + primary + guid + dpcd */ + struct mutex lock; /* protects mst_state + primary + dpcd */ bool mst_state; struct drm_dp_mst_branch *mst_primary; - /* primary MST device GUID */ - bool guid_valid; - u8 guid[16]; + u8 dpcd[DP_RECEIVER_CAP_SIZE]; u8 sink_count; int pbn_div; From a252d13f411ff3dc7697da09a991f1a4a7065d23 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Fri, 22 Jan 2016 17:07:29 -0500 Subject: [PATCH 203/418] drm/dp/mst: Reverse order of MST enable and clearing VC payload table. commit c175cd16df272119534058f28cbd5eeac6ff2d24 upstream. On DELL U3014 if you clear the table before enabling MST it sometimes hangs the receiver. Signed-off-by: Andrey Grodzovsky Reviewed-by: Harry Wentland Acked-by: Alex Deucher Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index c79e44a712e7..dd0e15a424c7 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1976,6 +1976,12 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms mgr->mst_primary = mstb; kref_get(&mgr->mst_primary->kref); + ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, + DP_MST_EN | DP_UP_REQ_EN | DP_UPSTREAM_IS_SRC); + if (ret < 0) { + goto out_unlock; + } + { struct drm_dp_payload reset_pay; reset_pay.start_slot = 0; @@ -1983,12 +1989,6 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms drm_dp_dpcd_write_payload(mgr, 0, &reset_pay); } - ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL, - DP_MST_EN | DP_UP_REQ_EN | DP_UPSTREAM_IS_SRC); - if (ret < 0) { - goto out_unlock; - } - queue_work(system_long_wq, &mgr->work); ret = 0; From 381e33c23c0acc1eb64fb1fe3a245b72da9484ea Mon Sep 17 00:00:00 2001 From: Mykola Lysenko Date: Wed, 27 Jan 2016 09:39:36 -0500 Subject: [PATCH 204/418] drm/dp/mst: deallocate payload on port destruction commit 91a25e463130c8e19bdb42f2d827836c7937992e upstream. This is needed to properly deallocate port payload after downstream branch get unplugged. In order to do this unplugged MST topology should be preserved, to find first alive port on path to unplugged MST topology, and send payload deallocation request to branch device of found port. For this mstb and port kref's are used in reversed order to track when port and branch memory could be freed. Added additional functions to find appropriate mstb as described above. Signed-off-by: Mykola Lysenko Reviewed-by: Harry Wentland Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 91 ++++++++++++++++++++++++--- 1 file changed, 83 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index dd0e15a424c7..bbda11f24844 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -798,12 +798,33 @@ static struct drm_dp_mst_branch *drm_dp_add_mst_branch_device(u8 lct, u8 *rad) return mstb; } +static void drm_dp_free_mst_port(struct kref *kref); + +static void drm_dp_free_mst_branch_device(struct kref *kref) +{ + struct drm_dp_mst_branch *mstb = container_of(kref, struct drm_dp_mst_branch, kref); + if (mstb->port_parent) { + if (list_empty(&mstb->port_parent->next)) + kref_put(&mstb->port_parent->kref, drm_dp_free_mst_port); + } + kfree(mstb); +} + static void drm_dp_destroy_mst_branch_device(struct kref *kref) { struct drm_dp_mst_branch *mstb = container_of(kref, struct drm_dp_mst_branch, kref); struct drm_dp_mst_port *port, *tmp; bool wake_tx = false; + /* + * init kref again to be used by ports to remove mst branch when it is + * not needed anymore + */ + kref_init(kref); + + if (mstb->port_parent && list_empty(&mstb->port_parent->next)) + kref_get(&mstb->port_parent->kref); + /* * destroy all ports - don't need lock * as there are no more references to the mst branch @@ -830,7 +851,8 @@ static void drm_dp_destroy_mst_branch_device(struct kref *kref) if (wake_tx) wake_up(&mstb->mgr->tx_waitq); - kfree(mstb); + + kref_put(kref, drm_dp_free_mst_branch_device); } static void drm_dp_put_mst_branch_device(struct drm_dp_mst_branch *mstb) @@ -878,6 +900,7 @@ static void drm_dp_destroy_port(struct kref *kref) * from an EDID retrieval */ mutex_lock(&mgr->destroy_connector_lock); + kref_get(&port->parent->kref); list_add(&port->next, &mgr->destroy_connector_list); mutex_unlock(&mgr->destroy_connector_lock); schedule_work(&mgr->destroy_connector_work); @@ -1602,6 +1625,37 @@ static int drm_dp_send_enum_path_resources(struct drm_dp_mst_topology_mgr *mgr, return 0; } +static struct drm_dp_mst_port *drm_dp_get_last_connected_port_to_mstb(struct drm_dp_mst_branch *mstb) +{ + if (!mstb->port_parent) + return NULL; + + if (mstb->port_parent->mstb != mstb) + return mstb->port_parent; + + return drm_dp_get_last_connected_port_to_mstb(mstb->port_parent->parent); +} + +static struct drm_dp_mst_branch *drm_dp_get_last_connected_port_and_mstb(struct drm_dp_mst_topology_mgr *mgr, + struct drm_dp_mst_branch *mstb, + int *port_num) +{ + struct drm_dp_mst_branch *rmstb = NULL; + struct drm_dp_mst_port *found_port; + mutex_lock(&mgr->lock); + if (mgr->mst_primary) { + found_port = drm_dp_get_last_connected_port_to_mstb(mstb); + + if (found_port) { + rmstb = found_port->parent; + kref_get(&rmstb->kref); + *port_num = found_port->port_num; + } + } + mutex_unlock(&mgr->lock); + return rmstb; +} + static int drm_dp_payload_send_msg(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port, int id, @@ -1609,11 +1663,16 @@ static int drm_dp_payload_send_msg(struct drm_dp_mst_topology_mgr *mgr, { struct drm_dp_sideband_msg_tx *txmsg; struct drm_dp_mst_branch *mstb; - int len, ret; + int len, ret, port_num; + port_num = port->port_num; mstb = drm_dp_get_validated_mstb_ref(mgr, port->parent); - if (!mstb) - return -EINVAL; + if (!mstb) { + mstb = drm_dp_get_last_connected_port_and_mstb(mgr, port->parent, &port_num); + + if (!mstb) + return -EINVAL; + } txmsg = kzalloc(sizeof(*txmsg), GFP_KERNEL); if (!txmsg) { @@ -1622,7 +1681,7 @@ static int drm_dp_payload_send_msg(struct drm_dp_mst_topology_mgr *mgr, } txmsg->dst = mstb; - len = build_allocate_payload(txmsg, port->port_num, + len = build_allocate_payload(txmsg, port_num, id, pbn); @@ -2752,6 +2811,13 @@ static void drm_dp_tx_work(struct work_struct *work) mutex_unlock(&mgr->qlock); } +static void drm_dp_free_mst_port(struct kref *kref) +{ + struct drm_dp_mst_port *port = container_of(kref, struct drm_dp_mst_port, kref); + kref_put(&port->parent->kref, drm_dp_free_mst_branch_device); + kfree(port); +} + static void drm_dp_destroy_connector_work(struct work_struct *work) { struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, destroy_connector_work); @@ -2772,13 +2838,22 @@ static void drm_dp_destroy_connector_work(struct work_struct *work) list_del(&port->next); mutex_unlock(&mgr->destroy_connector_lock); + kref_init(&port->kref); + INIT_LIST_HEAD(&port->next); + mgr->cbs->destroy_connector(mgr, port->connector); drm_dp_port_teardown_pdt(port, port->pdt); - if (!port->input && port->vcpi.vcpi > 0) - drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi); - kfree(port); + if (!port->input && port->vcpi.vcpi > 0) { + if (mgr->mst_state) { + drm_dp_mst_reset_vcpi_slots(mgr, port); + drm_dp_update_payload_part1(mgr); + drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi); + } + } + + kref_put(&port->kref, drm_dp_free_mst_port); send_hotplug = true; } if (send_hotplug) From 7d124d8895ec1345b565f7872e0dbe33b51b1c0f Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 23 Nov 2015 17:39:11 -0500 Subject: [PATCH 205/418] drm/radeon: Fix off-by-one errors in radeon_vm_bo_set_addr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 42ef344c0994cc453477afdc7a8eadc578ed0257 upstream. eoffset is sometimes treated as the last address inside the address range, and sometimes as the first address outside the range. This was resulting in errors when a test filled up the entire address space. Make it consistent to always be the last address within the range. Also fixed related errors when checking the VA limit and in radeon_vm_fence_pts. Signed-off-by: Felix.Kuehling Reviewed-by: Christian König Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_vm.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 48d97c040f49..3979632b9225 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -455,15 +455,15 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, if (soffset) { /* make sure object fit at this offset */ - eoffset = soffset + size; + eoffset = soffset + size - 1; if (soffset >= eoffset) { r = -EINVAL; goto error_unreserve; } last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; - if (last_pfn > rdev->vm_manager.max_pfn) { - dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", + if (last_pfn >= rdev->vm_manager.max_pfn) { + dev_err(rdev->dev, "va above limit (0x%08X >= 0x%08X)\n", last_pfn, rdev->vm_manager.max_pfn); r = -EINVAL; goto error_unreserve; @@ -478,7 +478,7 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, eoffset /= RADEON_GPU_PAGE_SIZE; if (soffset || eoffset) { struct interval_tree_node *it; - it = interval_tree_iter_first(&vm->va, soffset, eoffset - 1); + it = interval_tree_iter_first(&vm->va, soffset, eoffset); if (it && it != &bo_va->it) { struct radeon_bo_va *tmp; tmp = container_of(it, struct radeon_bo_va, it); @@ -518,7 +518,7 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, if (soffset || eoffset) { spin_lock(&vm->status_lock); bo_va->it.start = soffset; - bo_va->it.last = eoffset - 1; + bo_va->it.last = eoffset; list_add(&bo_va->vm_status, &vm->cleared); spin_unlock(&vm->status_lock); interval_tree_insert(&bo_va->it, &vm->va); @@ -888,7 +888,7 @@ static void radeon_vm_fence_pts(struct radeon_vm *vm, unsigned i; start >>= radeon_vm_block_size; - end >>= radeon_vm_block_size; + end = (end - 1) >> radeon_vm_block_size; for (i = start; i <= end; ++i) radeon_bo_fence(vm->page_tables[i].bo, fence, true); From b36e52c44ce6728824546d8b5f05b844cede96f1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 24 Nov 2015 14:32:44 -0500 Subject: [PATCH 206/418] drm/radeon: call hpd_irq_event on resume commit dbb17a21c131eca94eb31136eee9a7fe5aff00d9 upstream. Need to call this on resume if displays changes during suspend in order to properly be notified of changes. Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index c566993a2ec3..d690df545b4d 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1744,6 +1744,7 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon) } drm_kms_helper_poll_enable(dev); + drm_helper_hpd_irq_event(dev); /* set the power state here in case we are a PX system or headless */ if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) From 6f4e77b9e3fc1a2c6a2908e107c90553fe265c19 Mon Sep 17 00:00:00 2001 From: Slava Grigorev Date: Thu, 17 Dec 2015 11:09:58 -0500 Subject: [PATCH 207/418] drm/radeon: Fix "slow" audio over DP on DCE8+ commit ac4a9350abddc51ccb897abf0d9f3fd592b97e0b upstream. DP audio is derived from the dfs clock. Signed-off-by: Slava Grigorev Reviewed-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/dce6_afmt.c | 16 ++++++++++++++++ drivers/gpu/drm/radeon/radeon.h | 1 + drivers/gpu/drm/radeon/radeon_atombios.c | 7 +++++++ drivers/gpu/drm/radeon/sid.h | 5 +++++ 4 files changed, 29 insertions(+) diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c index 752072771388..6bfc46369db1 100644 --- a/drivers/gpu/drm/radeon/dce6_afmt.c +++ b/drivers/gpu/drm/radeon/dce6_afmt.c @@ -301,6 +301,22 @@ void dce6_dp_audio_set_dto(struct radeon_device *rdev, * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator */ if (ASIC_IS_DCE8(rdev)) { + unsigned int div = (RREG32(DENTIST_DISPCLK_CNTL) & + DENTIST_DPREFCLK_WDIVIDER_MASK) >> + DENTIST_DPREFCLK_WDIVIDER_SHIFT; + + if (div < 128 && div >= 96) + div -= 64; + else if (div >= 64) + div = div / 2 - 16; + else if (div >= 8) + div /= 4; + else + div = 0; + + if (div) + clock = rdev->clock.gpupll_outputfreq * 10 / div; + WREG32(DCE8_DCCG_AUDIO_DTO1_PHASE, 24000); WREG32(DCE8_DCCG_AUDIO_DTO1_MODULE, clock); } else { diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 87db64983ea8..285cacbec57f 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -268,6 +268,7 @@ struct radeon_clock { uint32_t current_dispclk; uint32_t dp_extclk; uint32_t max_pixel_clock; + uint32_t gpupll_outputfreq; }; /* diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 8f285244c839..13e5513b73af 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -1263,6 +1263,13 @@ bool radeon_atom_get_clock_info(struct drm_device *dev) rdev->mode_info.firmware_flags = le16_to_cpu(firmware_info->info.usFirmwareCapability.susAccess); + if (ASIC_IS_DCE8(rdev)) { + rdev->clock.gpupll_outputfreq = + le32_to_cpu(firmware_info->info_22.ulGPUPLL_OutputFreq); + if (rdev->clock.gpupll_outputfreq == 0) + rdev->clock.gpupll_outputfreq = 360000; /* 3.6 GHz */ + } + return true; } diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 4c4a7218a3bd..d1a7b58dd291 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -915,6 +915,11 @@ #define DCCG_AUDIO_DTO1_PHASE 0x05c0 #define DCCG_AUDIO_DTO1_MODULE 0x05c4 +#define DENTIST_DISPCLK_CNTL 0x0490 +# define DENTIST_DPREFCLK_WDIVIDER(x) (((x) & 0x7f) << 24) +# define DENTIST_DPREFCLK_WDIVIDER_MASK (0x7f << 24) +# define DENTIST_DPREFCLK_WDIVIDER_SHIFT 24 + #define AFMT_AUDIO_SRC_CONTROL 0x713c #define AFMT_AUDIO_SRC_SELECT(x) (((x) & 7) << 0) /* AFMT_AUDIO_SRC_SELECT From 557761e927e79011ce59f07f91e7a0a58788ea62 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 17 Dec 2015 12:52:17 -0500 Subject: [PATCH 208/418] drm/radeon: clean up fujitsu quirks commit 0eb1c3d4084eeb6fb3a703f88d6ce1521f8fcdd1 upstream. Combine the two quirks. bug: https://bugzilla.kernel.org/show_bug.cgi?id=109481 Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_atombios.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 13e5513b73af..08fc1b5effa8 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -437,7 +437,9 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev, } /* Fujitsu D3003-S2 board lists DVI-I as DVI-D and VGA */ - if (((dev->pdev->device == 0x9802) || (dev->pdev->device == 0x9806)) && + if (((dev->pdev->device == 0x9802) || + (dev->pdev->device == 0x9805) || + (dev->pdev->device == 0x9806)) && (dev->pdev->subsystem_vendor == 0x1734) && (dev->pdev->subsystem_device == 0x11bd)) { if (*connector_type == DRM_MODE_CONNECTOR_VGA) { @@ -448,14 +450,6 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev, } } - /* Fujitsu D3003-S2 board lists DVI-I as DVI-I and VGA */ - if ((dev->pdev->device == 0x9805) && - (dev->pdev->subsystem_vendor == 0x1734) && - (dev->pdev->subsystem_device == 0x11bd)) { - if (*connector_type == DRM_MODE_CONNECTOR_VGA) - return false; - } - return true; } From 46414c46e79de89588afbbd58f4df16ee4857ed8 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 22 Jan 2016 00:13:15 -0500 Subject: [PATCH 209/418] drm/radeon: properly byte swap vce firmware setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit cc78eb22885bba64445cde438ba098de0104920f upstream. Firmware is LE. Need to properly byteswap some of the fields so they are interpreted correctly by the driver on BE systems. Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/vce_v1_0.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c index 07a0d378e122..a01efe39a820 100644 --- a/drivers/gpu/drm/radeon/vce_v1_0.c +++ b/drivers/gpu/drm/radeon/vce_v1_0.c @@ -178,12 +178,12 @@ int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data) return -EINVAL; } - for (i = 0; i < sign->num; ++i) { - if (sign->val[i].chip_id == chip_id) + for (i = 0; i < le32_to_cpu(sign->num); ++i) { + if (le32_to_cpu(sign->val[i].chip_id) == chip_id) break; } - if (i == sign->num) + if (i == le32_to_cpu(sign->num)) return -EINVAL; data += (256 - 64) / 4; @@ -191,18 +191,18 @@ int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data) data[1] = sign->val[i].nonce[1]; data[2] = sign->val[i].nonce[2]; data[3] = sign->val[i].nonce[3]; - data[4] = sign->len + 64; + data[4] = cpu_to_le32(le32_to_cpu(sign->len) + 64); memset(&data[5], 0, 44); memcpy(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign)); - data += data[4] / 4; + data += le32_to_cpu(data[4]) / 4; data[0] = sign->val[i].sigval[0]; data[1] = sign->val[i].sigval[1]; data[2] = sign->val[i].sigval[2]; data[3] = sign->val[i].sigval[3]; - rdev->vce.keyselect = sign->val[i].keyselect; + rdev->vce.keyselect = le32_to_cpu(sign->val[i].keyselect); return 0; } From 3bbb4a052d9a891e529c2a54f1782b03b3441aa2 Mon Sep 17 00:00:00 2001 From: Slava Grigorev Date: Tue, 26 Jan 2016 16:45:10 -0500 Subject: [PATCH 210/418] drm/radeon: cleaned up VCO output settings for DP audio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c9a392eac18409f51a071520cf508c0b4ad990e2 upstream. This is preparation for the fixes in the following patches. Signed-off-by: Slava Grigorev Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/dce6_afmt.c | 2 +- drivers/gpu/drm/radeon/radeon.h | 2 +- drivers/gpu/drm/radeon/radeon_atombios.c | 12 +++++++----- drivers/gpu/drm/radeon/radeon_audio.c | 8 +------- 4 files changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c index 6bfc46369db1..ea4e3fc2744f 100644 --- a/drivers/gpu/drm/radeon/dce6_afmt.c +++ b/drivers/gpu/drm/radeon/dce6_afmt.c @@ -315,7 +315,7 @@ void dce6_dp_audio_set_dto(struct radeon_device *rdev, div = 0; if (div) - clock = rdev->clock.gpupll_outputfreq * 10 / div; + clock /= div; WREG32(DCE8_DCCG_AUDIO_DTO1_PHASE, 24000); WREG32(DCE8_DCCG_AUDIO_DTO1_MODULE, clock); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 285cacbec57f..5580568088bb 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -268,7 +268,7 @@ struct radeon_clock { uint32_t current_dispclk; uint32_t dp_extclk; uint32_t max_pixel_clock; - uint32_t gpupll_outputfreq; + uint32_t vco_freq; }; /* diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 08fc1b5effa8..9a9363a7e5b9 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -1257,12 +1257,14 @@ bool radeon_atom_get_clock_info(struct drm_device *dev) rdev->mode_info.firmware_flags = le16_to_cpu(firmware_info->info.usFirmwareCapability.susAccess); - if (ASIC_IS_DCE8(rdev)) { - rdev->clock.gpupll_outputfreq = + if (ASIC_IS_DCE8(rdev)) + rdev->clock.vco_freq = le32_to_cpu(firmware_info->info_22.ulGPUPLL_OutputFreq); - if (rdev->clock.gpupll_outputfreq == 0) - rdev->clock.gpupll_outputfreq = 360000; /* 3.6 GHz */ - } + else + rdev->clock.vco_freq = rdev->clock.current_dispclk; + + if (rdev->clock.vco_freq == 0) + rdev->clock.vco_freq = 360000; /* 3.6 GHz */ return true; } diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index 2c02e99b5f95..85e1c234f020 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -739,9 +739,6 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder, struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; struct drm_connector *connector = radeon_get_connector_for_encoder(encoder); - struct radeon_connector *radeon_connector = to_radeon_connector(connector); - struct radeon_connector_atom_dig *dig_connector = - radeon_connector->con_priv; if (!dig || !dig->afmt) return; @@ -753,10 +750,7 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder, radeon_audio_write_speaker_allocation(encoder); radeon_audio_write_sad_regs(encoder); radeon_audio_write_latency_fields(encoder, mode); - if (rdev->clock.dp_extclk || ASIC_IS_DCE5(rdev)) - radeon_audio_set_dto(encoder, rdev->clock.default_dispclk * 10); - else - radeon_audio_set_dto(encoder, dig_connector->dp_clock); + radeon_audio_set_dto(encoder, rdev->clock.vco_freq * 10); radeon_audio_set_audio_packet(encoder); radeon_audio_select_pin(encoder); From c38554e8df027f43558b59f6f1205c63b1fdff4a Mon Sep 17 00:00:00 2001 From: Slava Grigorev Date: Tue, 26 Jan 2016 16:56:25 -0500 Subject: [PATCH 211/418] drm/radeon: Add a common function for DFS handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a64c9dab1c4d05c87ec8a1cb9b48915816462143 upstream. Move encoding of DFS (digital frequency synthesizer) divider into a separate function and improve calculation precision. Signed-off-by: Slava Grigorev Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/dce6_afmt.c | 12 ++---------- drivers/gpu/drm/radeon/radeon_audio.c | 12 ++++++++++++ drivers/gpu/drm/radeon/radeon_audio.h | 1 + 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c index ea4e3fc2744f..367a916f364e 100644 --- a/drivers/gpu/drm/radeon/dce6_afmt.c +++ b/drivers/gpu/drm/radeon/dce6_afmt.c @@ -304,18 +304,10 @@ void dce6_dp_audio_set_dto(struct radeon_device *rdev, unsigned int div = (RREG32(DENTIST_DISPCLK_CNTL) & DENTIST_DPREFCLK_WDIVIDER_MASK) >> DENTIST_DPREFCLK_WDIVIDER_SHIFT; - - if (div < 128 && div >= 96) - div -= 64; - else if (div >= 64) - div = div / 2 - 16; - else if (div >= 8) - div /= 4; - else - div = 0; + div = radeon_audio_decode_dfs_div(div); if (div) - clock /= div; + clock = clock * 100 / div; WREG32(DCE8_DCCG_AUDIO_DTO1_PHASE, 24000); WREG32(DCE8_DCCG_AUDIO_DTO1_MODULE, clock); diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index 85e1c234f020..b214663b370d 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -775,3 +775,15 @@ void radeon_audio_dpms(struct drm_encoder *encoder, int mode) if (radeon_encoder->audio && radeon_encoder->audio->dpms) radeon_encoder->audio->dpms(encoder, mode == DRM_MODE_DPMS_ON); } + +unsigned int radeon_audio_decode_dfs_div(unsigned int div) +{ + if (div >= 8 && div < 64) + return (div - 8) * 25 + 200; + else if (div >= 64 && div < 96) + return (div - 64) * 50 + 1600; + else if (div >= 96 && div < 128) + return (div - 96) * 100 + 3200; + else + return 0; +} diff --git a/drivers/gpu/drm/radeon/radeon_audio.h b/drivers/gpu/drm/radeon/radeon_audio.h index 059cc3012062..5c70cceaa4a6 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.h +++ b/drivers/gpu/drm/radeon/radeon_audio.h @@ -79,5 +79,6 @@ void radeon_audio_fini(struct radeon_device *rdev); void radeon_audio_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode); void radeon_audio_dpms(struct drm_encoder *encoder, int mode); +unsigned int radeon_audio_decode_dfs_div(unsigned int div); #endif From ac6f949bbbd922f5d16fea936522bef7663971ea Mon Sep 17 00:00:00 2001 From: Slava Grigorev Date: Tue, 26 Jan 2016 17:35:57 -0500 Subject: [PATCH 212/418] drm/radeon: fix DP audio support for APU with DCE4.1 display engine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit fe6fc1f132b4300c1f6defd43a5d673eb60a820d upstream. Properly setup the DFS divider for DP audio for DCE4.1. Signed-off-by: Slava Grigorev Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/evergreen_hdmi.c | 10 +++++++ drivers/gpu/drm/radeon/evergreend.h | 5 ++++ drivers/gpu/drm/radeon/radeon_atombios.c | 37 +++++++++++++++++++----- 3 files changed, 44 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c index 9953356fe263..3cf04a2f44bb 100644 --- a/drivers/gpu/drm/radeon/evergreen_hdmi.c +++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c @@ -289,6 +289,16 @@ void dce4_dp_audio_set_dto(struct radeon_device *rdev, * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator */ + if (ASIC_IS_DCE41(rdev)) { + unsigned int div = (RREG32(DCE41_DENTIST_DISPCLK_CNTL) & + DENTIST_DPREFCLK_WDIVIDER_MASK) >> + DENTIST_DPREFCLK_WDIVIDER_SHIFT; + div = radeon_audio_decode_dfs_div(div); + + if (div) + clock = 100 * clock / div; + } + WREG32(DCCG_AUDIO_DTO1_PHASE, 24000); WREG32(DCCG_AUDIO_DTO1_MODULE, clock); } diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index 4aa5f755572b..13b6029d65cc 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h @@ -511,6 +511,11 @@ #define DCCG_AUDIO_DTO1_CNTL 0x05cc # define DCCG_AUDIO_DTO1_USE_512FBR_DTO (1 << 3) +#define DCE41_DENTIST_DISPCLK_CNTL 0x049c +# define DENTIST_DPREFCLK_WDIVIDER(x) (((x) & 0x7f) << 24) +# define DENTIST_DPREFCLK_WDIVIDER_MASK (0x7f << 24) +# define DENTIST_DPREFCLK_WDIVIDER_SHIFT 24 + /* DCE 4.0 AFMT */ #define HDMI_CONTROL 0x7030 # define HDMI_KEEPOUT_MODE (1 << 0) diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index 9a9363a7e5b9..de9a2ffcf5f7 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -1106,6 +1106,31 @@ union firmware_info { ATOM_FIRMWARE_INFO_V2_2 info_22; }; +union igp_info { + struct _ATOM_INTEGRATED_SYSTEM_INFO info; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7; + struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8; +}; + +static void radeon_atombios_get_dentist_vco_freq(struct radeon_device *rdev) +{ + struct radeon_mode_info *mode_info = &rdev->mode_info; + int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo); + union igp_info *igp_info; + u8 frev, crev; + u16 data_offset; + + if (atom_parse_data_header(mode_info->atom_context, index, NULL, + &frev, &crev, &data_offset)) { + igp_info = (union igp_info *)(mode_info->atom_context->bios + + data_offset); + rdev->clock.vco_freq = + le32_to_cpu(igp_info->info_6.ulDentistVCOFreq); + } +} + bool radeon_atom_get_clock_info(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private; @@ -1260,6 +1285,10 @@ bool radeon_atom_get_clock_info(struct drm_device *dev) if (ASIC_IS_DCE8(rdev)) rdev->clock.vco_freq = le32_to_cpu(firmware_info->info_22.ulGPUPLL_OutputFreq); + else if (ASIC_IS_DCE5(rdev)) + rdev->clock.vco_freq = rdev->clock.current_dispclk; + else if (ASIC_IS_DCE41(rdev)) + radeon_atombios_get_dentist_vco_freq(rdev); else rdev->clock.vco_freq = rdev->clock.current_dispclk; @@ -1272,14 +1301,6 @@ bool radeon_atom_get_clock_info(struct drm_device *dev) return false; } -union igp_info { - struct _ATOM_INTEGRATED_SYSTEM_INFO info; - struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2; - struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6; - struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7; - struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8; -}; - bool radeon_atombios_sideport_present(struct radeon_device *rdev) { struct radeon_mode_info *mode_info = &rdev->mode_info; From f4eb8334b571b1633b5df06cd56f2e665e8e19da Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 30 Jan 2016 07:59:32 +0200 Subject: [PATCH 213/418] drm: add helper to check for wc memory support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4b0e4e4af6c6dc8354dcb72182d52c1bc55f12fc upstream. Reviewed-by: Christian König Reviewed-by: Michel Dänzer Signed-off-by: Dave Airlie Signed-off-by: Oded Gabbay Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- include/drm/drm_cache.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h index 7bfb063029d8..461a0558bca4 100644 --- a/include/drm/drm_cache.h +++ b/include/drm/drm_cache.h @@ -35,4 +35,13 @@ void drm_clflush_pages(struct page *pages[], unsigned long num_pages); +static inline bool drm_arch_can_wc_memory(void) +{ +#if defined(CONFIG_PPC) && !defined(CONFIG_NOT_COHERENT_CACHE) + return false; +#else + return true; +#endif +} + #endif From 9bf88217c3e18f28482921ad7902ffa178163126 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sat, 30 Jan 2016 07:59:33 +0200 Subject: [PATCH 214/418] drm/radeon: mask out WC from BO on unsupported arches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c5244987394648913ae1a03879c58058a2fc2cee upstream. Reviewed-by: Christian König Reviewed-by: Michel Dänzer Signed-off-by: Oded Gabbay Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_object.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 84d45633d28c..fb6ad143873f 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "radeon.h" #include "radeon_trace.h" @@ -245,6 +246,12 @@ int radeon_bo_create(struct radeon_device *rdev, DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " "better performance thanks to write-combining\n"); bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); +#else + /* For architectures that don't support WC memory, + * mask out the WC flag from the BO + */ + if (!drm_arch_can_wc_memory()) + bo->flags &= ~RADEON_GEM_GTT_WC; #endif radeon_ttm_placement_from_domain(bo, domain); From bd4419f7ee1978a1dba107a0f112b797b024ce1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= Date: Fri, 5 Feb 2016 14:35:53 -0500 Subject: [PATCH 215/418] drm/radeon: hold reference to fences in radeon_sa_bo_new MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f6ff4f67cdf8455d0a4226eeeaf5af17c37d05eb upstream. An arbitrary amount of time can pass between spin_unlock and radeon_fence_wait_any, so we need to ensure that nobody frees the fences from under us. Based on the analogous fix for amdgpu. Signed-off-by: Nicolai Hähnle Reviewed-by: Christian König Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_sa.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c index c507896aca45..197b157b73d0 100644 --- a/drivers/gpu/drm/radeon/radeon_sa.c +++ b/drivers/gpu/drm/radeon/radeon_sa.c @@ -349,8 +349,13 @@ int radeon_sa_bo_new(struct radeon_device *rdev, /* see if we can skip over some allocations */ } while (radeon_sa_bo_next_hole(sa_manager, fences, tries)); + for (i = 0; i < RADEON_NUM_RINGS; ++i) + radeon_fence_ref(fences[i]); + spin_unlock(&sa_manager->wq.lock); r = radeon_fence_wait_any(rdev, fences, false); + for (i = 0; i < RADEON_NUM_RINGS; ++i) + radeon_fence_unref(&fences[i]); spin_lock(&sa_manager->wq.lock); /* if we have nothing to wait for block */ if (r == -ENOENT) { From de2e68f48bc441cb59b7190b2f53793359ce220c Mon Sep 17 00:00:00 2001 From: Insu Yun Date: Mon, 1 Feb 2016 11:08:29 -0500 Subject: [PATCH 216/418] drm: fix missing reference counting decrease commit dabe19540af9e563d526113bb102e1b9b9fa73f9 upstream. In drm_dp_mst_allocate_vcpi, it returns true in two paths, but in one path, there is no reference couting decrease. Signed-off-by: Insu Yun Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_dp_mst_topology.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index bbda11f24844..39d7e2e15c11 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -2463,6 +2463,7 @@ bool drm_dp_mst_allocate_vcpi(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp DRM_DEBUG_KMS("payload: vcpi %d already allocated for pbn %d - requested pbn %d\n", port->vcpi.vcpi, port->vcpi.pbn, pbn); if (pbn == port->vcpi.pbn) { *slots = port->vcpi.num_slots; + drm_dp_put_port(port); return true; } } From 383652ce4ab5504e57fb2ec82cb6eb799cf68db1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 27 Nov 2015 13:28:55 +0000 Subject: [PATCH 217/418] drm/i915: Restore inhibiting the load of the default context commit 06ef83a705a98da63797a5a570220b6ca36febd4 upstream. Following a GPU reset, we may leave the context in a poorly defined state, and reloading from that context will leave the GPU flummoxed. For secondary contexts, this will lead to that context being banned - but currently it is also causing the default context to become banned, leading to turmoil in the shared state. This is a regression from commit 6702cf16e0ba8b0129f5aa1b6609d4e9c70bc13b [v4.1] Author: Ben Widawsky Date: Mon Mar 16 16:00:58 2015 +0000 drm/i915: Initialize all contexts which quietly introduced the removal of the MI_RESTORE_INHIBIT on the default context. v2: Mark the global default context as uninitialized on GPU reset so that the context-local workarounds are reloaded upon re-enabling. Signed-off-by: Chris Wilson Cc: Michel Thierry Cc: Mika Kuoppala Cc: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1448630935-27377-1-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala [danvet: This seems to fix a gpu hand on after the first resume, resulting in any future suspend operation failing with -EIO because the gpu seems to be in a funky state. Somehow this patch fixes that.] Signed-off-by: Daniel Vetter (cherry picked from commit 42f1cae8c079bcceb3cff079fddc3ff8852c788f) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_gem_context.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 02ceb7a4b481..0433d25f9d23 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -340,6 +340,10 @@ void i915_gem_context_reset(struct drm_device *dev) i915_gem_context_unreference(lctx); ring->last_context = NULL; } + + /* Force the GPU state to be reinitialised on enabling */ + if (ring->default_context) + ring->default_context->legacy_hw_ctx.initialized = false; } } @@ -708,7 +712,7 @@ static int do_switch(struct drm_i915_gem_request *req) if (ret) goto unpin_out; - if (!to->legacy_hw_ctx.initialized) { + if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to)) { hw_flags |= MI_RESTORE_INHIBIT; /* NB: If we inhibit the restore, the context is not allowed to * die because future work may end up depending on valid address From 82ed1aba70ce938097629c41f316826e0a715865 Mon Sep 17 00:00:00 2001 From: Lyude Date: Thu, 7 Jan 2016 10:43:28 -0500 Subject: [PATCH 218/418] drm/i915: intel_hpd_init(): Fix suspend/resume reprobing commit 2dc2f761dea65069485110d24eaa5b0d5d808b07 upstream. This fixes reprobing of display connectors on resume. After some talking with danvet on IRC, I learned that calling drm_helper_hpd_irq_event() does actually trigger a full reprobe of each connector's status. It turns out this is the actual reason reprobing on resume hasn't been working (this was observed on a T440s): - We call hpd_init() - We check each connector for a couple of things before marking connector->polled with DRM_CONNECTOR_POLL_HPD, one of which is an active encoder. Of course, a disconnected port won't have an active encoder, so we don't add the flag to any of the connectors. - We call drm_helper_hpd_irq_event() - drm_helper_irq_event() checks each connector for the DRM_CONNECTOR_POLL_HPD flag. The only one that has it is eDP-1, so we skip reprobing each connector except that one. In addition, we also now avoid setting connector->polled to DRM_CONNECTOR_POLL_HPD for MST connectors, since their reprobing is handled by the mst helpers. This is probably what was originally intended to happen here. Changes since V1: * Use the explanation of the issue as the commit message instead * Change the title of the commit, since this does more then just stop a check for an encoder now * Add "Fixes" line for the patch that introduced this regression * Don't enable DRM_CONNECTOR_POLL_HPD for mst connectors Changes since V2: * Put patch changelog above Signed-off-by * Follow Daniel Vetter's suggestion for making the code here a bit more legible Fixes: 0e32b39ceed6 ("drm/i915: add DP 1.2 MST support (v0.7)") Signed-off-by: Lyude Link: http://patchwork.freedesktop.org/patch/msgid/1452181408-14777-1-git-send-email-cpaul@redhat.com Signed-off-by: Daniel Vetter (cherry picked from commit 07c519134417d92c2e1a536e2b66d4ffff4b3be0) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_hotplug.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index b17785719598..d7a6437d9da2 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -468,9 +468,14 @@ void intel_hpd_init(struct drm_i915_private *dev_priv) list_for_each_entry(connector, &mode_config->connector_list, head) { struct intel_connector *intel_connector = to_intel_connector(connector); connector->polled = intel_connector->polled; - if (connector->encoder && !connector->polled && I915_HAS_HOTPLUG(dev) && intel_connector->encoder->hpd_pin > HPD_NONE) - connector->polled = DRM_CONNECTOR_POLL_HPD; + + /* MST has a dynamic intel_connector->encoder and it's reprobing + * is all handled by the MST helpers. */ if (intel_connector->mst_port) + continue; + + if (!connector->polled && I915_HAS_HOTPLUG(dev) && + intel_connector->encoder->hpd_pin > HPD_NONE) connector->polled = DRM_CONNECTOR_POLL_HPD; } From 741598a5bce0a7bb07006cfbcd8044a3c7b91c31 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 13 Jan 2016 11:55:28 +0100 Subject: [PATCH 219/418] drm/i915: Init power domains early in driver load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f5949141a21ee16edf1beaf95cbae7e419171ab5 upstream. Since commit ac9b8236551d1177fd07b56aef9b565d1864420d Author: Ville Syrjälä Date: Fri Nov 27 18:55:26 2015 +0200 drm/i915: Introduce a gmbus power domain gmbus also needs the power domain infrastructure right from the start, since as soon as we register the i2c controllers someone can use them. v2: Adjust cleanup paths too (Chris). v3: Rebase onto -nightly (totally bogus tree I had lying around) and also move dpio init head (Ville). v4: Ville instead suggested to move gmbus setup later in the sequence, since it's only needed by the modeset code. v5: Move even close to the actual user, right next to the comment that states where we really need gmbus (and interrupts!). Cc: Ville Syrjälä Cc: Patrik Jakobsson Cc: Imre Deak Cc: Jani Nikula Cc: Meelis Roos Cc: Chris Wilson Fixes: ac9b8236551d ("drm/i915: Introduce a gmbus power domain") References: http://www.spinics.net/lists/intel-gfx/msg83075.html Signed-off-by: Daniel Vetter Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1452682528-19437-1-git-send-email-daniel.vetter@ffwll.ch Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_dma.c | 6 +++--- drivers/gpu/drm/i915/intel_display.c | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index b4741d121a74..61fcb3b22297 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -402,6 +402,8 @@ static int i915_load_modeset_init(struct drm_device *dev) if (ret) goto cleanup_gem_stolen; + intel_setup_gmbus(dev); + /* Important: The output setup functions called by modeset_init need * working irqs for e.g. gmbus and dp aux transfers. */ intel_modeset_init(dev); @@ -451,6 +453,7 @@ cleanup_gem: cleanup_irq: intel_guc_ucode_fini(dev); drm_irq_uninstall(dev); + intel_teardown_gmbus(dev); cleanup_gem_stolen: i915_gem_cleanup_stolen(dev); cleanup_vga_switcheroo: @@ -1028,7 +1031,6 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) /* Try to make sure MCHBAR is enabled before poking at it */ intel_setup_mchbar(dev); - intel_setup_gmbus(dev); intel_opregion_setup(dev); i915_gem_load(dev); @@ -1099,7 +1101,6 @@ out_gem_unload: if (dev->pdev->msi_enabled) pci_disable_msi(dev->pdev); - intel_teardown_gmbus(dev); intel_teardown_mchbar(dev); pm_qos_remove_request(&dev_priv->pm_qos); destroy_workqueue(dev_priv->gpu_error.hangcheck_wq); @@ -1198,7 +1199,6 @@ int i915_driver_unload(struct drm_device *dev) intel_csr_ucode_fini(dev); - intel_teardown_gmbus(dev); intel_teardown_mchbar(dev); destroy_workqueue(dev_priv->hotplug.dp_wq); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 32cf97346978..53d77b8be76a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15565,6 +15565,8 @@ void intel_modeset_cleanup(struct drm_device *dev) mutex_lock(&dev->struct_mutex); intel_cleanup_gt_powersave(dev); mutex_unlock(&dev->struct_mutex); + + intel_teardown_gmbus(dev); } /* From f39741673c3d54bc84ec30b4e1d21daa75bbcb02 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 13 Jan 2016 18:59:39 -0800 Subject: [PATCH 220/418] drm/i915: Make sure DC writes are coherent on flush. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 935a0ff0e1ea62a116848c0a187b13838f7b9cee upstream. We need to set the DC FLUSH PIPE_CONTROL bit on Gen7+ to guarantee that writes performed via the HDC are visible in memory. Fixes an intermittent failure in a Piglit test that writes to a BO from a shader using GL atomic counters (implemented as HDC untyped atomics) and then expects the memory to read back the same value after mapping it on the CPU. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91298 Tested-by: Mark Janes Cc: Rodrigo Vivi Signed-off-by: Francisco Jerez Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1452740379-3194-1-git-send-email-currojerez@riseup.net (cherry picked from commit 965fd602a6436f689f4f2fe40a6789582778ccd5) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_lrc.c | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 88e12bdf79e2..d69547a65dbb 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1706,6 +1706,7 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, if (flush_domains) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 9461a238f5d5..f6b2a814e629 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -347,6 +347,7 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, if (flush_domains) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; } if (invalidate_domains) { @@ -419,6 +420,7 @@ gen8_render_ring_flush(struct drm_i915_gem_request *req, if (flush_domains) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; } if (invalidate_domains) { From 6de4682d2845d6aa8883ae62e3ed1b2f915df4e8 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 13 Jan 2016 16:35:20 +0200 Subject: [PATCH 221/418] drm/i915/dp: fall back to 18 bpp when sink capability is unknown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5efd407674068dede403551bea3b0b134c32513a upstream. Per DP spec, the source device should fall back to 18 bpp, VESA range RGB when the sink capability is unknown. Fix the color depth clamping. 18 bpp color depth should ensure full color range in automatic mode. The clamping has been HDMI specific since its introduction in commit 996a2239f93b03c5972923f04b097f65565c5bed Author: Daniel Vetter Date: Fri Apr 19 11:24:34 2013 +0200 drm/i915: Disable high-bpc on pre-1.4 EDID screens Reported-and-tested-by: Dihan Wickremasuriya Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=105331 Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1452695720-7076-1-git-send-email-jani.nikula@intel.com (cherry picked from commit 013dd9e038723bbd2aa67be51847384b75be8253) Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_display.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 53d77b8be76a..a0ddaf1dfe14 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11930,11 +11930,21 @@ connected_sink_compute_bpp(struct intel_connector *connector, pipe_config->pipe_bpp = connector->base.display_info.bpc*3; } - /* Clamp bpp to 8 on screens without EDID 1.4 */ - if (connector->base.display_info.bpc == 0 && bpp > 24) { - DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of 24\n", - bpp); - pipe_config->pipe_bpp = 24; + /* Clamp bpp to default limit on screens without EDID 1.4 */ + if (connector->base.display_info.bpc == 0) { + int type = connector->base.connector_type; + int clamp_bpp = 24; + + /* Fall back to 18 bpp when DP sink capability is unknown. */ + if (type == DRM_MODE_CONNECTOR_DisplayPort || + type == DRM_MODE_CONNECTOR_eDP) + clamp_bpp = 18; + + if (bpp > clamp_bpp) { + DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of %d\n", + bpp, clamp_bpp); + pipe_config->pipe_bpp = clamp_bpp; + } } } From feba77fdca12991dc953c133fda6524b086df87c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 15 Jan 2016 20:46:53 +0200 Subject: [PATCH 222/418] drm/i915: Don't reject primary plane windowing with color keying enabled on SKL+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6f94b6dd006909a5ef6435cc0af557e945240f48 upstream. On SKL+ plane scaling is mutually exclusive with color keying. The code check for this, but during some refactoring the code got changed to also reject primary plane windowing when color keying is used. There is no such restriction in the hardware, so restore the original logic. Cc: Maarten Lankhorst Fixes: 061e4b8d650a ("drm/i915: clean up atomic plane check functions, v2.") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1452883613-28549-1-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Matt Roper Reviewed-by: Maarten Lankhorst (cherry picked from commit 693bdc28a733dba68b86af295e7509812fec35d9) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_display.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a0ddaf1dfe14..f859a5b87ed4 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13547,11 +13547,12 @@ intel_check_primary_plane(struct drm_plane *plane, int max_scale = DRM_PLANE_HELPER_NO_SCALING; bool can_position = false; - /* use scaler when colorkey is not required */ - if (INTEL_INFO(plane->dev)->gen >= 9 && - state->ckey.flags == I915_SET_COLORKEY_NONE) { - min_scale = 1; - max_scale = skl_max_scale(to_intel_crtc(crtc), crtc_state); + if (INTEL_INFO(plane->dev)->gen >= 9) { + /* use scaler when colorkey is not required */ + if (state->ckey.flags == I915_SET_COLORKEY_NONE) { + min_scale = 1; + max_scale = skl_max_scale(to_intel_crtc(crtc), crtc_state); + } can_position = true; } From 2f1e7d5f8f6b3f21db1423e15167e4350bd959d7 Mon Sep 17 00:00:00 2001 From: Lyude Date: Tue, 2 Feb 2016 10:49:43 -0500 Subject: [PATCH 223/418] drm/i915/skl: Don't skip mst encoders in skl_ddi_pll_select() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3d849b02336be103d312c1574d6f7314d5c0bc9f upstream. We don't actually check for INTEL_OUTPUT_DP_MST at all in here, as a result we skip assigning a DPLL to any DP MST ports, which makes link training fail: [ 1442.933896] [drm:intel_power_well_enable] enabling DDI D power well [ 1442.933905] [drm:skl_set_power_well] Enabling DDI D power well [ 1442.933957] [drm:intel_mst_pre_enable_dp] 0 [ 1442.935474] [drm:intel_dp_set_signal_levels] Using signal levels 00000000 [ 1442.935477] [drm:intel_dp_set_signal_levels] Using vswing level 0 [ 1442.935480] [drm:intel_dp_set_signal_levels] Using pre-emphasis level 0 [ 1442.936190] [drm:intel_dp_set_signal_levels] Using signal levels 05000000 [ 1442.936193] [drm:intel_dp_set_signal_levels] Using vswing level 1 [ 1442.936195] [drm:intel_dp_set_signal_levels] Using pre-emphasis level 1 [ 1442.936858] [drm:intel_dp_set_signal_levels] Using signal levels 08000000 [ 1442.936862] [drm:intel_dp_set_signal_levels] Using vswing level 2 … [ 1442.998253] [drm:intel_dp_link_training_clock_recovery [i915]] *ERROR* too many full retries, give up [ 1442.998512] [drm:intel_dp_start_link_train [i915]] *ERROR* failed to train DP, aborting After which the pipe state goes completely out of sync: [ 70.075596] [drm:check_crtc_state] [CRTC:25] [ 70.075696] [drm:intel_pipe_config_compare [i915]] *ERROR* mismatch in ddi_pll_sel (expected 0x00000000, found 0x00000001) [ 70.075747] [drm:intel_pipe_config_compare [i915]] *ERROR* mismatch in shared_dpll (expected -1, found 0) [ 70.075798] [drm:intel_pipe_config_compare [i915]] *ERROR* mismatch in dpll_hw_state.ctrl1 (expected 0x00000000, found 0x00000021) [ 70.075840] [drm:intel_pipe_config_compare [i915]] *ERROR* mismatch in dpll_hw_state.cfgcr1 (expected 0x00000000, found 0x80400173) [ 70.075884] [drm:intel_pipe_config_compare [i915]] *ERROR* mismatch in dpll_hw_state.cfgcr2 (expected 0x00000000, found 0x000003a5) [ 70.075954] [drm:intel_pipe_config_compare [i915]] *ERROR* mismatch in base.adjusted_mode.crtc_clock (expected 262750, found 72256) [ 70.075999] [drm:intel_pipe_config_compare [i915]] *ERROR* mismatch in port_clock (expected 540000, found 148500) And if you're especially lucky, it keeps going downhill: [ 83.309256] Kernel panic - not syncing: Timeout: Not all CPUs entered broadcast exception handler [ 83.309265] [ 83.309265] ================================= [ 83.309266] [ INFO: inconsistent lock state ] [ 83.309267] 4.5.0-rc1Lyude-Test #265 Not tainted [ 83.309267] --------------------------------- [ 83.309268] inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage. [ 83.309270] Xorg/1194 [HC0[1]:SC0[0]:HE1:SE1] takes: [ 83.309293] (&(&dev_priv->uncore.lock)->rlock){?.-...}, at: [] gen9_write32+0x63/0x400 [i915] [ 83.309293] {IN-HARDIRQ-W} state was registered at: [ 83.309297] [] __lock_acquire+0x9c4/0x1d00 [ 83.309299] [] lock_acquire+0xce/0x1c0 [ 83.309302] [] _raw_spin_lock_irqsave+0x56/0x90 [ 83.309321] [] gen9_read32+0x52/0x3d0 [i915] [ 83.309332] [] gen8_irq_handler+0x27a/0x6a0 [i915] [ 83.309337] [] handle_irq_event_percpu+0x41/0x300 [ 83.309339] [] handle_irq_event+0x39/0x60 [ 83.309341] [] handle_edge_irq+0x74/0x130 [ 83.309344] [] handle_irq+0x73/0x120 [ 83.309346] [] do_IRQ+0x61/0x120 [ 83.309348] [] ret_from_intr+0x0/0x20 [ 83.309351] [] cpuidle_enter_state+0x105/0x330 [ 83.309353] [] cpuidle_enter+0x17/0x20 [ 83.309356] [] call_cpuidle+0x2a/0x50 [ 83.309358] [] cpu_startup_entry+0x26d/0x3a0 [ 83.309360] [] rest_init+0x13a/0x140 [ 83.309363] [] start_kernel+0x475/0x482 [ 83.309365] [] x86_64_start_reservations+0x2a/0x2c [ 83.309367] [] x86_64_start_kernel+0x13b/0x14a Fixes: 82d354370189 ("drm/i915/skl: Implementation of SKL DPLL programming") Signed-off-by: Lyude Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1454428183-994-1-git-send-email-cpaul@redhat.com (cherry picked from commit 78385cb398748debb7ea2e36d6d2001830c172bc) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_ddi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index a6752a61d99f..7e6158b889da 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1582,7 +1582,8 @@ skl_ddi_pll_select(struct intel_crtc *intel_crtc, DPLL_CFGCR2_KDIV(wrpll_params.kdiv) | DPLL_CFGCR2_PDIV(wrpll_params.pdiv) | wrpll_params.central_freq; - } else if (intel_encoder->type == INTEL_OUTPUT_DISPLAYPORT) { + } else if (intel_encoder->type == INTEL_OUTPUT_DISPLAYPORT || + intel_encoder->type == INTEL_OUTPUT_DP_MST) { switch (crtc_state->port_clock / 2) { case 81000: ctrl1 |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, 0); From 7a35478f900a01650f00e39423722956d04daedd Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 4 Feb 2016 12:50:49 +0200 Subject: [PATCH 224/418] drm/i915/dsi: defend gpio table against out of bounds access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 4db3a2448ec8902310acb78de39b6227a9a56ac8 upstream. Do not blindly trust the VBT data used for indexing. Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/cc32d40c2b47f2d2151811855ac2c3dabab1d57d.1454582914.git.jani.nikula@intel.com (cherry picked from commit 5d2d0a12d3d08bf50434f0b5947bb73bac04b941) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_dsi_panel_vbt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c index a5e99ac305da..349775e88e5b 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c @@ -209,6 +209,11 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) /* pull up/down */ action = *data++; + if (gpio >= ARRAY_SIZE(gtable)) { + DRM_DEBUG_KMS("unknown gpio %u\n", gpio); + goto out; + } + function = gtable[gpio].function_reg; pad = gtable[gpio].pad_reg; @@ -226,6 +231,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) vlv_gpio_nc_write(dev_priv, pad, val); mutex_unlock(&dev_priv->sb_lock); +out: return data; } From 222d0fa4e86370e58e6d4f2e2cf02d76d4003992 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 4 Feb 2016 12:50:50 +0200 Subject: [PATCH 225/418] drm/i915/dsi: don't pass arbitrary data to sideband MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 26f6f2d301c1fb46acb1138ee155125815239b0d upstream. Since sequence block v2 the second byte contains flags other than just pull up/down. Don't pass arbitrary data to the sideband interface. The rest may or may not work for sequence block v2, but there should be no harm done. Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/ebe3c2eee623afc4b3a134533b01f8d591d13f32.1454582914.git.jani.nikula@intel.com (cherry picked from commit 4e1c63e3761b84ec7d87c75b58bbc8bcf18e98ee) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_dsi_panel_vbt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c index 349775e88e5b..a8912aecc31f 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c @@ -207,7 +207,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) gpio = *data++; /* pull up/down */ - action = *data++; + action = *data++ & 1; if (gpio >= ARRAY_SIZE(gtable)) { DRM_DEBUG_KMS("unknown gpio %u\n", gpio); From 0f44b766c9b773b43e073259933f78d505badcc1 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 9 Feb 2016 21:11:13 +0100 Subject: [PATCH 226/418] drm/i915: fix error path in intel_setup_gmbus() commit ed3f9fd1e865975ceefdb2a43b453e090b1fd787 upstream. This fails to undo the setup for pin==0; moreover, something interesting happens if the setup failed already at pin==0. Signed-off-by: Rasmus Villemoes Fixes: f899fc64cda8 ("drm/i915: use GMBUS to manage i2c links") Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1455048677-19882-3-git-send-email-linux@rasmusvillemoes.dk (cherry picked from commit 2417c8c03f508841b85bf61acc91836b7b0e2560) Signed-off-by: Jani Nikula Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/intel_i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index 8324654037b6..f3bee54c414f 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -675,7 +675,7 @@ int intel_setup_gmbus(struct drm_device *dev) return 0; err: - while (--pin) { + while (pin--) { if (!intel_gmbus_is_valid_pin(dev_priv, pin)) continue; From b78bd121b4978a8378c8300e0fb2e12c88a05a85 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Tue, 16 Feb 2016 14:25:00 +0100 Subject: [PATCH 227/418] drm/qxl: use kmalloc_array to alloc reloc_info in qxl_process_single_command commit 34855706c30d52b0a744da44348b5d1cc39fbe51 upstream. This avoids integer overflows on 32bit machines when calculating reloc_info size, as reported by Alan Cox. Cc: gnomes@lxorguk.ukuu.org.uk Signed-off-by: Gerd Hoffmann Reviewed-by: Daniel Vetter Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/qxl/qxl_ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c index 2ae8577497ca..7c2e78201ead 100644 --- a/drivers/gpu/drm/qxl/qxl_ioctl.c +++ b/drivers/gpu/drm/qxl/qxl_ioctl.c @@ -168,7 +168,8 @@ static int qxl_process_single_command(struct qxl_device *qdev, cmd->command_size)) return -EFAULT; - reloc_info = kmalloc(sizeof(struct qxl_reloc_info) * cmd->relocs_num, GFP_KERNEL); + reloc_info = kmalloc_array(cmd->relocs_num, + sizeof(struct qxl_reloc_info), GFP_KERNEL); if (!reloc_info) return -ENOMEM; From 96e87f46f186b51e246fece96aa18ae39d535a98 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Mon, 15 Feb 2016 19:41:47 +0100 Subject: [PATCH 228/418] drm/radeon: use post-decrement in error handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bc3f5d8c4ca01555820617eb3b6c0857e4df710d upstream. We need to use post-decrement to get the pci_map_page undone also for i==0, and to avoid some very unpleasant behaviour if pci_map_page failed already at i==0. Reviewed-by: Christian König Signed-off-by: Rasmus Villemoes Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index e34307459e50..e06ac546a90f 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -758,7 +758,7 @@ static int radeon_ttm_tt_populate(struct ttm_tt *ttm) 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); if (pci_dma_mapping_error(rdev->pdev, gtt->ttm.dma_address[i])) { - while (--i) { + while (i--) { pci_unmap_page(rdev->pdev, gtt->ttm.dma_address[i], PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); gtt->ttm.dma_address[i] = 0; From 57c0829490bffc6a8488dbd4da3903e902b3ded3 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Fri, 12 Feb 2016 20:30:27 +0100 Subject: [PATCH 229/418] drm: No-Op redundant calls to drm_vblank_off() (v2) commit e8235891b33799d597ff4ab5e45afe173a65da30 upstream. Otherwise if a kms driver calls into drm_vblank_off() more than once before calling drm_vblank_on() again, the redundant calls to vblank_disable_and_save() will call drm_update_vblank_count() while hw vblank counters and vblank timestamping are in a undefined state during modesets, dpms off etc. At least with the legacy drm helpers it is not unusual to get multiple calls to drm_vblank_off and drm_vblank_on, e.g., half a dozen calls to drm_vblank_off and two calls to drm_vblank_on were observed on radeon-kms during dpms-off -> dpms-on transition. We don't no-op calls from atomic modesetting drivers, as they should do a proper job of tracking hw state. Fixes large jumps of the software maintained vblank counter due to the hardware vblank counter resetting to zero during dpms off or modeset, e.g., if radeon-kms is modified to use drm_vblank_off/on instead of drm_vblank_pre/post_modeset(). This fixes a regression caused by the changes made to drm_update_vblank_count() in Linux 4.4. v2: Don't no-op on atomic modesetting drivers, per suggestion of Daniel Vetter. Signed-off-by: Mario Kleiner Reviewed-by: Daniel Vetter Cc: michel@daenzer.net Cc: vbabka@suse.cz Cc: ville.syrjala@linux.intel.com Cc: alexander.deucher@amd.com Cc: christian.koenig@amd.com Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_irq.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 607f493ae801..1a18033c30db 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -1313,7 +1313,13 @@ void drm_vblank_off(struct drm_device *dev, unsigned int pipe) spin_lock_irqsave(&dev->event_lock, irqflags); spin_lock(&dev->vbl_lock); - vblank_disable_and_save(dev, pipe); + DRM_DEBUG_VBL("crtc %d, vblank enabled %d, inmodeset %d\n", + pipe, vblank->enabled, vblank->inmodeset); + + /* Avoid redundant vblank disables without previous drm_vblank_on(). */ + if (drm_core_check_feature(dev, DRIVER_ATOMIC) || !vblank->inmodeset) + vblank_disable_and_save(dev, pipe); + wake_up(&vblank->queue); /* @@ -1415,6 +1421,9 @@ void drm_vblank_on(struct drm_device *dev, unsigned int pipe) return; spin_lock_irqsave(&dev->vbl_lock, irqflags); + DRM_DEBUG_VBL("crtc %d, vblank enabled %d, inmodeset %d\n", + pipe, vblank->enabled, vblank->inmodeset); + /* Drop our private "prevent drm_vblank_get" refcount */ if (vblank->inmodeset) { atomic_dec(&vblank->refcount); From 6f84d997be26c01ccb6965d802c919987ce85161 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Fri, 12 Feb 2016 20:30:28 +0100 Subject: [PATCH 230/418] drm: Prevent vblank counter bumps > 1 with active vblank clients. (v2) commit 99b8e71597fadd6b2ac85e6e10f221f79dd9c1c1 upstream. This fixes a regression introduced by the new drm_update_vblank_count() implementation in Linux 4.4: Restrict the bump of the software vblank counter in drm_update_vblank_count() to a safe maximum value of +1 whenever there is the possibility that concurrent readers of vblank timestamps could be active at the moment, as the current implementation of the timestamp caching and updating is not safe against concurrent readers for calls to store_vblank() with a bump of anything but +1. A bump != 1 would very likely return corrupted timestamps to userspace, because the same slot in the cache could be concurrently written by store_vblank() and read by one of those readers in a non-atomic fashion and without the read-retry logic detecting this collision. Concurrent readers can exist while drm_update_vblank_count() is called from the drm_vblank_off() or drm_vblank_on() functions or other non-vblank- irq callers. However, all those calls are happening with the vbl_lock locked thereby preventing a drm_vblank_get(), so the vblank refcount can't increase while drm_update_vblank_count() is executing. Therefore a zero vblank refcount during execution of that function signals that is safe for arbitrary counter bumps if called from outside vblank irq, whereas a non-zero count is not safe. Whenever the function is called from vblank irq, we have to assume concurrent readers could show up any time during its execution, even if the refcount is currently zero, as vblank irqs are usually only enabled due to the presence of readers, and because when it is called from vblank irq it can't hold the vbl_lock to protect it from sudden bumps in vblank refcount. Therefore also restrict bumps to +1 when the function is called from vblank irq. Such bumps of more than +1 can happen at other times than reenabling vblank irqs, e.g., when regular vblank interrupts get delayed by more than 1 frame due to long held locks, long irq off periods, realtime preemption on RT kernels, or system management interrupts. A better solution would be to rewrite the timestamp caching to use full seqlocks to allow concurrent writes and reads for arbitrary vblank counter increments. v2: Add code comment that this is essentially a hack and should be replaced by a full seqlock implementation for caching of timestamps. Signed-off-by: Mario Kleiner Reviewed-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman Cc: michel@daenzer.net Cc: vbabka@suse.cz Cc: ville.syrjala@linux.intel.com Cc: daniel.vetter@ffwll.ch Cc: dri-devel@lists.freedesktop.org Cc: alexander.deucher@amd.com Cc: christian.koenig@amd.com Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_irq.c | 43 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 1a18033c30db..92ad62f9a2ee 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -221,6 +221,49 @@ static void drm_update_vblank_count(struct drm_device *dev, unsigned int pipe, diff = (flags & DRM_CALLED_FROM_VBLIRQ) != 0; } + /* + * FIMXE: Need to replace this hack with proper seqlocks. + * + * Restrict the bump of the software vblank counter to a safe maximum + * value of +1 whenever there is the possibility that concurrent readers + * of vblank timestamps could be active at the moment, as the current + * implementation of the timestamp caching and updating is not safe + * against concurrent readers for calls to store_vblank() with a bump + * of anything but +1. A bump != 1 would very likely return corrupted + * timestamps to userspace, because the same slot in the cache could + * be concurrently written by store_vblank() and read by one of those + * readers without the read-retry logic detecting the collision. + * + * Concurrent readers can exist when we are called from the + * drm_vblank_off() or drm_vblank_on() functions and other non-vblank- + * irq callers. However, all those calls to us are happening with the + * vbl_lock locked to prevent drm_vblank_get(), so the vblank refcount + * can't increase while we are executing. Therefore a zero refcount at + * this point is safe for arbitrary counter bumps if we are called + * outside vblank irq, a non-zero count is not 100% safe. Unfortunately + * we must also accept a refcount of 1, as whenever we are called from + * drm_vblank_get() -> drm_vblank_enable() the refcount will be 1 and + * we must let that one pass through in order to not lose vblank counts + * during vblank irq off - which would completely defeat the whole + * point of this routine. + * + * Whenever we are called from vblank irq, we have to assume concurrent + * readers exist or can show up any time during our execution, even if + * the refcount is currently zero, as vblank irqs are usually only + * enabled due to the presence of readers, and because when we are called + * from vblank irq we can't hold the vbl_lock to protect us from sudden + * bumps in vblank refcount. Therefore also restrict bumps to +1 when + * called from vblank irq. + */ + if ((diff > 1) && (atomic_read(&vblank->refcount) > 1 || + (flags & DRM_CALLED_FROM_VBLIRQ))) { + DRM_DEBUG_VBL("clamping vblank bump to 1 on crtc %u: diffr=%u " + "refcount %u, vblirq %u\n", pipe, diff, + atomic_read(&vblank->refcount), + (flags & DRM_CALLED_FROM_VBLIRQ) != 0); + diff = 1; + } + DRM_DEBUG_VBL("updating vblank count on crtc %u:" " current=%u, diff=%u, hw=%u hw_last=%u\n", pipe, vblank->count, diff, cur_vblank, vblank->last); From 439c9942ce4f0ced30e24f4864d8b2209215521b Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Fri, 12 Feb 2016 20:30:29 +0100 Subject: [PATCH 231/418] drm: Fix drm_vblank_pre/post_modeset regression from Linux 4.4 commit c61934ed9a0e3911a9935df26858726a7ec35ec0 upstream. Changes to drm_update_vblank_count() in Linux 4.4 broke the behaviour of the pre/post modeset functions as the new update code doesn't deal with hw vblank counter resets inbetween calls to drm_vblank_pre_modeset an drm_vblank_post_modeset, as it should. This causes mistreatment of such hw counter resets as counter wraparound, and thereby large forward jumps of the software vblank counter which in turn cause vblank event dispatching and vblank waits to fail/hang --> userspace clients hang. This symptom was reported on radeon-kms to cause a infinite hang of KDE Plasma 5 shell's login procedure, preventing users from logging in. Fix this by detecting when drm_update_vblank_count() is called inside a pre->post modeset interval. If so, clamp valid vblank increments to the safe values 0 and 1, pretty much restoring the update behavior of the old update code of Linux 4.3 and earlier. Also reset the last recorded hw vblank count at call to drm_vblank_post_modeset() to be safe against hw that after modesetting, dpms on etc. only fires its first vblank irq after drm_vblank_post_modeset() was already called. Reported-by: Vlastimil Babka Signed-off-by: Mario Kleiner Reviewed-by: Daniel Vetter Tested-by: Vlastimil Babka Cc: michel@daenzer.net Cc: vbabka@suse.cz Cc: ville.syrjala@linux.intel.com Cc: daniel.vetter@ffwll.ch Cc: dri-devel@lists.freedesktop.org Cc: alexander.deucher@amd.com Cc: christian.koenig@amd.com Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_irq.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 92ad62f9a2ee..055b0fa2b4cf 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -221,6 +221,21 @@ static void drm_update_vblank_count(struct drm_device *dev, unsigned int pipe, diff = (flags & DRM_CALLED_FROM_VBLIRQ) != 0; } + /* + * Within a drm_vblank_pre_modeset - drm_vblank_post_modeset + * interval? If so then vblank irqs keep running and it will likely + * happen that the hardware vblank counter is not trustworthy as it + * might reset at some point in that interval and vblank timestamps + * are not trustworthy either in that interval. Iow. this can result + * in a bogus diff >> 1 which must be avoided as it would cause + * random large forward jumps of the software vblank counter. + */ + if (diff > 1 && (vblank->inmodeset & 0x2)) { + DRM_DEBUG_VBL("clamping vblank bump to 1 on crtc %u: diffr=%u" + " due to pre-modeset.\n", pipe, diff); + diff = 1; + } + /* * FIMXE: Need to replace this hack with proper seqlocks. * @@ -1575,6 +1590,7 @@ void drm_vblank_post_modeset(struct drm_device *dev, unsigned int pipe) if (vblank->inmodeset) { spin_lock_irqsave(&dev->vbl_lock, irqflags); dev->vblank_disable_allowed = true; + drm_reset_vblank_timestamp(dev, pipe); spin_unlock_irqrestore(&dev->vbl_lock, irqflags); if (vblank->inmodeset & 0x2) From dcf9ef2709b728f6c591d51c5bbf745df319a139 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Fri, 12 Feb 2016 20:30:30 +0100 Subject: [PATCH 232/418] drm: Fix treatment of drm_vblank_offdelay in drm_vblank_on() (v2) commit bb74fc1bf3072bd3ab4ed5f43afd287a63baf2d7 upstream. drm_vblank_offdelay can have three different types of values: < 0 is to be always treated the same as dev->vblank_disable_immediate = 0 is to be treated as "never disable vblanks" > 0 is to be treated as disable immediate if kms driver wants it that way via dev->vblank_disable_immediate. Otherwise it is a disable timeout in msecs. This got broken in Linux 3.18+ for the implementation of drm_vblank_on. If the user specified a value of zero which should always reenable vblank irqs in this function, a kms driver could override the users choice by setting vblank_disable_immediate to true. This patch fixes the regression and keeps the user in control. v2: Only reenable vblank if there are clients left or the user requested to "never disable vblanks" via offdelay 0. Enabling vblanks even in the "delayed disable" case (offdelay > 0) was specifically added by Ville in commit cd19e52aee922 ("drm: Kick start vblank interrupts at drm_vblank_on()"), but after discussion it turns out that this was done by accident. Citing Ville: "I think it just ended up as a mess due to changing some of the semantics of offdelay<0 vs. offdelay==0 vs. disable_immediate during the review of the series. So yeah, given how drm_vblank_put() works now, I'd just make this check for offdelay==0." Signed-off-by: Mario Kleiner Reviewed-by: Daniel Vetter Cc: michel@daenzer.net Cc: vbabka@suse.cz Cc: ville.syrjala@linux.intel.com Cc: daniel.vetter@ffwll.ch Cc: dri-devel@lists.freedesktop.org Cc: alexander.deucher@amd.com Cc: christian.koenig@amd.com Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_irq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 055b0fa2b4cf..8090989185b2 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -1494,8 +1494,7 @@ void drm_vblank_on(struct drm_device *dev, unsigned int pipe) * re-enable interrupts if there are users left, or the * user wishes vblank interrupts to be enabled all the time. */ - if (atomic_read(&vblank->refcount) != 0 || - (!dev->vblank_disable_immediate && drm_vblank_offdelay == 0)) + if (atomic_read(&vblank->refcount) != 0 || drm_vblank_offdelay == 0) WARN_ON(drm_vblank_enable(dev, pipe)); spin_unlock_irqrestore(&dev->vbl_lock, irqflags); } From a72eb45c3342c56bebe9c81c931f0431b71e7a32 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Fri, 19 Feb 2016 02:06:38 +0100 Subject: [PATCH 233/418] drm/radeon: Don't hang in radeon_flip_work_func on disabled crtc. (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2b8341b3f917c108b47f6a8a771a40d226c57883 upstream. This fixes a regression introduced in Linux 4.4. Limit the amount of time radeon_flip_work_func can delay programming a page flip, by both limiting the maximum amount of time per wait cycle and the maximum number of wait cycles. Continue the flip if the limit is exceeded, even if that may result in a visual or timing glitch. This is to prevent a hang of page flips, as reported in fdo bug #93746: Disconnecting a DisplayPort display in parallel to a kms pageflip getting queued can cause the following hang of page flips and thereby an unusable desktop: 1. kms pageflip ioctl() queues pageflip -> queues execution of radeon_flip_work_func. 2. Hotunplug of display causes the driver to DPMS OFF the unplugged display. Display engine shuts down, scanout no longer moves, but stays at its resting position at start line of vblank. 3. radeon_flip_work_func executes while crtc is off, and due to the non-moving scanout position, the new flip delay code introduced into Linux 4.4 by commit 5b5561b3660d ("drm/radeon: Fixup hw vblank counter/ts..") enters an infinite wait loop. 4. After reconnecting the display, the pageflip continues to hang in 3. and the display doesn't update its view of the desktop. This patch fixes the Linux 4.4 regression from fdo bug #93746 v2: Skip wait immediately if !radeon_crtc->enabled, as suggested by Michel. Reported-by: Bernd Steinhauser Signed-off-by: Mario Kleiner Tested-by: Bernd Steinhauser Cc: Michel Dänzer Cc: Alex Deucher Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_display.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 1eca0acac016..13767d21835f 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -403,7 +403,8 @@ static void radeon_flip_work_func(struct work_struct *__work) struct drm_crtc *crtc = &radeon_crtc->base; unsigned long flags; int r; - int vpos, hpos, stat, min_udelay; + int vpos, hpos, stat, min_udelay = 0; + unsigned repcnt = 4; struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id]; down_read(&rdev->exclusive_lock); @@ -454,7 +455,7 @@ static void radeon_flip_work_func(struct work_struct *__work) * In practice this won't execute very often unless on very fast * machines because the time window for this to happen is very small. */ - for (;;) { + while (radeon_crtc->enabled && repcnt--) { /* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank * start in hpos, and to the "fudged earlier" vblank start in * vpos. @@ -472,10 +473,22 @@ static void radeon_flip_work_func(struct work_struct *__work) /* Sleep at least until estimated real start of hw vblank */ spin_unlock_irqrestore(&crtc->dev->event_lock, flags); min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5); + if (min_udelay > vblank->framedur_ns / 2000) { + /* Don't wait ridiculously long - something is wrong */ + repcnt = 0; + break; + } usleep_range(min_udelay, 2 * min_udelay); spin_lock_irqsave(&crtc->dev->event_lock, flags); }; + if (!repcnt) + DRM_DEBUG_DRIVER("Delay problem on crtc %d: min_udelay %d, " + "framedur %d, linedur %d, stat %d, vpos %d, " + "hpos %d\n", work->crtc_id, min_udelay, + vblank->framedur_ns / 1000, + vblank->linedur_ns / 1000, stat, vpos, hpos); + /* do the flip (mmio) */ radeon_page_flip(rdev, radeon_crtc->crtc_id, work->base); From a83b349814dee660caff0a40a22ac2f166c94a8b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 19 Feb 2016 18:05:10 -0500 Subject: [PATCH 234/418] drm/radeon/pm: adjust display configuration after powerstate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 39d4275058baf53e89203407bf3841ff2c74fa32 upstream. set_power_state defaults to no displays, so we need to update the display configuration after setting up the powerstate on the first call. In most cases this is not an issue since ends up getting called multiple times at any given modeset and the proper order is achieved in the display changed handling at the top of the function. Reviewed-by: Christian König Acked-by: Jordan Lazare Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_pm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 59abebd6b5dc..2081a60d08fb 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -1075,8 +1075,6 @@ force: /* update display watermarks based on new power state */ radeon_bandwidth_update(rdev); - /* update displays */ - radeon_dpm_display_configuration_changed(rdev); rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs; rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count; @@ -1097,6 +1095,9 @@ force: radeon_dpm_post_set_power_state(rdev); + /* update displays */ + radeon_dpm_display_configuration_changed(rdev); + if (rdev->asic->dpm.force_performance_level) { if (rdev->pm.dpm.thermal_active) { enum radeon_dpm_forced_level level = rdev->pm.dpm.forced_level; From bcb1875a069043c70af27dc9f0f5e075a09d76b1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Jan 2016 18:08:52 -0500 Subject: [PATCH 235/418] make sure that freeing shmem fast symlinks is RCU-delayed commit 3ed47db34f480df7caf44436e3e63e555351ae9a upstream. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- include/linux/shmem_fs.h | 5 +---- mm/shmem.c | 9 ++++----- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 50777b5b1e4c..92d112aeec68 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -15,10 +15,7 @@ struct shmem_inode_info { unsigned int seals; /* shmem seals */ unsigned long flags; unsigned long alloced; /* data pages alloced to file */ - union { - unsigned long swapped; /* subtotal assigned to swap */ - char *symlink; /* unswappable short symlink */ - }; + unsigned long swapped; /* subtotal assigned to swap */ struct shared_policy policy; /* NUMA memory alloc policy */ struct list_head swaplist; /* chain of maybes on swap */ struct simple_xattrs xattrs; /* list of xattrs */ diff --git a/mm/shmem.c b/mm/shmem.c index 2afcdbbdb685..ea5a70cfc1d8 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -620,8 +620,7 @@ static void shmem_evict_inode(struct inode *inode) list_del_init(&info->swaplist); mutex_unlock(&shmem_swaplist_mutex); } - } else - kfree(info->symlink); + } simple_xattrs_free(&info->xattrs); WARN_ON(inode->i_blocks); @@ -2462,13 +2461,12 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s info = SHMEM_I(inode); inode->i_size = len-1; if (len <= SHORT_SYMLINK_LEN) { - info->symlink = kmemdup(symname, len, GFP_KERNEL); - if (!info->symlink) { + inode->i_link = kmemdup(symname, len, GFP_KERNEL); + if (!inode->i_link) { iput(inode); return -ENOMEM; } inode->i_op = &shmem_short_symlink_operations; - inode->i_link = info->symlink; } else { error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL); if (error) { @@ -3083,6 +3081,7 @@ static struct inode *shmem_alloc_inode(struct super_block *sb) static void shmem_destroy_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); + kfree(inode->i_link); kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); } From e3fb82079c460f979ff99b3e033442e9a948a37f Mon Sep 17 00:00:00 2001 From: Azael Avalos Date: Sun, 15 Nov 2015 20:32:47 -0700 Subject: [PATCH 236/418] toshiba_acpi: Fix blank screen at boot if transflective backlight is supported commit bae5336f0aaedffa115dab9cb3d8a4e4aed3a26a upstream. If transflective backlight is supported and the brightness is zero (lowest brightness level), the set_lcd_brightness function will activate the transflective backlight, making the LCD appear to be turned off. This patch fixes the issue by incrementing the brightness level, and by doing so, avoiding the activation of the tranflective backlight. Reported-and-tested-by: Fabian Koester Signed-off-by: Azael Avalos Signed-off-by: Darren Hart Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/toshiba_acpi.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index c01302989ee4..b0f62141ea4d 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -2484,6 +2484,14 @@ static int toshiba_acpi_setup_backlight(struct toshiba_acpi_dev *dev) brightness = __get_lcd_brightness(dev); if (brightness < 0) return 0; + /* + * If transflective backlight is supported and the brightness is zero + * (lowest brightness level), the set_lcd_brightness function will + * activate the transflective backlight, making the LCD appear to be + * turned off, simply increment the brightness level to avoid that. + */ + if (dev->tr_backlight_supported && brightness == 0) + brightness++; ret = set_lcd_brightness(dev, brightness); if (ret) { pr_debug("Backlight method is read-only, disabling backlight support\n"); From 8d065375e5a40ef7722d2d0ae59d4034938b8e0a Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Wed, 9 Dec 2015 21:12:52 -0500 Subject: [PATCH 237/418] ideapad-laptop: Add Lenovo ideapad Y700-17ISK to no_hw_rfkill dmi list commit edde316acb5f07c04abf09a92f59db5d2efd14e2 upstream. One of the newest ideapad models also lacks a physical hw rfkill switch, and trying to read the hw rfkill switch through the ideapad module causes it to always reported blocking breaking wifi. Fix it by adding this model to the DMI list. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1286293 Signed-off-by: Josh Boyer Signed-off-by: Darren Hart Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/ideapad-laptop.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index a313dfc0245f..d28db0e793df 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -864,6 +864,13 @@ static const struct dmi_system_id no_hw_rfkill_list[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo G50-30"), }, }, + { + .ident = "Lenovo ideapad Y700-17ISK", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad Y700-17ISK"), + }, + }, { .ident = "Lenovo Yoga 2 11 / 13 / Pro", .matches = { From 906bf4dd14c187ee258961a8f0723e4820bfd084 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Sun, 24 Jan 2016 10:46:42 -0500 Subject: [PATCH 238/418] ideapad-laptop: Add Lenovo Yoga 700 to no_hw_rfkill dmi list commit 6b31de3e698582fe0b8f7f4bab15831b73204800 upstream. Like the Yoga 900 models the Lenovo Yoga 700 does not have a hw rfkill switch, and trying to read the hw rfkill switch through the ideapad module causes it to always reported blocking breaking wifi. This commit adds the Lenovo Yoga 700 to the no_hw_rfkill dmi list, fixing the wifi breakage. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1295272 Tested-by: Signed-off-by: Josh Boyer Signed-off-by: Darren Hart Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/ideapad-laptop.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index d28db0e793df..d78ee151c9e4 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -899,6 +899,13 @@ static const struct dmi_system_id no_hw_rfkill_list[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 3"), }, }, + { + .ident = "Lenovo Yoga 700", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 700"), + }, + }, { .ident = "Lenovo Yoga 900", .matches = { From 7712c014b16f64d344239b190ea9c126123cb14f Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Sun, 10 Jan 2016 20:14:11 -0500 Subject: [PATCH 239/418] uapi: update install list after nvme.h rename commit a9cf8284b45110a4d98aea180a89c857e53bf850 upstream. Commit 9d99a8dda154 ("nvme: move hardware structures out of the uapi version of nvme.h") renamed nvme.h to nvme_ioctl.h, but the uapi list still refers to nvme.h. People trying to install the headers hit a failure as the header no longer exists. Signed-off-by: Mike Frysinger Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/Kbuild | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index c2e5d6cb34e3..ebd10e624598 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -307,7 +307,7 @@ header-y += nfs_mount.h header-y += nl80211.h header-y += n_r3964.h header-y += nubus.h -header-y += nvme.h +header-y += nvme_ioctl.h header-y += nvram.h header-y += omap3isp.h header-y += omapfb.h From 96ee50f3b366a0b547dfac776a43af15ccc322e0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 Jan 2016 23:24:02 +0100 Subject: [PATCH 240/418] lib: sw842: select crc32 commit 5b57167749274961baf15ed1f05a4996b3ab0487 upstream. The sw842 library code was merged in linux-4.1 and causes a very rare randconfig failure when CONFIG_CRC32 is not set: lib/built-in.o: In function `sw842_compress': oid_registry.c:(.text+0x12ddc): undefined reference to `crc32_be' lib/built-in.o: In function `sw842_decompress': oid_registry.c:(.text+0x137e4): undefined reference to `crc32_be' This adds an explict 'select CRC32' statement, similar to what the other users of the crc32 code have. In practice, CRC32 is always enabled anyway because over 100 other symbols select it. Signed-off-by: Arnd Bergmann Fixes: 2da572c959dd ("lib: add software 842 compression/decompression") Acked-by: Dan Streetman Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- lib/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/Kconfig b/lib/Kconfig index f0df318104e7..1a48744253d7 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -210,9 +210,11 @@ config RANDOM32_SELFTEST # compression support is select'ed if needed # config 842_COMPRESS + select CRC32 tristate config 842_DECOMPRESS + select CRC32 tristate config ZLIB_INFLATE From e85dc751231a3b8853e9983363521bdb17f31271 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 11 Jan 2016 14:46:17 +0100 Subject: [PATCH 241/418] ACPI / video: Add disable_backlight_sysfs_if quirk for the Toshiba Portege R700 commit de588b8ff057d4de0751f337b930f90ca522bab2 upstream. The Toshiba Portege R700 needs disable_backlight_sysfs_if=1, just like the Toshiba Portege R830. Add a quirk for this. Link: https://bugzilla.kernel.org/show_bug.cgi?id=21012 Tested-by: Emma Reisz Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_video.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 3405f7a41e25..2c7e281beb57 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -464,6 +464,15 @@ static struct dmi_system_id video_dmi_table[] = { * control on these systems, but do not register a backlight sysfs * as brightness control does not work. */ + { + /* https://bugzilla.kernel.org/show_bug.cgi?id=21012 */ + .callback = video_disable_backlight_sysfs_if, + .ident = "Toshiba Portege R700", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_NAME, "PORTEGE R700"), + }, + }, { /* https://bugs.freedesktop.org/show_bug.cgi?id=82634 */ .callback = video_disable_backlight_sysfs_if, From 8b819bf125bb410e265275f9660d98986a9fa802 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 14 Jan 2016 14:24:39 +0100 Subject: [PATCH 242/418] ACPI / video: Add disable_backlight_sysfs_if quirk for the Toshiba Satellite R830 commit b21f2e81bd3fd8ed260590e72901254bca2193cd upstream. The Toshiba Satellite R830 needs disable_backlight_sysfs_if=1, just like the Toshiba Portege R830. Add a quirk for this. Link: https://bugzilla.kernel.org/show_bug.cgi?id=21012 Tested-by: To Do Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpi_video.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 2c7e281beb57..5fdac394207a 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -482,6 +482,15 @@ static struct dmi_system_id video_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "PORTEGE R830"), }, }, + { + /* https://bugzilla.kernel.org/show_bug.cgi?id=21012 */ + .callback = video_disable_backlight_sysfs_if, + .ident = "Toshiba Satellite R830", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_NAME, "SATELLITE R830"), + }, + }, /* * Some machine's _DOD IDs don't have bit 31(Device ID Scheme) set * but the IDs actually follow the Device ID Scheme. From eac1122b727753dd6ea32a852d926f228ace6f66 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 22 Jan 2016 11:41:05 +0100 Subject: [PATCH 243/418] ACPI: Revert "ACPI / video: Add Dell Inspiron 5737 to the blacklist" commit b186b4dcb79b1914c3dadb27ac72dafaa4267998 upstream. The quirk to get "acpi_backlight=vendor" behavior by default on the Dell Inspiron 5737 was added before we started doing "acpi_backlight=native" by default on Win8 ready machines. Since we now avoid using acpi-video as backlight driver on these machines by default (using the native driver instead) we no longer need this quirk. Moreover the vendor driver does not work after a suspend/resume where as the native driver does. This reverts commit 08a56226d847 (ACPI / video: Add Dell Inspiron 5737 to the blacklist). Link: https://bugzilla.kernel.org/show_bug.cgi?id=111061 Reported-and-tested-by: erusan@gmail.com Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/video_detect.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index daaf1c4e1e0f..80e55cb0827b 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -135,14 +135,6 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "UL30A"), }, }, - { - .callback = video_detect_force_vendor, - .ident = "Dell Inspiron 5737", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 5737"), - }, - }, /* * These models have a working acpi_video backlight control, and using From 5066e4475fe82ba77afd521bf373c7ee8faac0c8 Mon Sep 17 00:00:00 2001 From: Insu Yun Date: Sat, 23 Jan 2016 15:44:19 -0500 Subject: [PATCH 244/418] ACPI / PCI / hotplug: unlock in error path in acpiphp_enable_slot() commit 2c3033a0664dfae91e1dee7fabac10f24354b958 upstream. In acpiphp_enable_slot(), there is a missing unlock path when error occurred. It needs to be unlocked before returning an error. Signed-off-by: Insu Yun Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/pci/hotplug/acpiphp_glue.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index ff538568a617..0b3e0bfa7be5 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -953,8 +953,10 @@ int acpiphp_enable_slot(struct acpiphp_slot *slot) { pci_lock_rescan_remove(); - if (slot->flags & SLOT_IS_GOING_AWAY) + if (slot->flags & SLOT_IS_GOING_AWAY) { + pci_unlock_rescan_remove(); return -ENODEV; + } /* configure all functions */ if (!(slot->flags & SLOT_ENABLED)) From ef1fb7d12ac8863557390ead966566ed3477b023 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 4 Feb 2016 16:51:00 -0800 Subject: [PATCH 245/418] nfit: fix multi-interface dimm handling, acpi6.1 compatibility commit 6697b2cf69d4363266ca47eaebc49ef13dabc1c9 upstream. ACPI 6.1 clarified that multi-interface dimms require multiple control region entries (DCRs) per dimm. Previously we were assuming that a control region is only present when block-data-windows are present. This implementation was done with an eye to be compatibility with the looser ACPI 6.0 interpretation of this table. 1/ When coalescing the memory device (MEMDEV) tables for a single dimm, coalesce on device_handle rather than control region index. 2/ Whenever we disocver a control region with non-zero block windows re-scan for block-data-window (BDW) entries. We may need to revisit this if a DIMM ever implements a format interface outside of blk or pmem, but that is not on the foreseeable horizon. Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/nfit.c | 71 ++++++++++++++++++++++----------------------- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index aa45d4802707..11d8209e6e5d 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -468,37 +468,16 @@ static void nfit_mem_find_spa_bdw(struct acpi_nfit_desc *acpi_desc, nfit_mem->bdw = NULL; } -static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc, +static void nfit_mem_init_bdw(struct acpi_nfit_desc *acpi_desc, struct nfit_mem *nfit_mem, struct acpi_nfit_system_address *spa) { u16 dcr = __to_nfit_memdev(nfit_mem)->region_index; struct nfit_memdev *nfit_memdev; struct nfit_flush *nfit_flush; - struct nfit_dcr *nfit_dcr; struct nfit_bdw *nfit_bdw; struct nfit_idt *nfit_idt; u16 idt_idx, range_index; - list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) { - if (nfit_dcr->dcr->region_index != dcr) - continue; - nfit_mem->dcr = nfit_dcr->dcr; - break; - } - - if (!nfit_mem->dcr) { - dev_dbg(acpi_desc->dev, "SPA %d missing:%s%s\n", - spa->range_index, __to_nfit_memdev(nfit_mem) - ? "" : " MEMDEV", nfit_mem->dcr ? "" : " DCR"); - return -ENODEV; - } - - /* - * We've found enough to create an nvdimm, optionally - * find an associated BDW - */ - list_add(&nfit_mem->list, &acpi_desc->dimms); - list_for_each_entry(nfit_bdw, &acpi_desc->bdws, list) { if (nfit_bdw->bdw->region_index != dcr) continue; @@ -507,12 +486,12 @@ static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc, } if (!nfit_mem->bdw) - return 0; + return; nfit_mem_find_spa_bdw(acpi_desc, nfit_mem); if (!nfit_mem->spa_bdw) - return 0; + return; range_index = nfit_mem->spa_bdw->range_index; list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { @@ -537,8 +516,6 @@ static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc, } break; } - - return 0; } static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc, @@ -547,7 +524,6 @@ static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc, struct nfit_mem *nfit_mem, *found; struct nfit_memdev *nfit_memdev; int type = nfit_spa_type(spa); - u16 dcr; switch (type) { case NFIT_SPA_DCR: @@ -558,14 +534,18 @@ static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc, } list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { - int rc; + struct nfit_dcr *nfit_dcr; + u32 device_handle; + u16 dcr; if (nfit_memdev->memdev->range_index != spa->range_index) continue; found = NULL; dcr = nfit_memdev->memdev->region_index; + device_handle = nfit_memdev->memdev->device_handle; list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) - if (__to_nfit_memdev(nfit_mem)->region_index == dcr) { + if (__to_nfit_memdev(nfit_mem)->device_handle + == device_handle) { found = nfit_mem; break; } @@ -578,6 +558,31 @@ static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc, if (!nfit_mem) return -ENOMEM; INIT_LIST_HEAD(&nfit_mem->list); + list_add(&nfit_mem->list, &acpi_desc->dimms); + } + + list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) { + if (nfit_dcr->dcr->region_index != dcr) + continue; + /* + * Record the control region for the dimm. For + * the ACPI 6.1 case, where there are separate + * control regions for the pmem vs blk + * interfaces, be sure to record the extended + * blk details. + */ + if (!nfit_mem->dcr) + nfit_mem->dcr = nfit_dcr->dcr; + else if (nfit_mem->dcr->windows == 0 + && nfit_dcr->dcr->windows) + nfit_mem->dcr = nfit_dcr->dcr; + break; + } + + if (dcr && !nfit_mem->dcr) { + dev_err(acpi_desc->dev, "SPA %d missing DCR %d\n", + spa->range_index, dcr); + return -ENODEV; } if (type == NFIT_SPA_DCR) { @@ -594,6 +599,7 @@ static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc, nfit_mem->idt_dcr = nfit_idt->idt; break; } + nfit_mem_init_bdw(acpi_desc, nfit_mem, spa); } else { /* * A single dimm may belong to multiple SPA-PM @@ -602,13 +608,6 @@ static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc, */ nfit_mem->memdev_pmem = nfit_memdev->memdev; } - - if (found) - continue; - - rc = nfit_mem_add(acpi_desc, nfit_mem, spa); - if (rc) - return rc; } return 0; From 4f29e5445382110d6801fedaa9b0582c00beccc8 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Mon, 11 Jan 2016 13:04:28 +0000 Subject: [PATCH 246/418] dmaengine: dw: fix cyclic transfer setup commit df3bb8a0e619d501cd13334c3e0586edcdcbc716 upstream. Commit 61e183f83069 ("dmaengine/dw_dmac: Reconfigure interrupt and chan_cfg register on resume") moved some channel initialisation to a new function which must be called before starting a transfer. This updates dw_dma_cyclic_start() to use dwc_dostart() like the other modes, thus ensuring dwc_initialize() gets called and removing some code duplication. Fixes: 61e183f83069 ("dmaengine/dw_dmac: Reconfigure interrupt and chan_cfg register on resume") Signed-off-by: Mans Rullgard Reviewed-by: Viresh Kumar Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/dw/core.c | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 7067b6ddc1db..af2b92f8501e 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -1245,7 +1245,6 @@ static void dwc_free_chan_resources(struct dma_chan *chan) int dw_dma_cyclic_start(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); - struct dw_dma *dw = to_dw_dma(dwc->chan.device); unsigned long flags; if (!test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) { @@ -1254,27 +1253,7 @@ int dw_dma_cyclic_start(struct dma_chan *chan) } spin_lock_irqsave(&dwc->lock, flags); - - /* Assert channel is idle */ - if (dma_readl(dw, CH_EN) & dwc->mask) { - dev_err(chan2dev(&dwc->chan), - "%s: BUG: Attempted to start non-idle channel\n", - __func__); - dwc_dump_chan_regs(dwc); - spin_unlock_irqrestore(&dwc->lock, flags); - return -EBUSY; - } - - dma_writel(dw, CLEAR.ERROR, dwc->mask); - dma_writel(dw, CLEAR.XFER, dwc->mask); - - /* Setup DMAC channel registers */ - channel_writel(dwc, LLP, dwc->cdesc->desc[0]->txd.phys); - channel_writel(dwc, CTL_LO, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); - channel_writel(dwc, CTL_HI, 0); - - channel_set_bit(dw, CH_EN, dwc->mask); - + dwc_dostart(dwc, dwc->cdesc->desc[0]); spin_unlock_irqrestore(&dwc->lock, flags); return 0; From 7ed338d4a9f58d88cd9fda055425bc6e0b8f3865 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Mon, 11 Jan 2016 13:04:29 +0000 Subject: [PATCH 247/418] dmaengine: dw: fix cyclic transfer callbacks commit 2895b2cad6e7a95104cf396e5330054453382ae1 upstream. Cyclic transfer callbacks rely on block completion interrupts which were disabled in commit ff7b05f29fd4 ("dmaengine/dw_dmac: Don't handle block interrupts"). This re-enables block interrupts so the cyclic callbacks can work. Other transfer types are not affected as they set the INT_EN bit only on the last block. Fixes: ff7b05f29fd4 ("dmaengine/dw_dmac: Don't handle block interrupts") Signed-off-by: Mans Rullgard Reviewed-by: Viresh Kumar Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/dw/core.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index af2b92f8501e..b92662722404 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -156,6 +156,7 @@ static void dwc_initialize(struct dw_dma_chan *dwc) /* Enable interrupts */ channel_set_bit(dw, MASK.XFER, dwc->mask); + channel_set_bit(dw, MASK.BLOCK, dwc->mask); channel_set_bit(dw, MASK.ERROR, dwc->mask); dwc->initialized = true; @@ -536,16 +537,17 @@ EXPORT_SYMBOL(dw_dma_get_dst_addr); /* Called with dwc->lock held and all DMAC interrupts disabled */ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, - u32 status_err, u32 status_xfer) + u32 status_block, u32 status_err, u32 status_xfer) { unsigned long flags; - if (dwc->mask) { + if (status_block & dwc->mask) { void (*callback)(void *param); void *callback_param; dev_vdbg(chan2dev(&dwc->chan), "new cyclic period llp 0x%08x\n", channel_readl(dwc, LLP)); + dma_writel(dw, CLEAR.BLOCK, dwc->mask); callback = dwc->cdesc->period_callback; callback_param = dwc->cdesc->period_callback_param; @@ -577,6 +579,7 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, channel_writel(dwc, CTL_LO, 0); channel_writel(dwc, CTL_HI, 0); + dma_writel(dw, CLEAR.BLOCK, dwc->mask); dma_writel(dw, CLEAR.ERROR, dwc->mask); dma_writel(dw, CLEAR.XFER, dwc->mask); @@ -593,10 +596,12 @@ static void dw_dma_tasklet(unsigned long data) { struct dw_dma *dw = (struct dw_dma *)data; struct dw_dma_chan *dwc; + u32 status_block; u32 status_xfer; u32 status_err; int i; + status_block = dma_readl(dw, RAW.BLOCK); status_xfer = dma_readl(dw, RAW.XFER); status_err = dma_readl(dw, RAW.ERROR); @@ -605,7 +610,8 @@ static void dw_dma_tasklet(unsigned long data) for (i = 0; i < dw->dma.chancnt; i++) { dwc = &dw->chan[i]; if (test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) - dwc_handle_cyclic(dw, dwc, status_err, status_xfer); + dwc_handle_cyclic(dw, dwc, status_block, status_err, + status_xfer); else if (status_err & (1 << i)) dwc_handle_error(dw, dwc); else if (status_xfer & (1 << i)) @@ -616,6 +622,7 @@ static void dw_dma_tasklet(unsigned long data) * Re-enable interrupts. */ channel_set_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_set_bit(dw, MASK.BLOCK, dw->all_chan_mask); channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask); } @@ -635,6 +642,7 @@ static irqreturn_t dw_dma_interrupt(int irq, void *dev_id) * softirq handler. */ channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask); channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask); status = dma_readl(dw, STATUS_INT); @@ -645,6 +653,7 @@ static irqreturn_t dw_dma_interrupt(int irq, void *dev_id) /* Try to recover */ channel_clear_bit(dw, MASK.XFER, (1 << 8) - 1); + channel_clear_bit(dw, MASK.BLOCK, (1 << 8) - 1); channel_clear_bit(dw, MASK.SRC_TRAN, (1 << 8) - 1); channel_clear_bit(dw, MASK.DST_TRAN, (1 << 8) - 1); channel_clear_bit(dw, MASK.ERROR, (1 << 8) - 1); @@ -1111,6 +1120,7 @@ static void dw_dma_off(struct dw_dma *dw) dma_writel(dw, CFG, 0); channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask); channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask); channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask); channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask); @@ -1216,6 +1226,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan) /* Disable interrupts */ channel_clear_bit(dw, MASK.XFER, dwc->mask); + channel_clear_bit(dw, MASK.BLOCK, dwc->mask); channel_clear_bit(dw, MASK.ERROR, dwc->mask); spin_unlock_irqrestore(&dwc->lock, flags); @@ -1458,6 +1469,7 @@ void dw_dma_cyclic_free(struct dma_chan *chan) dwc_chan_disable(dw, dwc); + dma_writel(dw, CLEAR.BLOCK, dwc->mask); dma_writel(dw, CLEAR.ERROR, dwc->mask); dma_writel(dw, CLEAR.XFER, dwc->mask); @@ -1546,9 +1558,6 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata) /* Force dma off, just in case */ dw_dma_off(dw); - /* Disable BLOCK interrupts as well */ - channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask); - /* Create a pool of consistent memory blocks for hardware descriptors */ dw->desc_pool = dmam_pool_create("dw_dmac_desc_pool", chip->dev, sizeof(struct dw_desc), 4, 0); From f4fa3d6f9e84d5009d78c732a0a81251b8bfe0b4 Mon Sep 17 00:00:00 2001 From: Songjun Wu Date: Mon, 18 Jan 2016 11:14:44 +0100 Subject: [PATCH 248/418] dmaengine: at_xdmac: fix resume for cyclic transfers commit 611dcadb01c89d1d3521450c05a4ded332e5a32d upstream. When having cyclic transfers, the channel was paused when performing suspend but was not correctly resumed. Signed-off-by: Songjun Wu Signed-off-by: Ludovic Desroches Fixes: e1f7c9eee707 ("dmaengine: at_xdmac: creation of the atmel eXtended DMA Controller driver") Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_xdmac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 370c661c7d7b..fa00f3a186da 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -1688,6 +1688,7 @@ static int at_xdmac_device_terminate_all(struct dma_chan *chan) list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node) at_xdmac_remove_xfer(atchan, desc); + clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status); clear_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status); spin_unlock_irqrestore(&atchan->lock, flags); @@ -1820,6 +1821,8 @@ static int atmel_xdmac_resume(struct device *dev) atchan = to_at_xdmac_chan(chan); at_xdmac_chan_write(atchan, AT_XDMAC_CC, atchan->save_cc); if (at_xdmac_chan_is_cyclic(atchan)) { + if (at_xdmac_chan_is_paused(atchan)) + at_xdmac_device_resume(chan); at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, atchan->save_cnda); at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, atchan->save_cndc); at_xdmac_chan_write(atchan, AT_XDMAC_CIE, atchan->save_cim); From 556dfd8dae7d66b35121ffa8198465e55a34d64d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 10 Feb 2016 15:59:42 +0200 Subject: [PATCH 249/418] dmaengine: dw: disable BLOCK IRQs for non-cyclic xfer commit ee1cdcdae59563535485a5f56ee72c894ab7d7ad upstream. The commit 2895b2cad6e7 ("dmaengine: dw: fix cyclic transfer callbacks") re-enabled BLOCK interrupts with regard to make cyclic transfers work. However, this change becomes a regression for non-cyclic transfers as interrupt counters under stress test had been grown enormously (approximately per 4-5 bytes in the UART loop back test). Taking into consideration above enable BLOCK interrupts if and only if channel is programmed to perform cyclic transfer. Fixes: 2895b2cad6e7 ("dmaengine: dw: fix cyclic transfer callbacks") Signed-off-by: Andy Shevchenko Acked-by: Mans Rullgard Tested-by: Mans Rullgard Acked-by: Viresh Kumar Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/dw/core.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index b92662722404..4f099ea29f83 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -156,7 +156,6 @@ static void dwc_initialize(struct dw_dma_chan *dwc) /* Enable interrupts */ channel_set_bit(dw, MASK.XFER, dwc->mask); - channel_set_bit(dw, MASK.BLOCK, dwc->mask); channel_set_bit(dw, MASK.ERROR, dwc->mask); dwc->initialized = true; @@ -588,6 +587,9 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, spin_unlock_irqrestore(&dwc->lock, flags); } + + /* Re-enable interrupts */ + channel_set_bit(dw, MASK.BLOCK, dwc->mask); } /* ------------------------------------------------------------------------- */ @@ -618,11 +620,8 @@ static void dw_dma_tasklet(unsigned long data) dwc_scan_descriptors(dw, dwc); } - /* - * Re-enable interrupts. - */ + /* Re-enable interrupts */ channel_set_bit(dw, MASK.XFER, dw->all_chan_mask); - channel_set_bit(dw, MASK.BLOCK, dw->all_chan_mask); channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask); } @@ -1256,6 +1255,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan) int dw_dma_cyclic_start(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); unsigned long flags; if (!test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) { @@ -1264,7 +1264,12 @@ int dw_dma_cyclic_start(struct dma_chan *chan) } spin_lock_irqsave(&dwc->lock, flags); + + /* Enable interrupts to perform cyclic transfer */ + channel_set_bit(dw, MASK.BLOCK, dwc->mask); + dwc_dostart(dwc, dwc->cdesc->desc[0]); + spin_unlock_irqrestore(&dwc->lock, flags); return 0; From e97bff5116d8ba6a25848691063d3370b939a4af Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 1 Jan 2016 13:17:46 +0100 Subject: [PATCH 250/418] IB/cm: Fix a recently introduced deadlock commit 4bfdf635c668869c69fd18ece37ec66fb6f38fcf upstream. ib_send_cm_drep() calls cm_enter_timewait() while holding a spinlock that can be locked from inside an interrupt handler. Hence do not enable interrupts inside cm_enter_timewait() if called with interrupts disabled. This patch fixes e.g. the following deadlock: Acked-by: Erez Shitrit ================================= [ INFO: inconsistent lock state ] 4.4.0-rc7+ #1 Tainted: G E --------------------------------- inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage. swapper/8/0 [HC1[1]:SC0[0]:HE0:SE1] takes: (&(&cm_id_priv->lock)->rlock){?.+...}, at: [] cm_establish+0x 74/0x1b0 [ib_cm] {HARDIRQ-ON-W} state was registered at: [] mark_held_locks+0x71/0x90 [] trace_hardirqs_on_caller+0xa7/0x1c0 [] trace_hardirqs_on+0xd/0x10 [] _raw_spin_unlock_irq+0x2b/0x40 [] cm_enter_timewait+0xae/0x100 [ib_cm] [] ib_send_cm_drep+0xb6/0x190 [ib_cm] [] srp_cm_handler+0x128/0x1a0 [ib_srp] [] cm_process_work+0x20/0xf0 [ib_cm] [] cm_dreq_handler+0x135/0x2c0 [ib_cm] [] cm_work_handler+0x75/0xd0 [ib_cm] [] process_one_work+0x1bd/0x460 [] worker_thread+0x118/0x420 [] kthread+0xe4/0x100 [] ret_from_fork+0x3f/0x70 irq event stamp: 1672286 hardirqs last enabled at (1672283): [] poll_idle+0x10/0x80 hardirqs last disabled at (1672284): [] common_interrupt+0x84/0x89 softirqs last enabled at (1672286): [] _local_bh_enable+0x1c/0x50 softirqs last disabled at (1672285): [] irq_enter+0x47/0x70 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&cm_id_priv->lock)->rlock); lock(&(&cm_id_priv->lock)->rlock); *** DEADLOCK *** no locks held by swapper/8/0. stack backtrace: CPU: 8 PID: 0 Comm: swapper/8 Tainted: G E 4.4.0-rc7+ #1 Hardware name: Dell Inc. PowerEdge R430/03XKDV, BIOS 1.0.2 11/17/2014 ffff88045af5e950 ffff88046e503a88 ffffffff81251c1b 0000000000000007 0000000000000006 0000000000000003 ffff88045af5ddc0 ffff88046e503ad8 ffffffff810a32f4 0000000000000000 0000000000000000 0000000000000001 Call Trace: [] dump_stack+0x4f/0x74 [] print_usage_bug+0x184/0x190 [] mark_lock_irq+0xf2/0x290 [] mark_lock+0x115/0x1b0 [] mark_irqflags+0x15c/0x170 [] __lock_acquire+0x1ef/0x560 [] lock_acquire+0x62/0x80 [] _raw_spin_lock_irqsave+0x43/0x60 [] cm_establish+0x74/0x1b0 [ib_cm] [] ib_cm_notify+0x31/0x100 [ib_cm] [] srpt_qp_event+0x54/0xd0 [ib_srpt] [] mlx4_ib_qp_event+0x72/0xc0 [mlx4_ib] [] mlx4_qp_event+0x69/0xd0 [mlx4_core] [] mlx4_eq_int+0x51e/0xd50 [mlx4_core] [] mlx4_msi_x_interrupt+0xf/0x20 [mlx4_core] [] handle_irq_event_percpu+0x40/0x110 [] handle_irq_event+0x3f/0x70 [] handle_edge_irq+0x79/0x120 [] handle_irq+0x5d/0x130 [] do_IRQ+0x6d/0x130 [] common_interrupt+0x89/0x89 [] cpuidle_enter_state+0xcf/0x200 [] cpuidle_enter+0x12/0x20 [] call_cpuidle+0x36/0x60 [] cpuidle_idle_call+0x63/0x110 [] cpu_idle_loop+0xfa/0x130 [] cpu_startup_entry+0xe/0x10 [] start_secondary+0x83/0x90 Fixes: commit be4b499323bf ("IB/cm: Do not queue work to a device that's going away") Signed-off-by: Bart Van Assche Cc: Erez Shitrit Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 0a26dd6d9b19..d6d2b3582910 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -782,11 +782,11 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) wait_time = cm_convert_to_ms(cm_id_priv->av.timeout); /* Check if the device started its remove_one */ - spin_lock_irq(&cm.lock); + spin_lock_irqsave(&cm.lock, flags); if (!cm_dev->going_down) queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work, msecs_to_jiffies(wait_time)); - spin_unlock_irq(&cm.lock); + spin_unlock_irqrestore(&cm.lock, flags); cm_id_priv->timewait_info = NULL; } From 7bf68a0afc0f367118ac765081f19c8cfd93823f Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 7 Jan 2016 16:44:10 -0500 Subject: [PATCH 251/418] IB/qib: fix mcast detach when qp not attached commit 09dc9cd6528f5b52bcbd3292a6312e762c85260f upstream. The code produces the following trace: [1750924.419007] general protection fault: 0000 [#3] SMP [1750924.420364] Modules linked in: nfnetlink autofs4 rpcsec_gss_krb5 nfsv4 dcdbas rfcomm bnep bluetooth nfsd auth_rpcgss nfs_acl dm_multipath nfs lockd scsi_dh sunrpc fscache radeon ttm drm_kms_helper drm serio_raw parport_pc ppdev i2c_algo_bit lpc_ich ipmi_si ib_mthca ib_qib dca lp parport ib_ipoib mac_hid ib_cm i3000_edac ib_sa ib_uverbs edac_core ib_umad ib_mad ib_core ib_addr tg3 ptp dm_mirror dm_region_hash dm_log psmouse pps_core [1750924.420364] CPU: 1 PID: 8401 Comm: python Tainted: G D 3.13.0-39-generic #66-Ubuntu [1750924.420364] Hardware name: Dell Computer Corporation PowerEdge 860/0XM089, BIOS A04 07/24/2007 [1750924.420364] task: ffff8800366a9800 ti: ffff88007af1c000 task.ti: ffff88007af1c000 [1750924.420364] RIP: 0010:[] [] qib_mcast_qp_free+0x11/0x50 [ib_qib] [1750924.420364] RSP: 0018:ffff88007af1dd70 EFLAGS: 00010246 [1750924.420364] RAX: 0000000000000001 RBX: ffff88007b822688 RCX: 000000000000000f [1750924.420364] RDX: ffff88007b822688 RSI: ffff8800366c15a0 RDI: 6764697200000000 [1750924.420364] RBP: ffff88007af1dd78 R08: 0000000000000001 R09: 0000000000000000 [1750924.420364] R10: 0000000000000011 R11: 0000000000000246 R12: ffff88007baa1d98 [1750924.420364] R13: ffff88003ecab000 R14: ffff88007b822660 R15: 0000000000000000 [1750924.420364] FS: 00007ffff7fd8740(0000) GS:ffff88007fc80000(0000) knlGS:0000000000000000 [1750924.420364] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1750924.420364] CR2: 00007ffff597c750 CR3: 000000006860b000 CR4: 00000000000007e0 [1750924.420364] Stack: [1750924.420364] ffff88007b822688 ffff88007af1ddf0 ffffffffa0132429 000000007af1de20 [1750924.420364] ffff88007baa1dc8 ffff88007baa0000 ffff88007af1de70 ffffffffa00cb313 [1750924.420364] 00007fffffffde88 0000000000000000 0000000000000008 ffff88003ecab000 [1750924.420364] Call Trace: [1750924.420364] [] qib_multicast_detach+0x1e9/0x350 [ib_qib] [1750924.568035] [] ? ib_uverbs_modify_qp+0x323/0x3d0 [ib_uverbs] [1750924.568035] [] ib_detach_mcast+0x31/0x50 [ib_core] [1750924.568035] [] ib_uverbs_detach_mcast+0x93/0x170 [ib_uverbs] [1750924.568035] [] ib_uverbs_write+0xc6/0x2c0 [ib_uverbs] [1750924.568035] [] ? apparmor_file_permission+0x18/0x20 [1750924.568035] [] ? security_file_permission+0x23/0xa0 [1750924.568035] [] vfs_write+0xb4/0x1f0 [1750924.568035] [] SyS_write+0x49/0xa0 [1750924.568035] [] system_call_fastpath+0x1a/0x1f [1750924.568035] Code: 66 2e 0f 1f 84 00 00 00 00 00 31 c0 5d c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 0f 1f 44 00 00 55 48 89 e5 53 48 89 fb 48 8b 7f 10 ff 8f 40 01 00 00 74 0e 48 89 df e8 8e f8 06 e1 5b 5d c3 0f [1750924.568035] RIP [] qib_mcast_qp_free+0x11/0x50 [ib_qib] [1750924.568035] RSP [1750924.650439] ---[ end trace 73d5d4b3f8ad4851 ] The fix is to note the qib_mcast_qp that was found. If none is found, then return EINVAL indicating the error. Reviewed-by: Dennis Dalessandro Reported-by: Jason Gunthorpe Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/qib/qib_verbs_mcast.c | 35 +++++++++------------ 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_verbs_mcast.c b/drivers/infiniband/hw/qib/qib_verbs_mcast.c index f8ea069a3eaf..b2fb5286dbd9 100644 --- a/drivers/infiniband/hw/qib/qib_verbs_mcast.c +++ b/drivers/infiniband/hw/qib/qib_verbs_mcast.c @@ -286,15 +286,13 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) struct qib_ibdev *dev = to_idev(ibqp->device); struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num); struct qib_mcast *mcast = NULL; - struct qib_mcast_qp *p, *tmp; + struct qib_mcast_qp *p, *tmp, *delp = NULL; struct rb_node *n; int last = 0; int ret; - if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) { - ret = -EINVAL; - goto bail; - } + if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) + return -EINVAL; spin_lock_irq(&ibp->lock); @@ -303,8 +301,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) while (1) { if (n == NULL) { spin_unlock_irq(&ibp->lock); - ret = -EINVAL; - goto bail; + return -EINVAL; } mcast = rb_entry(n, struct qib_mcast, rb_node); @@ -328,6 +325,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) */ list_del_rcu(&p->list); mcast->n_attached--; + delp = p; /* If this was the last attached QP, remove the GID too. */ if (list_empty(&mcast->qp_list)) { @@ -338,15 +336,16 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } spin_unlock_irq(&ibp->lock); + /* QP not attached */ + if (!delp) + return -EINVAL; + /* + * Wait for any list walkers to finish before freeing the + * list element. + */ + wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1); + qib_mcast_qp_free(delp); - if (p) { - /* - * Wait for any list walkers to finish before freeing the - * list element. - */ - wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1); - qib_mcast_qp_free(p); - } if (last) { atomic_dec(&mcast->refcount); wait_event(mcast->wait, !atomic_read(&mcast->refcount)); @@ -355,11 +354,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) dev->n_mcast_grps_allocated--; spin_unlock_irq(&dev->n_mcast_grps_lock); } - - ret = 0; - -bail: - return ret; + return 0; } int qib_mcast_tree_empty(struct qib_ibport *ibp) From e759d3185f48fbeb6e4e3f741120c7689741decd Mon Sep 17 00:00:00 2001 From: Vinit Agnihotri Date: Mon, 11 Jan 2016 12:57:25 -0500 Subject: [PATCH 252/418] IB/qib: Support creating qps with GFP_NOIO flag commit fbbeb8632bf0b46ab44cfcedc4654cd7831b7161 upstream. The current code is problematic when the QP creation and ipoib is used to support NFS and NFS desires to do IO for paging purposes. In that case, the GFP_KERNEL allocation in qib_qp.c causes a deadlock in tight memory situations. This fix adds support to create queue pair with GFP_NOIO flag for connected mode only to cleanly fail the create queue pair in those situations. Reviewed-by: Mike Marciniszyn Signed-off-by: Vinit Agnihotri Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/qib/qib_qp.c | 46 +++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 40f85bb3e0d3..3eff35c2d453 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -100,9 +100,10 @@ static u32 credit_table[31] = { 32768 /* 1E */ }; -static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map) +static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map, + gfp_t gfp) { - unsigned long page = get_zeroed_page(GFP_KERNEL); + unsigned long page = get_zeroed_page(gfp); /* * Free the page if someone raced with us installing it. @@ -121,7 +122,7 @@ static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map) * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. */ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, - enum ib_qp_type type, u8 port) + enum ib_qp_type type, u8 port, gfp_t gfp) { u32 i, offset, max_scan, qpn; struct qpn_map *map; @@ -151,7 +152,7 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, max_scan = qpt->nmaps - !offset; for (i = 0;;) { if (unlikely(!map->page)) { - get_map_page(qpt, map); + get_map_page(qpt, map, gfp); if (unlikely(!map->page)) break; } @@ -983,13 +984,21 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, size_t sz; size_t sg_list_sz; struct ib_qp *ret; + gfp_t gfp; + if (init_attr->cap.max_send_sge > ib_qib_max_sges || init_attr->cap.max_send_wr > ib_qib_max_qp_wrs || - init_attr->create_flags) { - ret = ERR_PTR(-EINVAL); - goto bail; - } + init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO)) + return ERR_PTR(-EINVAL); + + /* GFP_NOIO is applicable in RC QPs only */ + if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO && + init_attr->qp_type != IB_QPT_RC) + return ERR_PTR(-EINVAL); + + gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ? + GFP_NOIO : GFP_KERNEL; /* Check receive queue parameters if no SRQ is specified. */ if (!init_attr->srq) { @@ -1021,7 +1030,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, sz = sizeof(struct qib_sge) * init_attr->cap.max_send_sge + sizeof(struct qib_swqe); - swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); + swq = __vmalloc((init_attr->cap.max_send_wr + 1) * sz, + gfp, PAGE_KERNEL); if (swq == NULL) { ret = ERR_PTR(-ENOMEM); goto bail; @@ -1037,13 +1047,13 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, } else if (init_attr->cap.max_recv_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (init_attr->cap.max_recv_sge - 1); - qp = kzalloc(sz + sg_list_sz, GFP_KERNEL); + qp = kzalloc(sz + sg_list_sz, gfp); if (!qp) { ret = ERR_PTR(-ENOMEM); goto bail_swq; } RCU_INIT_POINTER(qp->next, NULL); - qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL); + qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), gfp); if (!qp->s_hdr) { ret = ERR_PTR(-ENOMEM); goto bail_qp; @@ -1058,8 +1068,16 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, qp->r_rq.max_sge = init_attr->cap.max_recv_sge; sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + sizeof(struct qib_rwqe); - qp->r_rq.wq = vmalloc_user(sizeof(struct qib_rwq) + - qp->r_rq.size * sz); + if (gfp != GFP_NOIO) + qp->r_rq.wq = vmalloc_user( + sizeof(struct qib_rwq) + + qp->r_rq.size * sz); + else + qp->r_rq.wq = __vmalloc( + sizeof(struct qib_rwq) + + qp->r_rq.size * sz, + gfp, PAGE_KERNEL); + if (!qp->r_rq.wq) { ret = ERR_PTR(-ENOMEM); goto bail_qp; @@ -1090,7 +1108,7 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd, dev = to_idev(ibpd->device); dd = dd_from_dev(dev); err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type, - init_attr->port_num); + init_attr->port_num, gfp); if (err < 0) { ret = ERR_PTR(err); vfree(qp->r_rq.wq); From 2f59395e47008b7a6fafbf56b657222c57b1a92a Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 14 Jan 2016 08:11:40 +0200 Subject: [PATCH 253/418] IB/mlx5: Expose correct maximum number of CQE capacity commit 9f17768611ebf81dfac69948dd12622b6f2e45fc upstream. Maximum number of EQE capacity per CQ was mistakenly exposed as CQE. Fix that. Fixes: 938fe83c8dcb ("net/mlx5_core: New device capabilities handling") Signed-off-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 7e97cb55a6bf..c4e091528390 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -275,7 +275,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_sge = min(max_rq_sg, max_sq_sg); props->max_sge_rd = props->max_sge; props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); - props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_eq_sz)) - 1; + props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd); props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp); From 774ac8b7eff69e0786970157de2157e68b22f456 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 30 Oct 2015 16:31:47 +0800 Subject: [PATCH 254/418] Thermal: initialize thermal zone device correctly commit bb431ba26c5cd0a17c941ca6c3a195a3a6d5d461 upstream. After thermal zone device registered, as we have not read any temperature before, thus tz->temperature should not be 0, which actually means 0C, and thermal trend is not available. In this case, we need specially handling for the first thermal_zone_device_update(). Both thermal core framework and step_wise governor is enhanced to handle this. And since the step_wise governor is the only one that uses trends, so it's the only thermal governor that needs to be updated. Tested-by: Manuel Krause Tested-by: szegad Tested-by: prash Tested-by: amish Tested-by: Matthias Reviewed-by: Javi Merino Signed-off-by: Zhang Rui Signed-off-by: Chen Yu Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/step_wise.c | 17 +++++++++++++++-- drivers/thermal/thermal_core.c | 19 +++++++++++++++++-- drivers/thermal/thermal_core.h | 1 + include/linux/thermal.h | 3 +++ 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c index 2f9f7086ac3d..ea9366ad3e6b 100644 --- a/drivers/thermal/step_wise.c +++ b/drivers/thermal/step_wise.c @@ -63,6 +63,19 @@ static unsigned long get_target_state(struct thermal_instance *instance, next_target = instance->target; dev_dbg(&cdev->device, "cur_state=%ld\n", cur_state); + if (!instance->initialized) { + if (throttle) { + next_target = (cur_state + 1) >= instance->upper ? + instance->upper : + ((cur_state + 1) < instance->lower ? + instance->lower : (cur_state + 1)); + } else { + next_target = THERMAL_NO_TARGET; + } + + return next_target; + } + switch (trend) { case THERMAL_TREND_RAISING: if (throttle) { @@ -149,7 +162,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) dev_dbg(&instance->cdev->device, "old_target=%d, target=%d\n", old_target, (int)instance->target); - if (old_target == instance->target) + if (instance->initialized && old_target == instance->target) continue; /* Activate a passive thermal instance */ @@ -161,7 +174,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) instance->target == THERMAL_NO_TARGET) update_passive_instance(tz, trip_type, -1); - + instance->initialized = true; instance->cdev->updated = false; /* cdev needs update */ } diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index d9e525cc9c1c..682bc1ef9c37 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -532,8 +532,22 @@ static void update_temperature(struct thermal_zone_device *tz) mutex_unlock(&tz->lock); trace_thermal_temperature(tz); - dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n", - tz->last_temperature, tz->temperature); + if (tz->last_temperature == THERMAL_TEMP_INVALID) + dev_dbg(&tz->device, "last_temperature N/A, current_temperature=%d\n", + tz->temperature); + else + dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n", + tz->last_temperature, tz->temperature); +} + +static void thermal_zone_device_reset(struct thermal_zone_device *tz) +{ + struct thermal_instance *pos; + + tz->temperature = THERMAL_TEMP_INVALID; + tz->passive = 0; + list_for_each_entry(pos, &tz->thermal_instances, tz_node) + pos->initialized = false; } void thermal_zone_device_update(struct thermal_zone_device *tz) @@ -1900,6 +1914,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, INIT_DELAYED_WORK(&(tz->poll_queue), thermal_zone_device_check); + thermal_zone_device_reset(tz); thermal_zone_device_update(tz); return tz; diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index d7ac1fccd659..749d41abfbab 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -41,6 +41,7 @@ struct thermal_instance { struct thermal_zone_device *tz; struct thermal_cooling_device *cdev; int trip; + bool initialized; unsigned long upper; /* Highest cooling state for this trip point */ unsigned long lower; /* Lowest cooling state for this trip point */ unsigned long target; /* expected cooling state */ diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 613c29bd6baf..103fcbe6bdaf 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -43,6 +43,9 @@ /* Default weight of a bound cooling device */ #define THERMAL_WEIGHT_DEFAULT 0 +/* use value, which < 0K, to indicate an invalid/uninitialized temperature */ +#define THERMAL_TEMP_INVALID -274000 + /* Unit conversion macros */ #define DECI_KELVIN_TO_CELSIUS(t) ({ \ long _t = (t); \ From a67208e94d945ad890f8dc4734ec5fdb3219cc03 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 30 Oct 2015 16:31:58 +0800 Subject: [PATCH 255/418] Thermal: handle thermal zone device properly during system sleep commit ff140fea847e1c2002a220571ab106c2456ed252 upstream. Current thermal code does not handle system sleep well because 1. the cooling device cooling state may be changed during suspend 2. the previous temperature reading becomes invalid after resumed because it is got before system sleep 3. updating thermal zone device during suspending/resuming is wrong because some devices may have already been suspended or may have not been resumed. Thus, the proper way to do this is to cancel all thermal zone device update requirements during suspend/resume, and after all the devices have been resumed, reset and update every registered thermal zone devices. This also fixes a regression introduced by: Commit 19593a1fb1f6 ("ACPI / fan: convert to platform driver") Because, with above commit applied, all the fan devices are attached to the acpi_general_pm_domain, and they are turned on by the pm_domain automatically after resume, without the awareness of thermal core. Reference: https://bugzilla.kernel.org/show_bug.cgi?id=78201 Reference: https://bugzilla.kernel.org/show_bug.cgi?id=91411 Tested-by: Manuel Krause Tested-by: szegad Tested-by: prash Tested-by: amish Tested-by: Matthias Reviewed-by: Javi Merino Signed-off-by: Zhang Rui Signed-off-by: Chen Yu Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/thermal_core.c | 42 ++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 682bc1ef9c37..9aae767bf39b 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -37,6 +37,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -59,6 +60,8 @@ static LIST_HEAD(thermal_governor_list); static DEFINE_MUTEX(thermal_list_lock); static DEFINE_MUTEX(thermal_governor_lock); +static atomic_t in_suspend; + static struct thermal_governor *def_governor; static struct thermal_governor *__find_governor(const char *name) @@ -554,6 +557,9 @@ void thermal_zone_device_update(struct thermal_zone_device *tz) { int count; + if (atomic_read(&in_suspend)) + return; + if (!tz->ops->get_temp) return; @@ -2155,6 +2161,36 @@ static void thermal_unregister_governors(void) thermal_gov_power_allocator_unregister(); } +static int thermal_pm_notify(struct notifier_block *nb, + unsigned long mode, void *_unused) +{ + struct thermal_zone_device *tz; + + switch (mode) { + case PM_HIBERNATION_PREPARE: + case PM_RESTORE_PREPARE: + case PM_SUSPEND_PREPARE: + atomic_set(&in_suspend, 1); + break; + case PM_POST_HIBERNATION: + case PM_POST_RESTORE: + case PM_POST_SUSPEND: + atomic_set(&in_suspend, 0); + list_for_each_entry(tz, &thermal_tz_list, node) { + thermal_zone_device_reset(tz); + thermal_zone_device_update(tz); + } + break; + default: + break; + } + return 0; +} + +static struct notifier_block thermal_pm_nb = { + .notifier_call = thermal_pm_notify, +}; + static int __init thermal_init(void) { int result; @@ -2175,6 +2211,11 @@ static int __init thermal_init(void) if (result) goto exit_netlink; + result = register_pm_notifier(&thermal_pm_nb); + if (result) + pr_warn("Thermal: Can not register suspend notifier, return %d\n", + result); + return 0; exit_netlink: @@ -2194,6 +2235,7 @@ error: static void __exit thermal_exit(void) { + unregister_pm_notifier(&thermal_pm_nb); of_thermal_destroy_zones(); genetlink_exit(); class_unregister(&thermal_class); From 27f356149d599d1ee55318641f9d3ed69e66174a Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Fri, 30 Oct 2015 16:32:10 +0800 Subject: [PATCH 256/418] Thermal: do thermal zone update after a cooling device registered commit 4511f7166a2deb5f7a578cf87fd2fe1ae83527e3 upstream. When a new cooling device is registered, we need to update the thermal zone to set the new registered cooling device to a proper state. This fixes a problem that the system is cool, while the fan devices are left running on full speed after boot, if fan device is registered after thermal zone device. Here is the history of why current patch looks like this: https://patchwork.kernel.org/patch/7273041/ Reference:https://bugzilla.kernel.org/show_bug.cgi?id=92431 Tested-by: Manuel Krause Tested-by: szegad Tested-by: prash Tested-by: amish Reviewed-by: Javi Merino Signed-off-by: Zhang Rui Signed-off-by: Chen Yu Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/thermal_core.c | 14 +++++++++++++- include/linux/thermal.h | 2 ++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 9aae767bf39b..ba08b5521382 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1341,6 +1341,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, if (!result) { list_add_tail(&dev->tz_node, &tz->thermal_instances); list_add_tail(&dev->cdev_node, &cdev->thermal_instances); + atomic_set(&tz->need_update, 1); } mutex_unlock(&cdev->lock); mutex_unlock(&tz->lock); @@ -1450,6 +1451,7 @@ __thermal_cooling_device_register(struct device_node *np, const struct thermal_cooling_device_ops *ops) { struct thermal_cooling_device *cdev; + struct thermal_zone_device *pos = NULL; int result; if (type && strlen(type) >= THERMAL_NAME_LENGTH) @@ -1494,6 +1496,12 @@ __thermal_cooling_device_register(struct device_node *np, /* Update binding information for 'this' new cdev */ bind_cdev(cdev); + mutex_lock(&thermal_list_lock); + list_for_each_entry(pos, &thermal_tz_list, node) + if (atomic_cmpxchg(&pos->need_update, 1, 0)) + thermal_zone_device_update(pos); + mutex_unlock(&thermal_list_lock); + return cdev; } @@ -1826,6 +1834,8 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, tz->trips = trips; tz->passive_delay = passive_delay; tz->polling_delay = polling_delay; + /* A new thermal zone needs to be updated anyway. */ + atomic_set(&tz->need_update, 1); dev_set_name(&tz->device, "thermal_zone%d", tz->id); result = device_register(&tz->device); @@ -1921,7 +1931,9 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, INIT_DELAYED_WORK(&(tz->poll_queue), thermal_zone_device_check); thermal_zone_device_reset(tz); - thermal_zone_device_update(tz); + /* Update the new thermal zone and mark it as already updated. */ + if (atomic_cmpxchg(&tz->need_update, 1, 0)) + thermal_zone_device_update(tz); return tz; diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 103fcbe6bdaf..e13a1ace50e9 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -170,6 +170,7 @@ struct thermal_attr { * @forced_passive: If > 0, temperature at which to switch on all ACPI * processor cooling devices. Currently only used by the * step-wise governor. + * @need_update: if equals 1, thermal_zone_device_update needs to be invoked. * @ops: operations this &thermal_zone_device supports * @tzp: thermal zone parameters * @governor: pointer to the governor for this thermal zone @@ -197,6 +198,7 @@ struct thermal_zone_device { int emul_temperature; int passive; unsigned int forced_passive; + atomic_t need_update; struct thermal_zone_device_ops *ops; struct thermal_zone_params *tzp; struct thermal_governor *governor; From 0ff7850139c4e104b974edc09d5cfdb0dc616da9 Mon Sep 17 00:00:00 2001 From: Thorsten Leemhuis Date: Sun, 17 Jan 2016 16:03:04 +0100 Subject: [PATCH 257/418] hwmon: (dell-smm) Blacklist Dell Studio XPS 8000 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6220f4ebd7b4db499238c2dc91268a9c473fd01c upstream. Since Linux 4.0 the CPU fan speed is going up and down on Dell Studio XPS 8000 and 8100 for unknown reasons. The 8100 was already blacklisted in commit a4b45b25f18d ("hwmon: (dell-smm) Blacklist Dell Studio XPS 8100"). This patch blacklists the XPS 8000. Without further debugging on the affected machine, it is not possible to find the problem. For more details see https://bugzilla.kernel.org/show_bug.cgi?id=100121 Signed-off-by: Thorsten Leemhuis Acked-by: Pali Rohár Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/dell-smm-hwmon.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index c8487894b312..c43318d3416e 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -930,6 +930,17 @@ static struct dmi_system_id i8k_dmi_table[] __initdata = { MODULE_DEVICE_TABLE(dmi, i8k_dmi_table); static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = { + { + /* + * CPU fan speed going up and down on Dell Studio XPS 8000 + * for unknown reasons. + */ + .ident = "Dell Studio XPS 8000", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Studio XPS 8000"), + }, + }, { /* * CPU fan speed going up and down on Dell Studio XPS 8100 From 4bbd7acd014a4432b8d15372840184dce62b1002 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Fri, 19 Feb 2016 18:09:51 -0600 Subject: [PATCH 258/418] hwmon: (gpio-fan) Remove un-necessary speed_index lookup for thermal hook commit 000e0949148382c4962489593a2f05504c2a6771 upstream. Thermal hook gpio_fan_get_cur_state is only interested in knowing the current speed index that was setup in the system, this is already available as part of fan_data->speed_index which is always set by set_fan_speed. Using get_fan_speed_index is useful when we have no idea about the fan speed configuration (for example during fan_ctrl_init). When thermal framework invokes gpio_fan_get_cur_state=>get_fan_speed_index via gpio_fan_get_cur_state especially in a polled configuration for thermal governor, we basically hog the i2c interface to the extent that other functions fail to get any traffic out :(. Instead, just provide the last state set in the driver - since the gpio fan driver is responsible for the fan state immaterial of override, the fan_data->speed_index should accurately reflect the state. Fixes: b5cf88e46bad ("(gpio-fan): Add thermal control hooks") Reported-by: Tony Lindgren Cc: Guenter Roeck Cc: Eduardo Valentin Signed-off-by: Nishanth Menon Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/gpio-fan.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/hwmon/gpio-fan.c b/drivers/hwmon/gpio-fan.c index 82de3deeb18a..685568b1236d 100644 --- a/drivers/hwmon/gpio-fan.c +++ b/drivers/hwmon/gpio-fan.c @@ -406,16 +406,11 @@ static int gpio_fan_get_cur_state(struct thermal_cooling_device *cdev, unsigned long *state) { struct gpio_fan_data *fan_data = cdev->devdata; - int r; if (!fan_data) return -EINVAL; - r = get_fan_speed_index(fan_data); - if (r < 0) - return r; - - *state = r; + *state = fan_data->speed_index; return 0; } From 5c2bd0c61ca8a5af85370c71f9c7e05e1e434132 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Thu, 18 Feb 2016 14:07:52 +0100 Subject: [PATCH 259/418] hwmon: (ads1015) Handle negative conversion values correctly commit acc146943957d7418a6846f06e029b2c5e87e0d5 upstream. Make the divisor signed as DIV_ROUND_CLOSEST is undefined for negative dividends when the divisor is unsigned. Signed-off-by: Peter Rosin Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/ads1015.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/ads1015.c b/drivers/hwmon/ads1015.c index f155b8380481..2b3105c8aed3 100644 --- a/drivers/hwmon/ads1015.c +++ b/drivers/hwmon/ads1015.c @@ -126,7 +126,7 @@ static int ads1015_reg_to_mv(struct i2c_client *client, unsigned int channel, struct ads1015_data *data = i2c_get_clientdata(client); unsigned int pga = data->channel_data[channel].pga; int fullscale = fullscale_table[pga]; - const unsigned mask = data->id == ads1115 ? 0x7fff : 0x7ff0; + const int mask = data->id == ads1115 ? 0x7fff : 0x7ff0; return DIV_ROUND_CLOSEST(reg * fullscale, mask); } From 286ab25d69068cef7c97e305af347288b9f1ab4a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Jan 2016 16:44:38 +0100 Subject: [PATCH 260/418] cpufreq: pxa2xx: fix pxa_cpufreq_change_voltage prototype commit fb2a24a1c6457d21df9fae0dd66b20c63ba56077 upstream. There are two definitions of pxa_cpufreq_change_voltage, with slightly different prototypes after one of them had its argument marked 'const'. Now the other one (for !CONFIG_REGULATOR) produces a harmless warning: drivers/cpufreq/pxa2xx-cpufreq.c: In function 'pxa_set_target': drivers/cpufreq/pxa2xx-cpufreq.c:291:36: warning: passing argument 1 of 'pxa_cpufreq_change_voltage' discards 'const' qualifier from pointer target type [-Wdiscarded-qualifiers] ret = pxa_cpufreq_change_voltage(&pxa_freq_settings[idx]); ^ drivers/cpufreq/pxa2xx-cpufreq.c:205:12: note: expected 'struct pxa_freqs *' but argument is of type 'const struct pxa_freqs *' static int pxa_cpufreq_change_voltage(struct pxa_freqs *pxa_freq) ^ This changes the prototype in the same way as the other, which avoids the warning. Fixes: 03c229906311 (cpufreq: pxa: make pxa_freqs arrays const) Signed-off-by: Arnd Bergmann Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/pxa2xx-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/pxa2xx-cpufreq.c b/drivers/cpufreq/pxa2xx-cpufreq.c index 1d99c97defa9..096377232747 100644 --- a/drivers/cpufreq/pxa2xx-cpufreq.c +++ b/drivers/cpufreq/pxa2xx-cpufreq.c @@ -202,7 +202,7 @@ static void __init pxa_cpufreq_init_voltages(void) } } #else -static int pxa_cpufreq_change_voltage(struct pxa_freqs *pxa_freq) +static int pxa_cpufreq_change_voltage(const struct pxa_freqs *pxa_freq) { return 0; } From 0ac4a2a8bae9838d69be6dec3212e3630fb5773c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 25 Jan 2016 22:33:46 +0530 Subject: [PATCH 261/418] cpufreq: Fix NULL reference crash while accessing policy->governor_data commit e4b133cc4b30b48d488e4e4fffb132f173ce4358 upstream. There is a race discovered by Juri, where we are able to: - create and read a sysfs file before policy->governor_data is being set to a non NULL value. OR - set policy->governor_data to NULL, and reading a file before being destroyed. And so such a crash is reported: Unable to handle kernel NULL pointer dereference at virtual address 0000000c pgd = edfc8000 [0000000c] *pgd=bfc8c835 Internal error: Oops: 17 [#1] SMP ARM Modules linked in: CPU: 4 PID: 1730 Comm: cat Not tainted 4.5.0-rc1+ #463 Hardware name: ARM-Versatile Express task: ee8e8480 ti: ee930000 task.ti: ee930000 PC is at show_ignore_nice_load_gov_pol+0x24/0x34 LR is at show+0x4c/0x60 pc : [] lr : [] psr: a0070013 sp : ee931dd0 ip : ee931de0 fp : ee931ddc r10: ee4bc290 r9 : 00001000 r8 : ef2cb000 r7 : ee4bc200 r6 : ef2cb000 r5 : c0af57b0 r4 : ee4bc2e0 r3 : 00000000 r2 : 00000000 r1 : c0928df4 r0 : ef2cb000 Flags: NzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: adfc806a DAC: 00000051 Process cat (pid: 1730, stack limit = 0xee930210) Stack: (0xee931dd0 to 0xee932000) 1dc0: ee931dfc ee931de0 c058ae88 c058f1a4 1de0: edce3bc0 c07bfca4 edce3ac0 00001000 ee931e24 ee931e00 c01fcb90 c058ae48 1e00: 00000001 edce3bc0 00000000 00000001 ee931e50 ee8ff480 ee931e34 ee931e28 1e20: c01fb33c c01fcb0c ee931e8c ee931e38 c01a5210 c01fb314 ee931e9c ee931e48 1e40: 00000000 edce3bf0 befe4a00 ee931f78 00000000 00000000 000001e4 00000000 1e60: c00545a8 edce3ac0 00001000 00001000 befe4a00 ee931f78 00000000 00001000 1e80: ee931ed4 ee931e90 c01fbed8 c01a5038 ed085a58 00020000 00000000 00000000 1ea0: c0ad72e4 ee931f78 ee8ff488 ee8ff480 c077f3fc 00001000 befe4a00 ee931f78 1ec0: 00000000 00001000 ee931f44 ee931ed8 c017c328 c01fbdc4 00001000 00000000 1ee0: ee8ff480 00001000 ee931f44 ee931ef8 c017c65c c03deb10 ee931fac ee931f08 1f00: c0009270 c001f290 c0a8d968 ef2cb000 ef2cb000 ee8ff480 00000020 ee8ff480 1f20: ee8ff480 befe4a00 00001000 ee931f78 00000000 00000000 ee931f74 ee931f48 1f40: c017d1ec c017c2f8 c019c724 c019c684 ee8ff480 ee8ff480 00001000 befe4a00 1f60: 00000000 00000000 ee931fa4 ee931f78 c017d2a8 c017d160 00000000 00000000 1f80: 000a9f20 00001000 befe4a00 00000003 c000ffe4 ee930000 00000000 ee931fa8 1fa0: c000fe40 c017d264 000a9f20 00001000 00000003 befe4a00 00001000 00000000 Unable to handle kernel NULL pointer dereference at virtual address 0000000c 1fc0: 000a9f20 00001000 befe4a00 00000003 00000000 00000000 00000003 00000001 pgd = edfc4000 [0000000c] *pgd=bfcac835 1fe0: 00000000 befe49dc 000197f8 b6e35dfc 60070010 00000003 3065b49d 134ac2c9 [] (show_ignore_nice_load_gov_pol) from [] (show+0x4c/0x60) [] (show) from [] (sysfs_kf_seq_show+0x90/0xfc) [] (sysfs_kf_seq_show) from [] (kernfs_seq_show+0x34/0x38) [] (kernfs_seq_show) from [] (seq_read+0x1e4/0x4e4) [] (seq_read) from [] (kernfs_fop_read+0x120/0x1a0) [] (kernfs_fop_read) from [] (__vfs_read+0x3c/0xe0) [] (__vfs_read) from [] (vfs_read+0x98/0x104) [] (vfs_read) from [] (SyS_read+0x50/0x90) [] (SyS_read) from [] (ret_fast_syscall+0x0/0x1c) Code: e5903044 e1a00001 e3081df4 e34c1092 (e593300c) ---[ end trace 5994b9a5111f35ee ]--- Fix that by making sure, policy->governor_data is updated at the right places only. Reported-and-tested-by: Juri Lelli Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/cpufreq_governor.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index b260576ddb12..d994b0f652d3 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -356,16 +356,18 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy, if (!have_governor_per_policy()) cdata->gdbs_data = dbs_data; + policy->governor_data = dbs_data; + ret = sysfs_create_group(get_governor_parent_kobj(policy), get_sysfs_attr(dbs_data)); if (ret) goto reset_gdbs_data; - policy->governor_data = dbs_data; - return 0; reset_gdbs_data: + policy->governor_data = NULL; + if (!have_governor_per_policy()) cdata->gdbs_data = NULL; cdata->exit(dbs_data, !policy->governor->initialized); @@ -386,16 +388,19 @@ static int cpufreq_governor_exit(struct cpufreq_policy *policy, if (!cdbs->shared || cdbs->shared->policy) return -EBUSY; - policy->governor_data = NULL; if (!--dbs_data->usage_count) { sysfs_remove_group(get_governor_parent_kobj(policy), get_sysfs_attr(dbs_data)); + policy->governor_data = NULL; + if (!have_governor_per_policy()) cdata->gdbs_data = NULL; cdata->exit(dbs_data, policy->governor->initialized == 1); kfree(dbs_data); + } else { + policy->governor_data = NULL; } free_common_dbs_info(policy, cdata); From df86161e4460cfd4ba504d5c5fd6ba218c75d4aa Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Sat, 26 Dec 2015 06:00:48 +0100 Subject: [PATCH 262/418] seccomp: always propagate NO_NEW_PRIVS on tsync commit 103502a35cfce0710909da874f092cb44823ca03 upstream. Before this patch, a process with some permissive seccomp filter that was applied by root without NO_NEW_PRIVS was able to add more filters to itself without setting NO_NEW_PRIVS by setting the new filter from a throwaway thread with NO_NEW_PRIVS. Signed-off-by: Jann Horn Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- kernel/seccomp.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 580ac2d4024f..15a1795bbba1 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -316,24 +316,24 @@ static inline void seccomp_sync_threads(void) put_seccomp_filter(thread); smp_store_release(&thread->seccomp.filter, caller->seccomp.filter); + + /* + * Don't let an unprivileged task work around + * the no_new_privs restriction by creating + * a thread that sets it up, enters seccomp, + * then dies. + */ + if (task_no_new_privs(caller)) + task_set_no_new_privs(thread); + /* * Opt the other thread into seccomp if needed. * As threads are considered to be trust-realm * equivalent (see ptrace_may_access), it is safe to * allow one thread to transition the other. */ - if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) { - /* - * Don't let an unprivileged task work around - * the no_new_privs restriction by creating - * a thread that sets it up, enters seccomp, - * then dies. - */ - if (task_no_new_privs(caller)) - task_set_no_new_privs(thread); - + if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) seccomp_assign_mode(thread, SECCOMP_MODE_FILTER); - } } } From ee834473805f5fd77a3d2625a40552159641029e Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 28 Dec 2015 13:18:34 +0300 Subject: [PATCH 263/418] libceph: fix ceph_msg_revoke() commit 67645d7619738e51c668ca69f097cb90b5470422 upstream. There are a number of problems with revoking a "was sending" message: (1) We never make any attempt to revoke data - only kvecs contibute to con->out_skip. However, once the header (envelope) is written to the socket, our peer learns data_len and sets itself to expect at least data_len bytes to follow front or front+middle. If ceph_msg_revoke() is called while the messenger is sending message's data portion, anything we send after that call is counted by the OSD towards the now revoked message's data portion. The effects vary, the most common one is the eventual hang - higher layers get stuck waiting for the reply to the message that was sent out after ceph_msg_revoke() returned and treated by the OSD as a bunch of data bytes. This is what Matt ran into. (2) Flat out zeroing con->out_kvec_bytes worth of bytes to handle kvecs is wrong. If ceph_msg_revoke() is called before the tag is sent out or while the messenger is sending the header, we will get a connection reset, either due to a bad tag (0 is not a valid tag) or a bad header CRC, which kind of defeats the purpose of revoke. Currently the kernel client refuses to work with header CRCs disabled, but that will likely change in the future, making this even worse. (3) con->out_skip is not reset on connection reset, leading to one or more spurious connection resets if we happen to get a real one between con->out_skip is set in ceph_msg_revoke() and before it's cleared in write_partial_skip(). Fixing (1) and (3) is trivial. The idea behind fixing (2) is to never zero the tag or the header, i.e. send out tag+header regardless of when ceph_msg_revoke() is called. That way the header is always correct, no unnecessary resets are induced and revoke stands ready for disabled CRCs. Since ceph_msg_revoke() rips out con->out_msg, introduce a new "message out temp" and copy the header into it before sending. Reported-by: Matt Conner Signed-off-by: Ilya Dryomov Tested-by: Matt Conner Reviewed-by: Sage Weil Signed-off-by: Greg Kroah-Hartman --- include/linux/ceph/messenger.h | 2 +- net/ceph/messenger.c | 76 ++++++++++++++++++++++++++-------- 2 files changed, 59 insertions(+), 19 deletions(-) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 71b1d6cdcb5d..8dbd7879fdc6 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -220,6 +220,7 @@ struct ceph_connection { struct ceph_entity_addr actual_peer_addr; /* message out temps */ + struct ceph_msg_header out_hdr; struct ceph_msg *out_msg; /* sending message (== tail of out_sent) */ bool out_msg_done; @@ -229,7 +230,6 @@ struct ceph_connection { int out_kvec_left; /* kvec's left in out_kvec */ int out_skip; /* skip this many bytes */ int out_kvec_bytes; /* total bytes left */ - bool out_kvec_is_msg; /* kvec refers to out_msg */ int out_more; /* there is more data after the kvecs */ __le64 out_temp_ack; /* for writing an ack */ struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2 diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 9981039ef4ff..a145180150ef 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -672,6 +672,8 @@ static void reset_connection(struct ceph_connection *con) } con->in_seq = 0; con->in_seq_acked = 0; + + con->out_skip = 0; } /* @@ -771,6 +773,8 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt) static void con_out_kvec_reset(struct ceph_connection *con) { + BUG_ON(con->out_skip); + con->out_kvec_left = 0; con->out_kvec_bytes = 0; con->out_kvec_cur = &con->out_kvec[0]; @@ -779,9 +783,9 @@ static void con_out_kvec_reset(struct ceph_connection *con) static void con_out_kvec_add(struct ceph_connection *con, size_t size, void *data) { - int index; + int index = con->out_kvec_left; - index = con->out_kvec_left; + BUG_ON(con->out_skip); BUG_ON(index >= ARRAY_SIZE(con->out_kvec)); con->out_kvec[index].iov_len = size; @@ -790,6 +794,27 @@ static void con_out_kvec_add(struct ceph_connection *con, con->out_kvec_bytes += size; } +/* + * Chop off a kvec from the end. Return residual number of bytes for + * that kvec, i.e. how many bytes would have been written if the kvec + * hadn't been nuked. + */ +static int con_out_kvec_skip(struct ceph_connection *con) +{ + int off = con->out_kvec_cur - con->out_kvec; + int skip = 0; + + if (con->out_kvec_bytes > 0) { + skip = con->out_kvec[off + con->out_kvec_left - 1].iov_len; + BUG_ON(con->out_kvec_bytes < skip); + BUG_ON(!con->out_kvec_left); + con->out_kvec_bytes -= skip; + con->out_kvec_left--; + } + + return skip; +} + #ifdef CONFIG_BLOCK /* @@ -1197,7 +1222,6 @@ static void prepare_write_message_footer(struct ceph_connection *con) m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE; dout("prepare_write_message_footer %p\n", con); - con->out_kvec_is_msg = true; con->out_kvec[v].iov_base = &m->footer; if (con->peer_features & CEPH_FEATURE_MSG_AUTH) { if (con->ops->sign_message) @@ -1225,7 +1249,6 @@ static void prepare_write_message(struct ceph_connection *con) u32 crc; con_out_kvec_reset(con); - con->out_kvec_is_msg = true; con->out_msg_done = false; /* Sneak an ack in there first? If we can get it into the same @@ -1265,18 +1288,19 @@ static void prepare_write_message(struct ceph_connection *con) /* tag + hdr + front + middle */ con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); - con_out_kvec_add(con, sizeof (m->hdr), &m->hdr); + con_out_kvec_add(con, sizeof(con->out_hdr), &con->out_hdr); con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); if (m->middle) con_out_kvec_add(con, m->middle->vec.iov_len, m->middle->vec.iov_base); - /* fill in crc (except data pages), footer */ + /* fill in hdr crc and finalize hdr */ crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); con->out_msg->hdr.crc = cpu_to_le32(crc); - con->out_msg->footer.flags = 0; + memcpy(&con->out_hdr, &con->out_msg->hdr, sizeof(con->out_hdr)); + /* fill in front and middle crc, footer */ crc = crc32c(0, m->front.iov_base, m->front.iov_len); con->out_msg->footer.front_crc = cpu_to_le32(crc); if (m->middle) { @@ -1288,6 +1312,7 @@ static void prepare_write_message(struct ceph_connection *con) dout("%s front_crc %u middle_crc %u\n", __func__, le32_to_cpu(con->out_msg->footer.front_crc), le32_to_cpu(con->out_msg->footer.middle_crc)); + con->out_msg->footer.flags = 0; /* is there a data payload? */ con->out_msg->footer.data_crc = 0; @@ -1492,7 +1517,6 @@ static int write_partial_kvec(struct ceph_connection *con) } } con->out_kvec_left = 0; - con->out_kvec_is_msg = false; ret = 1; out: dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con, @@ -1584,6 +1608,7 @@ static int write_partial_skip(struct ceph_connection *con) { int ret; + dout("%s %p %d left\n", __func__, con, con->out_skip); while (con->out_skip > 0) { size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE); @@ -2506,13 +2531,13 @@ more: more_kvec: /* kvec data queued? */ - if (con->out_skip) { - ret = write_partial_skip(con); + if (con->out_kvec_left) { + ret = write_partial_kvec(con); if (ret <= 0) goto out; } - if (con->out_kvec_left) { - ret = write_partial_kvec(con); + if (con->out_skip) { + ret = write_partial_skip(con); if (ret <= 0) goto out; } @@ -3050,16 +3075,31 @@ void ceph_msg_revoke(struct ceph_msg *msg) ceph_msg_put(msg); } if (con->out_msg == msg) { - dout("%s %p msg %p - was sending\n", __func__, con, msg); - con->out_msg = NULL; - if (con->out_kvec_is_msg) { - con->out_skip = con->out_kvec_bytes; - con->out_kvec_is_msg = false; + BUG_ON(con->out_skip); + /* footer */ + if (con->out_msg_done) { + con->out_skip += con_out_kvec_skip(con); + } else { + BUG_ON(!msg->data_length); + if (con->peer_features & CEPH_FEATURE_MSG_AUTH) + con->out_skip += sizeof(msg->footer); + else + con->out_skip += sizeof(msg->old_footer); } - msg->hdr.seq = 0; + /* data, middle, front */ + if (msg->data_length) + con->out_skip += msg->cursor.total_resid; + if (msg->middle) + con->out_skip += con_out_kvec_skip(con); + con->out_skip += con_out_kvec_skip(con); + dout("%s %p msg %p - was sending, will write %d skip %d\n", + __func__, con, msg, con->out_kvec_bytes, con->out_skip); + msg->hdr.seq = 0; + con->out_msg = NULL; ceph_msg_put(msg); } + mutex_unlock(&con->mutex); } From 50c6a283a713c62e6430e6dcc27ecaa91c46ba80 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 17 Feb 2016 20:04:08 +0100 Subject: [PATCH 264/418] libceph: don't bail early from try_read() when skipping a message commit e7a88e82fe380459b864e05b372638aeacb0f52d upstream. The contract between try_read() and try_write() is that when called each processes as much data as possible. When instructed by osd_client to skip a message, try_read() is violating this contract by returning after receiving and discarding a single message instead of checking for more. try_write() then gets a chance to write out more requests, generating more replies/skips for try_read() to handle, forcing the messenger into a starvation loop. Reported-by: Varada Kari Signed-off-by: Ilya Dryomov Tested-by: Varada Kari Reviewed-by: Alex Elder Signed-off-by: Greg Kroah-Hartman --- net/ceph/messenger.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index a145180150ef..93e3083f4006 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -2340,7 +2340,7 @@ static int read_partial_message(struct ceph_connection *con) con->in_base_pos = -front_len - middle_len - data_len - sizeof(m->footer); con->in_tag = CEPH_MSGR_TAG_READY; - return 0; + return 1; } else if ((s64)seq - (s64)con->in_seq > 1) { pr_err("read_partial_message bad seq %lld expected %lld\n", seq, con->in_seq + 1); @@ -2366,7 +2366,7 @@ static int read_partial_message(struct ceph_connection *con) sizeof(m->footer); con->in_tag = CEPH_MSGR_TAG_READY; con->in_seq++; - return 0; + return 1; } BUG_ON(!con->in_msg); From 10dada9dad8fbc36840ef5266419bb0fce5945a0 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 19 Feb 2016 11:38:57 +0100 Subject: [PATCH 265/418] libceph: use the right footer size when skipping a message commit dbc0d3caff5b7591e0cf8e34ca686ca6f4479ee1 upstream. ceph_msg_footer is 21 bytes long, while ceph_msg_footer_old is only 13. Don't skip too much when CEPH_FEATURE_MSG_AUTH isn't negotiated. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder Signed-off-by: Greg Kroah-Hartman --- net/ceph/messenger.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 93e3083f4006..63ae5dd24fc5 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1200,6 +1200,13 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, return new_piece; } +static size_t sizeof_footer(struct ceph_connection *con) +{ + return (con->peer_features & CEPH_FEATURE_MSG_AUTH) ? + sizeof(struct ceph_msg_footer) : + sizeof(struct ceph_msg_footer_old); +} + static void prepare_message_data(struct ceph_msg *msg, u32 data_len) { BUG_ON(!msg); @@ -2338,7 +2345,7 @@ static int read_partial_message(struct ceph_connection *con) ceph_pr_addr(&con->peer_addr.in_addr), seq, con->in_seq + 1); con->in_base_pos = -front_len - middle_len - data_len - - sizeof(m->footer); + sizeof_footer(con); con->in_tag = CEPH_MSGR_TAG_READY; return 1; } else if ((s64)seq - (s64)con->in_seq > 1) { @@ -2363,7 +2370,7 @@ static int read_partial_message(struct ceph_connection *con) /* skip this message */ dout("alloc_msg said skip message\n"); con->in_base_pos = -front_len - middle_len - data_len - - sizeof(m->footer); + sizeof_footer(con); con->in_tag = CEPH_MSGR_TAG_READY; con->in_seq++; return 1; From 01c3c0f921c8a2743e3d066108081c14618ee98c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 19 Feb 2016 11:38:57 +0100 Subject: [PATCH 266/418] libceph: don't spam dmesg with stray reply warnings commit cd8140c673d9ba9be3591220e1b2226d9e1e40d3 upstream. Commit d15f9d694b77 ("libceph: check data_len in ->alloc_msg()") mistakenly bumped the log level on the "tid %llu unknown, skipping" message. Turn it back into a dout() - stray replies are perfectly normal when OSDs flap, crash, get killed for testing purposes, etc. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder Signed-off-by: Greg Kroah-Hartman --- net/ceph/osd_client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index f8f235930d88..a28e47ff1b1b 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -2843,8 +2843,8 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, mutex_lock(&osdc->request_mutex); req = __lookup_request(osdc, tid); if (!req) { - pr_warn("%s osd%d tid %llu unknown, skipping\n", - __func__, osd->o_osd, tid); + dout("%s osd%d tid %llu unknown, skipping\n", __func__, + osd->o_osd, tid); m = NULL; *skip = 1; goto out; From f313f1d809fb055eb9707e6dfd2c77fb4d9cc4ee Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 20 Jan 2016 11:01:23 -0500 Subject: [PATCH 267/418] sd: Optimal I/O size is in bytes, not sectors commit d0eb20a863ba7dc1d3f4b841639671f134560be2 upstream. Commit ca369d51b3e1 ("block/sd: Fix device-imposed transfer length limits") accidentally switched optimal I/O size reporting from bytes to block layer sectors. Signed-off-by: Martin K. Petersen Reported-by: Christian Borntraeger Tested-by: Christian Borntraeger Fixes: ca369d51b3e1649be4a72addd6d6a168cfb3f537 Reviewed-by: James E.J. Bottomley Reviewed-by: Ewan D. Milne Reviewed-by: Matthew R. Ochs Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/sd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 84fa4c46eaa6..bb669d32ccd0 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2893,7 +2893,7 @@ static int sd_revalidate_disk(struct gendisk *disk) sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS && sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_CACHE_SIZE) rw_max = q->limits.io_opt = - logical_to_sectors(sdp, sdkp->opt_xfer_blocks); + sdkp->opt_xfer_blocks * sdp->sector_size; else rw_max = BLK_DEF_MAX_SECTORS; From 16d9f52201f330b4438467724a0b0691dcf45cc3 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Fri, 15 Jan 2016 00:47:41 +0100 Subject: [PATCH 268/418] Staging: speakup: Fix getting port information commit 327b882d3bcc1fba82dbd39b5cf5a838c81218e2 upstream. Commit f79b0d9c223c ("staging: speakup: Fixed warning instead of ") broke the port information in the speakup driver: SERIAL_PORT_DFNS only gets defined if asm/serial.h is included, and no other header includes asm/serial.h. We here make sure serialio.c does get the arch-specific definition of SERIAL_PORT_DFNS from asm/serial.h, if any. Along the way, this makes sure that we do have information for the requested serial port number (index) Fixes: f79b0d9c223c ("staging: speakup: Fixed warning instead of ") Signed-off-by: Samuel Thibault Signed-off-by: Greg Kroah-Hartman --- drivers/staging/speakup/serialio.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/staging/speakup/serialio.c b/drivers/staging/speakup/serialio.c index 3b5835b28128..a5bbb338f275 100644 --- a/drivers/staging/speakup/serialio.c +++ b/drivers/staging/speakup/serialio.c @@ -6,6 +6,11 @@ #include "spk_priv.h" #include "serialio.h" +#include +/* WARNING: Do not change this to without testing that + * SERIAL_PORT_DFNS does get defined to the appropriate value. */ +#include + #ifndef SERIAL_PORT_DFNS #define SERIAL_PORT_DFNS #endif @@ -23,9 +28,15 @@ const struct old_serial_port *spk_serial_init(int index) int baud = 9600, quot = 0; unsigned int cval = 0; int cflag = CREAD | HUPCL | CLOCAL | B9600 | CS8; - const struct old_serial_port *ser = rs_table + index; + const struct old_serial_port *ser; int err; + if (index >= ARRAY_SIZE(rs_table)) { + pr_info("no port info for ttyS%d\n", index); + return NULL; + } + ser = rs_table + index; + /* Divisor, bytesize and parity */ quot = ser->baud_base / baud; cval = cflag & (CSIZE | CSTOPB); From f021f05c1223ce0424d9bd9bddb013aa68b88801 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Sat, 23 Jan 2016 14:49:20 +0530 Subject: [PATCH 269/418] Revert "Staging: panel: usleep_range is preferred over udelay" commit b64a1cbef6df47c986ad622b5b2e4d3d1940070c upstream. This reverts commit ebd43516d3879f882a403836bba8bc5791f26a28. We should not be sleeping inside spin_lock. Fixes: ebd43516d387 ("Staging: panel: usleep_range is preferred over udelay") Cc: Sirnam Swetha Signed-off-by: Sudip Mukherjee Reported-by: Huang, Ying Tested-by: Huang, Ying Signed-off-by: Greg Kroah-Hartman --- drivers/staging/panel/panel.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/drivers/staging/panel/panel.c b/drivers/staging/panel/panel.c index 79ac19246548..70b8f4fabfad 100644 --- a/drivers/staging/panel/panel.c +++ b/drivers/staging/panel/panel.c @@ -825,8 +825,7 @@ static void lcd_write_cmd_s(int cmd) lcd_send_serial(0x1F); /* R/W=W, RS=0 */ lcd_send_serial(cmd & 0x0F); lcd_send_serial((cmd >> 4) & 0x0F); - /* the shortest command takes at least 40 us */ - usleep_range(40, 100); + udelay(40); /* the shortest command takes at least 40 us */ spin_unlock_irq(&pprt_lock); } @@ -837,8 +836,7 @@ static void lcd_write_data_s(int data) lcd_send_serial(0x5F); /* R/W=W, RS=1 */ lcd_send_serial(data & 0x0F); lcd_send_serial((data >> 4) & 0x0F); - /* the shortest data takes at least 40 us */ - usleep_range(40, 100); + udelay(40); /* the shortest data takes at least 40 us */ spin_unlock_irq(&pprt_lock); } @@ -848,20 +846,19 @@ static void lcd_write_cmd_p8(int cmd) spin_lock_irq(&pprt_lock); /* present the data to the data port */ w_dtr(pprt, cmd); - /* maintain the data during 20 us before the strobe */ - usleep_range(20, 100); + udelay(20); /* maintain the data during 20 us before the strobe */ bits.e = BIT_SET; bits.rs = BIT_CLR; bits.rw = BIT_CLR; set_ctrl_bits(); - usleep_range(40, 100); /* maintain the strobe during 40 us */ + udelay(40); /* maintain the strobe during 40 us */ bits.e = BIT_CLR; set_ctrl_bits(); - usleep_range(120, 500); /* the shortest command takes at least 120 us */ + udelay(120); /* the shortest command takes at least 120 us */ spin_unlock_irq(&pprt_lock); } @@ -871,20 +868,19 @@ static void lcd_write_data_p8(int data) spin_lock_irq(&pprt_lock); /* present the data to the data port */ w_dtr(pprt, data); - /* maintain the data during 20 us before the strobe */ - usleep_range(20, 100); + udelay(20); /* maintain the data during 20 us before the strobe */ bits.e = BIT_SET; bits.rs = BIT_SET; bits.rw = BIT_CLR; set_ctrl_bits(); - usleep_range(40, 100); /* maintain the strobe during 40 us */ + udelay(40); /* maintain the strobe during 40 us */ bits.e = BIT_CLR; set_ctrl_bits(); - usleep_range(45, 100); /* the shortest data takes at least 45 us */ + udelay(45); /* the shortest data takes at least 45 us */ spin_unlock_irq(&pprt_lock); } @@ -894,7 +890,7 @@ static void lcd_write_cmd_tilcd(int cmd) spin_lock_irq(&pprt_lock); /* present the data to the control port */ w_ctr(pprt, cmd); - usleep_range(60, 120); + udelay(60); spin_unlock_irq(&pprt_lock); } @@ -904,7 +900,7 @@ static void lcd_write_data_tilcd(int data) spin_lock_irq(&pprt_lock); /* present the data to the data port */ w_dtr(pprt, data); - usleep_range(60, 120); + udelay(60); spin_unlock_irq(&pprt_lock); } @@ -947,7 +943,7 @@ static void lcd_clear_fast_s(void) lcd_send_serial(0x5F); /* R/W=W, RS=1 */ lcd_send_serial(' ' & 0x0F); lcd_send_serial((' ' >> 4) & 0x0F); - usleep_range(40, 100); /* the shortest data takes at least 40 us */ + udelay(40); /* the shortest data takes at least 40 us */ } spin_unlock_irq(&pprt_lock); @@ -971,7 +967,7 @@ static void lcd_clear_fast_p8(void) w_dtr(pprt, ' '); /* maintain the data during 20 us before the strobe */ - usleep_range(20, 100); + udelay(20); bits.e = BIT_SET; bits.rs = BIT_SET; @@ -979,13 +975,13 @@ static void lcd_clear_fast_p8(void) set_ctrl_bits(); /* maintain the strobe during 40 us */ - usleep_range(40, 100); + udelay(40); bits.e = BIT_CLR; set_ctrl_bits(); /* the shortest data takes at least 45 us */ - usleep_range(45, 100); + udelay(45); } spin_unlock_irq(&pprt_lock); @@ -1007,7 +1003,7 @@ static void lcd_clear_fast_tilcd(void) for (pos = 0; pos < lcd.height * lcd.hwidth; pos++) { /* present the data to the data port */ w_dtr(pprt, ' '); - usleep_range(60, 120); + udelay(60); } spin_unlock_irq(&pprt_lock); From 19964740ebb3f69fb65a8e10dadb4863dd8a8dfe Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 18 Jan 2016 15:45:18 +0100 Subject: [PATCH 270/418] cdc-acm:exclude Samsung phone 04e8:685d commit e912e685f372ab62a2405a1acd923597f524e94a upstream. This phone needs to be handled by a specialised firmware tool and is reported to crash irrevocably if cdc-acm takes it. Signed-off-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index e4c70dce3e7c..fa4e23930614 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1841,6 +1841,11 @@ static const struct usb_device_id acm_ids[] = { }, #endif + /*Samsung phone in firmware update mode */ + { USB_DEVICE(0x04e8, 0x685d), + .driver_info = IGNORE_DEVICE, + }, + /* Exclude Infineon Flash Loader utility */ { USB_DEVICE(0x058b, 0x0041), .driver_info = IGNORE_DEVICE, From e43193617315070ca9a6ca55794760e328fe4e9b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 20 Jan 2016 12:56:34 +0100 Subject: [PATCH 271/418] perf stat: Do not clean event's private stats commit 3f416f22d1e21709a631189ba169f76fd267b374 upstream. Mel reported stddev reporting was broken due to following commit: 106a94a0f8c2 ("perf stat: Introduce read_counters function") This commit merged interval and overall counters reading into single read_counters function. The old interval code cleaned the stddev data for some reason (it's never displayed in interval mode) and the mentioned commit kept on cleaning the stddev data in merged function, which resulted in the stddev not being displayed. Removing the wrong stddev data cleanup init_stats call. Reported-and-Tested-by: Mel Gorman Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: 106a94a0f8c2 ("perf stat: Introduce read_counters function") Link: http://lkml.kernel.org/r/1453290995-18485-4-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/stat.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 2d9d8306dbd3..4a3a72cb5805 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -310,7 +310,6 @@ int perf_stat_process_counter(struct perf_stat_config *config, int i, ret; aggr->val = aggr->ena = aggr->run = 0; - init_stats(ps->res_stats); if (counter->per_pkg) zero_per_pkg(counter); From d024d46ec5203664fac872a20f7d75125db952da Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 27 Jan 2016 19:26:07 +0800 Subject: [PATCH 272/418] tick/nohz: Set the correct expiry when switching to nohz/lowres mode commit 1ca8ec532fc2d986f1f4a319857bb18e0c9739b4 upstream. commit 0ff53d096422 sets the next tick interrupt to the last jiffies update, i.e. in the past, because the forward operation is invoked before the set operation. There is no resulting damage (yet), but we get an extra pointless tick interrupt. Revert the order so we get the next tick interrupt in the future. Fixes: commit 0ff53d096422 "tick: sched: Force tick interrupt and get rid of softirq magic" Signed-off-by: Wanpeng Li Cc: Peter Zijlstra Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1453893967-3458-1-git-send-email-wanpeng.li@hotmail.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/time/tick-sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 7c7ec4515983..22c57e191a23 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -977,9 +977,9 @@ static void tick_nohz_switch_to_nohz(void) /* Get the next period */ next = tick_init_jiffy_update(); - hrtimer_forward_now(&ts->sched_timer, tick_period); hrtimer_set_expires(&ts->sched_timer, next); - tick_program_event(next, 1); + hrtimer_forward_now(&ts->sched_timer, tick_period); + tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); tick_nohz_activate(ts, NOHZ_MODE_LOWRES); } From 33bf18e4aa06f4b31eb21c1f77d4286f49dfc72e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 26 Jan 2016 11:29:03 +0100 Subject: [PATCH 273/418] rfkill: fix rfkill_fop_read wait_event usage commit 6736fde9672ff6717ac576e9bba2fd5f3dfec822 upstream. The code within wait_event_interruptible() is called with !TASK_RUNNING, so mustn't call any functions that can sleep, like mutex_lock(). Since we re-check the list_empty() in a loop after the wait, it's safe to simply use list_empty() without locking. This bug has existed forever, but was only discovered now because all userspace implementations, including the default 'rfkill' tool, use poll() or select() to get a readable fd before attempting to read. Fixes: c64fb01627e24 ("rfkill: create useful userspace interface") Reported-by: Dmitry Vyukov Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/rfkill/core.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/net/rfkill/core.c b/net/rfkill/core.c index f53bf3b6558b..cf5b69ab1829 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -1095,17 +1095,6 @@ static unsigned int rfkill_fop_poll(struct file *file, poll_table *wait) return res; } -static bool rfkill_readable(struct rfkill_data *data) -{ - bool r; - - mutex_lock(&data->mtx); - r = !list_empty(&data->events); - mutex_unlock(&data->mtx); - - return r; -} - static ssize_t rfkill_fop_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { @@ -1122,8 +1111,11 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf, goto out; } mutex_unlock(&data->mtx); + /* since we re-check and it just compares pointers, + * using !list_empty() without locking isn't a problem + */ ret = wait_event_interruptible(data->read_wait, - rfkill_readable(data)); + !list_empty(&data->events)); mutex_lock(&data->mtx); if (ret) From 81bb655358726b0abf888c54d33135dae3a07c12 Mon Sep 17 00:00:00 2001 From: Sachin Kulkarni Date: Tue, 12 Jan 2016 14:30:19 +0530 Subject: [PATCH 274/418] mac80211: Requeue work after scan complete for all VIF types. commit 4fa11ec726a32ea6dd768dbb2e2af3453a98ec0a upstream. During a sw scan ieee80211_iface_work ignores work items for all vifs. However after the scan complete work is requeued only for STA, ADHOC and MESH iftypes. This occasionally results in event processing getting delayed/not processed for iftype AP when it coexists with a STA. This can result in data halt and eventually disconnection on the AP interface. Signed-off-by: Sachin Kulkarni Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/ibss.c | 1 - net/mac80211/mesh.c | 11 ----------- net/mac80211/mesh.h | 4 ---- net/mac80211/mlme.c | 2 -- net/mac80211/scan.c | 12 +++++++++++- 5 files changed, 11 insertions(+), 19 deletions(-) diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 337bb5d78003..6a12b0f5cac8 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1732,7 +1732,6 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local) if (sdata->vif.type != NL80211_IFTYPE_ADHOC) continue; sdata->u.ibss.last_scan_completed = jiffies; - ieee80211_queue_work(&local->hw, &sdata->work); } mutex_unlock(&local->iflist_mtx); } diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index fa28500f28fd..6f85b6ab8e51 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1370,17 +1370,6 @@ out: sdata_unlock(sdata); } -void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) -{ - struct ieee80211_sub_if_data *sdata; - - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) - if (ieee80211_vif_is_mesh(&sdata->vif) && - ieee80211_sdata_running(sdata)) - ieee80211_queue_work(&local->hw, &sdata->work); - rcu_read_unlock(); -} void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) { diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index a1596344c3ba..4a8019f79fb2 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -362,14 +362,10 @@ static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata) return sdata->u.mesh.mesh_pp_id == IEEE80211_PATH_PROTOCOL_HWMP; } -void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local); - void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata); void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata); void ieee80211s_stop(void); #else -static inline void -ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {} static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata) { return false; } static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 3aa04344942b..83097c3832d1 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4003,8 +4003,6 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) if (!ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR)) ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.monitor_work); - /* and do all the other regular work too */ - ieee80211_queue_work(&sdata->local->hw, &sdata->work); } } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index a413e52f7691..acbe182b75d1 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -314,6 +314,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) bool was_scanning = local->scanning; struct cfg80211_scan_request *scan_req; struct ieee80211_sub_if_data *scan_sdata; + struct ieee80211_sub_if_data *sdata; lockdep_assert_held(&local->mtx); @@ -373,7 +374,16 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) ieee80211_mlme_notify_scan_completed(local); ieee80211_ibss_notify_scan_completed(local); - ieee80211_mesh_notify_scan_completed(local); + + /* Requeue all the work that might have been ignored while + * the scan was in progress; if there was none this will + * just be a no-op for the particular interface. + */ + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (ieee80211_sdata_running(sdata)) + ieee80211_queue_work(&sdata->local->hw, &sdata->work); + } + if (was_scanning) ieee80211_start_next_roc(local); } From 21b34b4574f8619907eb751b37c1831bfa3f2440 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Feb 2016 13:54:25 -0500 Subject: [PATCH 275/418] workqueue: handle NUMA_NO_NODE for unbound pool_workqueue lookup commit d6e022f1d207a161cd88e08ef0371554680ffc46 upstream. When looking up the pool_workqueue to use for an unbound workqueue, workqueue assumes that the target CPU is always bound to a valid NUMA node. However, currently, when a CPU goes offline, the mapping is destroyed and cpu_to_node() returns NUMA_NO_NODE. This has always been broken but hasn't triggered often enough before 874bbfe600a6 ("workqueue: make sure delayed work run in local cpu"). After the commit, workqueue forcifully assigns the local CPU for delayed work items without explicit target CPU to fix a different issue. This widens the window where CPU can go offline while a delayed work item is pending causing delayed work items dispatched with target CPU set to an already offlined CPU. The resulting NUMA_NO_NODE mapping makes workqueue try to queue the work item on a NULL pool_workqueue and thus crash. While 874bbfe600a6 has been reverted for a different reason making the bug less visible again, it can still happen. Fix it by mapping NUMA_NO_NODE to the default pool_workqueue from unbound_pwq_by_node(). This is a temporary workaround. The long term solution is keeping CPU -> NODE mapping stable across CPU off/online cycles which is being worked on. Signed-off-by: Tejun Heo Reported-by: Mike Galbraith Cc: Tang Chen Cc: Rafael J. Wysocki Cc: Len Brown Link: http://lkml.kernel.org/g/1454424264.11183.46.camel@gmail.com Link: http://lkml.kernel.org/g/1453702100-2597-1-git-send-email-tangchen@cn.fujitsu.com Signed-off-by: Greg Kroah-Hartman --- kernel/workqueue.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c579dbab2e36..ad98ea967323 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -568,6 +568,16 @@ static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq, int node) { assert_rcu_or_wq_mutex_or_pool_mutex(wq); + + /* + * XXX: @node can be NUMA_NO_NODE if CPU goes offline while a + * delayed item is pending. The plan is to keep CPU -> NODE + * mapping valid and stable across CPU on/offlines. Once that + * happens, this workaround can be removed. + */ + if (unlikely(node == NUMA_NO_NODE)) + return wq->dfl_pwq; + return rcu_dereference_raw(wq->numa_pwq_tbl[node]); } From 6684710434d07dfd7e512e5ecc77eefb5a30151e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 9 Feb 2016 16:11:26 -0500 Subject: [PATCH 276/418] Revert "workqueue: make sure delayed work run in local cpu" commit 041bd12e272c53a35c54c13875839bcb98c999ce upstream. This reverts commit 874bbfe600a660cba9c776b3957b1ce393151b76. Workqueue used to implicity guarantee that work items queued without explicit CPU specified are put on the local CPU. Recent changes in timer broke the guarantee and led to vmstat breakage which was fixed by 176bed1de5bf ("vmstat: explicitly schedule per-cpu work on the CPU we need it to run on"). vmstat is the most likely to expose the issue and it's quite possible that there are other similar problems which are a lot more difficult to trigger. As a preventive measure, 874bbfe600a6 ("workqueue: make sure delayed work run in local cpu") was applied to restore the local CPU guarnatee. Unfortunately, the change exposed a bug in timer code which got fixed by 22b886dd1018 ("timers: Use proper base migration in add_timer_on()"). Due to code restructuring, the commit couldn't be backported beyond certain point and stable kernels which only had 874bbfe600a6 started crashing. The local CPU guarantee was accidental more than anything else and we want to get rid of it anyway. As, with the vmstat case fixed, 874bbfe600a6 is causing more problems than it's fixing, it has been decided to take the chance and officially break the guarantee by reverting the commit. A debug feature will be added to force foreign CPU assignment to expose cases relying on the guarantee and fixes for the individual cases will be backported to stable as necessary. Signed-off-by: Tejun Heo Fixes: 874bbfe600a6 ("workqueue: make sure delayed work run in local cpu") Link: http://lkml.kernel.org/g/20160120211926.GJ10810@quack.suse.cz Cc: Mike Galbraith Cc: Henrique de Moraes Holschuh Cc: Daniel Bilik Cc: Jan Kara Cc: Shaohua Li Cc: Sasha Levin Cc: Ben Hutchings Cc: Thomas Gleixner Cc: Daniel Bilik Cc: Jiri Slaby Cc: Michal Hocko Signed-off-by: Greg Kroah-Hartman --- kernel/workqueue.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ad98ea967323..450c21fd0e6e 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1468,13 +1468,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, timer_stats_timer_set_start_info(&dwork->timer); dwork->wq = wq; - /* timer isn't guaranteed to run in this cpu, record earlier */ - if (cpu == WORK_CPU_UNBOUND) - cpu = raw_smp_processor_id(); dwork->cpu = cpu; timer->expires = jiffies + delay; - add_timer_on(timer, cpu); + if (unlikely(cpu != WORK_CPU_UNBOUND)) + add_timer_on(timer, cpu); + else + add_timer(timer); } /** From 1436e689caee2b92ea30813d587598afea1d0e6b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 22 Feb 2016 15:18:13 +0100 Subject: [PATCH 277/418] ALSA: hda - Apply clock gate workaround to Skylake, too commit 7e31a0159461818a1bda49662921b98a29c1187b upstream. Some Skylake machines show the codec probe errors in certain situations, e.g. HP Z240 desktop fails to probe the onboard Realtek codec at reloading the snd-hda-intel module like: snd_hda_intel 0000:00:1f.3: spurious response 0x200:0x2, last cmd=0x000000 snd_hda_intel 0000:00:1f.3: azx_get_response timeout, switching to polling mode: lastcmd=0x000f0000 snd_hda_intel 0000:00:1f.3: No response from codec, disabling MSI: last cmd=0x000f0000 snd_hda_intel 0000:00:1f.3: Codec #0 probe error; disabling it... hdaudio hdaudioC0D2: no AFG or MFG node found snd_hda_intel 0000:00:1f.3: no codecs initialized Also, HP G470 G3 suffers from the similar problem, as reported in bugzilla below. On this machine, the codec probe error appears even at a fresh boot. As Libin suggested, the same workaround used for Broxton in the commit [6639484ddaf6: ALSA: hda - disable dynamic clock gating on Broxton before reset] can be applied for Skylake in order to fix this problem. The Intel HW team also confirmed that this is needed for SKL. This patch makes the workaround applied to both SKL and BXT platforms. The referred macros are moved and one superfluous macro (IS_BROXTON()) is another one (IS_BXT()) as well. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=112731 Suggested-by: Libin Yang Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 2c13298e80b7..2ff692dd2c5f 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -357,7 +357,10 @@ enum { ((pci)->device == 0x0d0c) || \ ((pci)->device == 0x160c)) -#define IS_BROXTON(pci) ((pci)->device == 0x5a98) +#define IS_SKL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa170) +#define IS_SKL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d70) +#define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98) +#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) static char *driver_short_names[] = { [AZX_DRIVER_ICH] = "HDA Intel", @@ -534,13 +537,13 @@ static void hda_intel_init_chip(struct azx *chip, bool full_reset) if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) snd_hdac_set_codec_wakeup(bus, true); - if (IS_BROXTON(pci)) { + if (IS_SKL_PLUS(pci)) { pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val); val = val & ~INTEL_HDA_CGCTL_MISCBDCGE; pci_write_config_dword(pci, INTEL_HDA_CGCTL, val); } azx_init_chip(chip, full_reset); - if (IS_BROXTON(pci)) { + if (IS_SKL_PLUS(pci)) { pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val); val = val | INTEL_HDA_CGCTL_MISCBDCGE; pci_write_config_dword(pci, INTEL_HDA_CGCTL, val); @@ -549,7 +552,7 @@ static void hda_intel_init_chip(struct azx *chip, bool full_reset) snd_hdac_set_codec_wakeup(bus, false); /* reduce dma latency to avoid noise */ - if (IS_BROXTON(pci)) + if (IS_BXT(pci)) bxt_reduce_dma_latency(chip); } @@ -971,11 +974,6 @@ static int azx_resume(struct device *dev) /* put codec down to D3 at hibernation for Intel SKL+; * otherwise BIOS may still access the codec and screw up the driver */ -#define IS_SKL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa170) -#define IS_SKL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d70) -#define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98) -#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci)) - static int azx_freeze_noirq(struct device *dev) { struct pci_dev *pci = to_pci_dev(dev); From 7cb32ae09a6490c27bc3c110ee42d808a5670142 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 25 Feb 2016 15:19:38 +0800 Subject: [PATCH 278/418] ALSA: hda - Fixing background noise on Dell Inspiron 3162 commit 3b43b71f05d3ecd01c4116254666d9492301697d upstream. After login to the desktop on Dell Inspiron 3162, there's a very loud background noise comes from the builtin speaker. The noise does not go away even if the speaker is muted. The noise disappears after using the aamix fixup. Codec: Realtek ALC3234 Address: 0 AFG Function Id: 0x1 (unsol 1) Vendor Id: 0x10ec0255 Subsystem Id: 0x10280725 Revision Id: 0x100002 No Modem Function Group found BugLink: http://bugs.launchpad.net/bugs/1549620 Signed-off-by: Kai-Heng Feng Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index efd4980cffb8..72fa58dd7723 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4749,6 +4749,7 @@ enum { ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE, ALC293_FIXUP_LENOVO_SPK_NOISE, ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY, + ALC255_FIXUP_DELL_SPK_NOISE, }; static const struct hda_fixup alc269_fixups[] = { @@ -5368,6 +5369,12 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc233_fixup_lenovo_line2_mic_hotkey, }, + [ALC255_FIXUP_DELL_SPK_NOISE] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_disable_aamix, + .chained = true, + .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -5410,6 +5417,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x06df, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK), SND_PCI_QUIRK(0x1028, 0x06e0, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK), SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), + SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), From 3b493f9f06d19056669fe94b0933aaea06f3fee6 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 11 Jan 2016 21:31:09 -0800 Subject: [PATCH 279/418] target: Fix LUN_RESET active I/O handling for ACK_KREF commit febe562c20dfa8f33bee7d419c6b517986a5aa33 upstream. This patch fixes a NULL pointer se_cmd->cmd_kref < 0 refcount bug during TMR LUN_RESET with active se_cmd I/O, that can be triggered during se_cmd descriptor shutdown + release via core_tmr_drain_state_list() code. To address this bug, add common __target_check_io_state() helper for ABORT_TASK + LUN_RESET w/ CMD_T_COMPLETE checking, and set CMD_T_ABORTED + obtain ->cmd_kref for both cases ahead of last target_put_sess_cmd() after TFO->aborted_task() -> transport_cmd_finish_abort() callback has completed. It also introduces SCF_ACK_KREF to determine when transport_cmd_finish_abort() needs to drop the second extra reference, ahead of calling target_put_sess_cmd() for the final kref_put(&se_cmd->cmd_kref). It also updates transport_cmd_check_stop() to avoid holding se_cmd->t_state_lock while dropping se_cmd device state via target_remove_from_state_list(), now that core_tmr_drain_state_list() is holding the se_device lock while checking se_cmd state from within TMR logic. Finally, move transport_put_cmd() release of SGL + TMR + extended CDB memory into target_free_cmd_mem() in order to avoid potential resource leaks in TMR ABORT_TASK + LUN_RESET code-paths. Also update target_release_cmd_kref() accordingly. Reviewed-by: Quinn Tran Cc: Himanshu Madhani Cc: Sagi Grimberg Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Andy Grover Cc: Mike Christie Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_tmr.c | 69 ++++++++++++++++--------- drivers/target/target_core_transport.c | 71 +++++++++++--------------- include/target/target_core_base.h | 1 + 3 files changed, 78 insertions(+), 63 deletions(-) diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index 28fb3016370f..6545df43c9a9 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -107,6 +107,34 @@ static int target_check_cdb_and_preempt(struct list_head *list, return 1; } +static bool __target_check_io_state(struct se_cmd *se_cmd) +{ + struct se_session *sess = se_cmd->se_sess; + + assert_spin_locked(&sess->sess_cmd_lock); + WARN_ON_ONCE(!irqs_disabled()); + /* + * If command already reached CMD_T_COMPLETE state within + * target_complete_cmd(), this se_cmd has been passed to + * fabric driver and will not be aborted. + * + * Otherwise, obtain a local se_cmd->cmd_kref now for TMR + * ABORT_TASK + LUN_RESET for CMD_T_ABORTED processing as + * long as se_cmd->cmd_kref is still active unless zero. + */ + spin_lock(&se_cmd->t_state_lock); + if (se_cmd->transport_state & CMD_T_COMPLETE) { + pr_debug("Attempted to abort io tag: %llu already complete," + " skipping\n", se_cmd->tag); + spin_unlock(&se_cmd->t_state_lock); + return false; + } + se_cmd->transport_state |= CMD_T_ABORTED; + spin_unlock(&se_cmd->t_state_lock); + + return kref_get_unless_zero(&se_cmd->cmd_kref); +} + void core_tmr_abort_task( struct se_device *dev, struct se_tmr_req *tmr, @@ -130,34 +158,22 @@ void core_tmr_abort_task( if (tmr->ref_task_tag != ref_tag) continue; - if (!kref_get_unless_zero(&se_cmd->cmd_kref)) - continue; - printk("ABORT_TASK: Found referenced %s task_tag: %llu\n", se_cmd->se_tfo->get_fabric_name(), ref_tag); - spin_lock(&se_cmd->t_state_lock); - if (se_cmd->transport_state & CMD_T_COMPLETE) { - printk("ABORT_TASK: ref_tag: %llu already complete," - " skipping\n", ref_tag); - spin_unlock(&se_cmd->t_state_lock); + if (!__target_check_io_state(se_cmd)) { spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); - target_put_sess_cmd(se_cmd); - goto out; } - se_cmd->transport_state |= CMD_T_ABORTED; - spin_unlock(&se_cmd->t_state_lock); - list_del_init(&se_cmd->se_cmd_list); spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); cancel_work_sync(&se_cmd->work); transport_wait_for_tasks(se_cmd); - target_put_sess_cmd(se_cmd); transport_cmd_finish_abort(se_cmd, true); + target_put_sess_cmd(se_cmd); printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for" " ref_tag: %llu\n", ref_tag); @@ -242,8 +258,10 @@ static void core_tmr_drain_state_list( struct list_head *preempt_and_abort_list) { LIST_HEAD(drain_task_list); + struct se_session *sess; struct se_cmd *cmd, *next; unsigned long flags; + int rc; /* * Complete outstanding commands with TASK_ABORTED SAM status. @@ -282,6 +300,16 @@ static void core_tmr_drain_state_list( if (prout_cmd == cmd) continue; + sess = cmd->se_sess; + if (WARN_ON_ONCE(!sess)) + continue; + + spin_lock(&sess->sess_cmd_lock); + rc = __target_check_io_state(cmd); + spin_unlock(&sess->sess_cmd_lock); + if (!rc) + continue; + list_move_tail(&cmd->state_list, &drain_task_list); cmd->state_active = false; } @@ -289,7 +317,7 @@ static void core_tmr_drain_state_list( while (!list_empty(&drain_task_list)) { cmd = list_entry(drain_task_list.next, struct se_cmd, state_list); - list_del(&cmd->state_list); + list_del_init(&cmd->state_list); pr_debug("LUN_RESET: %s cmd: %p" " ITT/CmdSN: 0x%08llx/0x%08x, i_state: %d, t_state: %d" @@ -313,16 +341,11 @@ static void core_tmr_drain_state_list( * loop above, but we do it down here given that * cancel_work_sync may block. */ - if (cmd->t_state == TRANSPORT_COMPLETE) - cancel_work_sync(&cmd->work); - - spin_lock_irqsave(&cmd->t_state_lock, flags); - target_stop_cmd(cmd, &flags); - - cmd->transport_state |= CMD_T_ABORTED; - spin_unlock_irqrestore(&cmd->t_state_lock, flags); + cancel_work_sync(&cmd->work); + transport_wait_for_tasks(cmd); core_tmr_handle_tas_abort(tmr_nacl, cmd, tas); + target_put_sess_cmd(cmd); } } diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 4fdcee2006d1..3b549f7ca80f 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -528,9 +528,6 @@ void transport_deregister_session(struct se_session *se_sess) } EXPORT_SYMBOL(transport_deregister_session); -/* - * Called with cmd->t_state_lock held. - */ static void target_remove_from_state_list(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; @@ -555,10 +552,6 @@ static int transport_cmd_check_stop(struct se_cmd *cmd, bool remove_from_lists, { unsigned long flags; - spin_lock_irqsave(&cmd->t_state_lock, flags); - if (write_pending) - cmd->t_state = TRANSPORT_WRITE_PENDING; - if (remove_from_lists) { target_remove_from_state_list(cmd); @@ -568,6 +561,10 @@ static int transport_cmd_check_stop(struct se_cmd *cmd, bool remove_from_lists, cmd->se_lun = NULL; } + spin_lock_irqsave(&cmd->t_state_lock, flags); + if (write_pending) + cmd->t_state = TRANSPORT_WRITE_PENDING; + /* * Determine if frontend context caller is requesting the stopping of * this command for frontend exceptions. @@ -621,6 +618,8 @@ static void transport_lun_remove_cmd(struct se_cmd *cmd) void transport_cmd_finish_abort(struct se_cmd *cmd, int remove) { + bool ack_kref = (cmd->se_cmd_flags & SCF_ACK_KREF); + if (cmd->se_cmd_flags & SCF_SE_LUN_CMD) transport_lun_remove_cmd(cmd); /* @@ -632,7 +631,7 @@ void transport_cmd_finish_abort(struct se_cmd *cmd, int remove) if (transport_cmd_check_stop_to_fabric(cmd)) return; - if (remove) + if (remove && ack_kref) transport_put_cmd(cmd); } @@ -700,7 +699,7 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status) * Check for case where an explicit ABORT_TASK has been received * and transport_wait_for_tasks() will be waiting for completion.. */ - if (cmd->transport_state & CMD_T_ABORTED && + if (cmd->transport_state & CMD_T_ABORTED || cmd->transport_state & CMD_T_STOP) { spin_unlock_irqrestore(&cmd->t_state_lock, flags); complete_all(&cmd->t_transport_stop_comp); @@ -2212,28 +2211,6 @@ static inline void transport_free_pages(struct se_cmd *cmd) cmd->t_bidi_data_nents = 0; } -/** - * transport_release_cmd - free a command - * @cmd: command to free - * - * This routine unconditionally frees a command, and reference counting - * or list removal must be done in the caller. - */ -static int transport_release_cmd(struct se_cmd *cmd) -{ - BUG_ON(!cmd->se_tfo); - - if (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) - core_tmr_release_req(cmd->se_tmr_req); - if (cmd->t_task_cdb != cmd->__t_task_cdb) - kfree(cmd->t_task_cdb); - /* - * If this cmd has been setup with target_get_sess_cmd(), drop - * the kref and call ->release_cmd() in kref callback. - */ - return target_put_sess_cmd(cmd); -} - /** * transport_put_cmd - release a reference to a command * @cmd: command to release @@ -2242,8 +2219,12 @@ static int transport_release_cmd(struct se_cmd *cmd) */ static int transport_put_cmd(struct se_cmd *cmd) { - transport_free_pages(cmd); - return transport_release_cmd(cmd); + BUG_ON(!cmd->se_tfo); + /* + * If this cmd has been setup with target_get_sess_cmd(), drop + * the kref and call ->release_cmd() in kref callback. + */ + return target_put_sess_cmd(cmd); } void *transport_kmap_data_sg(struct se_cmd *cmd) @@ -2443,14 +2424,13 @@ static void transport_write_pending_qf(struct se_cmd *cmd) int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) { - unsigned long flags; int ret = 0; if (!(cmd->se_cmd_flags & SCF_SE_LUN_CMD)) { if (wait_for_tasks && (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) - transport_wait_for_tasks(cmd); + transport_wait_for_tasks(cmd); - ret = transport_release_cmd(cmd); + ret = transport_put_cmd(cmd); } else { if (wait_for_tasks) transport_wait_for_tasks(cmd); @@ -2459,11 +2439,8 @@ int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) * has already added se_cmd to state_list, but fabric has * failed command before I/O submission. */ - if (cmd->state_active) { - spin_lock_irqsave(&cmd->t_state_lock, flags); + if (cmd->state_active) target_remove_from_state_list(cmd); - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - } if (cmd->se_lun) transport_lun_remove_cmd(cmd); @@ -2508,6 +2485,16 @@ out: } EXPORT_SYMBOL(target_get_sess_cmd); +static void target_free_cmd_mem(struct se_cmd *cmd) +{ + transport_free_pages(cmd); + + if (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) + core_tmr_release_req(cmd->se_tmr_req); + if (cmd->t_task_cdb != cmd->__t_task_cdb) + kfree(cmd->t_task_cdb); +} + static void target_release_cmd_kref(struct kref *kref) { struct se_cmd *se_cmd = container_of(kref, struct se_cmd, cmd_kref); @@ -2517,17 +2504,20 @@ static void target_release_cmd_kref(struct kref *kref) spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); if (list_empty(&se_cmd->se_cmd_list)) { spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); + target_free_cmd_mem(se_cmd); se_cmd->se_tfo->release_cmd(se_cmd); return; } if (se_sess->sess_tearing_down && se_cmd->cmd_wait_set) { spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); + target_free_cmd_mem(se_cmd); complete(&se_cmd->cmd_wait_comp); return; } list_del(&se_cmd->se_cmd_list); spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); + target_free_cmd_mem(se_cmd); se_cmd->se_tfo->release_cmd(se_cmd); } @@ -2539,6 +2529,7 @@ int target_put_sess_cmd(struct se_cmd *se_cmd) struct se_session *se_sess = se_cmd->se_sess; if (!se_sess) { + target_free_cmd_mem(se_cmd); se_cmd->se_tfo->release_cmd(se_cmd); return 1; } diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index aabf0aca0171..02a4d02d881d 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -138,6 +138,7 @@ enum se_cmd_flags_table { SCF_COMPARE_AND_WRITE = 0x00080000, SCF_COMPARE_AND_WRITE_POST = 0x00100000, SCF_PASSTHROUGH_PROT_SG_TO_MEM_NOALLOC = 0x00200000, + SCF_ACK_KREF = 0x00400000, }; /* struct se_dev_entry->lun_flags and struct se_lun->lun_access */ From 91583a21641d148b281ae48e909a56c23c5978f6 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 11 Jan 2016 21:53:05 -0800 Subject: [PATCH 280/418] target: Fix LUN_RESET active TMR descriptor handling commit a6d9bb1c9605cd4f44e2d8290dc4d0e88f20292d upstream. This patch fixes a NULL pointer se_cmd->cmd_kref < 0 refcount bug during TMR LUN_RESET with active TMRs, triggered during se_cmd + se_tmr_req descriptor shutdown + release via core_tmr_drain_tmr_list(). To address this bug, go ahead and obtain a local kref_get_unless_zero(&se_cmd->cmd_kref) for active I/O to set CMD_T_ABORTED, and transport_wait_for_tasks() followed by the final target_put_sess_cmd() to drop the local ->cmd_kref. Also add two new checks within target_tmr_work() to avoid CMD_T_ABORTED -> TFO->queue_tm_rsp() callbacks ahead of invoking the backend -> fabric put in transport_cmd_check_stop_to_fabric(). For good measure, also change core_tmr_release_req() to use list_del_init() ahead of se_tmr_req memory free. Reviewed-by: Quinn Tran Cc: Himanshu Madhani Cc: Sagi Grimberg Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Andy Grover Cc: Mike Christie Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_tmr.c | 22 +++++++++++++++++++++- drivers/target/target_core_transport.c | 17 +++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index 6545df43c9a9..3e3b12e263fc 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -68,7 +68,7 @@ void core_tmr_release_req(struct se_tmr_req *tmr) if (dev) { spin_lock_irqsave(&dev->se_tmr_lock, flags); - list_del(&tmr->tmr_list); + list_del_init(&tmr->tmr_list); spin_unlock_irqrestore(&dev->se_tmr_lock, flags); } @@ -194,9 +194,11 @@ static void core_tmr_drain_tmr_list( struct list_head *preempt_and_abort_list) { LIST_HEAD(drain_tmr_list); + struct se_session *sess; struct se_tmr_req *tmr_p, *tmr_pp; struct se_cmd *cmd; unsigned long flags; + bool rc; /* * Release all pending and outgoing TMRs aside from the received * LUN_RESET tmr.. @@ -222,17 +224,31 @@ static void core_tmr_drain_tmr_list( if (target_check_cdb_and_preempt(preempt_and_abort_list, cmd)) continue; + sess = cmd->se_sess; + if (WARN_ON_ONCE(!sess)) + continue; + + spin_lock(&sess->sess_cmd_lock); spin_lock(&cmd->t_state_lock); if (!(cmd->transport_state & CMD_T_ACTIVE)) { spin_unlock(&cmd->t_state_lock); + spin_unlock(&sess->sess_cmd_lock); continue; } if (cmd->t_state == TRANSPORT_ISTATE_PROCESSING) { spin_unlock(&cmd->t_state_lock); + spin_unlock(&sess->sess_cmd_lock); continue; } + cmd->transport_state |= CMD_T_ABORTED; spin_unlock(&cmd->t_state_lock); + rc = kref_get_unless_zero(&cmd->cmd_kref); + spin_unlock(&sess->sess_cmd_lock); + if (!rc) { + printk("LUN_RESET TMR: non-zero kref_get_unless_zero\n"); + continue; + } list_move_tail(&tmr_p->tmr_list, &drain_tmr_list); } spin_unlock_irqrestore(&dev->se_tmr_lock, flags); @@ -246,7 +262,11 @@ static void core_tmr_drain_tmr_list( (preempt_and_abort_list) ? "Preempt" : "", tmr_p, tmr_p->function, tmr_p->response, cmd->t_state); + cancel_work_sync(&cmd->work); + transport_wait_for_tasks(cmd); + transport_cmd_finish_abort(cmd, 1); + target_put_sess_cmd(cmd); } } diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 3b549f7ca80f..2abec2a17d02 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2891,8 +2891,17 @@ static void target_tmr_work(struct work_struct *work) struct se_cmd *cmd = container_of(work, struct se_cmd, work); struct se_device *dev = cmd->se_dev; struct se_tmr_req *tmr = cmd->se_tmr_req; + unsigned long flags; int ret; + spin_lock_irqsave(&cmd->t_state_lock, flags); + if (cmd->transport_state & CMD_T_ABORTED) { + tmr->response = TMR_FUNCTION_REJECTED; + spin_unlock_irqrestore(&cmd->t_state_lock, flags); + goto check_stop; + } + spin_unlock_irqrestore(&cmd->t_state_lock, flags); + switch (tmr->function) { case TMR_ABORT_TASK: core_tmr_abort_task(dev, tmr, cmd->se_sess); @@ -2925,9 +2934,17 @@ static void target_tmr_work(struct work_struct *work) break; } + spin_lock_irqsave(&cmd->t_state_lock, flags); + if (cmd->transport_state & CMD_T_ABORTED) { + spin_unlock_irqrestore(&cmd->t_state_lock, flags); + goto check_stop; + } cmd->t_state = TRANSPORT_ISTATE_PROCESSING; + spin_unlock_irqrestore(&cmd->t_state_lock, flags); + cmd->se_tfo->queue_tm_rsp(cmd); +check_stop: transport_cmd_check_stop_to_fabric(cmd); } From 547551e5a591ce744845202ecfa8b54366832b60 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sat, 16 Jan 2016 12:49:49 -0800 Subject: [PATCH 281/418] target: Fix TAS handling for multi-session se_node_acls commit ebde1ca5a908b10312db4ecd7553e3ba039319ab upstream. This patch fixes a bug in TMR task aborted status (TAS) handling when multiple sessions are connected to the same target WWPN endpoint and se_node_acl descriptor, resulting in TASK_ABORTED status to not be generated for aborted se_cmds on the remote port. This is due to core_tmr_handle_tas_abort() incorrectly comparing se_node_acl instead of se_session, for which the multi-session case is expected to be sharing the same se_node_acl. Instead, go ahead and update core_tmr_handle_tas_abort() to compare tmr_sess + cmd->se_sess in order to determine if the LUN_RESET was received on a different I_T nexus, and TASK_ABORTED status response needs to be generated. Reviewed-by: Christoph Hellwig Cc: Quinn Tran Cc: Himanshu Madhani Cc: Sagi Grimberg Cc: Hannes Reinecke Cc: Andy Grover Cc: Mike Christie Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_tmr.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index 3e3b12e263fc..0e137f628a33 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -76,7 +76,7 @@ void core_tmr_release_req(struct se_tmr_req *tmr) } static void core_tmr_handle_tas_abort( - struct se_node_acl *tmr_nacl, + struct se_session *tmr_sess, struct se_cmd *cmd, int tas) { @@ -84,7 +84,7 @@ static void core_tmr_handle_tas_abort( /* * TASK ABORTED status (TAS) bit support */ - if ((tmr_nacl && (tmr_nacl != cmd->se_sess->se_node_acl)) && tas) { + if (tmr_sess && tmr_sess != cmd->se_sess && tas) { remove = false; transport_send_task_abort(cmd); } @@ -273,7 +273,7 @@ static void core_tmr_drain_tmr_list( static void core_tmr_drain_state_list( struct se_device *dev, struct se_cmd *prout_cmd, - struct se_node_acl *tmr_nacl, + struct se_session *tmr_sess, int tas, struct list_head *preempt_and_abort_list) { @@ -364,7 +364,7 @@ static void core_tmr_drain_state_list( cancel_work_sync(&cmd->work); transport_wait_for_tasks(cmd); - core_tmr_handle_tas_abort(tmr_nacl, cmd, tas); + core_tmr_handle_tas_abort(tmr_sess, cmd, tas); target_put_sess_cmd(cmd); } } @@ -377,6 +377,7 @@ int core_tmr_lun_reset( { struct se_node_acl *tmr_nacl = NULL; struct se_portal_group *tmr_tpg = NULL; + struct se_session *tmr_sess = NULL; int tas; /* * TASK_ABORTED status bit, this is configurable via ConfigFS @@ -395,8 +396,9 @@ int core_tmr_lun_reset( * or struct se_device passthrough.. */ if (tmr && tmr->task_cmd && tmr->task_cmd->se_sess) { - tmr_nacl = tmr->task_cmd->se_sess->se_node_acl; - tmr_tpg = tmr->task_cmd->se_sess->se_tpg; + tmr_sess = tmr->task_cmd->se_sess; + tmr_nacl = tmr_sess->se_node_acl; + tmr_tpg = tmr_sess->se_tpg; if (tmr_nacl && tmr_tpg) { pr_debug("LUN_RESET: TMR caller fabric: %s" " initiator port %s\n", @@ -409,7 +411,7 @@ int core_tmr_lun_reset( dev->transport->name, tas); core_tmr_drain_tmr_list(dev, tmr, preempt_and_abort_list); - core_tmr_drain_state_list(dev, prout_cmd, tmr_nacl, tas, + core_tmr_drain_state_list(dev, prout_cmd, tmr_sess, tas, preempt_and_abort_list); /* From fb6a326e303451078ee93c1293918aa2e044cff8 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 19 Jan 2016 15:23:02 -0800 Subject: [PATCH 282/418] target: Fix remote-port TMR ABORT + se_cmd fabric stop commit 0f4a943168f31d29a1701908931acaba518b131a upstream. To address the bug where fabric driver level shutdown of se_cmd occurs at the same time when TMR CMD_T_ABORTED is happening resulting in a -1 ->cmd_kref, this patch adds a CMD_T_FABRIC_STOP bit that is used to determine when TMR + driver I_T nexus shutdown is happening concurrently. It changes target_sess_cmd_list_set_waiting() to obtain se_cmd->cmd_kref + set CMD_T_FABRIC_STOP, and drop local reference in target_wait_for_sess_cmds() and invoke extra target_put_sess_cmd() during Task Aborted Status (TAS) when necessary. Also, it adds a new target_wait_free_cmd() wrapper around transport_wait_for_tasks() for the special case within transport_generic_free_cmd() to set CMD_T_FABRIC_STOP, and is now aware of CMD_T_ABORTED + CMD_T_TAS status bits to know when an extra transport_put_cmd() during TAS is required. Note transport_generic_free_cmd() is expected to block on cmd->cmd_wait_comp in order to follow what iscsi-target expects during iscsi_conn context se_cmd shutdown. Cc: Quinn Tran Cc: Himanshu Madhani Cc: Sagi Grimberg Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Andy Grover Cc: Mike Christie Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_tmr.c | 54 ++++++--- drivers/target/target_core_transport.c | 159 ++++++++++++++++++------- include/target/target_core_base.h | 2 + 3 files changed, 157 insertions(+), 58 deletions(-) diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index 0e137f628a33..88029cc6de5e 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -75,16 +75,18 @@ void core_tmr_release_req(struct se_tmr_req *tmr) kfree(tmr); } -static void core_tmr_handle_tas_abort( - struct se_session *tmr_sess, - struct se_cmd *cmd, - int tas) +static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas) { - bool remove = true; + unsigned long flags; + bool remove = true, send_tas; /* * TASK ABORTED status (TAS) bit support */ - if (tmr_sess && tmr_sess != cmd->se_sess && tas) { + spin_lock_irqsave(&cmd->t_state_lock, flags); + send_tas = (cmd->transport_state & CMD_T_TAS); + spin_unlock_irqrestore(&cmd->t_state_lock, flags); + + if (send_tas) { remove = false; transport_send_task_abort(cmd); } @@ -107,7 +109,8 @@ static int target_check_cdb_and_preempt(struct list_head *list, return 1; } -static bool __target_check_io_state(struct se_cmd *se_cmd) +static bool __target_check_io_state(struct se_cmd *se_cmd, + struct se_session *tmr_sess, int tas) { struct se_session *sess = se_cmd->se_sess; @@ -115,21 +118,32 @@ static bool __target_check_io_state(struct se_cmd *se_cmd) WARN_ON_ONCE(!irqs_disabled()); /* * If command already reached CMD_T_COMPLETE state within - * target_complete_cmd(), this se_cmd has been passed to - * fabric driver and will not be aborted. + * target_complete_cmd() or CMD_T_FABRIC_STOP due to shutdown, + * this se_cmd has been passed to fabric driver and will + * not be aborted. * * Otherwise, obtain a local se_cmd->cmd_kref now for TMR * ABORT_TASK + LUN_RESET for CMD_T_ABORTED processing as * long as se_cmd->cmd_kref is still active unless zero. */ spin_lock(&se_cmd->t_state_lock); - if (se_cmd->transport_state & CMD_T_COMPLETE) { - pr_debug("Attempted to abort io tag: %llu already complete," + if (se_cmd->transport_state & (CMD_T_COMPLETE | CMD_T_FABRIC_STOP)) { + pr_debug("Attempted to abort io tag: %llu already complete or" + " fabric stop, skipping\n", se_cmd->tag); + spin_unlock(&se_cmd->t_state_lock); + return false; + } + if (sess->sess_tearing_down || se_cmd->cmd_wait_set) { + pr_debug("Attempted to abort io tag: %llu already shutdown," " skipping\n", se_cmd->tag); spin_unlock(&se_cmd->t_state_lock); return false; } se_cmd->transport_state |= CMD_T_ABORTED; + + if ((tmr_sess != se_cmd->se_sess) && tas) + se_cmd->transport_state |= CMD_T_TAS; + spin_unlock(&se_cmd->t_state_lock); return kref_get_unless_zero(&se_cmd->cmd_kref); @@ -161,7 +175,7 @@ void core_tmr_abort_task( printk("ABORT_TASK: Found referenced %s task_tag: %llu\n", se_cmd->se_tfo->get_fabric_name(), ref_tag); - if (!__target_check_io_state(se_cmd)) { + if (!__target_check_io_state(se_cmd, se_sess, 0)) { spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); target_put_sess_cmd(se_cmd); goto out; @@ -230,7 +244,8 @@ static void core_tmr_drain_tmr_list( spin_lock(&sess->sess_cmd_lock); spin_lock(&cmd->t_state_lock); - if (!(cmd->transport_state & CMD_T_ACTIVE)) { + if (!(cmd->transport_state & CMD_T_ACTIVE) || + (cmd->transport_state & CMD_T_FABRIC_STOP)) { spin_unlock(&cmd->t_state_lock); spin_unlock(&sess->sess_cmd_lock); continue; @@ -240,15 +255,22 @@ static void core_tmr_drain_tmr_list( spin_unlock(&sess->sess_cmd_lock); continue; } + if (sess->sess_tearing_down || cmd->cmd_wait_set) { + spin_unlock(&cmd->t_state_lock); + spin_unlock(&sess->sess_cmd_lock); + continue; + } cmd->transport_state |= CMD_T_ABORTED; spin_unlock(&cmd->t_state_lock); rc = kref_get_unless_zero(&cmd->cmd_kref); - spin_unlock(&sess->sess_cmd_lock); if (!rc) { printk("LUN_RESET TMR: non-zero kref_get_unless_zero\n"); + spin_unlock(&sess->sess_cmd_lock); continue; } + spin_unlock(&sess->sess_cmd_lock); + list_move_tail(&tmr_p->tmr_list, &drain_tmr_list); } spin_unlock_irqrestore(&dev->se_tmr_lock, flags); @@ -325,7 +347,7 @@ static void core_tmr_drain_state_list( continue; spin_lock(&sess->sess_cmd_lock); - rc = __target_check_io_state(cmd); + rc = __target_check_io_state(cmd, tmr_sess, tas); spin_unlock(&sess->sess_cmd_lock); if (!rc) continue; @@ -364,7 +386,7 @@ static void core_tmr_drain_state_list( cancel_work_sync(&cmd->work); transport_wait_for_tasks(cmd); - core_tmr_handle_tas_abort(tmr_sess, cmd, tas); + core_tmr_handle_tas_abort(cmd, tas); target_put_sess_cmd(cmd); } } diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 2abec2a17d02..c1f4cb4103f7 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2422,18 +2422,33 @@ static void transport_write_pending_qf(struct se_cmd *cmd) } } +static bool +__transport_wait_for_tasks(struct se_cmd *, bool, bool *, bool *, + unsigned long *flags); + +static void target_wait_free_cmd(struct se_cmd *cmd, bool *aborted, bool *tas) +{ + unsigned long flags; + + spin_lock_irqsave(&cmd->t_state_lock, flags); + __transport_wait_for_tasks(cmd, true, aborted, tas, &flags); + spin_unlock_irqrestore(&cmd->t_state_lock, flags); +} + int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) { int ret = 0; + bool aborted = false, tas = false; if (!(cmd->se_cmd_flags & SCF_SE_LUN_CMD)) { if (wait_for_tasks && (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) - transport_wait_for_tasks(cmd); + target_wait_free_cmd(cmd, &aborted, &tas); - ret = transport_put_cmd(cmd); + if (!aborted || tas) + ret = transport_put_cmd(cmd); } else { if (wait_for_tasks) - transport_wait_for_tasks(cmd); + target_wait_free_cmd(cmd, &aborted, &tas); /* * Handle WRITE failure case where transport_generic_new_cmd() * has already added se_cmd to state_list, but fabric has @@ -2445,7 +2460,20 @@ int transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks) if (cmd->se_lun) transport_lun_remove_cmd(cmd); - ret = transport_put_cmd(cmd); + if (!aborted || tas) + ret = transport_put_cmd(cmd); + } + /* + * If the task has been internally aborted due to TMR ABORT_TASK + * or LUN_RESET, target_core_tmr.c is responsible for performing + * the remaining calls to target_put_sess_cmd(), and not the + * callers of this function. + */ + if (aborted) { + pr_debug("Detected CMD_T_ABORTED for ITT: %llu\n", cmd->tag); + wait_for_completion(&cmd->cmd_wait_comp); + cmd->se_tfo->release_cmd(cmd); + ret = 1; } return ret; } @@ -2500,6 +2528,7 @@ static void target_release_cmd_kref(struct kref *kref) struct se_cmd *se_cmd = container_of(kref, struct se_cmd, cmd_kref); struct se_session *se_sess = se_cmd->se_sess; unsigned long flags; + bool fabric_stop; spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); if (list_empty(&se_cmd->se_cmd_list)) { @@ -2508,13 +2537,19 @@ static void target_release_cmd_kref(struct kref *kref) se_cmd->se_tfo->release_cmd(se_cmd); return; } - if (se_sess->sess_tearing_down && se_cmd->cmd_wait_set) { + + spin_lock(&se_cmd->t_state_lock); + fabric_stop = (se_cmd->transport_state & CMD_T_FABRIC_STOP); + spin_unlock(&se_cmd->t_state_lock); + + if (se_cmd->cmd_wait_set || fabric_stop) { + list_del_init(&se_cmd->se_cmd_list); spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); target_free_cmd_mem(se_cmd); complete(&se_cmd->cmd_wait_comp); return; } - list_del(&se_cmd->se_cmd_list); + list_del_init(&se_cmd->se_cmd_list); spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); target_free_cmd_mem(se_cmd); @@ -2546,6 +2581,7 @@ void target_sess_cmd_list_set_waiting(struct se_session *se_sess) { struct se_cmd *se_cmd; unsigned long flags; + int rc; spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); if (se_sess->sess_tearing_down) { @@ -2555,8 +2591,15 @@ void target_sess_cmd_list_set_waiting(struct se_session *se_sess) se_sess->sess_tearing_down = 1; list_splice_init(&se_sess->sess_cmd_list, &se_sess->sess_wait_list); - list_for_each_entry(se_cmd, &se_sess->sess_wait_list, se_cmd_list) - se_cmd->cmd_wait_set = 1; + list_for_each_entry(se_cmd, &se_sess->sess_wait_list, se_cmd_list) { + rc = kref_get_unless_zero(&se_cmd->cmd_kref); + if (rc) { + se_cmd->cmd_wait_set = 1; + spin_lock(&se_cmd->t_state_lock); + se_cmd->transport_state |= CMD_T_FABRIC_STOP; + spin_unlock(&se_cmd->t_state_lock); + } + } spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); } @@ -2569,15 +2612,25 @@ void target_wait_for_sess_cmds(struct se_session *se_sess) { struct se_cmd *se_cmd, *tmp_cmd; unsigned long flags; + bool tas; list_for_each_entry_safe(se_cmd, tmp_cmd, &se_sess->sess_wait_list, se_cmd_list) { - list_del(&se_cmd->se_cmd_list); + list_del_init(&se_cmd->se_cmd_list); pr_debug("Waiting for se_cmd: %p t_state: %d, fabric state:" " %d\n", se_cmd, se_cmd->t_state, se_cmd->se_tfo->get_cmd_state(se_cmd)); + spin_lock_irqsave(&se_cmd->t_state_lock, flags); + tas = (se_cmd->transport_state & CMD_T_TAS); + spin_unlock_irqrestore(&se_cmd->t_state_lock, flags); + + if (!target_put_sess_cmd(se_cmd)) { + if (tas) + target_put_sess_cmd(se_cmd); + } + wait_for_completion(&se_cmd->cmd_wait_comp); pr_debug("After cmd_wait_comp: se_cmd: %p t_state: %d" " fabric state: %d\n", se_cmd, se_cmd->t_state, @@ -2599,6 +2652,58 @@ void transport_clear_lun_ref(struct se_lun *lun) wait_for_completion(&lun->lun_ref_comp); } +static bool +__transport_wait_for_tasks(struct se_cmd *cmd, bool fabric_stop, + bool *aborted, bool *tas, unsigned long *flags) + __releases(&cmd->t_state_lock) + __acquires(&cmd->t_state_lock) +{ + + assert_spin_locked(&cmd->t_state_lock); + WARN_ON_ONCE(!irqs_disabled()); + + if (fabric_stop) + cmd->transport_state |= CMD_T_FABRIC_STOP; + + if (cmd->transport_state & CMD_T_ABORTED) + *aborted = true; + + if (cmd->transport_state & CMD_T_TAS) + *tas = true; + + if (!(cmd->se_cmd_flags & SCF_SE_LUN_CMD) && + !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) + return false; + + if (!(cmd->se_cmd_flags & SCF_SUPPORTED_SAM_OPCODE) && + !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) + return false; + + if (!(cmd->transport_state & CMD_T_ACTIVE)) + return false; + + if (fabric_stop && *aborted) + return false; + + cmd->transport_state |= CMD_T_STOP; + + pr_debug("wait_for_tasks: Stopping %p ITT: 0x%08llx i_state: %d," + " t_state: %d, CMD_T_STOP\n", cmd, cmd->tag, + cmd->se_tfo->get_cmd_state(cmd), cmd->t_state); + + spin_unlock_irqrestore(&cmd->t_state_lock, *flags); + + wait_for_completion(&cmd->t_transport_stop_comp); + + spin_lock_irqsave(&cmd->t_state_lock, *flags); + cmd->transport_state &= ~(CMD_T_ACTIVE | CMD_T_STOP); + + pr_debug("wait_for_tasks: Stopped wait_for_completion(&cmd->" + "t_transport_stop_comp) for ITT: 0x%08llx\n", cmd->tag); + + return true; +} + /** * transport_wait_for_tasks - wait for completion to occur * @cmd: command to wait @@ -2609,43 +2714,13 @@ void transport_clear_lun_ref(struct se_lun *lun) bool transport_wait_for_tasks(struct se_cmd *cmd) { unsigned long flags; + bool ret, aborted = false, tas = false; spin_lock_irqsave(&cmd->t_state_lock, flags); - if (!(cmd->se_cmd_flags & SCF_SE_LUN_CMD) && - !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) { - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - return false; - } - - if (!(cmd->se_cmd_flags & SCF_SUPPORTED_SAM_OPCODE) && - !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) { - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - return false; - } - - if (!(cmd->transport_state & CMD_T_ACTIVE)) { - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - return false; - } - - cmd->transport_state |= CMD_T_STOP; - - pr_debug("wait_for_tasks: Stopping %p ITT: 0x%08llx i_state: %d, t_state: %d, CMD_T_STOP\n", - cmd, cmd->tag, cmd->se_tfo->get_cmd_state(cmd), cmd->t_state); - + ret = __transport_wait_for_tasks(cmd, false, &aborted, &tas, &flags); spin_unlock_irqrestore(&cmd->t_state_lock, flags); - wait_for_completion(&cmd->t_transport_stop_comp); - - spin_lock_irqsave(&cmd->t_state_lock, flags); - cmd->transport_state &= ~(CMD_T_ACTIVE | CMD_T_STOP); - - pr_debug("wait_for_tasks: Stopped wait_for_completion(&cmd->t_transport_stop_comp) for ITT: 0x%08llx\n", - cmd->tag); - - spin_unlock_irqrestore(&cmd->t_state_lock, flags); - - return true; + return ret; } EXPORT_SYMBOL(transport_wait_for_tasks); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 02a4d02d881d..689f4d207122 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -491,6 +491,8 @@ struct se_cmd { #define CMD_T_DEV_ACTIVE (1 << 7) #define CMD_T_REQUEST_STOP (1 << 8) #define CMD_T_BUSY (1 << 9) +#define CMD_T_TAS (1 << 10) +#define CMD_T_FABRIC_STOP (1 << 11) spinlock_t t_state_lock; struct kref cmd_kref; struct completion t_transport_stop_comp; From b9d992021215ef0361105c3735aa5625ad261937 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Fri, 5 Feb 2016 14:51:36 -0800 Subject: [PATCH 283/418] target: Fix race with SCF_SEND_DELAYED_TAS handling commit 310d3d314be7f0a84011ebdc4bdccbcae9755a87 upstream. This patch fixes a race between setting of SCF_SEND_DELAYED_TAS in transport_send_task_abort(), and check of the same bit in transport_check_aborted_status(). It adds a __transport_check_aborted_status() version that is used by target_execute_cmd() when se_cmd->t_state_lock is held, and a transport_check_aborted_status() wrapper for all other existing callers. Also, it handles the case where the check happens before transport_send_task_abort() gets called. For this, go ahead and set SCF_SEND_DELAYED_TAS early when necessary, and have transport_send_task_abort() send the abort. Cc: Quinn Tran Cc: Himanshu Madhani Cc: Sagi Grimberg Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Andy Grover Cc: Mike Christie Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_transport.c | 53 ++++++++++++++++++++------ 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index c1f4cb4103f7..94f4ffac723f 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1849,19 +1849,21 @@ static bool target_handle_task_attr(struct se_cmd *cmd) return true; } +static int __transport_check_aborted_status(struct se_cmd *, int); + void target_execute_cmd(struct se_cmd *cmd) { - /* - * If the received CDB has aleady been aborted stop processing it here. - */ - if (transport_check_aborted_status(cmd, 1)) - return; - /* * Determine if frontend context caller is requesting the stopping of * this command for frontend exceptions. + * + * If the received CDB has aleady been aborted stop processing it here. */ spin_lock_irq(&cmd->t_state_lock); + if (__transport_check_aborted_status(cmd, 1)) { + spin_unlock_irq(&cmd->t_state_lock); + return; + } if (cmd->transport_state & CMD_T_STOP) { pr_debug("%s:%d CMD_T_STOP for ITT: 0x%08llx\n", __func__, __LINE__, cmd->tag); @@ -2902,28 +2904,49 @@ transport_send_check_condition_and_sense(struct se_cmd *cmd, } EXPORT_SYMBOL(transport_send_check_condition_and_sense); -int transport_check_aborted_status(struct se_cmd *cmd, int send_status) +static int __transport_check_aborted_status(struct se_cmd *cmd, int send_status) + __releases(&cmd->t_state_lock) + __acquires(&cmd->t_state_lock) { + assert_spin_locked(&cmd->t_state_lock); + WARN_ON_ONCE(!irqs_disabled()); + if (!(cmd->transport_state & CMD_T_ABORTED)) return 0; - /* * If cmd has been aborted but either no status is to be sent or it has * already been sent, just return */ - if (!send_status || !(cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS)) + if (!send_status || !(cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS)) { + if (send_status) + cmd->se_cmd_flags |= SCF_SEND_DELAYED_TAS; return 1; + } - pr_debug("Sending delayed SAM_STAT_TASK_ABORTED status for CDB: 0x%02x ITT: 0x%08llx\n", - cmd->t_task_cdb[0], cmd->tag); + pr_debug("Sending delayed SAM_STAT_TASK_ABORTED status for CDB:" + " 0x%02x ITT: 0x%08llx\n", cmd->t_task_cdb[0], cmd->tag); cmd->se_cmd_flags &= ~SCF_SEND_DELAYED_TAS; cmd->scsi_status = SAM_STAT_TASK_ABORTED; trace_target_cmd_complete(cmd); + + spin_unlock_irq(&cmd->t_state_lock); cmd->se_tfo->queue_status(cmd); + spin_lock_irq(&cmd->t_state_lock); return 1; } + +int transport_check_aborted_status(struct se_cmd *cmd, int send_status) +{ + int ret; + + spin_lock_irq(&cmd->t_state_lock); + ret = __transport_check_aborted_status(cmd, send_status); + spin_unlock_irq(&cmd->t_state_lock); + + return ret; +} EXPORT_SYMBOL(transport_check_aborted_status); void transport_send_task_abort(struct se_cmd *cmd) @@ -2945,11 +2968,17 @@ void transport_send_task_abort(struct se_cmd *cmd) */ if (cmd->data_direction == DMA_TO_DEVICE) { if (cmd->se_tfo->write_pending_status(cmd) != 0) { - cmd->transport_state |= CMD_T_ABORTED; + spin_lock_irqsave(&cmd->t_state_lock, flags); + if (cmd->se_cmd_flags & SCF_SEND_DELAYED_TAS) { + spin_unlock_irqrestore(&cmd->t_state_lock, flags); + goto send_abort; + } cmd->se_cmd_flags |= SCF_SEND_DELAYED_TAS; + spin_unlock_irqrestore(&cmd->t_state_lock, flags); return; } } +send_abort: cmd->scsi_status = SAM_STAT_TASK_ABORTED; transport_lun_remove_cmd(cmd); From 44c7d7625f76a987f59b588fcb8727c7b286b99d Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Wed, 27 Jan 2016 17:48:32 +0100 Subject: [PATCH 284/418] spi: atmel: fix gpio chip-select in case of non-DT platform commit 70f340df24518d36eeaefb6652d492f250115c19 upstream. The non-DT platform that uses this driver (actually the AVR32) was taking a bad branch for determining if the IP would use gpio for CS. Adding the presence of DT as a condition fixes this issue. Fixes: 4820303480a1 ("spi: atmel: add support for the internal chip-select of the spi controller") Reported-by: Mans Rullgard Signed-off-by: Cyrille Pitchen [nicolas.ferre@atmel.com: extract from ml discussion] Signed-off-by: Nicolas Ferre Tested-by: Mans Rullgard Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-atmel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c index aebad36391c9..8feac599e9ab 100644 --- a/drivers/spi/spi-atmel.c +++ b/drivers/spi/spi-atmel.c @@ -1571,6 +1571,7 @@ static int atmel_spi_probe(struct platform_device *pdev) as->use_cs_gpios = true; if (atmel_spi_is_v2(as) && + pdev->dev.of_node && !of_get_property(pdev->dev.of_node, "cs-gpios", NULL)) { as->use_cs_gpios = false; master->num_chipselect = 4; From 921d439c6720c22ad94da1c3354ee4522781809d Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Thu, 4 Feb 2016 11:45:16 -0500 Subject: [PATCH 285/418] qla2xxx: Fix stale pointer access. commit cb43285ff7039fe3c4b0bc476e6d6569c31104f3 upstream. [ Upstream Commit 84e32a06f4f8756ce9ec3c8dc7e97896575f0771 ] Commit 84e32a0 ("qla2xxx: Use pci_enable_msix_range() instead of pci_enable_msix()") introduced a regression when target mode is enabled. In qla24xx_enable_msix(), ha->max_rsp_queues was incorrectly set to a value higher than the number of response queues allocated causing an invalid dereference. Specifically here in qla2x00_init_rings(): *rsp->in_ptr = 0; Add additional check to make sure the pointer is valid. following call stack will be seen ---- 8< ---- RIP: 0010:[] [] qla2x00_init_rings+0xdc/0x320 [qla2xxx] RSP: 0018:ffff880429447dd8 EFLAGS: 00010082 .... Call Trace: [] qla2x00_abort_isp+0x170/0x6b0 [qla2xxx] [] qla2x00_do_dpc+0x357/0x7f0 [qla2xxx] [] ? qla2x00_relogin+0x260/0x260 [qla2xxx] [] kthread+0xc9/0xe0 [] ? flush_kthread_worker+0x90/0x90 [] ret_from_fork+0x3f/0x70 [] ? flush_kthread_worker+0x90/0x90 ---- 8< ---- Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Signed-off-by: Nicholas Bellinger Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_init.c | 10 +++++----- drivers/scsi/qla2xxx/qla_isr.c | 4 ++-- drivers/scsi/qla2xxx/qla_mid.c | 4 ++-- drivers/scsi/qla2xxx/qla_os.c | 6 ++++++ drivers/scsi/qla2xxx/qla_tmpl.c | 16 ++++++++++++++++ 5 files changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 16a1935cc9c1..e197c6f39de2 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -2192,7 +2192,7 @@ qla2x00_init_rings(scsi_qla_host_t *vha) /* Clear outstanding commands array. */ for (que = 0; que < ha->max_req_queues; que++) { req = ha->req_q_map[que]; - if (!req) + if (!req || !test_bit(que, ha->req_qid_map)) continue; req->out_ptr = (void *)(req->ring + req->length); *req->out_ptr = 0; @@ -2209,7 +2209,7 @@ qla2x00_init_rings(scsi_qla_host_t *vha) for (que = 0; que < ha->max_rsp_queues; que++) { rsp = ha->rsp_q_map[que]; - if (!rsp) + if (!rsp || !test_bit(que, ha->rsp_qid_map)) continue; rsp->in_ptr = (void *)(rsp->ring + rsp->length); *rsp->in_ptr = 0; @@ -4961,7 +4961,7 @@ qla25xx_init_queues(struct qla_hw_data *ha) for (i = 1; i < ha->max_rsp_queues; i++) { rsp = ha->rsp_q_map[i]; - if (rsp) { + if (rsp && test_bit(i, ha->rsp_qid_map)) { rsp->options &= ~BIT_0; ret = qla25xx_init_rsp_que(base_vha, rsp); if (ret != QLA_SUCCESS) @@ -4976,8 +4976,8 @@ qla25xx_init_queues(struct qla_hw_data *ha) } for (i = 1; i < ha->max_req_queues; i++) { req = ha->req_q_map[i]; - if (req) { - /* Clear outstanding commands array. */ + if (req && test_bit(i, ha->req_qid_map)) { + /* Clear outstanding commands array. */ req->options &= ~BIT_0; ret = qla25xx_init_req_que(base_vha, req); if (ret != QLA_SUCCESS) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index ccf6a7f99024..0e59731f95ad 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -3018,9 +3018,9 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp) "MSI-X: Failed to enable support " "-- %d/%d\n Retry with %d vectors.\n", ha->msix_count, ret, ret); + ha->msix_count = ret; + ha->max_rsp_queues = ha->msix_count - 1; } - ha->msix_count = ret; - ha->max_rsp_queues = ha->msix_count - 1; ha->msix_entries = kzalloc(sizeof(struct qla_msix_entry) * ha->msix_count, GFP_KERNEL); if (!ha->msix_entries) { diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index c5dd594f6c31..cf7ba52bae66 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -600,7 +600,7 @@ qla25xx_delete_queues(struct scsi_qla_host *vha) /* Delete request queues */ for (cnt = 1; cnt < ha->max_req_queues; cnt++) { req = ha->req_q_map[cnt]; - if (req) { + if (req && test_bit(cnt, ha->req_qid_map)) { ret = qla25xx_delete_req_que(vha, req); if (ret != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x00ea, @@ -614,7 +614,7 @@ qla25xx_delete_queues(struct scsi_qla_host *vha) /* Delete response queues */ for (cnt = 1; cnt < ha->max_rsp_queues; cnt++) { rsp = ha->rsp_q_map[cnt]; - if (rsp) { + if (rsp && test_bit(cnt, ha->rsp_qid_map)) { ret = qla25xx_delete_rsp_que(vha, rsp); if (ret != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x00eb, diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index bfa9a64c316b..fc6674db4f2d 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -397,6 +397,9 @@ static void qla2x00_free_queues(struct qla_hw_data *ha) int cnt; for (cnt = 0; cnt < ha->max_req_queues; cnt++) { + if (!test_bit(cnt, ha->req_qid_map)) + continue; + req = ha->req_q_map[cnt]; qla2x00_free_req_que(ha, req); } @@ -404,6 +407,9 @@ static void qla2x00_free_queues(struct qla_hw_data *ha) ha->req_q_map = NULL; for (cnt = 0; cnt < ha->max_rsp_queues; cnt++) { + if (!test_bit(cnt, ha->rsp_qid_map)) + continue; + rsp = ha->rsp_q_map[cnt]; qla2x00_free_rsp_que(ha, rsp); } diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c index ddbe2e7ac14d..c3e622524604 100644 --- a/drivers/scsi/qla2xxx/qla_tmpl.c +++ b/drivers/scsi/qla2xxx/qla_tmpl.c @@ -395,6 +395,10 @@ qla27xx_fwdt_entry_t263(struct scsi_qla_host *vha, if (ent->t263.queue_type == T263_QUEUE_TYPE_REQ) { for (i = 0; i < vha->hw->max_req_queues; i++) { struct req_que *req = vha->hw->req_q_map[i]; + + if (!test_bit(i, vha->hw->req_qid_map)) + continue; + if (req || !buf) { length = req ? req->length : REQUEST_ENTRY_CNT_24XX; @@ -408,6 +412,10 @@ qla27xx_fwdt_entry_t263(struct scsi_qla_host *vha, } else if (ent->t263.queue_type == T263_QUEUE_TYPE_RSP) { for (i = 0; i < vha->hw->max_rsp_queues; i++) { struct rsp_que *rsp = vha->hw->rsp_q_map[i]; + + if (!test_bit(i, vha->hw->rsp_qid_map)) + continue; + if (rsp || !buf) { length = rsp ? rsp->length : RESPONSE_ENTRY_CNT_MQ; @@ -634,6 +642,10 @@ qla27xx_fwdt_entry_t274(struct scsi_qla_host *vha, if (ent->t274.queue_type == T274_QUEUE_TYPE_REQ_SHAD) { for (i = 0; i < vha->hw->max_req_queues; i++) { struct req_que *req = vha->hw->req_q_map[i]; + + if (!test_bit(i, vha->hw->req_qid_map)) + continue; + if (req || !buf) { qla27xx_insert16(i, buf, len); qla27xx_insert16(1, buf, len); @@ -645,6 +657,10 @@ qla27xx_fwdt_entry_t274(struct scsi_qla_host *vha, } else if (ent->t274.queue_type == T274_QUEUE_TYPE_RSP_SHAD) { for (i = 0; i < vha->hw->max_rsp_queues; i++) { struct rsp_que *rsp = vha->hw->rsp_q_map[i]; + + if (!test_bit(i, vha->hw->rsp_qid_map)) + continue; + if (rsp || !buf) { qla27xx_insert16(i, buf, len); qla27xx_insert16(1, buf, len); From aff4514876f47a45a84118aeaa2b81b33d6f0e29 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 1 Feb 2016 11:33:21 -0500 Subject: [PATCH 286/418] libata: fix sff host state machine locking while polling commit 8eee1d3ed5b6fc8e14389567c9a6f53f82bb7224 upstream. The bulk of ATA host state machine is implemented by ata_sff_hsm_move(). The function is called from either the interrupt handler or, if polling, a work item. Unlike from the interrupt path, the polling path calls the function without holding the host lock and ata_sff_hsm_move() selectively grabs the lock. This is completely broken. If an IRQ triggers while polling is in progress, the two can easily race and end up accessing the hardware and updating state machine state at the same time. This can put the state machine in an illegal state and lead to a crash like the following. kernel BUG at drivers/ata/libata-sff.c:1302! invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN Modules linked in: CPU: 1 PID: 10679 Comm: syz-executor Not tainted 4.5.0-rc1+ #300 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff88002bd00000 ti: ffff88002e048000 task.ti: ffff88002e048000 RIP: 0010:[] [] ata_sff_hsm_move+0x619/0x1c60 ... Call Trace: [] __ata_sff_port_intr+0x1e1/0x3a0 drivers/ata/libata-sff.c:1584 [] ata_bmdma_port_intr+0x71/0x400 drivers/ata/libata-sff.c:2877 [< inline >] __ata_sff_interrupt drivers/ata/libata-sff.c:1629 [] ata_bmdma_interrupt+0x253/0x580 drivers/ata/libata-sff.c:2902 [] handle_irq_event_percpu+0x108/0x7e0 kernel/irq/handle.c:157 [] handle_irq_event+0xa7/0x140 kernel/irq/handle.c:205 [] handle_edge_irq+0x1e3/0x8d0 kernel/irq/chip.c:623 [< inline >] generic_handle_irq_desc include/linux/irqdesc.h:146 [] handle_irq+0x10c/0x2a0 arch/x86/kernel/irq_64.c:78 [] do_IRQ+0x7d/0x1a0 arch/x86/kernel/irq.c:240 [] common_interrupt+0x8c/0x8c arch/x86/entry/entry_64.S:520 [< inline >] rcu_lock_acquire include/linux/rcupdate.h:490 [< inline >] rcu_read_lock include/linux/rcupdate.h:874 [] filemap_map_pages+0x131/0xba0 mm/filemap.c:2145 [< inline >] do_fault_around mm/memory.c:2943 [< inline >] do_read_fault mm/memory.c:2962 [< inline >] do_fault mm/memory.c:3133 [< inline >] handle_pte_fault mm/memory.c:3308 [< inline >] __handle_mm_fault mm/memory.c:3418 [] handle_mm_fault+0x2516/0x49a0 mm/memory.c:3447 [] __do_page_fault+0x376/0x960 arch/x86/mm/fault.c:1238 [] trace_do_page_fault+0xe8/0x420 arch/x86/mm/fault.c:1331 [] do_async_page_fault+0x14/0xd0 arch/x86/kernel/kvm.c:264 [] async_page_fault+0x28/0x30 arch/x86/entry/entry_64.S:986 Fix it by ensuring that the polling path is holding the host lock before entering ata_sff_hsm_move() so that all hardware accesses and state updates are performed under the host lock. Signed-off-by: Tejun Heo Reported-and-tested-by: Dmitry Vyukov Link: http://lkml.kernel.org/g/CACT4Y+b_JsOxJu2EZyEf+mOXORc_zid5V1-pLZSroJVxyWdSpw@mail.gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-sff.c | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index cdf6215a9a22..7dbba387d12a 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -997,12 +997,9 @@ static inline int ata_hsm_ok_in_wq(struct ata_port *ap, static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq) { struct ata_port *ap = qc->ap; - unsigned long flags; if (ap->ops->error_handler) { if (in_wq) { - spin_lock_irqsave(ap->lock, flags); - /* EH might have kicked in while host lock is * released. */ @@ -1014,8 +1011,6 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq) } else ata_port_freeze(ap); } - - spin_unlock_irqrestore(ap->lock, flags); } else { if (likely(!(qc->err_mask & AC_ERR_HSM))) ata_qc_complete(qc); @@ -1024,10 +1019,8 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq) } } else { if (in_wq) { - spin_lock_irqsave(ap->lock, flags); ata_sff_irq_on(ap); ata_qc_complete(qc); - spin_unlock_irqrestore(ap->lock, flags); } else ata_qc_complete(qc); } @@ -1048,9 +1041,10 @@ int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, { struct ata_link *link = qc->dev->link; struct ata_eh_info *ehi = &link->eh_info; - unsigned long flags = 0; int poll_next; + lockdep_assert_held(ap->lock); + WARN_ON_ONCE((qc->flags & ATA_QCFLAG_ACTIVE) == 0); /* Make sure ata_sff_qc_issue() does not throw things @@ -1112,14 +1106,6 @@ fsm_start: } } - /* Send the CDB (atapi) or the first data block (ata pio out). - * During the state transition, interrupt handler shouldn't - * be invoked before the data transfer is complete and - * hsm_task_state is changed. Hence, the following locking. - */ - if (in_wq) - spin_lock_irqsave(ap->lock, flags); - if (qc->tf.protocol == ATA_PROT_PIO) { /* PIO data out protocol. * send first data block. @@ -1135,9 +1121,6 @@ fsm_start: /* send CDB */ atapi_send_cdb(ap, qc); - if (in_wq) - spin_unlock_irqrestore(ap->lock, flags); - /* if polling, ata_sff_pio_task() handles the rest. * otherwise, interrupt handler takes over from here. */ @@ -1361,12 +1344,14 @@ static void ata_sff_pio_task(struct work_struct *work) u8 status; int poll_next; + spin_lock_irq(ap->lock); + BUG_ON(ap->sff_pio_task_link == NULL); /* qc can be NULL if timeout occurred */ qc = ata_qc_from_tag(ap, link->active_tag); if (!qc) { ap->sff_pio_task_link = NULL; - return; + goto out_unlock; } fsm_start: @@ -1381,11 +1366,14 @@ fsm_start: */ status = ata_sff_busy_wait(ap, ATA_BUSY, 5); if (status & ATA_BUSY) { + spin_unlock_irq(ap->lock); ata_msleep(ap, 2); + spin_lock_irq(ap->lock); + status = ata_sff_busy_wait(ap, ATA_BUSY, 10); if (status & ATA_BUSY) { ata_sff_queue_pio_task(link, ATA_SHORT_PAUSE); - return; + goto out_unlock; } } @@ -1402,6 +1390,8 @@ fsm_start: */ if (poll_next) goto fsm_start; +out_unlock: + spin_unlock_irq(ap->lock); } /** From 0bdce40ce0402ab4d46e33f452eb0c29470e0dc6 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 8 Jan 2016 12:29:10 +0530 Subject: [PATCH 287/418] ARCv2: STAR 9000950267: Handle return from intr to Delay Slot #2 commit cbfe74a753e877b49dc54e9b04d5d42230ca0aed upstream. Returning to delay slot, riding an interrupti, had one loose end. AUX_USER_SP used for restoring user mode SP upon RTIE was not being setup from orig task's saved value, causing task to use wrong SP, leading to ProtV errors. The reason being: - INTERRUPT_EPILOGUE returns to a kernel trampoline, thus not expected to restore it - EXCEPTION_EPILOGUE is not used at all Fix that by restoring AUX_USER_SP explicitly in the trampoline. This was broken in the original workaround, but the error scenarios got reduced considerably since v3.14 due to following: 1. The Linuxthreads.old based userspace at the time caused many more exceptions in delay slot than the current NPTL based one. Infact with current userspace the error doesn't happen at all. 2. Return from interrupt (delay slot or otherwise) doesn't get exercised much after commit 4de0e52867d8 ("Really Re-enable interrupts to avoid deadlocks") since IRQ_ACTIVE.active being clear means most returns are as if from pure kernel (even for active interrupts) Infact the issue only happened in an experimental branch where I was tinkering with reverted 4de0e52867d8 Fixes: 4255b07f2c9c ("ARCv2: STAR 9000793984: Handle return from intr to Delay Slot") Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/kernel/entry-arcv2.S | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S index cbfec79137bf..b17830294706 100644 --- a/arch/arc/kernel/entry-arcv2.S +++ b/arch/arc/kernel/entry-arcv2.S @@ -211,7 +211,11 @@ debug_marker_syscall: ; (since IRQ NOT allowed in DS in ARCv2, this can only happen if orig ; entry was via Exception in DS which got preempted in kernel). ; -; IRQ RTIE won't reliably restore DE bit and/or BTA, needs handling +; IRQ RTIE won't reliably restore DE bit and/or BTA, needs workaround +; +; Solution is return from Intr w/o any delay slot quirks into a kernel trampoline +; and from pure kernel mode return to delay slot which handles DS bit/BTA correctly + .Lintr_ret_to_delay_slot: debug_marker_ds: @@ -222,18 +226,23 @@ debug_marker_ds: ld r2, [sp, PT_ret] ld r3, [sp, PT_status32] + ; STAT32 for Int return created from scratch + ; (No delay dlot, disable Further intr in trampoline) + bic r0, r3, STATUS_U_MASK|STATUS_DE_MASK|STATUS_IE_MASK|STATUS_L_MASK st r0, [sp, PT_status32] mov r1, .Lintr_ret_to_delay_slot_2 st r1, [sp, PT_ret] + ; Orig exception PC/STAT32 safekept @orig_r0 and @event stack slots st r2, [sp, 0] st r3, [sp, 4] b .Lisr_ret_fast_path .Lintr_ret_to_delay_slot_2: + ; Trampoline to restore orig exception PC/STAT32/BTA/AUX_USER_SP sub sp, sp, SZ_PT_REGS st r9, [sp, -4] @@ -243,11 +252,19 @@ debug_marker_ds: ld r9, [sp, 4] sr r9, [erstatus] + ; restore AUX_USER_SP if returning to U mode + bbit0 r9, STATUS_U_BIT, 1f + ld r9, [sp, PT_sp] + sr r9, [AUX_USER_SP] + +1: ld r9, [sp, 8] sr r9, [erbta] ld r9, [sp, -4] add sp, sp, SZ_PT_REGS + + ; return from pure kernel mode to delay slot rtie END(ret_from_exception) From 1de8f1bcb5321bdc35b64bafe4f4a9c389942167 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 23 Feb 2016 11:55:16 +0530 Subject: [PATCH 288/418] ARCv2: SMP: Emulate IPI to self using software triggered interrupt commit bb143f814ea488769ca2e79e0b376139cb5f134b upstream. ARConnect/MCIP Inter-Core-Interrupt module can't send interrupt to local core. So use core intc capability to trigger software interrupt to self, using an unsued IRQ #21. This showed up as csd deadlock with LTP trace_sched on a dual core system. This test acts as scheduler fuzzer, triggering all sorts of schedulting activity. Trouble starts with IPI to self, which doesn't get delivered (effectively lost due to H/w capability), but the msg intended to be sent remain enqueued in per-cpu @ipi_data. All subsequent IPIs to this core from other cores get elided due to the IPI coalescing optimization in ipi_send_msg_one() where a pending msg implies an IPI already sent and assumes other core is yet to ack it. After the elided IPI, other core simply goes into csd_lock_wait() but never comes out as this core never sees the interrupt. Fixes STAR 9001008624 Cc: Peter Zijlstra Signed-off-by: Vineet Gupta Signed-off-by: Greg Kroah-Hartman --- arch/arc/include/asm/irqflags-arcv2.h | 11 +++++++++++ arch/arc/kernel/entry-arcv2.S | 11 ++++++----- arch/arc/kernel/mcip.c | 15 +++++++++++++++ 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h index 258b0e5ad332..68b6092349d7 100644 --- a/arch/arc/include/asm/irqflags-arcv2.h +++ b/arch/arc/include/asm/irqflags-arcv2.h @@ -22,6 +22,7 @@ #define AUX_IRQ_CTRL 0x00E #define AUX_IRQ_ACT 0x043 /* Active Intr across all levels */ #define AUX_IRQ_LVL_PEND 0x200 /* Pending Intr across all levels */ +#define AUX_IRQ_HINT 0x201 /* For generating Soft Interrupts */ #define AUX_IRQ_PRIORITY 0x206 #define ICAUSE 0x40a #define AUX_IRQ_SELECT 0x40b @@ -112,6 +113,16 @@ static inline int arch_irqs_disabled(void) return arch_irqs_disabled_flags(arch_local_save_flags()); } +static inline void arc_softirq_trigger(int irq) +{ + write_aux_reg(AUX_IRQ_HINT, irq); +} + +static inline void arc_softirq_clear(int irq) +{ + write_aux_reg(AUX_IRQ_HINT, 0); +} + #else .macro IRQ_DISABLE scratch diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S index b17830294706..c1264607bbff 100644 --- a/arch/arc/kernel/entry-arcv2.S +++ b/arch/arc/kernel/entry-arcv2.S @@ -45,11 +45,12 @@ VECTOR reserved ; Reserved slots VECTOR handle_interrupt ; (16) Timer0 VECTOR handle_interrupt ; unused (Timer1) VECTOR handle_interrupt ; unused (WDT) -VECTOR handle_interrupt ; (19) ICI (inter core interrupt) -VECTOR handle_interrupt -VECTOR handle_interrupt -VECTOR handle_interrupt -VECTOR handle_interrupt ; (23) End of fixed IRQs +VECTOR handle_interrupt ; (19) Inter core Interrupt (IPI) +VECTOR handle_interrupt ; (20) perf Interrupt +VECTOR handle_interrupt ; (21) Software Triggered Intr (Self IPI) +VECTOR handle_interrupt ; unused +VECTOR handle_interrupt ; (23) unused +# End of fixed IRQs .rept CONFIG_ARC_NUMBER_OF_INTERRUPTS - 8 VECTOR handle_interrupt diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index bd237acdf4f2..30d806ce0c78 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -11,9 +11,12 @@ #include #include #include +#include #include #include +#define SOFTIRQ_IRQ 21 + static char smp_cpuinfo_buf[128]; static int idu_detected; @@ -22,6 +25,7 @@ static DEFINE_RAW_SPINLOCK(mcip_lock); static void mcip_setup_per_cpu(int cpu) { smp_ipi_irq_setup(cpu, IPI_IRQ); + smp_ipi_irq_setup(cpu, SOFTIRQ_IRQ); } static void mcip_ipi_send(int cpu) @@ -29,6 +33,12 @@ static void mcip_ipi_send(int cpu) unsigned long flags; int ipi_was_pending; + /* ARConnect can only send IPI to others */ + if (unlikely(cpu == raw_smp_processor_id())) { + arc_softirq_trigger(SOFTIRQ_IRQ); + return; + } + /* * NOTE: We must spin here if the other cpu hasn't yet * serviced a previous message. This can burn lots @@ -63,6 +73,11 @@ static void mcip_ipi_clear(int irq) unsigned long flags; unsigned int __maybe_unused copy; + if (unlikely(irq == SOFTIRQ_IRQ)) { + arc_softirq_clear(irq); + return; + } + raw_spin_lock_irqsave(&mcip_lock, flags); /* Who sent the IPI */ From a2d25804cd78be1c4f824de13827f3a0e8bf26e1 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 25 Jan 2016 10:08:00 -0600 Subject: [PATCH 289/418] PCI/AER: Flush workqueue on device remove to avoid use-after-free commit 4ae2182b1e3407de369f8c5d799543b7db74221b upstream. A Root Port's AER structure (rpc) contains a queue of events. aer_irq() enqueues AER status information and schedules aer_isr() to dequeue and process it. When we remove a device, aer_remove() waits for the queue to be empty, then frees the rpc struct. But aer_isr() references the rpc struct after dequeueing and possibly emptying the queue, which can cause a use-after-free error as in the following scenario with two threads, aer_isr() on the left and a concurrent aer_remove() on the right: Thread A Thread B -------- -------- aer_irq(): rpc->prod_idx++ aer_remove(): wait_event(rpc->prod_idx == rpc->cons_idx) # now blocked until queue becomes empty aer_isr(): # ... rpc->cons_idx++ # unblocked because queue is now empty ... kfree(rpc) mutex_unlock(&rpc->rpc_mutex) To prevent this problem, use flush_work() to wait until the last scheduled instance of aer_isr() has completed before freeing the rpc struct in aer_remove(). I reproduced this use-after-free by flashing a device FPGA and re-enumerating the bus to find the new device. With SLUB debug, this crashes with 0x6b bytes (POISON_FREE, the use-after-free magic number) in GPR25: pcieport 0000:00:00.0: AER: Multiple Corrected error received: id=0000 Unable to handle kernel paging request for data at address 0x27ef9e3e Workqueue: events aer_isr GPR24: dd6aa000 6b6b6b6b 605f8378 605f8360 d99b12c0 604fc674 606b1704 d99b12c0 NIP [602f5328] pci_walk_bus+0xd4/0x104 [bhelgaas: changelog, stable tag] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Bjorn Helgaas Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pcie/aer/aerdrv.c | 4 +--- drivers/pci/pcie/aer/aerdrv.h | 1 - drivers/pci/pcie/aer/aerdrv_core.c | 2 -- 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 0bf82a20a0fb..48d21e0edd56 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -262,7 +262,6 @@ static struct aer_rpc *aer_alloc_rpc(struct pcie_device *dev) rpc->rpd = dev; INIT_WORK(&rpc->dpc_handler, aer_isr); mutex_init(&rpc->rpc_mutex); - init_waitqueue_head(&rpc->wait_release); /* Use PCIe bus function to store rpc into PCIe device */ set_service_data(dev, rpc); @@ -285,8 +284,7 @@ static void aer_remove(struct pcie_device *dev) if (rpc->isr) free_irq(dev->irq, dev); - wait_event(rpc->wait_release, rpc->prod_idx == rpc->cons_idx); - + flush_work(&rpc->dpc_handler); aer_disable_rootport(rpc); kfree(rpc); set_service_data(dev, NULL); diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index 84420b7c9456..945c939a86c5 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -72,7 +72,6 @@ struct aer_rpc { * recovery on the same * root port hierarchy */ - wait_queue_head_t wait_release; }; struct aer_broadcast_data { diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index fba785e9df75..4e14de0f0f98 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -811,8 +811,6 @@ void aer_isr(struct work_struct *work) while (get_e_source(rpc, &e_src)) aer_isr_one_error(p_device, &e_src); mutex_unlock(&rpc->rpc_mutex); - - wake_up(&rpc->wait_release); } /** From fff4dc84e72419196623f118312f571a2e057196 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 19 Jan 2016 12:18:41 -0500 Subject: [PATCH 290/418] cpuset: make mm migration asynchronous commit e93ad19d05648397ef3bcb838d26aec06c245dc0 upstream. If "cpuset.memory_migrate" is set, when a process is moved from one cpuset to another with a different memory node mask, pages in used by the process are migrated to the new set of nodes. This was performed synchronously in the ->attach() callback, which is synchronized against process management. Recently, the synchronization was changed from per-process rwsem to global percpu rwsem for simplicity and optimization. Combined with the synchronous mm migration, this led to deadlocks because mm migration could schedule a work item which may in turn try to create a new worker blocking on the process management lock held from cgroup process migration path. This heavy an operation shouldn't be performed synchronously from that deep inside cgroup migration in the first place. This patch punts the actual migration to an ordered workqueue and updates cgroup process migration and cpuset config update paths to flush the workqueue after all locks are released. This way, the operations still seem synchronous to userland without entangling mm migration with process management synchronization. CPU hotplug can also invoke mm migration but there's no reason for it to wait for mm migrations and thus doesn't synchronize against their completions. Signed-off-by: Tejun Heo Reported-and-tested-by: Christian Borntraeger Signed-off-by: Greg Kroah-Hartman --- include/linux/cpuset.h | 6 ++++ kernel/cgroup.c | 3 +- kernel/cpuset.c | 71 +++++++++++++++++++++++++++++------------- 3 files changed, 57 insertions(+), 23 deletions(-) diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 85a868ccb493..fea160ee5803 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -137,6 +137,8 @@ static inline void set_mems_allowed(nodemask_t nodemask) task_unlock(current); } +extern void cpuset_post_attach_flush(void); + #else /* !CONFIG_CPUSETS */ static inline bool cpusets_enabled(void) { return false; } @@ -243,6 +245,10 @@ static inline bool read_mems_allowed_retry(unsigned int seq) return false; } +static inline void cpuset_post_attach_flush(void) +{ +} + #endif /* !CONFIG_CPUSETS */ #endif /* _LINUX_CPUSET_H */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 470f6536b9e8..0798f7bdfdc4 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -57,7 +57,7 @@ #include /* TODO: replace with more sophisticated array */ #include #include - +#include #include /* @@ -2764,6 +2764,7 @@ out_unlock_rcu: out_unlock_threadgroup: percpu_up_write(&cgroup_threadgroup_rwsem); cgroup_kn_unlock(of->kn); + cpuset_post_attach_flush(); return ret ?: nbytes; } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 02a8ea5c9963..2ade632197d5 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -286,6 +286,8 @@ static struct cpuset top_cpuset = { static DEFINE_MUTEX(cpuset_mutex); static DEFINE_SPINLOCK(callback_lock); +static struct workqueue_struct *cpuset_migrate_mm_wq; + /* * CPU / memory hotplug is handled asynchronously. */ @@ -971,31 +973,51 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, } /* - * cpuset_migrate_mm - * - * Migrate memory region from one set of nodes to another. - * - * Temporarilly set tasks mems_allowed to target nodes of migration, - * so that the migration code can allocate pages on these nodes. - * - * While the mm_struct we are migrating is typically from some - * other task, the task_struct mems_allowed that we are hacking - * is for our current task, which must allocate new pages for that - * migrating memory region. + * Migrate memory region from one set of nodes to another. This is + * performed asynchronously as it can be called from process migration path + * holding locks involved in process management. All mm migrations are + * performed in the queued order and can be waited for by flushing + * cpuset_migrate_mm_wq. */ +struct cpuset_migrate_mm_work { + struct work_struct work; + struct mm_struct *mm; + nodemask_t from; + nodemask_t to; +}; + +static void cpuset_migrate_mm_workfn(struct work_struct *work) +{ + struct cpuset_migrate_mm_work *mwork = + container_of(work, struct cpuset_migrate_mm_work, work); + + /* on a wq worker, no need to worry about %current's mems_allowed */ + do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL); + mmput(mwork->mm); + kfree(mwork); +} + static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to) { - struct task_struct *tsk = current; + struct cpuset_migrate_mm_work *mwork; - tsk->mems_allowed = *to; + mwork = kzalloc(sizeof(*mwork), GFP_KERNEL); + if (mwork) { + mwork->mm = mm; + mwork->from = *from; + mwork->to = *to; + INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn); + queue_work(cpuset_migrate_mm_wq, &mwork->work); + } else { + mmput(mm); + } +} - do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); - - rcu_read_lock(); - guarantee_online_mems(task_cs(tsk), &tsk->mems_allowed); - rcu_read_unlock(); +void cpuset_post_attach_flush(void) +{ + flush_workqueue(cpuset_migrate_mm_wq); } /* @@ -1096,7 +1118,8 @@ static void update_tasks_nodemask(struct cpuset *cs) mpol_rebind_mm(mm, &cs->mems_allowed); if (migrate) cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems); - mmput(mm); + else + mmput(mm); } css_task_iter_end(&it); @@ -1541,11 +1564,11 @@ static void cpuset_attach(struct cgroup_taskset *tset) * @old_mems_allowed is the right nodesets that we * migrate mm from. */ - if (is_memory_migrate(cs)) { + if (is_memory_migrate(cs)) cpuset_migrate_mm(mm, &oldcs->old_mems_allowed, &cpuset_attach_nodemask_to); - } - mmput(mm); + else + mmput(mm); } } @@ -1710,6 +1733,7 @@ out_unlock: mutex_unlock(&cpuset_mutex); kernfs_unbreak_active_protection(of->kn); css_put(&cs->css); + flush_workqueue(cpuset_migrate_mm_wq); return retval ?: nbytes; } @@ -2355,6 +2379,9 @@ void __init cpuset_init_smp(void) top_cpuset.effective_mems = node_states[N_MEMORY]; register_hotmemory_notifier(&cpuset_track_online_nodes_nb); + + cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0); + BUG_ON(!cpuset_migrate_mm_wq); } /** From 4cbd196324c05809338c7f118b6f374d3c2db7a0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 21 Jan 2016 15:31:11 -0500 Subject: [PATCH 291/418] cgroup: make sure a parent css isn't offlined before its children commit aa226ff4a1ce79f229c6b7a4c0a14e17fececd01 upstream. There are three subsystem callbacks in css shutdown path - css_offline(), css_released() and css_free(). Except for css_released(), cgroup core didn't guarantee the order of invocation. css_offline() or css_free() could be called on a parent css before its children. This behavior is unexpected and led to bugs in cpu and memory controller. This patch updates offline path so that a parent css is never offlined before its children. Each css keeps online_cnt which reaches zero iff itself and all its children are offline and offline_css() is invoked only after online_cnt reaches zero. This fixes the memory controller bug and allows the fix for cpu controller. Signed-off-by: Tejun Heo Reported-and-tested-by: Christian Borntraeger Reported-by: Brian Christiansen Link: http://lkml.kernel.org/g/5698A023.9070703@de.ibm.com Link: http://lkml.kernel.org/g/CAKB58ikDkzc8REt31WBkD99+hxNzjK4+FBmhkgS+NVrC9vjMSg@mail.gmail.com Cc: Heiko Carstens Cc: Peter Zijlstra Signed-off-by: Greg Kroah-Hartman --- include/linux/cgroup-defs.h | 6 ++++++ kernel/cgroup.c | 22 +++++++++++++++++----- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 06b77f9dd3f2..8e30faeab183 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -133,6 +133,12 @@ struct cgroup_subsys_state { */ u64 serial_nr; + /* + * Incremented by online self and children. Used to guarantee that + * parents are not offlined before their children. + */ + atomic_t online_cnt; + /* percpu_ref killing and RCU release */ struct rcu_head rcu_head; struct work_struct destroy_work; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 0798f7bdfdc4..fb1ecfd2decd 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4784,6 +4784,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css, INIT_LIST_HEAD(&css->sibling); INIT_LIST_HEAD(&css->children); css->serial_nr = css_serial_nr_next++; + atomic_set(&css->online_cnt, 0); if (cgroup_parent(cgrp)) { css->parent = cgroup_css(cgroup_parent(cgrp), ss); @@ -4806,6 +4807,10 @@ static int online_css(struct cgroup_subsys_state *css) if (!ret) { css->flags |= CSS_ONLINE; rcu_assign_pointer(css->cgroup->subsys[ss->id], css); + + atomic_inc(&css->online_cnt); + if (css->parent) + atomic_inc(&css->parent->online_cnt); } return ret; } @@ -5037,10 +5042,15 @@ static void css_killed_work_fn(struct work_struct *work) container_of(work, struct cgroup_subsys_state, destroy_work); mutex_lock(&cgroup_mutex); - offline_css(css); - mutex_unlock(&cgroup_mutex); - css_put(css); + do { + offline_css(css); + css_put(css); + /* @css can't go away while we're holding cgroup_mutex */ + css = css->parent; + } while (css && atomic_dec_and_test(&css->online_cnt)); + + mutex_unlock(&cgroup_mutex); } /* css kill confirmation processing requires process context, bounce */ @@ -5049,8 +5059,10 @@ static void css_killed_ref_fn(struct percpu_ref *ref) struct cgroup_subsys_state *css = container_of(ref, struct cgroup_subsys_state, refcnt); - INIT_WORK(&css->destroy_work, css_killed_work_fn); - queue_work(cgroup_destroy_wq, &css->destroy_work); + if (atomic_dec_and_test(&css->online_cnt)) { + INIT_WORK(&css->destroy_work, css_killed_work_fn); + queue_work(cgroup_destroy_wq, &css->destroy_work); + } } /** From 7c465723d0b6f2621f6c712035b117d744a51a8b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 16 Feb 2016 11:34:07 -0700 Subject: [PATCH 292/418] writeback: keep superblock pinned during cgroup writeback association switches commit 5ff8eaac1636bf6deae86491f4818c4c69d1a9ac upstream. If cgroup writeback is in use, an inode is associated with a cgroup for writeback. If the inode's main dirtier changes to another cgroup, the association gets updated asynchronously. Nothing was pinning the superblock while such switches are in progress and superblock could go away while async switching is pending or in progress leading to crashes like the following. kernel BUG at fs/jbd2/transaction.c:319! invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC CPU: 1 PID: 29158 Comm: kworker/1:10 Not tainted 4.5.0-rc3 #51 Hardware name: Google Google, BIOS Google 01/01/2011 Workqueue: events inode_switch_wbs_work_fn task: ffff880213dbbd40 ti: ffff880209264000 task.ti: ffff880209264000 RIP: 0010:[] [] start_this_handle+0x382/0x3e0 RSP: 0018:ffff880209267c30 EFLAGS: 00010202 ... Call Trace: [] jbd2__journal_start+0xf4/0x190 [] __ext4_journal_start_sb+0x4e/0x70 [] ext4_evict_inode+0x12c/0x3d0 [] evict+0xbb/0x190 [] iput+0x130/0x190 [] inode_switch_wbs_work_fn+0x343/0x4c0 [] process_one_work+0x129/0x300 [] worker_thread+0x126/0x480 [] kthread+0xc4/0xe0 [] ret_from_fork+0x3f/0x70 Fix it by bumping s_active while cgroup association switching is in flight. Signed-off-by: Tejun Heo Reported-and-tested-by: Tahsin Erdogan Link: http://lkml.kernel.org/g/CAAeU0aNCq7LGODvVGRU-oU_o-6enii5ey0p1c26D1ZzYwkDc5A@mail.gmail.com Fixes: d10c80955265 ("writeback: implement foreign cgroup inode bdi_writeback switching") Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/fs-writeback.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 023f6a1f23cd..e5232bbcbe3d 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -317,6 +317,7 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) struct inode_switch_wbs_context *isw = container_of(work, struct inode_switch_wbs_context, work); struct inode *inode = isw->inode; + struct super_block *sb = inode->i_sb; struct address_space *mapping = inode->i_mapping; struct bdi_writeback *old_wb = inode->i_wb; struct bdi_writeback *new_wb = isw->new_wb; @@ -423,6 +424,7 @@ skip_switch: wb_put(new_wb); iput(inode); + deactivate_super(sb); kfree(isw); } @@ -469,11 +471,14 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) /* while holding I_WB_SWITCH, no one else can update the association */ spin_lock(&inode->i_lock); + if (inode->i_state & (I_WB_SWITCH | I_FREEING) || - inode_to_wb(inode) == isw->new_wb) { - spin_unlock(&inode->i_lock); - goto out_free; - } + inode_to_wb(inode) == isw->new_wb) + goto out_unlock; + + if (!atomic_inc_not_zero(&inode->i_sb->s_active)) + goto out_unlock; + inode->i_state |= I_WB_SWITCH; spin_unlock(&inode->i_lock); @@ -489,6 +494,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); return; +out_unlock: + spin_unlock(&inode->i_lock); out_free: if (isw->new_wb) wb_put(isw->new_wb); From be683dfd4c253e11a8e9d9263f5dd0fed8c21de3 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Thu, 28 Jan 2016 16:14:18 +0800 Subject: [PATCH 293/418] phy: core: fix wrong err handle for phy_power_on commit b82fcabe212a11698fd4b3e604d2f81d929d22f6 upstream. If phy_pm_runtime_get_sync failed but we already enable regulator, current code return directly without doing regulator_disable. This patch fix this problem and cleanup err handle of phy_power_on to be more readable. Fixes: 3be88125d85d ("phy: core: Support regulator ...") Cc: Roger Quadros Cc: Axel Lin Signed-off-by: Shawn Lin Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Greg Kroah-Hartman --- drivers/phy/phy-core.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c index 8c7f27db6ad3..e7e574dc667a 100644 --- a/drivers/phy/phy-core.c +++ b/drivers/phy/phy-core.c @@ -275,20 +275,21 @@ EXPORT_SYMBOL_GPL(phy_exit); int phy_power_on(struct phy *phy) { - int ret; + int ret = 0; if (!phy) - return 0; + goto out; if (phy->pwr) { ret = regulator_enable(phy->pwr); if (ret) - return ret; + goto out; } ret = phy_pm_runtime_get_sync(phy); if (ret < 0 && ret != -ENOTSUPP) - return ret; + goto err_pm_sync; + ret = 0; /* Override possible ret == -ENOTSUPP */ mutex_lock(&phy->mutex); @@ -296,19 +297,20 @@ int phy_power_on(struct phy *phy) ret = phy->ops->power_on(phy); if (ret < 0) { dev_err(&phy->dev, "phy poweron failed --> %d\n", ret); - goto out; + goto err_pwr_on; } } ++phy->power_count; mutex_unlock(&phy->mutex); return 0; -out: +err_pwr_on: mutex_unlock(&phy->mutex); phy_pm_runtime_put_sync(phy); +err_pm_sync: if (phy->pwr) regulator_disable(phy->pwr); - +out: return ret; } EXPORT_SYMBOL_GPL(phy_power_on); From 023e29e467fc46f7f19c6ac96ee5135c2a56e51e Mon Sep 17 00:00:00 2001 From: Alexandra Yates Date: Wed, 17 Feb 2016 18:21:21 -0800 Subject: [PATCH 294/418] i2c: i801: Adding Intel Lewisburg support for iTCO commit 1a1503c5396eb7f2edf4b8ef6067853014478c0c upstream. Starting from Intel Sunrisepoint (Skylake PCH) the iTCO watchdog resources have been moved to reside under the i801 SMBus host controller whereas previously they were under the LPC device. This patch adds Intel lewisburg SMBus support for iTCO device. It allows to load watchdog dynamically when the hardware is present. Fixes: cdc5a3110e7c ("i2c: i801: add Intel Lewisburg device IDs") Reviewed-by: Jean Delvare Signed-off-by: Alexandra Yates Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-i801.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index f62d69799a9c..27fa0cb09538 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1271,6 +1271,8 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) switch (dev->device) { case PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_SMBUS: case PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_SMBUS: + case PCI_DEVICE_ID_INTEL_LEWISBURG_SMBUS: + case PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS: case PCI_DEVICE_ID_INTEL_DNV_SMBUS: priv->features |= FEATURE_I2C_BLOCK_READ; priv->features |= FEATURE_IRQ; From 3e643b5cbef9d7d3112e3f64c64e3f3f7b024979 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 12 Feb 2016 09:39:15 +0100 Subject: [PATCH 295/418] bio: return EINTR if copying to user space got interrupted commit 2d99b55d378c996b9692a0c93dd25f4ed5d58934 upstream. Commit 35dc248383bbab0a7203fca4d722875bc81ef091 introduced a check for current->mm to see if we have a user space context and only copies data if we do. Now if an IO gets interrupted by a signal data isn't copied into user space any more (as we don't have a user space context) but user space isn't notified about it. This patch modifies the behaviour to return -EINTR from bio_uncopy_user() to notify userland that a signal has interrupted the syscall, otherwise it could lead to a situation where the caller may get a buffer with no data returned. This can be reproduced by issuing SG_IO ioctl()s in one thread while constantly sending signals to it. Fixes: 35dc248 [SCSI] sg: Fix user memory corruption when SG_IO is interrupted by a signal Signed-off-by: Johannes Thumshirn Signed-off-by: Hannes Reinecke Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/bio.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/block/bio.c b/block/bio.c index 4f184d938942..d4d144363250 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1090,9 +1090,12 @@ int bio_uncopy_user(struct bio *bio) if (!bio_flagged(bio, BIO_NULL_MAPPED)) { /* * if we're in a workqueue, the request is orphaned, so - * don't copy into a random user address space, just free. + * don't copy into a random user address space, just free + * and return -EINTR so user space doesn't expect any data. */ - if (current->mm && bio_data_dir(bio) == READ) + if (!current->mm) + ret = -EINTR; + else if (bio_data_dir(bio) == READ) ret = bio_copy_to_iter(bio, bmd->iter); if (bmd->is_our_pages) bio_free_pages(bio); From 68a9185526f3b0738118c0c87abea9cda38be841 Mon Sep 17 00:00:00 2001 From: Mike Krinkin Date: Sat, 30 Jan 2016 19:09:59 +0300 Subject: [PATCH 296/418] block: fix use-after-free in dio_bio_complete commit 7ddc971f86aa0a4cee9f6886c356a052461957ae upstream. kasan reported the following error when i ran xfstest: [ 701.826854] ================================================================== [ 701.826864] BUG: KASAN: use-after-free in dio_bio_complete+0x41a/0x600 at addr ffff880080b95f94 [ 701.826870] Read of size 4 by task loop2/3874 [ 701.826879] page:ffffea000202e540 count:0 mapcount:0 mapping: (null) index:0x0 [ 701.826890] flags: 0x100000000000000() [ 701.826895] page dumped because: kasan: bad access detected [ 701.826904] CPU: 3 PID: 3874 Comm: loop2 Tainted: G B W L 4.5.0-rc1-next-20160129 #83 [ 701.826910] Hardware name: LENOVO 23205NG/23205NG, BIOS G2ET95WW (2.55 ) 07/09/2013 [ 701.826917] ffff88008fadf800 ffff88008fadf758 ffffffff81ca67bb 0000000041b58ab3 [ 701.826941] ffffffff830d1e74 ffffffff81ca6724 ffff88008fadf748 ffffffff8161c05c [ 701.826963] 0000000000000282 ffff88008fadf800 ffffed0010172bf2 ffffea000202e540 [ 701.826987] Call Trace: [ 701.826997] [] dump_stack+0x97/0xdc [ 701.827005] [] ? _atomic_dec_and_lock+0xc4/0xc4 [ 701.827014] [] ? __dump_page+0x32c/0x490 [ 701.827023] [] kasan_report_error+0x5f3/0x8b0 [ 701.827033] [] ? dio_bio_complete+0x41a/0x600 [ 701.827040] [] __asan_report_load4_noabort+0x59/0x80 [ 701.827048] [] ? dio_bio_complete+0x41a/0x600 [ 701.827053] [] dio_bio_complete+0x41a/0x600 [ 701.827057] [] ? blk_queue_exit+0x108/0x270 [ 701.827060] [] dio_bio_end_aio+0xa0/0x4d0 [ 701.827063] [] ? dio_bio_complete+0x600/0x600 [ 701.827067] [] ? blk_account_io_completion+0x316/0x5d0 [ 701.827070] [] bio_endio+0x79/0x200 [ 701.827074] [] blk_update_request+0x1df/0xc50 [ 701.827078] [] blk_mq_end_request+0x57/0x120 [ 701.827081] [] __blk_mq_complete_request+0x310/0x590 [ 701.827084] [] ? set_next_entity+0x2f8/0x2ed0 [ 701.827088] [] ? put_prev_entity+0x22d/0x2a70 [ 701.827091] [] blk_mq_complete_request+0x5b/0x80 [ 701.827094] [] loop_queue_work+0x273/0x19d0 [ 701.827098] [] ? finish_task_switch+0x1c8/0x8e0 [ 701.827101] [] ? trace_hardirqs_on_caller+0x18/0x6c0 [ 701.827104] [] ? lo_read_simple+0x890/0x890 [ 701.827108] [] ? debug_check_no_locks_freed+0x350/0x350 [ 701.827111] [] ? __hrtick_start+0x130/0x130 [ 701.827115] [] ? __schedule+0x936/0x20b0 [ 701.827118] [] ? kthread_worker_fn+0x3ed/0x8d0 [ 701.827121] [] ? kthread_worker_fn+0x21d/0x8d0 [ 701.827125] [] ? trace_hardirqs_on_caller+0x18/0x6c0 [ 701.827128] [] kthread_worker_fn+0x2af/0x8d0 [ 701.827132] [] ? __init_kthread_worker+0x170/0x170 [ 701.827135] [] ? _raw_spin_unlock_irqrestore+0x36/0x60 [ 701.827138] [] ? __init_kthread_worker+0x170/0x170 [ 701.827141] [] ? __init_kthread_worker+0x170/0x170 [ 701.827144] [] kthread+0x24b/0x3a0 [ 701.827148] [] ? kthread_create_on_node+0x4c0/0x4c0 [ 701.827151] [] ? trace_hardirqs_on+0xd/0x10 [ 701.827155] [] ? do_group_exit+0xdd/0x350 [ 701.827158] [] ? kthread_create_on_node+0x4c0/0x4c0 [ 701.827161] [] ret_from_fork+0x3f/0x70 [ 701.827165] [] ? kthread_create_on_node+0x4c0/0x4c0 [ 701.827167] Memory state around the buggy address: [ 701.827170] ffff880080b95e80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 701.827172] ffff880080b95f00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 701.827175] >ffff880080b95f80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 701.827177] ^ [ 701.827179] ffff880080b96000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 701.827182] ffff880080b96080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff [ 701.827183] ================================================================== The problem is that bio_check_pages_dirty calls bio_put, so we must not access bio fields after bio_check_pages_dirty. Fixes: 9b81c842355ac96097ba ("block: don't access bio->bi_error after bio_put()"). Signed-off-by: Mike Krinkin Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/direct-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/direct-io.c b/fs/direct-io.c index 602e8441bc0f..01171d8a6ee9 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -472,8 +472,8 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio) dio->io_error = -EIO; if (dio->is_async && dio->rw == READ && dio->should_dirty) { - bio_check_pages_dirty(bio); /* transfers ownership */ err = bio->bi_error; + bio_check_pages_dirty(bio); /* transfers ownership */ } else { bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; From fd921e575636438b8996df1e3d885dea9be5b25c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 Feb 2016 21:11:50 +0100 Subject: [PATCH 297/418] nfs: fix nfs_size_to_loff_t commit 50ab8ec74a153eb30db26529088bc57dd700b24c upstream. See http: //www.infradead.org/rpr.html X-Evolution-Source: 1451162204.2173.11@leira.trondhjem.org Content-Transfer-Encoding: 8bit Mime-Version: 1.0 We support OFFSET_MAX just fine, so don't round down below it. Also switch to using min_t to make the helper more readable. Signed-off-by: Christoph Hellwig Fixes: 433c92379d9c ("NFS: Clean up nfs_size_to_loff_t()") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- include/linux/nfs_fs.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c0e961474a52..5455b660bd88 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -544,9 +544,7 @@ extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, static inline loff_t nfs_size_to_loff_t(__u64 size) { - if (size > (__u64) OFFSET_MAX - 1) - return OFFSET_MAX - 1; - return (loff_t) size; + return min_t(u64, size, OFFSET_MAX); } static inline ino_t From b8558ada696e26f51cbde1d24cdf291bbc7ec79b Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 17 Feb 2016 10:41:41 -0500 Subject: [PATCH 298/418] NFSv4: Fix a dentry leak on alias use commit d9dfd8d741683347ee159d25f5b50c346a0df557 upstream. In the case where d_add_unique() finds an appropriate alias to use it will have already incremented the reference count. An additional dget() to swap the open context's dentry is unnecessary and will leak a reference. Signed-off-by: Benjamin Coddington Fixes: 275bb307865a3 ("NFSv4: Move dentry instantiation into the NFSv4-...") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4proc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f496ed721d27..98a44157353a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2461,9 +2461,9 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, dentry = d_add_unique(dentry, igrab(state->inode)); if (dentry == NULL) { dentry = opendata->dentry; - } else if (dentry != ctx->dentry) { + } else { dput(ctx->dentry); - ctx->dentry = dget(dentry); + ctx->dentry = dentry; } nfs_set_verifier(dentry, nfs_save_change_attribute(d_inode(opendata->dir))); From e1d61091db0ba3ef805510bb5fee46dedd7c8734 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 9 Feb 2016 11:04:45 +0000 Subject: [PATCH 299/418] of/irq: Fix msi-map calculation for nonzero rid-base commit 5d589d81acf974d23af98044aac56c6339d659f8 upstream. The existing msi-map code is fine for shifting the entire RID space upwards, but attempting finer-grained remapping reveals a bug. It turns out that we are mistakenly treating the msi-base part as an offset, not as a new base to remap onto, so things get squiffy when rid-base is nonzero. Fix this, and at the same time add a sanity check against having msi-map-mask clash with a nonzero rid-base, as that's another thing one can easily get wrong. Signed-off-by: Robin Murphy Reviewed-by: Marc Zyngier Tested-by: Stuart Yoder Acked-by: Mark Rutland Acked-by: David Daney Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/of/irq.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 4fa916dffc91..72a2c1969646 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -636,6 +636,13 @@ static u32 __of_msi_map_rid(struct device *dev, struct device_node **np, msi_base = be32_to_cpup(msi_map + 2); rid_len = be32_to_cpup(msi_map + 3); + if (rid_base & ~map_mask) { + dev_err(parent_dev, + "Invalid msi-map translation - msi-map-mask (0x%x) ignores rid-base (0x%x)\n", + map_mask, rid_base); + return rid_out; + } + msi_controller_node = of_find_node_by_phandle(phandle); matched = (masked_rid >= rid_base && @@ -655,7 +662,7 @@ static u32 __of_msi_map_rid(struct device *dev, struct device_node **np, if (!matched) return rid_out; - rid_out = masked_rid + msi_base; + rid_out = masked_rid - rid_base + msi_base; dev_dbg(dev, "msi-map at: %s, using mask %08x, rid-base: %08x, msi-base: %08x, length: %08x, rid: %08x -> %08x\n", dev_name(parent_dev), map_mask, rid_base, msi_base, From 85212a3690a3eb53ae02219e3153e9d14216e164 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 19 Feb 2016 13:11:46 +0100 Subject: [PATCH 300/418] KVM: async_pf: do not warn on page allocation failures commit d7444794a02ff655eda87e3cc54e86b940e7736f upstream. In async_pf we try to allocate with NOWAIT to get an element quickly or fail. This code also handle failures gracefully. Lets silence potential page allocation failures under load. qemu-system-s39: page allocation failure: order:0,mode:0x2200000 [...] Call Trace: ([<00000000001146b8>] show_trace+0xf8/0x148) [<000000000011476a>] show_stack+0x62/0xe8 [<00000000004a36b8>] dump_stack+0x70/0x98 [<0000000000272c3a>] warn_alloc_failed+0xd2/0x148 [<000000000027709e>] __alloc_pages_nodemask+0x94e/0xb38 [<00000000002cd36a>] new_slab+0x382/0x400 [<00000000002cf7ac>] ___slab_alloc.constprop.30+0x2dc/0x378 [<00000000002d03d0>] kmem_cache_alloc+0x160/0x1d0 [<0000000000133db4>] kvm_setup_async_pf+0x6c/0x198 [<000000000013dee8>] kvm_arch_vcpu_ioctl_run+0xd48/0xd58 [<000000000012fcaa>] kvm_vcpu_ioctl+0x372/0x690 [<00000000002f66f6>] do_vfs_ioctl+0x3be/0x510 [<00000000002f68ec>] SyS_ioctl+0xa4/0xb8 [<0000000000781c5e>] system_call+0xd6/0x264 [<000003ffa24fa06a>] 0x3ffa24fa06a Signed-off-by: Christian Borntraeger Reviewed-by: Dominik Dingel Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- virt/kvm/async_pf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 77d42be6970e..4f70d12e392d 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -173,7 +173,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, * do alloc nowait since if we are going to sleep anyway we * may as well sleep faulting in page */ - work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT); + work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT | __GFP_NOWARN); if (!work) return 0; From d62cca1106558209021273b68118a4b102ac07cf Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 16 Feb 2016 14:47:31 +0000 Subject: [PATCH 301/418] KVM: arm/arm64: vgic: Ensure bitmaps are long enough commit 236cf17c2502007a9d2dda3c39fb0d9a6bd03cc2 upstream. When we allocate bitmaps in vgic_vcpu_init_maps, we divide the number of bits we need by 8 to figure out how many bytes to allocate. However, bitmap elements are always accessed as unsigned longs, and if we didn't happen to allocate a size such that size % sizeof(unsigned long) == 0, bitmap accesses may go past the end of the allocation. When using KASAN (which does byte-granular access checks), this results in a continuous stream of BUGs whenever these bitmaps are accessed: ============================================================================= BUG kmalloc-128 (Tainted: G B ): kasan: bad access detected ----------------------------------------------------------------------------- INFO: Allocated in vgic_init.part.25+0x55c/0x990 age=7493 cpu=3 pid=1730 INFO: Slab 0xffffffbde6d5da40 objects=16 used=15 fp=0xffffffc935769700 flags=0x4000000000000080 INFO: Object 0xffffffc935769500 @offset=1280 fp=0x (null) Bytes b4 ffffffc9357694f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769520: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769530: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769540: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769550: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769560: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Object ffffffc935769570: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Padding ffffffc9357695b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Padding ffffffc9357695c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Padding ffffffc9357695d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Padding ffffffc9357695e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ Padding ffffffc9357695f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ CPU: 3 PID: 1740 Comm: kvm-vcpu-0 Tainted: G B 4.4.0+ #17 Hardware name: ARM Juno development board (r1) (DT) Call trace: [] dump_backtrace+0x0/0x280 [] show_stack+0x14/0x20 [] dump_stack+0x100/0x188 [] print_trailer+0xfc/0x168 [] object_err+0x3c/0x50 [] kasan_report_error+0x244/0x558 [] __asan_report_load8_noabort+0x48/0x50 [] __bitmap_or+0xc0/0xc8 [] kvm_vgic_flush_hwstate+0x1bc/0x650 [] kvm_arch_vcpu_ioctl_run+0x2ec/0xa60 [] kvm_vcpu_ioctl+0x474/0xa68 [] do_vfs_ioctl+0x5b8/0xcb0 [] SyS_ioctl+0x8c/0xa0 [] el0_svc_naked+0x24/0x28 Memory state around the buggy address: ffffffc935769400: 00 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffffffc935769480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffffffc935769500: 04 fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ^ ffffffc935769580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffffffc935769600: 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc fc ================================================================== Fix the issue by always allocating a multiple of sizeof(unsigned long), as we do elsewhere in the vgic code. Fixes: c1bfb577a ("arm/arm64: KVM: vgic: switch to dynamic allocation") Acked-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- virt/kvm/arm/vgic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 7a2f449bd85d..5d10f104f3eb 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1875,8 +1875,8 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - - int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8; + int nr_longs = BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS); + int sz = nr_longs * sizeof(unsigned long); vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL); vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL); From fc90441e728aa461a8ed1cfede08b0b9efef43fb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 10 Feb 2016 17:50:23 +0100 Subject: [PATCH 302/418] KVM: x86: fix missed hardware breakpoints commit 172b2386ed16a9143d9a456aae5ec87275c61489 upstream. Sometimes when setting a breakpoint a process doesn't stop on it. This is because the debug registers are not loaded correctly on VCPU load. The following simple reproducer from Oleg Nesterov tries using debug registers in two threads. To see the bug, run a 2-VCPU guest with "taskset -c 0" and run "./bp 0 1" inside the guest. #include #include #include #include #include #include #include #include #include #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) unsigned long encode_dr7(int drnum, int enable, unsigned int type, unsigned int len) { unsigned long dr7; dr7 = ((len | type) & 0xf) << (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); if (enable) dr7 |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)); return dr7; } int write_dr(int pid, int dr, unsigned long val) { return ptrace(PTRACE_POKEUSER, pid, offsetof (struct user, u_debugreg[dr]), val); } void set_bp(pid_t pid, void *addr) { unsigned long dr7; assert(write_dr(pid, 0, (long)addr) == 0); dr7 = encode_dr7(0, 1, DR_RW_EXECUTE, DR_LEN_1); assert(write_dr(pid, 7, dr7) == 0); } void *get_rip(int pid) { return (void*)ptrace(PTRACE_PEEKUSER, pid, offsetof(struct user, regs.rip), 0); } void test(int nr) { void *bp_addr = &&label + nr, *bp_hit; int pid; printf("test bp %d\n", nr); assert(nr < 16); // see 16 asm nops below pid = fork(); if (!pid) { assert(ptrace(PTRACE_TRACEME, 0,0,0) == 0); kill(getpid(), SIGSTOP); for (;;) { label: asm ( "nop; nop; nop; nop;" "nop; nop; nop; nop;" "nop; nop; nop; nop;" "nop; nop; nop; nop;" ); } } assert(pid == wait(NULL)); set_bp(pid, bp_addr); for (;;) { assert(ptrace(PTRACE_CONT, pid, 0, 0) == 0); assert(pid == wait(NULL)); bp_hit = get_rip(pid); if (bp_hit != bp_addr) fprintf(stderr, "ERR!! hit wrong bp %ld != %d\n", bp_hit - &&label, nr); } } int main(int argc, const char *argv[]) { while (--argc) { int nr = atoi(*++argv); if (!fork()) test(nr); } while (wait(NULL) > 0) ; return 0; } Suggested-by: Nadav Amit Reported-by: Andrey Wagin Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9a2ed8904513..6ef3856aab4b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2736,6 +2736,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); + vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD; } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) From de5b55f616a7ba97e72e33f52ac0cde6c8c47527 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 19 Feb 2016 18:07:21 +0100 Subject: [PATCH 303/418] KVM: x86: fix conversion of addresses to linear in 32-bit protected mode commit 0c1d77f4ba5cc9c05a29adca3d6466cdf4969b70 upstream. Commit e8dd2d2d641c ("Silence compiler warning in arch/x86/kvm/emulate.c", 2015-09-06) broke boot of the Hurd. The bug is that the "default:" case actually could modify "la", but after the patch this change is not reflected in *linear. The bug is visible whenever a non-zero segment base causes the linear address to wrap around the 4GB mark. Fixes: e8dd2d2d641cb2724ee10e76c0ad02e04289c017 Reported-by: Aurelien Jarno Tested-by: Aurelien Jarno Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 1505587d06e9..b9b09fec173b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -650,10 +650,10 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, u16 sel; la = seg_base(ctxt, addr.seg) + addr.ea; - *linear = la; *max_size = 0; switch (mode) { case X86EMUL_MODE_PROT64: + *linear = la; if (is_noncanonical_address(la)) goto bad; @@ -662,6 +662,7 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, goto bad; break; default: + *linear = la = (u32)la; usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL, addr.seg); if (!usable) @@ -689,7 +690,6 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, if (size > *max_size) goto bad; } - la &= (u32)-1; break; } if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0)) From 4aa584e4200f63450b513900ef5a61a8bc04e245 Mon Sep 17 00:00:00 2001 From: Mike Krinkin Date: Wed, 24 Feb 2016 21:02:31 +0300 Subject: [PATCH 304/418] KVM: x86: MMU: fix ubsan index-out-of-range warning commit 17e4bce0ae63c7e03f3c7fa8d80890e7af3d4971 upstream. Ubsan reports the following warning due to a typo in update_accessed_dirty_bits template, the patch fixes the typo: [ 168.791851] ================================================================================ [ 168.791862] UBSAN: Undefined behaviour in arch/x86/kvm/paging_tmpl.h:252:15 [ 168.791866] index 4 is out of range for type 'u64 [4]' [ 168.791871] CPU: 0 PID: 2950 Comm: qemu-system-x86 Tainted: G O L 4.5.0-rc5-next-20160222 #7 [ 168.791873] Hardware name: LENOVO 23205NG/23205NG, BIOS G2ET95WW (2.55 ) 07/09/2013 [ 168.791876] 0000000000000000 ffff8801cfcaf208 ffffffff81c9f780 0000000041b58ab3 [ 168.791882] ffffffff82eb2cc1 ffffffff81c9f6b4 ffff8801cfcaf230 ffff8801cfcaf1e0 [ 168.791886] 0000000000000004 0000000000000001 0000000000000000 ffffffffa1981600 [ 168.791891] Call Trace: [ 168.791899] [] dump_stack+0xcc/0x12c [ 168.791904] [] ? _atomic_dec_and_lock+0xc4/0xc4 [ 168.791910] [] ubsan_epilogue+0xd/0x8a [ 168.791914] [] __ubsan_handle_out_of_bounds+0x15c/0x1a3 [ 168.791918] [] ? __ubsan_handle_shift_out_of_bounds+0x2bd/0x2bd [ 168.791922] [] ? get_user_pages_fast+0x2bf/0x360 [ 168.791954] [] ? kvm_largepages_enabled+0x30/0x30 [kvm] [ 168.791958] [] ? __get_user_pages_fast+0x360/0x360 [ 168.791987] [] paging64_walk_addr_generic+0x1b28/0x2600 [kvm] [ 168.792014] [] ? init_kvm_mmu+0x1100/0x1100 [kvm] [ 168.792019] [] ? debug_check_no_locks_freed+0x350/0x350 [ 168.792044] [] ? init_kvm_mmu+0x1100/0x1100 [kvm] [ 168.792076] [] paging64_gva_to_gpa+0x7d/0x110 [kvm] [ 168.792121] [] ? paging64_walk_addr_generic+0x2600/0x2600 [kvm] [ 168.792130] [] ? debug_lockdep_rcu_enabled+0x7b/0x90 [ 168.792178] [] emulator_read_write_onepage+0x27a/0x1150 [kvm] [ 168.792208] [] ? __kvm_read_guest_page+0x54/0x70 [kvm] [ 168.792234] [] ? kvm_task_switch+0x160/0x160 [kvm] [ 168.792238] [] ? debug_lockdep_rcu_enabled+0x7b/0x90 [ 168.792263] [] emulator_read_write+0xe7/0x6d0 [kvm] [ 168.792290] [] ? em_cr_write+0x230/0x230 [kvm] [ 168.792314] [] emulator_write_emulated+0x15/0x20 [kvm] [ 168.792340] [] segmented_write+0xf8/0x130 [kvm] [ 168.792367] [] ? em_lgdt+0x20/0x20 [kvm] [ 168.792374] [] ? vmx_read_guest_seg_ar+0x42/0x1e0 [kvm_intel] [ 168.792400] [] writeback+0x3f2/0x700 [kvm] [ 168.792424] [] ? em_sidt+0xa0/0xa0 [kvm] [ 168.792449] [] ? x86_decode_insn+0x1b3d/0x4f70 [kvm] [ 168.792474] [] x86_emulate_insn+0x572/0x3010 [kvm] [ 168.792499] [] x86_emulate_instruction+0x3bd/0x2110 [kvm] [ 168.792524] [] ? reexecute_instruction.part.110+0x2e0/0x2e0 [kvm] [ 168.792532] [] handle_ept_misconfig+0x61/0x460 [kvm_intel] [ 168.792539] [] ? handle_pause+0x450/0x450 [kvm_intel] [ 168.792546] [] vmx_handle_exit+0xd6a/0x1ad0 [kvm_intel] [ 168.792572] [] ? kvm_arch_vcpu_ioctl_run+0xbdc/0x6090 [kvm] [ 168.792597] [] kvm_arch_vcpu_ioctl_run+0xd3d/0x6090 [kvm] [ 168.792621] [] ? kvm_arch_vcpu_ioctl_run+0xbdc/0x6090 [kvm] [ 168.792627] [] ? __ww_mutex_lock_interruptible+0x1630/0x1630 [ 168.792651] [] ? kvm_arch_vcpu_runnable+0x4f0/0x4f0 [kvm] [ 168.792656] [] ? preempt_notifier_unregister+0x190/0x190 [ 168.792681] [] ? kvm_arch_vcpu_load+0x127/0x650 [kvm] [ 168.792704] [] kvm_vcpu_ioctl+0x553/0xda0 [kvm] [ 168.792727] [] ? vcpu_put+0x40/0x40 [kvm] [ 168.792732] [] ? debug_check_no_locks_freed+0x350/0x350 [ 168.792735] [] ? _raw_spin_unlock+0x27/0x40 [ 168.792740] [] ? handle_mm_fault+0x1673/0x2e40 [ 168.792744] [] ? trace_hardirqs_on_caller+0x478/0x6c0 [ 168.792747] [] ? trace_hardirqs_on+0xd/0x10 [ 168.792751] [] ? debug_lockdep_rcu_enabled+0x7b/0x90 [ 168.792756] [] do_vfs_ioctl+0x1b0/0x12b0 [ 168.792759] [] ? ioctl_preallocate+0x210/0x210 [ 168.792763] [] ? __fget+0x273/0x4a0 [ 168.792766] [] ? __fget+0x50/0x4a0 [ 168.792770] [] ? __fget_light+0x96/0x2b0 [ 168.792773] [] SyS_ioctl+0x79/0x90 [ 168.792777] [] entry_SYSCALL_64_fastpath+0x23/0xc1 [ 168.792780] ================================================================================ Signed-off-by: Mike Krinkin Reviewed-by: Xiao Guangrong Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/paging_tmpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 3058a22a658d..7be8a251363e 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -249,7 +249,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, return ret; kvm_vcpu_mark_page_dirty(vcpu, table_gfn); - walker->ptes[level] = pte; + walker->ptes[level - 1] = pte; } return 0; } From 222473f70b2c913445c3173ddcf7bbc5375b93b7 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 12 Feb 2016 16:03:05 +1100 Subject: [PATCH 305/418] powerpc/eeh: Fix partial hotplug criterion commit f6bf0fa14cf848ae770e0b7842c9b11ce2f01645 upstream. During error recovery, the device could be removed as part of the partial hotplug. The criterion used to come with partial hotplug is: if the device driver provides error_detected(), slot_reset() and resume() callbacks, it's immune from hotplug. Otherwise, it's going to experience partial hotplug during EEH recovery. But the criterion isn't correct enough: mlx4_core driver for Mellanox adapters provides error_detected(), slot_reset() callbacks, but resume() isn't there. Those Mellanox adapters won't be to involved in the partial hotplug. This fixes the criterion to a practical one: adpater with driver that provides error_detected(), slot_reset() will be immune from partial hotplug. resume() isn't mandatory. Fixes: f2da4ccf ("powerpc/eeh: More relaxed hotplug criterion") Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/eeh_driver.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index f69ecaa7ce33..52c1e273f8cd 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -418,8 +418,7 @@ static void *eeh_rmv_device(void *data, void *userdata) eeh_pcid_put(dev); if (driver->err_handler && driver->err_handler->error_detected && - driver->err_handler->slot_reset && - driver->err_handler->resume) + driver->err_handler->slot_reset) return NULL; } From 36b53e8b2abf1f514e83e2c3207e36e71c8176de Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 24 Feb 2016 09:04:24 -0500 Subject: [PATCH 306/418] tracing: Fix showing function event in available_events commit d045437a169f899dfb0f6f7ede24cc042543ced9 upstream. The ftrace:function event is only displayed for parsing the function tracer data. It is not used to enable function tracing, and does not include an "enable" file in its event directory. Originally, this event was kept separate from other events because it did not have a ->reg parameter. But perf added a "reg" parameter for its use which caused issues, because it made the event available to functions where it was not compatible for. Commit 9b63776fa3ca9 "tracing: Do not enable function event with enable" added a TRACE_EVENT_FL_IGNORE_ENABLE flag that prevented the function event from being enabled by normal trace events. But this commit missed keeping the function event from being displayed by the "available_events" directory, which is used to show what events can be enabled by set_event. One documented way to enable all events is to: cat available_events > set_event But because the function event is displayed in the available_events, this now causes an INVALID error: cat: write error: Invalid argument Reported-by: Chunyu Hu Fixes: 9b63776fa3ca9 "tracing: Do not enable function event with enable" Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 4f6ef6912e00..debf6e878076 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -869,7 +869,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos) * The ftrace subsystem is for showing formats only. * They can not be enabled or disabled via the event files. */ - if (call->class && call->class->reg) + if (call->class && call->class->reg && + !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) return file; } From dc16b4393fc6226af463fd8c1d92411e5c349cf0 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 18 Feb 2016 18:55:54 +0000 Subject: [PATCH 307/418] sunrpc/cache: fix off-by-one in qword_get() commit b7052cd7bcf3c1478796e93e3dff2b44c9e82943 upstream. The qword_get() function NUL-terminates its output buffer. If the input string is in hex format \xXXXX... and the same length as the output buffer, there is an off-by-one: int qword_get(char **bpp, char *dest, int bufsize) { ... while (len < bufsize) { ... *dest++ = (h << 4) | l; len++; } ... *dest = '\0'; return len; } This patch ensures the NUL terminator doesn't fall outside the output buffer. Signed-off-by: Stefan Hajnoczi Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- net/sunrpc/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 5e4f815c2b34..21e20353178e 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1225,7 +1225,7 @@ int qword_get(char **bpp, char *dest, int bufsize) if (bp[0] == '\\' && bp[1] == 'x') { /* HEX STRING */ bp += 2; - while (len < bufsize) { + while (len < bufsize - 1) { int h, l; h = hex_to_bin(bp[0]); From f634ac98bd9218feb31868b43f56e7a5999ce98e Mon Sep 17 00:00:00 2001 From: Simon Guinot Date: Thu, 10 Sep 2015 00:15:18 +0200 Subject: [PATCH 308/418] kernel/resource.c: fix muxed resource handling in __request_region() commit 59ceeaaf355fa0fb16558ef7c24413c804932ada upstream. In __request_region, if a conflict with a BUSY and MUXED resource is detected, then the caller goes to sleep and waits for the resource to be released. A pointer on the conflicting resource is kept. At wake-up this pointer is used as a parent to retry to request the region. A first problem is that this pointer might well be invalid (if for example the conflicting resource have already been freed). Another problem is that the next call to __request_region() fails to detect a remaining conflict. The previously conflicting resource is passed as a parameter and __request_region() will look for a conflict among the children of this resource and not at the resource itself. It is likely to succeed anyway, even if there is still a conflict. Instead, the parent of the conflicting resource should be passed to __request_region(). As a fix, this patch doesn't update the parent resource pointer in the case we have to wait for a muxed region right after. Reported-and-tested-by: Vincent Pelletier Signed-off-by: Simon Guinot Tested-by: Vincent Donnefort Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/resource.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/resource.c b/kernel/resource.c index f150dbbe6f62..249b1eb1e6e1 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1083,9 +1083,10 @@ struct resource * __request_region(struct resource *parent, if (!conflict) break; if (conflict != parent) { - parent = conflict; - if (!(conflict->flags & IORESOURCE_BUSY)) + if (!(conflict->flags & IORESOURCE_BUSY)) { + parent = conflict; continue; + } } if (conflict->flags & flags & IORESOURCE_MUXED) { add_wait_queue(&muxed_resource_wait, &wait); From 80d18c0026eb6fc9cd9fb5a36540bfaa8a00fb47 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 27 Feb 2016 19:17:33 -0500 Subject: [PATCH 309/418] do_last(): don't let a bogus return value from ->open() et.al. to confuse us commit c80567c82ae4814a41287618e315a60ecf513be6 upstream. ... into returning a positive to path_openat(), which would interpret that as "symlink had been encountered" and proceed to corrupt memory, etc. It can only happen due to a bug in some ->open() instance or in some LSM hook, etc., so we report any such event *and* make sure it doesn't trick us into further unpleasantness. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/namei.c b/fs/namei.c index c9eb7e84d05f..f26cecbd6e73 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3210,6 +3210,10 @@ opened: goto exit_fput; } out: + if (unlikely(error > 0)) { + WARN_ON(1); + error = -EINVAL; + } if (got_write) mnt_drop_write(nd->path.mnt); path_put(&save_parent); From 8b78924f123e7cbb08d9dd25cb793c2a2e58741d Mon Sep 17 00:00:00 2001 From: Ivaylo Dimitrov Date: Fri, 5 Feb 2016 16:37:08 +0200 Subject: [PATCH 310/418] ARM: OMAP2+: Fix onenand initialization to avoid filesystem corruption commit 3f315c5b850fa7aff73f50de8e316b98f611a32b upstream. Commit e7b11dc7b77b ("ARM: OMAP2+: Fix onenand rate detection to avoid filesystem corruption") partially fixed onenand configuration when GPMC module is reset. Finish the job by also providing the correct values in ONENAND_REG_SYS_CFG1 register. Fixes: e7b11dc7b77b ("ARM: OMAP2+: Fix onenand rate detection to avoid filesystem corruption") Signed-off-by: Ivaylo Dimitrov Tested-by: Aaro Koskinen Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap2/gpmc-onenand.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c index 7b76ce01c21d..8633c703546a 100644 --- a/arch/arm/mach-omap2/gpmc-onenand.c +++ b/arch/arm/mach-omap2/gpmc-onenand.c @@ -101,10 +101,8 @@ static void omap2_onenand_set_async_mode(void __iomem *onenand_base) static void set_onenand_cfg(void __iomem *onenand_base) { - u32 reg; + u32 reg = ONENAND_SYS_CFG1_RDY | ONENAND_SYS_CFG1_INT; - reg = readw(onenand_base + ONENAND_REG_SYS_CFG1); - reg &= ~((0x7 << ONENAND_SYS_CFG1_BRL_SHIFT) | (0x7 << 9)); reg |= (latency << ONENAND_SYS_CFG1_BRL_SHIFT) | ONENAND_SYS_CFG1_BL_16; if (onenand_flags & ONENAND_FLAG_SYNCREAD) @@ -123,6 +121,7 @@ static void set_onenand_cfg(void __iomem *onenand_base) reg |= ONENAND_SYS_CFG1_VHF; else reg &= ~ONENAND_SYS_CFG1_VHF; + writew(reg, onenand_base + ONENAND_REG_SYS_CFG1); } @@ -289,6 +288,7 @@ static int omap2_onenand_setup_async(void __iomem *onenand_base) } } + onenand_async.sync_write = true; omap2_onenand_calc_async_timings(&t); ret = gpmc_cs_program_settings(gpmc_onenand_data->cs, &onenand_async); From 362deccacfef46e1c78acc79ba9721829605a883 Mon Sep 17 00:00:00 2001 From: Ludovic Desroches Date: Fri, 19 Feb 2016 20:21:17 +0100 Subject: [PATCH 311/418] ARM: at91/dt: fix typo in sama5d2 pinmux descriptions commit 5e45a2589d24573c564630990c88ac93659f8fe4 upstream. PIN_PA15 macro has the same value as PIN_PA14 so we were overriding PA14 mux/configuration. Signed-off-by: Ludovic Desroches Reported-by: Cyrille Pitchen Fixes: 7f16cb676c00 ("ARM: at91/dt: add sama5d2 pinmux") Signed-off-by: Alexandre Belloni Signed-off-by: Olof Johansson Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/sama5d2-pinfunc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/sama5d2-pinfunc.h b/arch/arm/boot/dts/sama5d2-pinfunc.h index 1afe24629d1f..b0c912feaa2f 100644 --- a/arch/arm/boot/dts/sama5d2-pinfunc.h +++ b/arch/arm/boot/dts/sama5d2-pinfunc.h @@ -90,7 +90,7 @@ #define PIN_PA14__I2SC1_MCK PINMUX_PIN(PIN_PA14, 4, 2) #define PIN_PA14__FLEXCOM3_IO2 PINMUX_PIN(PIN_PA14, 5, 1) #define PIN_PA14__D9 PINMUX_PIN(PIN_PA14, 6, 2) -#define PIN_PA15 14 +#define PIN_PA15 15 #define PIN_PA15__GPIO PINMUX_PIN(PIN_PA15, 0, 0) #define PIN_PA15__SPI0_MOSI PINMUX_PIN(PIN_PA15, 1, 1) #define PIN_PA15__TF1 PINMUX_PIN(PIN_PA15, 2, 1) From 392abe33d5e5d3d9b822149558eb3da5debc9cd2 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 8 Feb 2016 16:02:06 +0000 Subject: [PATCH 312/418] xen/arm: correctly handle DMA mapping of compound pages commit 52ba0746b3b44c86aee121babf3b2fd9b8f84090 upstream. Currently xen_dma_map_page concludes that DMA to anything other than the head page of a compound page must be foreign, since the PFN of the page is that of the head. Fix the check to instead consider the whole of a compound page to be local if the PFN of the head passes the 1:1 check. We can never see a compound page which is a mixture of foreign and local sub-pages. The comment already correctly described the intention, but fixup the spelling and some grammar. This fixes the various SSH protocol errors which we have been seeing on the cubietrucks in our automated test infrastructure. This has been broken since commit 3567258d281b ("xen/arm: use hypercall to flush caches in map_page"), which was in v3.19-rc1. NB arch/arm64/.../xen/page-coherent.h also includes this file. Signed-off-by: Ian Campbell Reviewed-by: Stefano Stabellini Cc: xen-devel@lists.xenproject.org Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/xen/page-coherent.h | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h index 0375c8caa061..9408a994cc91 100644 --- a/arch/arm/include/asm/xen/page-coherent.h +++ b/arch/arm/include/asm/xen/page-coherent.h @@ -35,14 +35,21 @@ static inline void xen_dma_map_page(struct device *hwdev, struct page *page, dma_addr_t dev_addr, unsigned long offset, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - bool local = XEN_PFN_DOWN(dev_addr) == page_to_xen_pfn(page); + unsigned long page_pfn = page_to_xen_pfn(page); + unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr); + unsigned long compound_pages = + (1<map_page(hwdev, page, offset, size, dir, attrs); From e32b123feea78479f8a60a9abf1a645f9c3ee728 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 8 Feb 2016 15:30:18 +0100 Subject: [PATCH 313/418] xen/scsiback: correct frontend counting commit f285aa8db7cc4432c1a03f8b55ff34fe96317c11 upstream. When adding a new frontend to xen-scsiback don't decrement the number of active frontends in case of no error. Doing so results in a failure when trying to remove the xen-pvscsi nexus even if no domain is using it. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: David Vrabel Signed-off-by: Greg Kroah-Hartman --- drivers/xen/xen-scsiback.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index ad4eb1024d1f..51387d75c7bf 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -939,12 +939,12 @@ out: spin_unlock_irqrestore(&info->v2p_lock, flags); out_free: - mutex_lock(&tpg->tv_tpg_mutex); - tpg->tv_tpg_fe_count--; - mutex_unlock(&tpg->tv_tpg_mutex); - - if (err) + if (err) { + mutex_lock(&tpg->tv_tpg_mutex); + tpg->tv_tpg_fe_count--; + mutex_unlock(&tpg->tv_tpg_mutex); kfree(new); + } return err; } From d52a24819677bbb45eb1ce93a42daa1ae6c4d61d Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 11 Feb 2016 16:10:23 -0500 Subject: [PATCH 314/418] xen/pciback: Check PF instead of VF for PCI_COMMAND_MEMORY commit 8d47065f7d1980dde52abb874b301054f3013602 upstream. Commit 408fb0e5aa7fda0059db282ff58c3b2a4278baa0 (xen/pciback: Don't allow MSI-X ops if PCI_COMMAND_MEMORY is not set) prevented enabling MSI-X on passed-through virtual functions, because it checked the VF for PCI_COMMAND_MEMORY but this is not a valid bit for VFs. Instead, check the physical function for PCI_COMMAND_MEMORY. Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Jan Beulich Signed-off-by: David Vrabel Signed-off-by: Greg Kroah-Hartman --- drivers/xen/xen-pciback/pciback_ops.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index 73dafdc494aa..1078e8d631c9 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -227,8 +227,9 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev, /* * PCI_COMMAND_MEMORY must be enabled, otherwise we may not be able * to access the BARs where the MSI-X entries reside. + * But VF devices are unique in which the PF needs to be checked. */ - pci_read_config_word(dev, PCI_COMMAND, &cmd); + pci_read_config_word(pci_physfn(dev), PCI_COMMAND, &cmd); if (dev->msi_enabled || !(cmd & PCI_COMMAND_MEMORY)) return -ENXIO; From 4cf5aa2ffe17403385d75a5b1d9d97071500ea18 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 11 Feb 2016 16:10:24 -0500 Subject: [PATCH 315/418] xen/pciback: Save the number of MSI-X entries to be copied later. commit d159457b84395927b5a52adb72f748dd089ad5e5 upstream. Commit 8135cf8b092723dbfcc611fe6fdcb3a36c9951c5 (xen/pciback: Save xen_pci_op commands before processing it) broke enabling MSI-X because it would never copy the resulting vectors into the response. The number of vectors requested was being overwritten by the return value (typically zero for success). Save the number of vectors before processing the op, so the correct number of vectors are copied afterwards. Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Jan Beulich Signed-off-by: David Vrabel Signed-off-by: Greg Kroah-Hartman --- drivers/xen/xen-pciback/pciback_ops.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index 1078e8d631c9..fb0221434f81 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -333,6 +333,9 @@ void xen_pcibk_do_op(struct work_struct *data) struct xen_pcibk_dev_data *dev_data = NULL; struct xen_pci_op *op = &pdev->op; int test_intx = 0; +#ifdef CONFIG_PCI_MSI + unsigned int nr = 0; +#endif *op = pdev->sh_info->op; barrier(); @@ -361,6 +364,7 @@ void xen_pcibk_do_op(struct work_struct *data) op->err = xen_pcibk_disable_msi(pdev, dev, op); break; case XEN_PCI_OP_enable_msix: + nr = op->value; op->err = xen_pcibk_enable_msix(pdev, dev, op); break; case XEN_PCI_OP_disable_msix: @@ -383,7 +387,7 @@ void xen_pcibk_do_op(struct work_struct *data) if (op->cmd == XEN_PCI_OP_enable_msix && op->err == 0) { unsigned int i; - for (i = 0; i < op->value; i++) + for (i = 0; i < nr; i++) pdev->sh_info->op.msix_entries[i].vector = op->msix_entries[i].vector; } From 9108b130f74d13a37e67a96cc0a8642464578a0a Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 11 Feb 2016 16:10:26 -0500 Subject: [PATCH 316/418] xen/pcifront: Fix mysterious crashes when NUMA locality information was extracted. commit 4d8c8bd6f2062c9988817183a91fe2e623c8aa5e upstream. Occasionaly PV guests would crash with: pciback 0000:00:00.1: Xen PCI mapped GSI0 to IRQ16 BUG: unable to handle kernel paging request at 0000000d1a8c0be0 .. snip.. ] find_next_bit+0xb/0x10 [] cpumask_next_and+0x22/0x40 [] pci_device_probe+0xb8/0x120 [] ? driver_sysfs_add+0x77/0xa0 [] driver_probe_device+0x1a4/0x2d0 [] ? pci_match_device+0xdd/0x110 [] __device_attach_driver+0xa7/0xb0 [] ? __driver_attach+0xa0/0xa0 [] bus_for_each_drv+0x62/0x90 [] __device_attach+0xbd/0x110 [] device_attach+0xb/0x10 [] pci_bus_add_device+0x3c/0x70 [] pci_bus_add_devices+0x38/0x80 [] pcifront_scan_root+0x13e/0x1a0 [] pcifront_backend_changed+0x262/0x60b [] ? xenbus_gather+0xd6/0x160 [] ? put_object+0x2f/0x50 [] xenbus_otherend_changed+0x9d/0xa0 [] backend_changed+0xe/0x10 [] xenwatch_thread+0xc8/0x190 [] ? woken_wake_function+0x10/0x10 which was the result of two things: When we call pci_scan_root_bus we would pass in 'sd' (sysdata) pointer which was an 'pcifront_sd' structure. However in the pci_device_add it expects that the 'sd' is 'struct sysdata' and sets the dev->node to what is in sd->node (offset 4): set_dev_node(&dev->dev, pcibus_to_node(bus)); __pcibus_to_node(const struct pci_bus *bus) { const struct pci_sysdata *sd = bus->sysdata; return sd->node; } However our structure was pcifront_sd which had nothing at that offset: struct pcifront_sd { int domain; /* 0 4 */ /* XXX 4 bytes hole, try to pack */ struct pcifront_device * pdev; /* 8 8 */ } That is an hole - filled with garbage as we used kmalloc instead of kzalloc (the second problem). This patch fixes the issue by: 1) Use kzalloc to initialize to a well known state. 2) Put 'struct pci_sysdata' at the start of 'pcifront_sd'. That way access to the 'node' will access the right offset. Signed-off-by: Konrad Rzeszutek Wilk Reviewed-by: Boris Ostrovsky Signed-off-by: David Vrabel Signed-off-by: Greg Kroah-Hartman --- drivers/pci/xen-pcifront.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index c777b97207d5..5f70fee59a94 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -53,7 +53,7 @@ struct pcifront_device { }; struct pcifront_sd { - int domain; + struct pci_sysdata sd; struct pcifront_device *pdev; }; @@ -67,7 +67,9 @@ static inline void pcifront_init_sd(struct pcifront_sd *sd, unsigned int domain, unsigned int bus, struct pcifront_device *pdev) { - sd->domain = domain; + /* Because we do not expose that information via XenBus. */ + sd->sd.node = first_online_node; + sd->sd.domain = domain; sd->pdev = pdev; } @@ -468,8 +470,8 @@ static int pcifront_scan_root(struct pcifront_device *pdev, dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n", domain, bus); - bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL); - sd = kmalloc(sizeof(*sd), GFP_KERNEL); + bus_entry = kzalloc(sizeof(*bus_entry), GFP_KERNEL); + sd = kzalloc(sizeof(*sd), GFP_KERNEL); if (!bus_entry || !sd) { err = -ENOMEM; goto err_out; From 6f4b352f004c696439c5b5d75a4edc3d0823e770 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 27 Feb 2016 19:31:01 -0500 Subject: [PATCH 317/418] should_follow_link(): validate ->d_seq after having decided to follow commit a7f775428b8f5808815c0e3004020cedb94cbe3b upstream. ... otherwise d_is_symlink() above might have nothing to do with the inode value we've got. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/namei.c b/fs/namei.c index f26cecbd6e73..930b51d6308b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1711,6 +1711,11 @@ static inline int should_follow_link(struct nameidata *nd, struct path *link, return 0; if (!follow) return 0; + /* make sure that d_is_symlink above matches inode */ + if (nd->flags & LOOKUP_RCU) { + if (read_seqcount_retry(&link->dentry->d_seq, seq)) + return -ECHILD; + } return pick_link(nd, link, inode, seq); } From d57c0477f842af4fdcfa93454bc2ad94d73b7158 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 27 Feb 2016 19:37:37 -0500 Subject: [PATCH 318/418] do_last(): ELOOP failure exit should be done after leaving RCU mode commit 5129fa482b16615fd4464d2f5d23acb1b7056c66 upstream. ... or we risk seeing a bogus value of d_is_symlink() there. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/namei.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 930b51d6308b..d8ee4da93650 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3149,11 +3149,6 @@ finish_lookup: if (unlikely(error)) return error; - if (unlikely(d_is_symlink(path.dentry)) && !(open_flag & O_PATH)) { - path_to_nameidata(&path, nd); - return -ELOOP; - } - if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) { path_to_nameidata(&path, nd); } else { @@ -3172,6 +3167,10 @@ finish_open: return error; } audit_inode(nd->name, nd->path.dentry, 0); + if (unlikely(d_is_symlink(nd->path.dentry)) && !(open_flag & O_PATH)) { + error = -ELOOP; + goto out; + } error = -EISDIR; if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry)) goto out; From 04d946904c63586c577139964e77601097acbbbd Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 25 Feb 2016 18:17:38 +0100 Subject: [PATCH 319/418] hpfs: don't truncate the file when delete fails commit b6853f78e763d42c7a158d8de3549c9827c604ab upstream. The delete opration can allocate additional space on the HPFS filesystem due to btree split. The HPFS driver checks in advance if there is available space, so that it won't corrupt the btree if we run out of space during splitting. If there is not enough available space, the HPFS driver attempted to truncate the file, but this results in a deadlock since the commit 7dd29d8d865efdb00c0542a5d2c87af8c52ea6c7 ("HPFS: Introduce a global mutex and lock it on every callback from VFS"). This patch removes the code that tries to truncate the file and -ENOSPC is returned instead. If the user hits -ENOSPC on delete, he should try to delete other files (that are stored in a leaf btree node), so that the delete operation will make some space for deleting the file stored in non-leaf btree node. Reported-by: Al Viro Signed-off-by: Mikulas Patocka Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/hpfs/namei.c | 31 +++---------------------------- 1 file changed, 3 insertions(+), 28 deletions(-) diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index ae4d5a1fa4c9..bffb908acbd4 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -375,12 +375,11 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry) struct inode *inode = d_inode(dentry); dnode_secno dno; int r; - int rep = 0; int err; hpfs_lock(dir->i_sb); hpfs_adjust_length(name, &len); -again: + err = -ENOENT; de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh); if (!de) @@ -400,33 +399,9 @@ again: hpfs_error(dir->i_sb, "there was error when removing dirent"); err = -EFSERROR; break; - case 2: /* no space for deleting, try to truncate file */ - + case 2: /* no space for deleting */ err = -ENOSPC; - if (rep++) - break; - - dentry_unhash(dentry); - if (!d_unhashed(dentry)) { - hpfs_unlock(dir->i_sb); - return -ENOSPC; - } - if (generic_permission(inode, MAY_WRITE) || - !S_ISREG(inode->i_mode) || - get_write_access(inode)) { - d_rehash(dentry); - } else { - struct iattr newattrs; - /*pr_info("truncating file before delete.\n");*/ - newattrs.ia_size = 0; - newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; - err = notify_change(dentry, &newattrs, NULL); - put_write_access(inode); - if (!err) - goto again; - } - hpfs_unlock(dir->i_sb); - return -ENOSPC; + break; default: drop_nlink(inode); err = 0; From 04100683e8f2792d9c715d5a253a260cb30fa3e3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 26 Feb 2016 18:55:31 +0000 Subject: [PATCH 320/418] x86/mpx: Fix off-by-one comparison with nr_registers commit 9bf148cb0812595bfdf5100bd2c07e9bec9c6ef5 upstream. In the unlikely event that regno == nr_registers then we get an array overrun on regoff because the invalid register check is currently off-by-one. Fix this with a check that regno is >= nr_registers instead. Detected with static analysis using CoverityScan. Fixes: fcc7ffd67991 "x86, mpx: Decode MPX instruction to get bound violation information" Signed-off-by: Colin Ian King Acked-by: Dave Hansen Cc: Borislav Petkov Cc: "Kirill A . Shutemov" Link: http://lkml.kernel.org/r/1456512931-3388-1-git-send-email-colin.king@canonical.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/mpx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index b2fd67da1701..ef05755a1900 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -123,7 +123,7 @@ static int get_reg_offset(struct insn *insn, struct pt_regs *regs, break; } - if (regno > nr_registers) { + if (regno >= nr_registers) { WARN_ONCE(1, "decoded an instruction with an invalid register"); return -EINVAL; } From eeb241d4f8a1d0c343e0a19c99490e66b7eff346 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 24 Feb 2016 12:18:49 -0800 Subject: [PATCH 321/418] x86/entry/compat: Add missing CLAC to entry_INT80_32 commit 3d44d51bd339766f0178f0cf2e8d048b4a4872aa upstream. This doesn't seem to fix a regression -- I don't think the CLAC was ever there. I double-checked in a debugger: entries through the int80 gate do not automatically clear AC. Stable maintainers: I can provide a backport to 4.3 and earlier if needed. This needs to be backported all the way to 3.10. Reported-by: Brian Gerst Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 63bcff2a307b ("x86, smap: Add STAC and CLAC instructions to control user space access") Link: http://lkml.kernel.org/r/b02b7e71ae54074be01fc171cbd4b72517055c0e.1456345086.git.luto@kernel.org Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_64_compat.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 6a1ae3751e82..15cfebaa7688 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -267,6 +267,7 @@ ENTRY(entry_INT80_compat) * Interrupts are off on entry. */ PARAVIRT_ADJUST_EXCEPTION_FRAME + ASM_CLAC /* Do this early to minimize exposure */ SWAPGS /* From 7df19ad4a9f2e4845dce66b6049bbc7a796a7895 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 14 Jan 2016 08:43:38 +0100 Subject: [PATCH 322/418] x86/irq: Call chip->irq_set_affinity in proper context commit e23b257c293ce4bcc8cabb2aa3097b6ed8a8261a upstream. setup_ioapic_dest() calls irqchip->irq_set_affinity() completely unprotected. That's wrong in several aspects: - it opens a race window where irq_set_affinity() can be interrupted and the irq chip left in unconsistent state. - it triggers a lockdep splat when we fix the vector race for 4.3+ because vector lock is taken with interrupts enabled. The proper calling convention is irq descriptor lock held and interrupts disabled. Reported-and-tested-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Cc: Joe Lawrence Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1601140919420.3575@nanos Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/io_apic.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index f25321894ad2..fdb0fbfb1197 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2521,6 +2521,7 @@ void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; const struct cpumask *mask; + struct irq_desc *desc; struct irq_data *idata; struct irq_chip *chip; @@ -2536,7 +2537,9 @@ void __init setup_ioapic_dest(void) if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq)) continue; - idata = irq_get_irq_data(irq); + desc = irq_to_desc(irq); + raw_spin_lock_irq(&desc->lock); + idata = irq_desc_get_irq_data(desc); /* * Honour affinities which have been set in early boot @@ -2550,6 +2553,7 @@ void __init setup_ioapic_dest(void) /* Might be lapic_chip for irq 0 */ if (chip->irq_set_affinity) chip->irq_set_affinity(idata, mask, false); + raw_spin_unlock_irq(&desc->lock); } } #endif From e4d1544b1e35ae7d3c6a5b5ed2af1328fce2c9ec Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 31 Dec 2015 16:30:44 +0000 Subject: [PATCH 323/418] x86/irq: Fix a race in x86_vector_free_irqs() commit 111abeba67e0dbdc26537429de9155e4f1d807d8 upstream. There's a race condition between x86_vector_free_irqs() { free_apic_chip_data(irq_data->chip_data); xxxxx //irq_data->chip_data has been freed, but the pointer //hasn't been reset yet irq_domain_reset_irq_data(irq_data); } and smp_irq_move_cleanup_interrupt() { raw_spin_lock(&vector_lock); data = apic_chip_data(irq_desc_get_irq_data(desc)); access data->xxxx // may access freed memory raw_spin_unlock(&desc->lock); } which may cause smp_irq_move_cleanup_interrupt() to access freed memory. Call irq_domain_reset_irq_data(), which clears the pointer with vector lock held. [ tglx: Free memory outside of lock held region. ] Signed-off-by: Jiang Liu Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Link: http://lkml.kernel.org/r/1450880014-11741-3-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/vector.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 861bc59c8f25..6bd9cb941bfa 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -224,10 +224,8 @@ static int assign_irq_vector_policy(int irq, int node, static void clear_irq_vector(int irq, struct apic_chip_data *data) { struct irq_desc *desc; - unsigned long flags; int cpu, vector; - raw_spin_lock_irqsave(&vector_lock, flags); BUG_ON(!data->cfg.vector); vector = data->cfg.vector; @@ -237,10 +235,8 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data) data->cfg.vector = 0; cpumask_clear(data->domain); - if (likely(!data->move_in_progress)) { - raw_spin_unlock_irqrestore(&vector_lock, flags); + if (likely(!data->move_in_progress)) return; - } desc = irq_to_desc(irq); for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) { @@ -253,7 +249,6 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data) } } data->move_in_progress = 0; - raw_spin_unlock_irqrestore(&vector_lock, flags); } void init_irq_alloc_info(struct irq_alloc_info *info, @@ -274,19 +269,24 @@ void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src) static void x86_vector_free_irqs(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs) { + struct apic_chip_data *apic_data; struct irq_data *irq_data; + unsigned long flags; int i; for (i = 0; i < nr_irqs; i++) { irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i); if (irq_data && irq_data->chip_data) { + raw_spin_lock_irqsave(&vector_lock, flags); clear_irq_vector(virq + i, irq_data->chip_data); - free_apic_chip_data(irq_data->chip_data); + apic_data = irq_data->chip_data; + irq_domain_reset_irq_data(irq_data); + raw_spin_unlock_irqrestore(&vector_lock, flags); + free_apic_chip_data(apic_data); #ifdef CONFIG_X86_IO_APIC if (virq + i < nr_legacy_irqs()) legacy_irq_data[virq + i] = NULL; #endif - irq_domain_reset_irq_data(irq_data); } } } From 053c8ecc225a14ff2522f8d493ebc94c1ca7dc48 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:45 +0000 Subject: [PATCH 324/418] x86/irq: Validate that irq descriptor is still active commit 36f34c8c63da3e272fd66f91089228c22d2b6e8b upstream. In fixup_irqs() we unconditionally dereference the irq chip of an irq descriptor. The descriptor might still be valid, but already cleaned up, i.e. the chip removed. Add a check for this condition. Signed-off-by: Thomas Gleixner Cc: Jiang Liu Cc: Joe Lawrence Cc: Jeremiah Mahler Cc: Borislav Petkov Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Link: http://lkml.kernel.org/r/20151231160106.236423282@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/irq.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index f8062aaf5df9..c0b58dd1ca04 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -470,6 +470,15 @@ void fixup_irqs(void) } chip = irq_data_get_irq_chip(data); + /* + * The interrupt descriptor might have been cleaned up + * already, but it is not yet removed from the radix tree + */ + if (!chip) { + raw_spin_unlock(&desc->lock); + continue; + } + if (!irqd_can_move_in_process_context(data) && chip->irq_mask) chip->irq_mask(data); From cf2e82af2d53bf7894911a9a11fa2fdf0d17ab91 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 31 Dec 2015 16:30:46 +0000 Subject: [PATCH 325/418] x86/irq: Do not use apic_chip_data.old_domain as temporary buffer commit 8a580f70f6936ec095da217018cdeeb5835c0207 upstream. Function __assign_irq_vector() makes use of apic_chip_data.old_domain as a temporary buffer, which is in the way of using apic_chip_data.old_domain for synchronizing the vector cleanup with the vector assignement code. Use a proper temporary cpumask for this. [ tglx: Renamed the mask to searched_cpumask for clarity ] Signed-off-by: Jiang Liu Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Link: http://lkml.kernel.org/r/1450880014-11741-1-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/vector.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 6bd9cb941bfa..ea2970eb58a5 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -30,7 +30,7 @@ struct apic_chip_data { struct irq_domain *x86_vector_domain; static DEFINE_RAW_SPINLOCK(vector_lock); -static cpumask_var_t vector_cpumask; +static cpumask_var_t vector_cpumask, searched_cpumask; static struct irq_chip lapic_controller; #ifdef CONFIG_X86_IO_APIC static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY]; @@ -124,6 +124,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, /* Only try and allocate irqs on cpus that are present */ err = -ENOSPC; cpumask_clear(d->old_domain); + cpumask_clear(searched_cpumask); cpu = cpumask_first_and(mask, cpu_online_mask); while (cpu < nr_cpu_ids) { int new_cpu, vector, offset; @@ -157,9 +158,9 @@ next: } if (unlikely(current_vector == vector)) { - cpumask_or(d->old_domain, d->old_domain, + cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask); - cpumask_andnot(vector_cpumask, mask, d->old_domain); + cpumask_andnot(vector_cpumask, mask, searched_cpumask); cpu = cpumask_first_and(vector_cpumask, cpu_online_mask); continue; @@ -404,6 +405,7 @@ int __init arch_early_irq_init(void) arch_init_htirq_domain(x86_vector_domain); BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL)); + BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL)); return arch_early_ioapic_init(); } From f6e9ce2b061408d16ba052e6d7612b4660dbc1c8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:46 +0000 Subject: [PATCH 326/418] x86/irq: Reorganize the return path in assign_irq_vector commit 433cbd57d190a1cdd02f243df41c3d7f55ec4b94 upstream. Use an explicit goto for the cases where we have success in the search/update and return -ENOSPC if the search loop ends due to no space. Preparatory patch for fixes. No functional change. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Link: http://lkml.kernel.org/r/20151231160106.403491024@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/vector.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index ea2970eb58a5..74f7dd6f69b1 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -116,13 +116,12 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, */ static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; static int current_offset = VECTOR_OFFSET_START % 16; - int cpu, err; + int cpu; if (d->move_in_progress) return -EBUSY; /* Only try and allocate irqs on cpus that are present */ - err = -ENOSPC; cpumask_clear(d->old_domain); cpumask_clear(searched_cpumask); cpu = cpumask_first_and(mask, cpu_online_mask); @@ -132,9 +131,8 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, apic->vector_allocation_domain(cpu, vector_cpumask, mask); if (cpumask_subset(vector_cpumask, d->domain)) { - err = 0; if (cpumask_equal(vector_cpumask, d->domain)) - break; + goto success; /* * New cpumask using the vector is a proper subset of * the current in use mask. So cleanup the vector @@ -145,7 +143,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, d->move_in_progress = cpumask_intersects(d->old_domain, cpu_online_mask); cpumask_and(d->domain, d->domain, vector_cpumask); - break; + goto success; } vector = current_vector; @@ -185,17 +183,13 @@ next: per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq); d->cfg.vector = vector; cpumask_copy(d->domain, vector_cpumask); - err = 0; - break; + goto success; } + return -ENOSPC; - if (!err) { - /* cache destination APIC IDs into cfg->dest_apicid */ - err = apic->cpu_mask_to_apicid_and(mask, d->domain, - &d->cfg.dest_apicid); - } - - return err; +success: + /* cache destination APIC IDs into cfg->dest_apicid */ + return apic->cpu_mask_to_apicid_and(mask, d->domain, &d->cfg.dest_apicid); } static int assign_irq_vector(int irq, struct apic_chip_data *data, From cb987d4822d38f8540b7cd580c33628858d38120 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:47 +0000 Subject: [PATCH 327/418] x86/irq: Reorganize the search in assign_irq_vector commit 95ffeb4b5baca266e1d0d2bc90f1513e6f419cdd upstream. Split out the code which advances the target cpu for the search so we can reuse it for the next patch which adds an early validation check for the vectormask which we get from the apic. Add comments while at it. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Link: http://lkml.kernel.org/r/20151231160106.484562040@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/vector.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 74f7dd6f69b1..60bab79abbe5 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -155,14 +155,9 @@ next: vector = FIRST_EXTERNAL_VECTOR + offset; } - if (unlikely(current_vector == vector)) { - cpumask_or(searched_cpumask, searched_cpumask, - vector_cpumask); - cpumask_andnot(vector_cpumask, mask, searched_cpumask); - cpu = cpumask_first_and(vector_cpumask, - cpu_online_mask); - continue; - } + /* If the search wrapped around, try the next cpu */ + if (unlikely(current_vector == vector)) + goto next_cpu; if (test_bit(vector, used_vectors)) goto next; @@ -184,6 +179,19 @@ next: d->cfg.vector = vector; cpumask_copy(d->domain, vector_cpumask); goto success; + +next_cpu: + /* + * We exclude the current @vector_cpumask from the requested + * @mask and try again with the next online cpu in the + * result. We cannot modify @mask, so we use @vector_cpumask + * as a temporary buffer here as it will be reassigned when + * calling apic->vector_allocation_domain() above. + */ + cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask); + cpumask_andnot(vector_cpumask, mask, searched_cpumask); + cpu = cpumask_first_and(vector_cpumask, cpu_online_mask); + continue; } return -ENOSPC; From 4af6a215154b213835420c6cd2442c0436abbad5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:48 +0000 Subject: [PATCH 328/418] x86/irq: Check vector allocation early commit 3716fd27a604d61a91cda47083504971486b80f1 upstream. __assign_irq_vector() uses the vector_cpumask which is assigned by apic->vector_allocation_domain() without doing basic sanity checks. That can result in a situation where the final assignement of a newly found vector fails in apic->cpu_mask_to_apicid_and(). So we have to do rollbacks for no reason. apic->cpu_mask_to_apicid_and() only fails if vector_cpumask & requested_cpumask & cpu_online_mask is empty. Check for this condition right away and if the result is empty try immediately the next possible cpu in the requested mask. So in case of a failure the old setting is unchanged and we can remove the rollback code. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Link: http://lkml.kernel.org/r/20151231160106.561877324@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/vector.c | 38 +++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 60bab79abbe5..d96b43c02ba7 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -30,7 +30,7 @@ struct apic_chip_data { struct irq_domain *x86_vector_domain; static DEFINE_RAW_SPINLOCK(vector_lock); -static cpumask_var_t vector_cpumask, searched_cpumask; +static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask; static struct irq_chip lapic_controller; #ifdef CONFIG_X86_IO_APIC static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY]; @@ -128,8 +128,20 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, while (cpu < nr_cpu_ids) { int new_cpu, vector, offset; + /* Get the possible target cpus for @mask/@cpu from the apic */ apic->vector_allocation_domain(cpu, vector_cpumask, mask); + /* + * Clear the offline cpus from @vector_cpumask for searching + * and verify whether the result overlaps with @mask. If true, + * then the call to apic->cpu_mask_to_apicid_and() will + * succeed as well. If not, no point in trying to find a + * vector in this mask. + */ + cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask); + if (!cpumask_intersects(vector_searchmask, mask)) + goto next_cpu; + if (cpumask_subset(vector_cpumask, d->domain)) { if (cpumask_equal(vector_cpumask, d->domain)) goto success; @@ -162,7 +174,7 @@ next: if (test_bit(vector, used_vectors)) goto next; - for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) { + for_each_cpu(new_cpu, vector_searchmask) { if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector])) goto next; } @@ -174,7 +186,7 @@ next: d->move_in_progress = cpumask_intersects(d->old_domain, cpu_online_mask); } - for_each_cpu_and(new_cpu, vector_cpumask, cpu_online_mask) + for_each_cpu(new_cpu, vector_searchmask) per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq); d->cfg.vector = vector; cpumask_copy(d->domain, vector_cpumask); @@ -196,8 +208,14 @@ next_cpu: return -ENOSPC; success: - /* cache destination APIC IDs into cfg->dest_apicid */ - return apic->cpu_mask_to_apicid_and(mask, d->domain, &d->cfg.dest_apicid); + /* + * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail + * as we already established, that mask & d->domain & cpu_online_mask + * is not empty. + */ + BUG_ON(apic->cpu_mask_to_apicid_and(mask, d->domain, + &d->cfg.dest_apicid)); + return 0; } static int assign_irq_vector(int irq, struct apic_chip_data *data, @@ -407,6 +425,7 @@ int __init arch_early_irq_init(void) arch_init_htirq_domain(x86_vector_domain); BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL)); + BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL)); BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL)); return arch_early_ioapic_init(); @@ -496,14 +515,7 @@ static int apic_set_affinity(struct irq_data *irq_data, return -EINVAL; err = assign_irq_vector(irq, data, dest); - if (err) { - if (assign_irq_vector(irq, data, - irq_data_get_affinity_mask(irq_data))) - pr_err("Failed to recover vector for irq %d\n", irq); - return err; - } - - return IRQ_SET_MASK_OK; + return err ? err : IRQ_SET_MASK_OK; } static struct irq_chip lapic_controller = { From 2636de2fc22c1f5e1b24ba7d9e6f6615ed9569a5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:49 +0000 Subject: [PATCH 329/418] x86/irq: Copy vectormask instead of an AND operation commit 9ac15b7a8af4cf3337a101498c0ed690d23ade75 upstream. In the case that the new vector mask is a subset of the existing mask there is no point to do a AND operation of currentmask & newmask. The result is newmask. So we can simply copy the new mask to the current mask and be done with it. Preparatory patch for further consolidation. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Link: http://lkml.kernel.org/r/20151231160106.640253454@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/vector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index d96b43c02ba7..641d592f524a 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -154,7 +154,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, vector_cpumask); d->move_in_progress = cpumask_intersects(d->old_domain, cpu_online_mask); - cpumask_and(d->domain, d->domain, vector_cpumask); + cpumask_copy(d->domain, vector_cpumask); goto success; } From c655fd016c2917c5f88c4c694bdcdf9e68f4f661 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:49 +0000 Subject: [PATCH 330/418] x86/irq: Get rid of code duplication commit ab25ac02148b600e645f77cfb8b8ea415ed75bb4 upstream. Reusing an existing vector and assigning a new vector has duplicated code. Consolidate it. This is also a preparatory patch for finally plugging the cleanup race. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Link: http://lkml.kernel.org/r/20151231160106.721599216@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/vector.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 641d592f524a..c604ca651b8a 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -116,7 +116,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, */ static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; static int current_offset = VECTOR_OFFSET_START % 16; - int cpu; + int cpu, vector; if (d->move_in_progress) return -EBUSY; @@ -126,7 +126,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, cpumask_clear(searched_cpumask); cpu = cpumask_first_and(mask, cpu_online_mask); while (cpu < nr_cpu_ids) { - int new_cpu, vector, offset; + int new_cpu, offset; /* Get the possible target cpus for @mask/@cpu from the apic */ apic->vector_allocation_domain(cpu, vector_cpumask, mask); @@ -146,16 +146,12 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, if (cpumask_equal(vector_cpumask, d->domain)) goto success; /* - * New cpumask using the vector is a proper subset of - * the current in use mask. So cleanup the vector - * allocation for the members that are not used anymore. + * Mark the cpus which are not longer in the mask for + * cleanup. */ - cpumask_andnot(d->old_domain, d->domain, - vector_cpumask); - d->move_in_progress = - cpumask_intersects(d->old_domain, cpu_online_mask); - cpumask_copy(d->domain, vector_cpumask); - goto success; + cpumask_andnot(d->old_domain, d->domain, vector_cpumask); + vector = d->cfg.vector; + goto update; } vector = current_vector; @@ -181,16 +177,12 @@ next: /* Found one! */ current_vector = vector; current_offset = offset; - if (d->cfg.vector) { + /* Schedule the old vector for cleanup on all cpus */ + if (d->cfg.vector) cpumask_copy(d->old_domain, d->domain); - d->move_in_progress = - cpumask_intersects(d->old_domain, cpu_online_mask); - } for_each_cpu(new_cpu, vector_searchmask) per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq); - d->cfg.vector = vector; - cpumask_copy(d->domain, vector_cpumask); - goto success; + goto update; next_cpu: /* @@ -207,6 +199,11 @@ next_cpu: } return -ENOSPC; +update: + /* Cleanup required ? */ + d->move_in_progress = cpumask_intersects(d->old_domain, cpu_online_mask); + d->cfg.vector = vector; + cpumask_copy(d->domain, vector_cpumask); success: /* * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail From 550ac3f791f5cdf26021dfa6011adcb5f38856b6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:50 +0000 Subject: [PATCH 331/418] x86/irq: Remove offline cpus from vector cleanup commit 847667ef10356b824a11c853fc8a8b1b437b6a8d upstream. No point of keeping offline cpus in the cleanup mask. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Link: http://lkml.kernel.org/r/20151231160106.808642683@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/vector.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index c604ca651b8a..175d3ac4690e 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -200,8 +200,12 @@ next_cpu: return -ENOSPC; update: - /* Cleanup required ? */ - d->move_in_progress = cpumask_intersects(d->old_domain, cpu_online_mask); + /* + * Exclude offline cpus from the cleanup mask and set the + * move_in_progress flag when the result is not empty. + */ + cpumask_and(d->old_domain, d->old_domain, cpu_online_mask); + d->move_in_progress = !cpumask_empty(d->old_domain); d->cfg.vector = vector; cpumask_copy(d->domain, vector_cpumask); success: From c2b56b62c26ec51f64edea8d20b133efeddd2d0c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:51 +0000 Subject: [PATCH 332/418] x86/irq: Clear move_in_progress before sending cleanup IPI commit c1684f5035b60e9f98566493e869496fb5de1d89 upstream. send_cleanup_vector() fiddles with the old_domain mask unprotected because it relies on the protection by the move_in_progress flag. But this is fatal, as the flag is reset after the IPI has been sent. So a cpu which receives the IPI can still see the flag set and therefor ignores the cleanup request. If no other cleanup request happens then the vector stays stale on that cpu and in case of an irq removal the vector still persists. That can lead to use after free when the next cleanup IPI happens. Protect the code with vector_lock and clear move_in_progress before sending the IPI. This does not plug the race which Joe reported because: CPU0 CPU1 CPU2 lock_vector() data->move_in_progress=0 sendIPI() unlock_vector() set_affinity() assign_irq_vector() lock_vector() handle_IPI move_in_progress = 1 lock_vector() unlock_vector() move_in_progress == 1 The full fix comes with a later patch. Reported-and-tested-by: Joe Lawrence Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Link: http://lkml.kernel.org/r/20151231160106.892412198@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/vector.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 175d3ac4690e..f4c50d3f60e7 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -530,6 +530,8 @@ static void __send_cleanup_vector(struct apic_chip_data *data) { cpumask_var_t cleanup_mask; + raw_spin_lock(&vector_lock); + data->move_in_progress = 0; if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { unsigned int i; @@ -541,7 +543,7 @@ static void __send_cleanup_vector(struct apic_chip_data *data) apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); free_cpumask_var(cleanup_mask); } - data->move_in_progress = 0; + raw_spin_unlock(&vector_lock); } void send_cleanup_vector(struct irq_cfg *cfg) From 00bb447126d363fbf401d973548b193828c83fb2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:52 +0000 Subject: [PATCH 333/418] x86/irq: Remove the cpumask allocation from send_cleanup_vector() commit 5da0c1217f05d2ccc9a8ed6e6e5c23a8a1d24dd6 upstream. There is no need to allocate a new cpumask for sending the cleanup vector. The old_domain mask is now protected by the vector_lock, so we can safely remove the offline cpus from it and send the IPI with the resulting mask. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Link: http://lkml.kernel.org/r/20151231160106.967993932@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/vector.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index f4c50d3f60e7..a1938a62d10a 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -528,21 +528,11 @@ static struct irq_chip lapic_controller = { #ifdef CONFIG_SMP static void __send_cleanup_vector(struct apic_chip_data *data) { - cpumask_var_t cleanup_mask; - raw_spin_lock(&vector_lock); + cpumask_and(data->old_domain, data->old_domain, cpu_online_mask); data->move_in_progress = 0; - if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { - unsigned int i; - - for_each_cpu_and(i, data->old_domain, cpu_online_mask) - apic->send_IPI_mask(cpumask_of(i), - IRQ_MOVE_CLEANUP_VECTOR); - } else { - cpumask_and(cleanup_mask, data->old_domain, cpu_online_mask); - apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - free_cpumask_var(cleanup_mask); - } + if (!cpumask_empty(data->old_domain)) + apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR); raw_spin_unlock(&vector_lock); } From 950c362bdfb191aa3b149493c0447c89845f3a87 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:52 +0000 Subject: [PATCH 334/418] x86/irq: Remove outgoing CPU from vector cleanup mask commit 56d7d2f4bbd00fb198b7907cb3ab657d06115a42 upstream. We want to synchronize new vector assignments with a pending cleanup. Remove a dying cpu from a pending cleanup mask. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Link: http://lkml.kernel.org/r/20151231160107.045961667@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/vector.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index a1938a62d10a..cbb6c10c3187 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -631,9 +631,23 @@ void irq_complete_move(struct irq_cfg *cfg) void irq_force_complete_move(int irq) { struct irq_cfg *cfg = irq_cfg(irq); + struct apic_chip_data *data; - if (cfg) - __irq_complete_move(cfg, cfg->vector); + if (!cfg) + return; + + __irq_complete_move(cfg, cfg->vector); + + /* + * Remove this cpu from the cleanup mask. The IPI might have been sent + * just before the cpu was removed from the offline mask, but has not + * been processed because the CPU has interrupts disabled and is on + * the way out. + */ + raw_spin_lock(&vector_lock); + data = container_of(cfg, struct apic_chip_data, cfg); + cpumask_clear_cpu(smp_processor_id(), data->old_domain); + raw_spin_unlock(&vector_lock); } #endif From 4f45a0edf314c4f7f4be658bf58e0b7bc477723c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:53 +0000 Subject: [PATCH 335/418] x86/irq: Call irq_force_move_complete with irq descriptor commit 90a2282e23f0522e4b3f797ad447c5e91bf7fe32 upstream. First of all there is no point in looking up the irq descriptor again, but we also need the descriptor for the final cleanup race fix in the next patch. Make that change seperate. No functional difference. Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Tested-by: Joe Lawrence Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Link: http://lkml.kernel.org/r/20151231160107.125211743@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/irq.h | 5 +++-- arch/x86/kernel/apic/vector.c | 11 +++++++---- arch/x86/kernel/irq.c | 2 +- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 881b4768644a..e7de5c9a4fbd 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -23,11 +23,13 @@ extern void irq_ctx_init(int cpu); #define __ARCH_HAS_DO_SOFTIRQ +struct irq_desc; + #ifdef CONFIG_HOTPLUG_CPU #include extern int check_irq_vectors_for_cpu_disable(void); extern void fixup_irqs(void); -extern void irq_force_complete_move(int); +extern void irq_force_complete_move(struct irq_desc *desc); #endif #ifdef CONFIG_HAVE_KVM @@ -37,7 +39,6 @@ extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)); extern void (*x86_platform_ipi_callback)(void); extern void native_init_IRQ(void); -struct irq_desc; extern bool handle_irq(struct irq_desc *desc, struct pt_regs *regs); extern __visible unsigned int do_IRQ(struct pt_regs *regs); diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index cbb6c10c3187..6783f313970d 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -628,10 +628,14 @@ void irq_complete_move(struct irq_cfg *cfg) __irq_complete_move(cfg, ~get_irq_regs()->orig_ax); } -void irq_force_complete_move(int irq) +/* + * Called with @desc->lock held and interrupts disabled. + */ +void irq_force_complete_move(struct irq_desc *desc) { - struct irq_cfg *cfg = irq_cfg(irq); - struct apic_chip_data *data; + struct irq_data *irqdata = irq_desc_get_irq_data(desc); + struct apic_chip_data *data = apic_chip_data(irqdata); + struct irq_cfg *cfg = data ? &data->cfg : NULL; if (!cfg) return; @@ -645,7 +649,6 @@ void irq_force_complete_move(int irq) * the way out. */ raw_spin_lock(&vector_lock); - data = container_of(cfg, struct apic_chip_data, cfg); cpumask_clear_cpu(smp_processor_id(), data->old_domain); raw_spin_unlock(&vector_lock); } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index c0b58dd1ca04..61521dc19c10 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -462,7 +462,7 @@ void fixup_irqs(void) * non intr-remapping case, we can't wait till this interrupt * arrives at this cpu before completing the irq move. */ - irq_force_complete_move(irq); + irq_force_complete_move(desc); if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { break_affinity = 1; From 996c591227d988ed82e76080ebf4ed0f1f33e0f1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Dec 2015 16:30:54 +0000 Subject: [PATCH 336/418] x86/irq: Plug vector cleanup race commit 98229aa36caa9c769b13565523de9b813013c703 upstream. We still can end up with a stale vector due to the following: CPU0 CPU1 CPU2 lock_vector() data->move_in_progress=0 sendIPI() unlock_vector() set_affinity() assign_irq_vector() lock_vector() handle_IPI move_in_progress = 1 lock_vector() unlock_vector() move_in_progress == 1 So we need to serialize the vector assignment against a pending cleanup. The solution is rather simple now. We not only check for the move_in_progress flag in assign_irq_vector(), we also check whether there is still a cleanup pending in the old_domain cpumask. If so, we return -EBUSY to the caller and let him deal with it. Though we have to be careful in the cpu unplug case. If the cleanout has not yet completed then the following setaffinity() call would return -EBUSY. Add code which prevents this. Full context is here: http://lkml.kernel.org/r/5653B688.4050809@stratus.com Reported-and-tested-by: Joe Lawrence Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Cc: Jiang Liu Cc: Jeremiah Mahler Cc: andy.shevchenko@gmail.com Cc: Guenter Roeck Link: http://lkml.kernel.org/r/20151231160107.207265407@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/vector.c | 63 +++++++++++++++++++++++++++++------ 1 file changed, 53 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 6783f313970d..a35f6b5473f4 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -118,7 +118,12 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d, static int current_offset = VECTOR_OFFSET_START % 16; int cpu, vector; - if (d->move_in_progress) + /* + * If there is still a move in progress or the previous move has not + * been cleaned up completely, tell the caller to come back later. + */ + if (d->move_in_progress || + cpumask_intersects(d->old_domain, cpu_online_mask)) return -EBUSY; /* Only try and allocate irqs on cpus that are present */ @@ -257,7 +262,12 @@ static void clear_irq_vector(int irq, struct apic_chip_data *data) data->cfg.vector = 0; cpumask_clear(data->domain); - if (likely(!data->move_in_progress)) + /* + * If move is in progress or the old_domain mask is not empty, + * i.e. the cleanup IPI has not been processed yet, we need to remove + * the old references to desc from all cpus vector tables. + */ + if (!data->move_in_progress && cpumask_empty(data->old_domain)) return; desc = irq_to_desc(irq); @@ -577,12 +587,25 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) goto unlock; /* - * Check if the irq migration is in progress. If so, we - * haven't received the cleanup request yet for this irq. + * Nothing to cleanup if irq migration is in progress + * or this cpu is not set in the cleanup mask. */ - if (data->move_in_progress) + if (data->move_in_progress || + !cpumask_test_cpu(me, data->old_domain)) goto unlock; + /* + * We have two cases to handle here: + * 1) vector is unchanged but the target mask got reduced + * 2) vector and the target mask has changed + * + * #1 is obvious, but in #2 we have two vectors with the same + * irq descriptor: the old and the new vector. So we need to + * make sure that we only cleanup the old vector. The new + * vector has the current @vector number in the config and + * this cpu is part of the target mask. We better leave that + * one alone. + */ if (vector == data->cfg.vector && cpumask_test_cpu(me, data->domain)) goto unlock; @@ -600,6 +623,7 @@ asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) goto unlock; } __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); + cpumask_clear_cpu(me, data->old_domain); unlock: raw_spin_unlock(&desc->lock); } @@ -643,13 +667,32 @@ void irq_force_complete_move(struct irq_desc *desc) __irq_complete_move(cfg, cfg->vector); /* - * Remove this cpu from the cleanup mask. The IPI might have been sent - * just before the cpu was removed from the offline mask, but has not - * been processed because the CPU has interrupts disabled and is on - * the way out. + * This is tricky. If the cleanup of @data->old_domain has not been + * done yet, then the following setaffinity call will fail with + * -EBUSY. This can leave the interrupt in a stale state. + * + * The cleanup cannot make progress because we hold @desc->lock. So in + * case @data->old_domain is not yet cleaned up, we need to drop the + * lock and acquire it again. @desc cannot go away, because the + * hotplug code holds the sparse irq lock. */ raw_spin_lock(&vector_lock); - cpumask_clear_cpu(smp_processor_id(), data->old_domain); + /* Clean out all offline cpus (including ourself) first. */ + cpumask_and(data->old_domain, data->old_domain, cpu_online_mask); + while (!cpumask_empty(data->old_domain)) { + raw_spin_unlock(&vector_lock); + raw_spin_unlock(&desc->lock); + cpu_relax(); + raw_spin_lock(&desc->lock); + /* + * Reevaluate apic_chip_data. It might have been cleared after + * we dropped @desc->lock. + */ + data = apic_chip_data(irqdata); + if (!data) + return; + raw_spin_lock(&vector_lock); + } raw_spin_unlock(&vector_lock); } #endif From 9315bf18bec590aa0c4be5b54de55da21d31ac96 Mon Sep 17 00:00:00 2001 From: Matan Barak Date: Thu, 7 Jan 2016 11:19:29 +0200 Subject: [PATCH 337/418] IB/cma: Fix RDMA port validation for iWarp commit 649367735ee5dedb128d9fac0b86ba7e0fe7ae3b upstream. cma_validate_port wrongly assumed that Ethernet devices are RoCE devices and thus their ndev should be matched in the GID table. This broke the iWarp support. Fixing that matching the ndev only if we work on a RoCE port. Cc: # 4.4.x- Fixes: abae1b71dd37 ('IB/cma: cma_validate_port should verify the port and netdevice') Reported-by: Hariprasad Shenai Tested-by: Hariprasad Shenai Signed-off-by: Matan Barak Reviewed-by: Steve Wise Signed-off-by: Doug Ledford Signed-off-by: Steve Wise Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/core/cma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 2d762a2ecd81..17a15c56028c 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -453,7 +453,7 @@ static inline int cma_validate_port(struct ib_device *device, u8 port, if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) return ret; - if (dev_type == ARPHRD_ETHER) + if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) ndev = dev_get_by_index(&init_net, bound_if_index); ret = ib_find_cached_gid_by_port(device, gid, port, ndev, NULL); From e4a5a335105a557b5a78c1513650d1cf7c9f2edb Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 20 Jan 2016 15:00:01 -0800 Subject: [PATCH 338/418] security: let security modules use PTRACE_MODE_* with bitmasks commit 3dfb7d8cdbc7ea0c2970450e60818bb3eefbad69 upstream. It looks like smack and yama weren't aware that the ptrace mode can have flags ORed into it - PTRACE_MODE_NOAUDIT until now, but only for /proc/$pid/stat, and with the PTRACE_MODE_*CREDS patch, all modes have flags ORed into them. Signed-off-by: Jann Horn Acked-by: Kees Cook Acked-by: Casey Schaufler Cc: Oleg Nesterov Cc: Ingo Molnar Cc: James Morris Cc: "Serge E. Hallyn" Cc: Andy Shevchenko Cc: Andy Lutomirski Cc: Al Viro Cc: "Eric W. Biederman" Cc: Willy Tarreau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- security/smack/smack_lsm.c | 8 +++----- security/yama/yama_lsm.c | 4 ++-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index ff81026f6ddb..7c57c7fcf5a2 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -398,12 +398,10 @@ static int smk_copy_relabel(struct list_head *nhead, struct list_head *ohead, */ static inline unsigned int smk_ptrace_mode(unsigned int mode) { - switch (mode) { - case PTRACE_MODE_READ: - return MAY_READ; - case PTRACE_MODE_ATTACH: + if (mode & PTRACE_MODE_ATTACH) return MAY_READWRITE; - } + if (mode & PTRACE_MODE_READ) + return MAY_READ; return 0; } diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c index d3c19c970a06..cb6ed10816d4 100644 --- a/security/yama/yama_lsm.c +++ b/security/yama/yama_lsm.c @@ -281,7 +281,7 @@ static int yama_ptrace_access_check(struct task_struct *child, int rc = 0; /* require ptrace target be a child of ptracer on attach */ - if (mode == PTRACE_MODE_ATTACH) { + if (mode & PTRACE_MODE_ATTACH) { switch (ptrace_scope) { case YAMA_SCOPE_DISABLED: /* No additional restrictions. */ @@ -307,7 +307,7 @@ static int yama_ptrace_access_check(struct task_struct *child, } } - if (rc) { + if (rc && (mode & PTRACE_MODE_NOAUDIT) == 0) { printk_ratelimited(KERN_NOTICE "ptrace of pid %d was attempted by: %s (pid %d)\n", child->pid, current->comm, current->pid); From d19573e8499198a75c2f7a0b22053bdd8eb48050 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 20 Dec 2015 08:45:40 +0200 Subject: [PATCH 339/418] iwlwifi: dvm: fix WoWLAN commit a1cdb1c59c8c203de2731fc6910598ed19c97e41 upstream. My commit below introduced a mutex in the transport to prevent concurrent operations. To do so, it added a flag (is_down) to make sure the transport is in the right state. This uncoverred an bug that didn't cause any harm until now: iwldvm calls stop_device and then starts the firmware without calling start_hw in between. While this flow is fine from the device configuration point of view (register, etc...), it is now forbidden by the new is_down flag. This led to this error to appear: iwlwifi 0000:05:00.0: Can't start_fw since the HW hasn't been started and the suspend would fail. This fixes: https://bugzilla.kernel.org/show_bug.cgi?id=109591 Reported-by: Bogdan Bogush Fixes=fa9f3281cbb1 ("iwlwifi: pcie: lock start_hw / start_fw / stop_device") Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/dvm/lib.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/dvm/lib.c b/drivers/net/wireless/iwlwifi/dvm/lib.c index e18629a16fb0..0961f33de05e 100644 --- a/drivers/net/wireless/iwlwifi/dvm/lib.c +++ b/drivers/net/wireless/iwlwifi/dvm/lib.c @@ -1154,6 +1154,9 @@ int iwlagn_suspend(struct iwl_priv *priv, struct cfg80211_wowlan *wowlan) priv->ucode_loaded = false; iwl_trans_stop_device(priv->trans); + ret = iwl_trans_start_hw(priv->trans); + if (ret) + goto out; priv->wowlan = true; From 8a55831546e7d558e7cb14e1c40c6d403d779c10 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Tue, 5 Jan 2016 15:25:43 +0200 Subject: [PATCH 340/418] iwlwifi: pcie: properly configure the debug buffer size for 8000 commit 62d7476d958ce06d7a10b02bdb30006870286fe2 upstream. 8000 device family has a new debug engine that needs to be configured differently than 7000's. The debug engine's DMA works in chunks of memory and the size of the buffer really means the start of the last chunk. Since one chunk is 256-byte long, we should configure the device to write to buffer_size - 256. This fixes a situation were the device would write to memory it is not allowed to access. Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/pcie/trans.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index 90283453073c..8c7204738aa3 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -7,6 +7,7 @@ * * Copyright(c) 2007 - 2015 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -33,6 +34,7 @@ * * Copyright(c) 2005 - 2015 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -924,9 +926,16 @@ monitor: if (dest->monitor_mode == EXTERNAL_MODE && trans_pcie->fw_mon_size) { iwl_write_prph(trans, le32_to_cpu(dest->base_reg), trans_pcie->fw_mon_phys >> dest->base_shift); - iwl_write_prph(trans, le32_to_cpu(dest->end_reg), - (trans_pcie->fw_mon_phys + - trans_pcie->fw_mon_size) >> dest->end_shift); + if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) + iwl_write_prph(trans, le32_to_cpu(dest->end_reg), + (trans_pcie->fw_mon_phys + + trans_pcie->fw_mon_size - 256) >> + dest->end_shift); + else + iwl_write_prph(trans, le32_to_cpu(dest->end_reg), + (trans_pcie->fw_mon_phys + + trans_pcie->fw_mon_size) >> + dest->end_shift); } } From 3beb469074ec66079f3fcbed1c6cf06ca8bbc873 Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Thu, 17 Dec 2015 14:17:00 +0200 Subject: [PATCH 341/418] iwlwifi: update and fix 7265 series PCI IDs commit 006bda75d81fd27a583a3b310e9444fea2aa6ef2 upstream. Update and fix some 7265 PCI IDs entries. Signed-off-by: Oren Givon Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/pcie/drv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c index 639761fb2bfb..d58c094f2f04 100644 --- a/drivers/net/wireless/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/iwlwifi/pcie/drv.c @@ -384,6 +384,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x095B, 0x5310, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095B, 0x5302, iwl7265_n_cfg)}, {IWL_PCI_DEVICE(0x095B, 0x5210, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095A, 0x5C10, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5012, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5412, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5410, iwl7265_2ac_cfg)}, @@ -401,10 +402,10 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x095A, 0x900A, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x9110, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x9112, iwl7265_2ac_cfg)}, - {IWL_PCI_DEVICE(0x095A, 0x9210, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095B, 0x9210, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095B, 0x9200, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x9510, iwl7265_2ac_cfg)}, - {IWL_PCI_DEVICE(0x095A, 0x9310, iwl7265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x095B, 0x9310, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x9410, iwl7265_2ac_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x5020, iwl7265_2n_cfg)}, {IWL_PCI_DEVICE(0x095A, 0x502A, iwl7265_2n_cfg)}, From 74e579764007b3ef63d791116c7d252e0032cc1e Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Tue, 2 Feb 2016 15:11:15 +0200 Subject: [PATCH 342/418] iwlwifi: mvm: don't allow sched scans without matches to be started commit 5e56276e7555b34550d51459a801ff75eca8b907 upstream. The firmware can perform a scheduled scan with not matchsets passed, but it can't send notification that results were found. Since the userspace then cannot know when we got new results and the firmware wouldn't trigger a wake in case we are sleeping, it's better not to allow scans without matchsets. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=110831 Signed-off-by: Luca Coelho Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/mvm/scan.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/iwlwifi/mvm/scan.c b/drivers/net/wireless/iwlwifi/mvm/scan.c index d6e0c1b5c20c..8215d7405f64 100644 --- a/drivers/net/wireless/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/iwlwifi/mvm/scan.c @@ -1267,6 +1267,10 @@ int iwl_mvm_sched_scan_start(struct iwl_mvm *mvm, return -EBUSY; } + /* we don't support "match all" in the firmware */ + if (!req->n_match_sets) + return -EOPNOTSUPP; + ret = iwl_mvm_check_running_scans(mvm, type); if (ret) return ret; From c252409a688a831385fb71097db7a86ffe595b74 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 3 Mar 2016 15:10:04 -0800 Subject: [PATCH 343/418] Linux 4.4.4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 802be10c40c5..344bc6f27ea1 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 3 +SUBLEVEL = 4 EXTRAVERSION = NAME = Blurry Fish Butt From 53729dbbd20a517c4ecb994cfe994bc832b30523 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 29 Feb 2016 12:12:46 -0500 Subject: [PATCH 344/418] use ->d_seq to get coherency between ->d_inode and ->d_flags commit a528aca7f359f4b0b1d72ae406097e491a5ba9ea upstream. Games with ordering and barriers are way too brittle. Just bump ->d_seq before and after updating ->d_inode and ->d_flags type bits, so that verifying ->d_seq would guarantee they are coherent. Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 20 +++++--------------- include/linux/dcache.h | 4 +--- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 5c33aeb0f68f..877bcbbd03ff 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -269,9 +269,6 @@ static inline int dname_external(const struct dentry *dentry) return dentry->d_name.name != dentry->d_iname; } -/* - * Make sure other CPUs see the inode attached before the type is set. - */ static inline void __d_set_inode_and_type(struct dentry *dentry, struct inode *inode, unsigned type_flags) @@ -279,28 +276,18 @@ static inline void __d_set_inode_and_type(struct dentry *dentry, unsigned flags; dentry->d_inode = inode; - smp_wmb(); flags = READ_ONCE(dentry->d_flags); flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); flags |= type_flags; WRITE_ONCE(dentry->d_flags, flags); } -/* - * Ideally, we want to make sure that other CPUs see the flags cleared before - * the inode is detached, but this is really a violation of RCU principles - * since the ordering suggests we should always set inode before flags. - * - * We should instead replace or discard the entire dentry - but that sucks - * performancewise on mass deletion/rename. - */ static inline void __d_clear_type_and_inode(struct dentry *dentry) { unsigned flags = READ_ONCE(dentry->d_flags); flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); WRITE_ONCE(dentry->d_flags, flags); - smp_wmb(); dentry->d_inode = NULL; } @@ -370,9 +357,11 @@ static void dentry_unlink_inode(struct dentry * dentry) __releases(dentry->d_inode->i_lock) { struct inode *inode = dentry->d_inode; + + raw_write_seqcount_begin(&dentry->d_seq); __d_clear_type_and_inode(dentry); hlist_del_init(&dentry->d_u.d_alias); - dentry_rcuwalk_invalidate(dentry); + raw_write_seqcount_end(&dentry->d_seq); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); if (!inode->i_nlink) @@ -1757,8 +1746,9 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) spin_lock(&dentry->d_lock); if (inode) hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); + raw_write_seqcount_begin(&dentry->d_seq); __d_set_inode_and_type(dentry, inode, add_flags); - dentry_rcuwalk_invalidate(dentry); + raw_write_seqcount_end(&dentry->d_seq); spin_unlock(&dentry->d_lock); fsnotify_d_instantiate(dentry, inode); } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d67ae119cf4e..8a2e009c8a5a 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -409,9 +409,7 @@ static inline bool d_mountpoint(const struct dentry *dentry) */ static inline unsigned __d_entry_type(const struct dentry *dentry) { - unsigned type = READ_ONCE(dentry->d_flags); - smp_rmb(); - return type & DCACHE_ENTRY_TYPE; + return dentry->d_flags & DCACHE_ENTRY_TYPE; } static inline bool d_is_miss(const struct dentry *dentry) From ad1d8666ae4b68f5417afb4d990aef597e14206b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 24 Feb 2016 09:43:23 +0100 Subject: [PATCH 345/418] drivers: sh: Restore legacy clock domain on SuperH platforms commit 0378ba4899d5fbd8494ed6580cbc81d7b44dbac6 upstream. CONFIG_ARCH_SHMOBILE is not only enabled for Renesas ARM platforms (which are DT based and multi-platform), but also on a select set of Renesas SuperH platforms (SH7722/SH7723/SH7724/SH7343/SH7366). Hence since commit 0ba58de231066e47 ("drivers: sh: Get rid of CONFIG_ARCH_SHMOBILE_MULTI"), the legacy clock domain is no longer installed on these SuperH platforms, and module clocks may not be enabled when needed, leading to driver failures. To fix this, add an additional check for CONFIG_OF. Fixes: 0ba58de231066e47 ("drivers: sh: Get rid of CONFIG_ARCH_SHMOBILE_MULTI"). Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman Signed-off-by: Greg Kroah-Hartman --- drivers/sh/pm_runtime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/sh/pm_runtime.c b/drivers/sh/pm_runtime.c index 91a003011acf..a9bac3bf20de 100644 --- a/drivers/sh/pm_runtime.c +++ b/drivers/sh/pm_runtime.c @@ -34,7 +34,7 @@ static struct pm_clk_notifier_block platform_bus_notifier = { static int __init sh_pm_runtime_init(void) { - if (IS_ENABLED(CONFIG_ARCH_SHMOBILE)) { + if (IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_ARCH_SHMOBILE)) { if (!of_find_compatible_node(NULL, NULL, "renesas,cpg-mstp-clocks")) return 0; From 2232d6dc0017502b02157000afea6815a6810c2a Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 15 Jan 2016 11:05:12 +0000 Subject: [PATCH 346/418] Btrfs: fix deadlock running delayed iputs at transaction commit time commit c2d6cb1636d235257086f939a8194ef0bf93af6e upstream. While running a stress test I ran into a deadlock when running the delayed iputs at transaction time, which produced the following report and trace: [ 886.399989] ============================================= [ 886.400871] [ INFO: possible recursive locking detected ] [ 886.401663] 4.4.0-rc6-btrfs-next-18+ #1 Not tainted [ 886.402384] --------------------------------------------- [ 886.403182] fio/8277 is trying to acquire lock: [ 886.403568] (&fs_info->delayed_iput_sem){++++..}, at: [] btrfs_run_delayed_iputs+0x36/0xbf [btrfs] [ 886.403568] [ 886.403568] but task is already holding lock: [ 886.403568] (&fs_info->delayed_iput_sem){++++..}, at: [] btrfs_run_delayed_iputs+0x36/0xbf [btrfs] [ 886.403568] [ 886.403568] other info that might help us debug this: [ 886.403568] Possible unsafe locking scenario: [ 886.403568] [ 886.403568] CPU0 [ 886.403568] ---- [ 886.403568] lock(&fs_info->delayed_iput_sem); [ 886.403568] lock(&fs_info->delayed_iput_sem); [ 886.403568] [ 886.403568] *** DEADLOCK *** [ 886.403568] [ 886.403568] May be due to missing lock nesting notation [ 886.403568] [ 886.403568] 3 locks held by fio/8277: [ 886.403568] #0: (sb_writers#11){.+.+.+}, at: [] __sb_start_write+0x5f/0xb0 [ 886.403568] #1: (&sb->s_type->i_mutex_key#15){+.+.+.}, at: [] btrfs_file_write_iter+0x73/0x408 [btrfs] [ 886.403568] #2: (&fs_info->delayed_iput_sem){++++..}, at: [] btrfs_run_delayed_iputs+0x36/0xbf [btrfs] [ 886.403568] [ 886.403568] stack backtrace: [ 886.403568] CPU: 6 PID: 8277 Comm: fio Not tainted 4.4.0-rc6-btrfs-next-18+ #1 [ 886.403568] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS by qemu-project.org 04/01/2014 [ 886.403568] 0000000000000000 ffff88009f80f770 ffffffff8125d4fd ffffffff82af1fc0 [ 886.403568] ffff88009f80f830 ffffffff8108e5f9 0000000200000000 ffff88009fd92290 [ 886.403568] 0000000000000000 ffffffff82af1fc0 ffffffff829cfb01 00042b216d008804 [ 886.403568] Call Trace: [ 886.403568] [] dump_stack+0x4e/0x79 [ 886.403568] [] __lock_acquire+0xd42/0xf0b [ 886.403568] [] ? __module_address+0xdf/0x108 [ 886.403568] [] lock_acquire+0x10d/0x194 [ 886.403568] [] ? lock_acquire+0x10d/0x194 [ 886.403568] [] ? btrfs_run_delayed_iputs+0x36/0xbf [btrfs] [ 886.489542] [] down_read+0x3e/0x4d [ 886.489542] [] ? btrfs_run_delayed_iputs+0x36/0xbf [btrfs] [ 886.489542] [] btrfs_run_delayed_iputs+0x36/0xbf [btrfs] [ 886.489542] [] btrfs_commit_transaction+0x8f5/0x96e [btrfs] [ 886.489542] [] flush_space+0x435/0x44a [btrfs] [ 886.489542] [] ? reserve_metadata_bytes+0x26a/0x384 [btrfs] [ 886.489542] [] reserve_metadata_bytes+0x28d/0x384 [btrfs] [ 886.489542] [] ? btrfs_block_rsv_refill+0x58/0x96 [btrfs] [ 886.489542] [] btrfs_block_rsv_refill+0x70/0x96 [btrfs] [ 886.489542] [] btrfs_evict_inode+0x394/0x55a [btrfs] [ 886.489542] [] evict+0xa7/0x15c [ 886.489542] [] iput+0x1d3/0x266 [ 886.489542] [] btrfs_run_delayed_iputs+0x8f/0xbf [btrfs] [ 886.489542] [] btrfs_commit_transaction+0x8f5/0x96e [btrfs] [ 886.489542] [] ? signal_pending_state+0x31/0x31 [ 886.489542] [] btrfs_alloc_data_chunk_ondemand+0x1d7/0x288 [btrfs] [ 886.489542] [] btrfs_check_data_free_space+0x40/0x59 [btrfs] [ 886.489542] [] btrfs_delalloc_reserve_space+0x1e/0x4e [btrfs] [ 886.489542] [] btrfs_direct_IO+0x10c/0x27e [btrfs] [ 886.489542] [] generic_file_direct_write+0xb3/0x128 [ 886.489542] [] btrfs_file_write_iter+0x229/0x408 [btrfs] [ 886.489542] [] ? __lock_is_held+0x38/0x50 [ 886.489542] [] __vfs_write+0x7c/0xa5 [ 886.489542] [] vfs_write+0xa0/0xe4 [ 886.489542] [] SyS_write+0x50/0x7e [ 886.489542] [] entry_SYSCALL_64_fastpath+0x12/0x6f [ 1081.852335] INFO: task fio:8244 blocked for more than 120 seconds. [ 1081.854348] Not tainted 4.4.0-rc6-btrfs-next-18+ #1 [ 1081.857560] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 1081.863227] fio D ffff880213f9bb28 0 8244 8240 0x00000000 [ 1081.868719] ffff880213f9bb28 00ffffff810fc6b0 ffffffff0000000a ffff88023ed55240 [ 1081.872499] ffff880206b5d400 ffff880213f9c000 ffff88020a4d5318 ffff880206b5d400 [ 1081.876834] ffffffff00000001 ffff880206b5d400 ffff880213f9bb40 ffffffff81482ba4 [ 1081.880782] Call Trace: [ 1081.881793] [] schedule+0x7f/0x97 [ 1081.883340] [] rwsem_down_write_failed+0x2d5/0x325 [ 1081.895525] [] ? trace_hardirqs_on_caller+0x16/0x1ab [ 1081.897419] [] call_rwsem_down_write_failed+0x13/0x20 [ 1081.899251] [] ? call_rwsem_down_write_failed+0x13/0x20 [ 1081.901063] [] ? __down_write_nested.isra.0+0x1f/0x21 [ 1081.902365] [] down_write+0x43/0x57 [ 1081.903846] [] ? btrfs_alloc_data_chunk_ondemand+0x1f6/0x288 [btrfs] [ 1081.906078] [] btrfs_alloc_data_chunk_ondemand+0x1f6/0x288 [btrfs] [ 1081.908846] [] ? mark_held_locks+0x56/0x6c [ 1081.910409] [] btrfs_check_data_free_space+0x40/0x59 [btrfs] [ 1081.912482] [] btrfs_delalloc_reserve_space+0x1e/0x4e [btrfs] [ 1081.914597] [] btrfs_direct_IO+0x10c/0x27e [btrfs] [ 1081.919037] [] generic_file_direct_write+0xb3/0x128 [ 1081.920754] [] btrfs_file_write_iter+0x229/0x408 [btrfs] [ 1081.922496] [] ? __lock_is_held+0x38/0x50 [ 1081.923922] [] __vfs_write+0x7c/0xa5 [ 1081.925275] [] vfs_write+0xa0/0xe4 [ 1081.926584] [] SyS_write+0x50/0x7e [ 1081.927968] [] entry_SYSCALL_64_fastpath+0x12/0x6f [ 1081.985293] INFO: lockdep is turned off. [ 1081.986132] INFO: task fio:8249 blocked for more than 120 seconds. [ 1081.987434] Not tainted 4.4.0-rc6-btrfs-next-18+ #1 [ 1081.988534] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 1081.990147] fio D ffff880218febbb8 0 8249 8240 0x00000000 [ 1081.991626] ffff880218febbb8 00ffffff81486b8e ffff88020000000b ffff88023ed75240 [ 1081.993258] ffff8802120a9a00 ffff880218fec000 ffff88020a4d5318 ffff8802120a9a00 [ 1081.994850] ffffffff00000001 ffff8802120a9a00 ffff880218febbd0 ffffffff81482ba4 [ 1081.996485] Call Trace: [ 1081.997037] [] schedule+0x7f/0x97 [ 1081.998017] [] rwsem_down_write_failed+0x2d5/0x325 [ 1081.999241] [] ? finish_wait+0x6d/0x76 [ 1082.000306] [] call_rwsem_down_write_failed+0x13/0x20 [ 1082.001533] [] ? call_rwsem_down_write_failed+0x13/0x20 [ 1082.002776] [] ? __down_write_nested.isra.0+0x1f/0x21 [ 1082.003995] [] down_write+0x43/0x57 [ 1082.005000] [] ? btrfs_alloc_data_chunk_ondemand+0x1f6/0x288 [btrfs] [ 1082.007403] [] btrfs_alloc_data_chunk_ondemand+0x1f6/0x288 [btrfs] [ 1082.008988] [] btrfs_fallocate+0x7c1/0xc2f [btrfs] [ 1082.010193] [] ? percpu_down_read+0x4e/0x77 [ 1082.011280] [] ? __sb_start_write+0x5f/0xb0 [ 1082.012265] [] ? __sb_start_write+0x5f/0xb0 [ 1082.013021] [] vfs_fallocate+0x170/0x1ff [ 1082.013738] [] ioctl_preallocate+0x89/0x9b [ 1082.014778] [] do_vfs_ioctl+0x40a/0x4ea [ 1082.015778] [] ? SYSC_newfstat+0x25/0x2e [ 1082.016806] [] ? __fget_light+0x4d/0x71 [ 1082.017789] [] SyS_ioctl+0x57/0x79 [ 1082.018706] [] entry_SYSCALL_64_fastpath+0x12/0x6f This happens because we can recursively acquire the semaphore fs_info->delayed_iput_sem when attempting to allocate space to satisfy a file write request as shown in the first trace above - when committing a transaction we acquire (down_read) the semaphore before running the delayed iputs, and when running a delayed iput() we can end up calling an inode's eviction handler, which in turn commits another transaction and attempts to acquire (down_read) again the semaphore to run more delayed iput operations. This results in a deadlock because if a task acquires multiple times a semaphore it should invoke down_read_nested() with a different lockdep class for each level of recursion. Fix this by simplifying the implementation and use a mutex instead that is acquired by the cleaner kthread before it runs the delayed iputs instead of always acquiring a semaphore before delayed references are run from anywhere. Fixes: d7c151717a1e (btrfs: Fix NO_SPACE bug caused by delayed-iput) Signed-off-by: Filipe Manana Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ctree.h | 2 +- fs/btrfs/disk-io.c | 5 ++++- fs/btrfs/extent-tree.c | 9 +++++---- fs/btrfs/inode.c | 4 ---- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 35489e7129a7..385b449fd7ed 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1572,7 +1572,7 @@ struct btrfs_fs_info { spinlock_t delayed_iput_lock; struct list_head delayed_iputs; - struct rw_semaphore delayed_iput_sem; + struct mutex cleaner_delayed_iput_mutex; /* this protects tree_mod_seq_list */ spinlock_t tree_mod_seq_lock; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4958360a44f7..41fb43183406 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1796,7 +1796,10 @@ static int cleaner_kthread(void *arg) goto sleep; } + mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex); btrfs_run_delayed_iputs(root); + mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex); + again = btrfs_clean_one_deleted_snapshot(root); mutex_unlock(&root->fs_info->cleaner_mutex); @@ -2556,8 +2559,8 @@ int open_ctree(struct super_block *sb, mutex_init(&fs_info->delete_unused_bgs_mutex); mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->delalloc_root_mutex); + mutex_init(&fs_info->cleaner_delayed_iput_mutex); seqlock_init(&fs_info->profiles_lock); - init_rwsem(&fs_info->delayed_iput_sem); INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); INIT_LIST_HEAD(&fs_info->space_info); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c4661db2b72a..470ea9b321a4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4100,11 +4100,12 @@ commit_trans: if (ret) return ret; /* - * make sure that all running delayed iput are - * done + * The cleaner kthread might still be doing iput + * operations. Wait for it to finish so that + * more space is released. */ - down_write(&root->fs_info->delayed_iput_sem); - up_write(&root->fs_info->delayed_iput_sem); + mutex_lock(&root->fs_info->cleaner_delayed_iput_mutex); + mutex_unlock(&root->fs_info->cleaner_delayed_iput_mutex); goto again; } else { btrfs_end_transaction(trans, root); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 52fc1b5e9f03..4bc9dbf29a73 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3142,8 +3142,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) if (empty) return; - down_read(&fs_info->delayed_iput_sem); - spin_lock(&fs_info->delayed_iput_lock); list_splice_init(&fs_info->delayed_iputs, &list); spin_unlock(&fs_info->delayed_iput_lock); @@ -3154,8 +3152,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) iput(delayed->inode); kfree(delayed); } - - up_read(&root->fs_info->delayed_iput_sem); } /* From 08acfd9dd845dc052c5eae33e6c3976338070069 Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Tue, 1 Dec 2015 18:39:40 +0800 Subject: [PATCH 347/418] btrfs: Fix no_space in write and rm loop commit e1746e8381cd2af421f75557b5cae3604fc18b35 upstream. I see no_space in v4.4-rc1 again in xfstests generic/102. It happened randomly in some node only. (one of 4 phy-node, and a kvm with non-virtio block driver) By bisect, we can found the first-bad is: commit bdced438acd8 ("block: setup bi_phys_segments after splitting")' But above patch only triggered the bug by making bio operation faster(or slower). Main reason is in our space_allocating code, we need to commit page writeback before wait it complish, this patch fixed above bug. BTW, there is another reason for generic/102 fail, caused by disable default mixed-blockgroup, I'll fix it in xfstests. Signed-off-by: Zhao Lei Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-tree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 470ea9b321a4..2368cac1115a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4086,8 +4086,10 @@ commit_trans: !atomic_read(&root->fs_info->open_ioctl_trans)) { need_commit--; - if (need_commit > 0) + if (need_commit > 0) { + btrfs_start_delalloc_roots(fs_info, 0, -1); btrfs_wait_ordered_roots(fs_info, -1); + } trans = btrfs_join_transaction(root); if (IS_ERR(trans)) From d60a37490744e8dfc2dbd31319a658da7bd601c7 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 22 Jan 2016 09:28:38 +0800 Subject: [PATCH 348/418] btrfs: async-thread: Fix a use-after-free error for trace commit 0a95b851370b84a4b9d92ee6d1fa0926901d0454 upstream. Parameter of trace_btrfs_work_queued() can be freed in its workqueue. So no one use use that pointer after queue_work(). Fix the user-after-free bug by move the trace line before queue_work(). Reported-by: Dave Jones Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/async-thread.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 3e36e4adc4a3..9aba42b78253 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -328,8 +328,8 @@ static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq, list_add_tail(&work->ordered_list, &wq->ordered_list); spin_unlock_irqrestore(&wq->list_lock, flags); } - queue_work(wq->normal_wq, &work->normal_work); trace_btrfs_work_queued(work); + queue_work(wq->normal_wq, &work->normal_work); } void btrfs_queue_work(struct btrfs_workqueue *wq, From 3b26135a8834b726d773e56cd37dd9ff6e43eb6b Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sat, 30 Jan 2016 07:59:34 +0200 Subject: [PATCH 349/418] drm/amdgpu: mask out WC from BO on unsupported arches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a187f17f0e15a046aa5d7263b35df55230d92779 upstream. Reviewed-by: Christian König Reviewed-by: Michel Dänzer Signed-off-by: Oded Gabbay Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index a2a16acee34d..b8fbbd7699e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "amdgpu.h" #include "amdgpu_trace.h" @@ -261,6 +262,13 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, AMDGPU_GEM_DOMAIN_OA); bo->flags = flags; + + /* For architectures that don't support WC memory, + * mask out the WC flag from the BO + */ + if (!drm_arch_can_wc_memory()) + bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; + amdgpu_fill_placement_to_bo(bo, placement); /* Kernel allocation are uninterruptible */ r = ttm_bo_init(&adev->mman.bdev, &bo->tbo, size, type, From 9ec190906d22e7d8bbe3234f37e31b3ce743a71b Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 10 Feb 2016 16:52:47 -0700 Subject: [PATCH 350/418] block: Initialize max_dev_sectors to 0 commit 5f009d3f8e6685fe8c6215082c1696a08b411220 upstream. The new queue limit is not used by the majority of block drivers, and should be initialized to 0 for the driver's requested settings to be used. Signed-off-by: Keith Busch Acked-by: Martin K. Petersen Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-settings.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index dd4973583978..c7bb666aafd1 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -91,8 +91,8 @@ void blk_set_default_limits(struct queue_limits *lim) lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; lim->virt_boundary_mask = 0; lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; - lim->max_sectors = lim->max_dev_sectors = lim->max_hw_sectors = - BLK_SAFE_MAX_SECTORS; + lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS; + lim->max_dev_sectors = 0; lim->chunk_sectors = 0; lim->max_write_same_sectors = 0; lim->max_discard_sectors = 0; From 5bbc2b412a22acbf614e71f74e327a7a63a754ac Mon Sep 17 00:00:00 2001 From: Murali Karicheri Date: Mon, 29 Feb 2016 17:18:22 -0600 Subject: [PATCH 351/418] PCI: keystone: Fix MSI code that retrieves struct pcie_port pointer commit 79e3f4a853ed161cd4c06d84b50beebf961a47c6 upstream. Commit cbce7900598c ("PCI: designware: Make driver arch-agnostic") changed the host bridge sysdata pointer from the ARM pci_sys_data to the DesignWare pcie_port structure, and changed pcie-designware.c to reflect that. But it did not change the corresponding code in pci-keystone-dw.c, so it caused crashes on Keystone: Unable to handle kernel NULL pointer dereference at virtual address 00000030 pgd = c0003000 [00000030] *pgd=80000800004003, *pmd=00000000 Internal error: Oops: 206 [#1] PREEMPT SMP ARM CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.4.2-00139-gb74f926 #2 Hardware name: Keystone PC is at ks_dw_pcie_msi_irq_unmask+0x24/0x58 Change pci-keystone-dw.c to expect sysdata to be the struct pcie_port pointer. [bhelgaas: changelog] Fixes: cbce7900598c ("PCI: designware: Make driver arch-agnostic") Signed-off-by: Murali Karicheri Signed-off-by: Bjorn Helgaas CC: Zhou Wang Signed-off-by: Greg Kroah-Hartman --- drivers/pci/host/pci-keystone-dw.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/pci/host/pci-keystone-dw.c b/drivers/pci/host/pci-keystone-dw.c index ed34c9520a02..6153853ca9c3 100644 --- a/drivers/pci/host/pci-keystone-dw.c +++ b/drivers/pci/host/pci-keystone-dw.c @@ -58,11 +58,6 @@ #define to_keystone_pcie(x) container_of(x, struct keystone_pcie, pp) -static inline struct pcie_port *sys_to_pcie(struct pci_sys_data *sys) -{ - return sys->private_data; -} - static inline void update_reg_offset_bit_pos(u32 offset, u32 *reg_offset, u32 *bit_pos) { @@ -108,7 +103,7 @@ static void ks_dw_pcie_msi_irq_ack(struct irq_data *d) struct pcie_port *pp; msi = irq_data_get_msi_desc(d); - pp = sys_to_pcie(msi_desc_to_pci_sysdata(msi)); + pp = (struct pcie_port *) msi_desc_to_pci_sysdata(msi); ks_pcie = to_keystone_pcie(pp); offset = d->irq - irq_linear_revmap(pp->irq_domain, 0); update_reg_offset_bit_pos(offset, ®_offset, &bit_pos); @@ -146,7 +141,7 @@ static void ks_dw_pcie_msi_irq_mask(struct irq_data *d) u32 offset; msi = irq_data_get_msi_desc(d); - pp = sys_to_pcie(msi_desc_to_pci_sysdata(msi)); + pp = (struct pcie_port *) msi_desc_to_pci_sysdata(msi); ks_pcie = to_keystone_pcie(pp); offset = d->irq - irq_linear_revmap(pp->irq_domain, 0); @@ -167,7 +162,7 @@ static void ks_dw_pcie_msi_irq_unmask(struct irq_data *d) u32 offset; msi = irq_data_get_msi_desc(d); - pp = sys_to_pcie(msi_desc_to_pci_sysdata(msi)); + pp = (struct pcie_port *) msi_desc_to_pci_sysdata(msi); ks_pcie = to_keystone_pcie(pp); offset = d->irq - irq_linear_revmap(pp->irq_domain, 0); From 59dc7ab2e8ab1a0fd0ff104a67f7827436435a46 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Tue, 19 Jan 2016 16:08:49 +0100 Subject: [PATCH 352/418] parisc: Fix ptrace syscall number and return value modification commit 98e8b6c9ac9d1b1e9d1122dfa6783d5d566bb8f7 upstream. Mike Frysinger reported that his ptrace testcase showed strange behaviour on parisc: It was not possible to avoid a syscall and the return value of a syscall couldn't be changed. To modify a syscall number, we were missing to save the new syscall number to gr20 which is then picked up later in assembly again. The effect that the return value couldn't be changed is a side-effect of another bug in the assembly code. When a process is ptraced, userspace expects each syscall to report entrance and exit of a syscall. If a syscall number was given which doesn't exist, we jumped to the normal syscall exit code instead of informing userspace that the (non-existant) syscall exits. This unexpected behaviour confuses userspace and thus the bug was misinterpreted as if we can't change the return value. This patch fixes both problems and was tested on 64bit kernel with 32bit userspace. Signed-off-by: Helge Deller Cc: Mike Frysinger Tested-by: Mike Frysinger Signed-off-by: Greg Kroah-Hartman --- arch/parisc/kernel/ptrace.c | 16 +++++++++++----- arch/parisc/kernel/syscall.S | 5 ++++- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index 9585c81f755f..ce0b2b4075c7 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -269,14 +269,19 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, long do_syscall_trace_enter(struct pt_regs *regs) { - long ret = 0; - /* Do the secure computing check first. */ secure_computing_strict(regs->gr[20]); if (test_thread_flag(TIF_SYSCALL_TRACE) && - tracehook_report_syscall_entry(regs)) - ret = -1L; + tracehook_report_syscall_entry(regs)) { + /* + * Tracing decided this syscall should not happen or the + * debugger stored an invalid system call number. Skip + * the system call and the system call restart handling. + */ + regs->gr[20] = -1UL; + goto out; + } #ifdef CONFIG_64BIT if (!is_compat_task()) @@ -290,7 +295,8 @@ long do_syscall_trace_enter(struct pt_regs *regs) regs->gr[24] & 0xffffffff, regs->gr[23] & 0xffffffff); - return ret ? : regs->gr[20]; +out: + return regs->gr[20]; } void do_syscall_trace_exit(struct pt_regs *regs) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 3fbd7252a4b2..fbafa0d0e2bf 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -343,7 +343,7 @@ tracesys_next: #endif comiclr,>>= __NR_Linux_syscalls, %r20, %r0 - b,n .Lsyscall_nosys + b,n .Ltracesys_nosys LDREGX %r20(%r19), %r19 @@ -359,6 +359,9 @@ tracesys_next: be 0(%sr7,%r19) ldo R%tracesys_exit(%r2),%r2 +.Ltracesys_nosys: + ldo -ENOSYS(%r0),%r28 /* set errno */ + /* Do *not* call this function on the gateway page, because it makes a direct call to syscall_trace. */ From ce16d9cf0481aa71cf31fac9cd5826022c9f12fb Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 28 Feb 2016 17:35:59 +0200 Subject: [PATCH 353/418] mips/kvm: fix ioctl error handling commit 0178fd7dcc4451fcb90bec5e91226586962478d2 upstream. Returning directly whatever copy_to_user(...) or copy_from_user(...) returns may not do the right thing if there's a pagefault: copy_to_user/copy_from_user return the number of bytes not copied in this case, but ioctls need to return -EFAULT instead. Fix up kvm on mips to do return copy_to_user(...)) ? -EFAULT : 0; and return copy_from_user(...)) ? -EFAULT : 0; everywhere. Signed-off-by: Michael S. Tsirkin Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/mips/kvm/mips.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index b9b803facdbf..2683d04fdda5 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -702,7 +702,7 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) { void __user *uaddr = (void __user *)(long)reg->addr; - return copy_to_user(uaddr, vs, 16); + return copy_to_user(uaddr, vs, 16) ? -EFAULT : 0; } else { return -EINVAL; } @@ -732,7 +732,7 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) { void __user *uaddr = (void __user *)(long)reg->addr; - return copy_from_user(vs, uaddr, 16); + return copy_from_user(vs, uaddr, 16) ? -EFAULT : 0; } else { return -EINVAL; } From 8cedd1c0796f849760ddd46fb45355c92e913c84 Mon Sep 17 00:00:00 2001 From: Owen Hofmann Date: Tue, 1 Mar 2016 13:36:13 -0800 Subject: [PATCH 354/418] kvm: x86: Update tsc multiplier on change. commit 2680d6da455b636dd006636780c0f235c6561d70 upstream. vmx.c writes the TSC_MULTIPLIER field in vmx_vcpu_load, but only when a vcpu has migrated physical cpus. Record the last value written and update in vmx_vcpu_load on any change, otherwise a cpu migration must occur for TSC frequency scaling to take effect. Fixes: ff2c3a1803775cc72dc6f624b59554956396b0ee Signed-off-by: Owen Hofmann Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/vmx.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 10e7693b3540..5fd846cd6e0e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -595,6 +595,8 @@ struct vcpu_vmx { /* Support for PML */ #define PML_ENTITY_NUM 512 struct page *pml_pg; + + u64 current_tsc_ratio; }; enum segment_cache_field { @@ -2062,14 +2064,16 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ - /* Setup TSC multiplier */ - if (cpu_has_vmx_tsc_scaling()) - vmcs_write64(TSC_MULTIPLIER, - vcpu->arch.tsc_scaling_ratio); - vmx->loaded_vmcs->cpu = cpu; } + /* Setup TSC multiplier */ + if (kvm_has_tsc_control && + vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) { + vmx->current_tsc_ratio = vcpu->arch.tsc_scaling_ratio; + vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio); + } + vmx_vcpu_pi_load(vcpu, cpu); } From 2f553b9b6fe7ef636353d80e22d45dbe76c61fb9 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 15 Feb 2016 18:41:33 +0000 Subject: [PATCH 355/418] fbcon: set a default value to blink interval commit a1e533ec07d583d01349ef13c0c965b8633e1b91 upstream. Since commit 27a4c827c34ac4256a190cc9d24607f953c1c459 fbcon: use the cursor blink interval provided by vt two attempts have been made at fixing a possible hang caused by cursor_timer_handler. That function registers a timer to be triggered at "jiffies + fbcon_ops.cur_blink_jiffies". A new case had been encountered during initialisation of clcd-pl11x: fbcon_fb_registered do_fbcon_takeover -> do_register_con_driver fbcon_startup (A) add_cursor_timer (with cur_blink_jiffies = 0) -> do_bind_con_driver visual_init fbcon_init (B) cur_blink_jiffies = msecs_to_jiffies(vc->vc_cur_blink_ms); If we take an softirq anywhere between A and B (and we do), cursor_timer_handler executes indefinitely. Instead of patching all possible paths that lead to this case one at a time, fix the issue at the source and initialise cur_blink_jiffies to 200ms when allocating fbcon_ops. This was its default value before aforesaid commit. fbcon_cursor or fbcon_init will refine this value downstream. Signed-off-by: Jean-Philippe Brucker Tested-by: Scot Doyle Signed-off-by: Tomi Valkeinen Signed-off-by: Greg Kroah-Hartman --- drivers/video/console/fbcon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c index 92f394927f24..6e92917ba77a 100644 --- a/drivers/video/console/fbcon.c +++ b/drivers/video/console/fbcon.c @@ -709,6 +709,7 @@ static int con2fb_acquire_newinfo(struct vc_data *vc, struct fb_info *info, } if (!err) { + ops->cur_blink_jiffies = HZ / 5; info->fbcon_par = ops; if (vc) @@ -956,6 +957,7 @@ static const char *fbcon_startup(void) ops->currcon = -1; ops->graphics = 1; ops->cur_rotate = -1; + ops->cur_blink_jiffies = HZ / 5; info->fbcon_par = ops; p->con_rotate = initial_rotation; set_blitting_type(vc, info); From 77f0cc0e5e9cd79f93acb3e7baae425638b8f293 Mon Sep 17 00:00:00 2001 From: Justin Maggard Date: Tue, 9 Feb 2016 15:52:08 -0800 Subject: [PATCH 356/418] cifs: fix out-of-bounds access in lease parsing commit deb7deff2f00bdbbcb3d560dad2a89ef37df837d upstream. When opening a file, SMB2_open() attempts to parse the lease state from the SMB2 CREATE Response. However, the parsing code was not careful to ensure that the create contexts are not empty or invalid, which can lead to out- of-bounds memory access. This can be seen easily by trying to read a file from a OSX 10.11 SMB3 server. Here is sample crash output: BUG: unable to handle kernel paging request at ffff8800a1a77cc6 IP: [] SMB2_open+0x804/0x960 PGD 8f77067 PUD 0 Oops: 0000 [#1] SMP Modules linked in: CPU: 3 PID: 2876 Comm: cp Not tainted 4.5.0-rc3.x86_64.1+ #14 Hardware name: NETGEAR ReadyNAS 314 /ReadyNAS 314 , BIOS 4.6.5 10/11/2012 task: ffff880073cdc080 ti: ffff88005b31c000 task.ti: ffff88005b31c000 RIP: 0010:[] [] SMB2_open+0x804/0x960 RSP: 0018:ffff88005b31fa08 EFLAGS: 00010282 RAX: 0000000000000015 RBX: 0000000000000000 RCX: 0000000000000006 RDX: 0000000000000000 RSI: 0000000000000246 RDI: ffff88007eb8c8b0 RBP: ffff88005b31fad8 R08: 666666203d206363 R09: 6131613030383866 R10: 3030383866666666 R11: 00000000000002b0 R12: ffff8800660fd800 R13: ffff8800a1a77cc2 R14: 00000000424d53fe R15: ffff88005f5a28c0 FS: 00007f7c8a2897c0(0000) GS:ffff88007eb80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: ffff8800a1a77cc6 CR3: 000000005b281000 CR4: 00000000000006e0 Stack: ffff88005b31fa70 ffffffff88278789 00000000000001d3 ffff88005f5a2a80 ffffffff00000003 ffff88005d029d00 ffff88006fde05a0 0000000000000000 ffff88005b31fc78 ffff88006fde0780 ffff88005b31fb2f 0000000100000fe0 Call Trace: [] ? cifsConvertToUTF16+0x159/0x2d0 [] smb2_open_file+0x98/0x210 [] ? __kmalloc+0x1c/0xe0 [] cifs_open+0x2a4/0x720 [] do_dentry_open+0x1ff/0x310 [] ? cifsFileInfo_get+0x30/0x30 [] vfs_open+0x52/0x60 [] path_openat+0x170/0xf70 [] ? remove_wait_queue+0x48/0x50 [] do_filp_open+0x79/0xd0 [] ? __alloc_fd+0x3a/0x170 [] do_sys_open+0x114/0x1e0 [] SyS_open+0x19/0x20 [] entry_SYSCALL_64_fastpath+0x12/0x6a Code: 4d 8d 6c 07 04 31 c0 4c 89 ee e8 47 6f e5 ff 31 c9 41 89 ce 44 89 f1 48 c7 c7 28 b1 bd 88 31 c0 49 01 cd 4c 89 ee e8 2b 6f e5 ff <45> 0f b7 75 04 48 c7 c7 31 b1 bd 88 31 c0 4d 01 ee 4c 89 f6 e8 RIP [] SMB2_open+0x804/0x960 RSP CR2: ffff8800a1a77cc6 ---[ end trace d9f69ba64feee469 ]--- Signed-off-by: Justin Maggard Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 767555518d40..373b5cd1c913 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1109,21 +1109,25 @@ parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp, { char *data_offset; struct create_context *cc; - unsigned int next = 0; + unsigned int next; + unsigned int remaining; char *name; data_offset = (char *)rsp + 4 + le32_to_cpu(rsp->CreateContextsOffset); + remaining = le32_to_cpu(rsp->CreateContextsLength); cc = (struct create_context *)data_offset; - do { - cc = (struct create_context *)((char *)cc + next); + while (remaining >= sizeof(struct create_context)) { name = le16_to_cpu(cc->NameOffset) + (char *)cc; - if (le16_to_cpu(cc->NameLength) != 4 || - strncmp(name, "RqLs", 4)) { - next = le32_to_cpu(cc->Next); - continue; - } - return server->ops->parse_lease_buf(cc, epoch); - } while (next != 0); + if (le16_to_cpu(cc->NameLength) == 4 && + strncmp(name, "RqLs", 4) == 0) + return server->ops->parse_lease_buf(cc, epoch); + + next = le32_to_cpu(cc->Next); + if (!next) + break; + remaining -= next; + cc = (struct create_context *)((char *)cc + next); + } return 0; } From b52a395883fa5b793137d57d90e1fd980e42748f Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Sat, 27 Feb 2016 11:58:18 +0300 Subject: [PATCH 357/418] CIFS: Fix SMB2+ interim response processing for read requests commit 6cc3b24235929b54acd5ecc987ef11a425bd209e upstream. For interim responses we only need to parse a header and update a number credits. Now it is done for all SMB2+ command except SMB2_READ which is wrong. Fix this by adding such processing. Signed-off-by: Pavel Shilovsky Tested-by: Shirish Pargaonkar Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifssmb.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 90b4f9f7de66..76fcb50295a3 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1396,11 +1396,10 @@ openRetry: * current bigbuf. */ static int -cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) +discard_remaining_data(struct TCP_Server_Info *server) { unsigned int rfclen = get_rfc1002_length(server->smallbuf); int remaining = rfclen + 4 - server->total_read; - struct cifs_readdata *rdata = mid->callback_data; while (remaining > 0) { int length; @@ -1414,10 +1413,20 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) remaining -= length; } - dequeue_mid(mid, rdata->result); return 0; } +static int +cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) +{ + int length; + struct cifs_readdata *rdata = mid->callback_data; + + length = discard_remaining_data(server); + dequeue_mid(mid, rdata->result); + return length; +} + int cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) { @@ -1446,6 +1455,12 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) return length; server->total_read += length; + if (server->ops->is_status_pending && + server->ops->is_status_pending(buf, server, 0)) { + discard_remaining_data(server); + return -1; + } + /* Was the SMB read successful? */ rdata->result = server->ops->map_error(buf, false); if (rdata->result != 0) { From bbb3c5076be5c932609524db605e7911ecb95b47 Mon Sep 17 00:00:00 2001 From: Yadan Fan Date: Mon, 29 Feb 2016 14:44:57 +0800 Subject: [PATCH 358/418] Fix cifs_uniqueid_to_ino_t() function for s390x commit 1ee9f4bd1a97026a7b2d7ae9f1f74b45680d0003 upstream. This issue is caused by commit 02323db17e3a7 ("cifs: fix cifs_uniqueid_to_ino_t not to ever return 0"), when BITS_PER_LONG is 64 on s390x, the corresponding cifs_uniqueid_to_ino_t() function will cast 64-bit fileid to 32-bit by using (ino_t)fileid, because ino_t (typdefed __kernel_ino_t) is int type. It's defined in arch/s390/include/uapi/asm/posix_types.h #ifndef __s390x__ typedef unsigned long __kernel_ino_t; ... #else /* __s390x__ */ typedef unsigned int __kernel_ino_t; So the #ifdef condition is wrong for s390x, we can just still use one cifs_uniqueid_to_ino_t() function with comparing sizeof(ino_t) and sizeof(u64) to choose the correct execution accordingly. Signed-off-by: Yadan Fan Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifsfs.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index c3cc1609025f..44b3d4280abb 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -31,19 +31,15 @@ * so that it will fit. We use hash_64 to convert the value to 31 bits, and * then add 1, to ensure that we don't end up with a 0 as the value. */ -#if BITS_PER_LONG == 64 static inline ino_t cifs_uniqueid_to_ino_t(u64 fileid) { + if ((sizeof(ino_t)) < (sizeof(u64))) + return (ino_t)hash_64(fileid, (sizeof(ino_t) * 8) - 1) + 1; + return (ino_t)fileid; + } -#else -static inline ino_t -cifs_uniqueid_to_ino_t(u64 fileid) -{ - return (ino_t)hash_64(fileid, (sizeof(ino_t) * 8) - 1) + 1; -} -#endif extern struct file_system_type cifs_fs_type; extern const struct address_space_operations cifs_addr_ops; From 7931825da8fd57ceb8bae86d3dbe555a506a7edd Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 28 Feb 2016 16:31:39 +0200 Subject: [PATCH 359/418] vfio: fix ioctl error handling commit 8160c4e455820d5008a1116d2dca35f0363bb062 upstream. Calling return copy_to_user(...) in an ioctl will not do the right thing if there's a pagefault: copy_to_user returns the number of bytes not copied in this case. Fix up vfio to do return copy_to_user(...)) ? -EFAULT : 0; everywhere. Signed-off-by: Michael S. Tsirkin Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/pci/vfio_pci.c | 9 ++++++--- drivers/vfio/platform/vfio_platform_common.c | 9 ++++++--- drivers/vfio/vfio_iommu_type1.c | 6 ++++-- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 56bf6dbb93db..9982cb176ce8 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -446,7 +446,8 @@ static long vfio_pci_ioctl(void *device_data, info.num_regions = VFIO_PCI_NUM_REGIONS; info.num_irqs = VFIO_PCI_NUM_IRQS; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) { struct pci_dev *pdev = vdev->pdev; @@ -520,7 +521,8 @@ static long vfio_pci_ioctl(void *device_data, return -EINVAL; } - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) { struct vfio_irq_info info; @@ -555,7 +557,8 @@ static long vfio_pci_ioctl(void *device_data, else info.flags |= VFIO_IRQ_INFO_NORESIZE; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_SET_IRQS) { struct vfio_irq_set hdr; diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index 418cdd9ba3f4..e65b142d3422 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -219,7 +219,8 @@ static long vfio_platform_ioctl(void *device_data, info.num_regions = vdev->num_regions; info.num_irqs = vdev->num_irqs; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) { struct vfio_region_info info; @@ -240,7 +241,8 @@ static long vfio_platform_ioctl(void *device_data, info.size = vdev->regions[info.index].size; info.flags = vdev->regions[info.index].flags; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) { struct vfio_irq_info info; @@ -259,7 +261,8 @@ static long vfio_platform_ioctl(void *device_data, info.flags = vdev->irqs[info.index].flags; info.count = vdev->irqs[info.index].count; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_DEVICE_SET_IRQS) { struct vfio_irq_set hdr; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 59d47cb638d5..ecb826eefe02 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -999,7 +999,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, info.iova_pgsizes = vfio_pgsize_bitmap(iommu); - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? + -EFAULT : 0; } else if (cmd == VFIO_IOMMU_MAP_DMA) { struct vfio_iommu_type1_dma_map map; @@ -1032,7 +1033,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, if (ret) return ret; - return copy_to_user((void __user *)arg, &unmap, minsz); + return copy_to_user((void __user *)arg, &unmap, minsz) ? + -EFAULT : 0; } return -ENOTTY; From 25e8618619a5a46aae253e1cc68eeaa0d44d2f52 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 26 Feb 2016 12:28:40 +0100 Subject: [PATCH 360/418] KVM: x86: fix root cause for missed hardware breakpoints commit 70e4da7a8ff62f2775337b705f45c804bb450454 upstream. Commit 172b2386ed16 ("KVM: x86: fix missed hardware breakpoints", 2016-02-10) worked around a case where the debug registers are not loaded correctly on preemption and on the first entry to KVM_RUN. However, Xiao Guangrong pointed out that the root cause must be that KVM_DEBUGREG_BP_ENABLED is not being set correctly. This can indeed happen due to the lazy debug exit mechanism, which does not call kvm_update_dr7. Fix it by replacing the existing loop (more or less equivalent to kvm_update_dr0123) with calls to all the kvm_update_dr* functions. Fixes: 172b2386ed16a9143d9a456aae5ec87275c61489 Reviewed-by: Xiao Guangrong Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6ef3856aab4b..d2945024ed33 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2736,7 +2736,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); - vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD; } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -6545,12 +6544,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) * KVM_DEBUGREG_WONT_EXIT again. */ if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) { - int i; - WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP); kvm_x86_ops->sync_dirty_debug_regs(vcpu); - for (i = 0; i < KVM_NR_DB_REGS; i++) - vcpu->arch.eff_db[i] = vcpu->arch.db[i]; + kvm_update_dr0123(vcpu); + kvm_update_dr6(vcpu); + kvm_update_dr7(vcpu); + vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; } /* From d1c623c9c264c6cb045015900f8ce1e60b4d2c7d Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 28 Feb 2016 17:32:07 +0200 Subject: [PATCH 361/418] arm/arm64: KVM: Fix ioctl error handling commit 4cad67fca3fc952d6f2ed9e799621f07666a560f upstream. Calling return copy_to_user(...) in an ioctl will not do the right thing if there's a pagefault: copy_to_user returns the number of bytes not copied in this case. Fix up kvm to do return copy_to_user(...)) ? -EFAULT : 0; everywhere. Acked-by: Christoffer Dall Signed-off-by: Michael S. Tsirkin Signed-off-by: Marc Zyngier Signed-off-by: Greg Kroah-Hartman --- arch/arm/kvm/guest.c | 2 +- arch/arm64/kvm/guest.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 96e935bbc38c..3705fc2921c2 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -155,7 +155,7 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) u64 val; val = kvm_arm_timer_get_reg(vcpu, reg->id); - return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0; } static unsigned long num_core_regs(void) diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index d250160d32bc..3039f080e2d5 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -186,7 +186,7 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) u64 val; val = kvm_arm_timer_get_reg(vcpu, reg->id); - return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0; } /** From ad0f9dade9912224326fcac1e09ed4846b41f7c9 Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Wed, 10 Feb 2016 15:48:01 -0600 Subject: [PATCH 362/418] iommu/amd: Apply workaround for ATS write permission check commit 358875fd52ab8f00f66328cbf1a1d2486f265829 upstream. The AMD Family 15h Models 30h-3Fh (Kaveri) BIOS and Kernel Developer's Guide omitted part of the BIOS IOMMU L2 register setup specification. Without this setup the IOMMU L2 does not fully respect write permissions when handling an ATS translation request. The IOMMU L2 will set PTE dirty bit when handling an ATS translation with write permission request, even when PTE RW bit is clear. This may occur by direct translation (which would cause a PPR) or by prefetch request from the ATC. This is observed in practice when the IOMMU L2 modifies a PTE which maps a pagecache page. The ext4 filesystem driver BUGs when asked to writeback these (non-modified) pages. Enable ATS write permission check in the Kaveri IOMMU L2 if BIOS has not. Signed-off-by: Jay Cornwall Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd_iommu_init.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 013bdfff2d4d..0c4257ee6ea6 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -1015,6 +1015,34 @@ static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu) pci_write_config_dword(iommu->dev, 0xf0, 0x90); } +/* + * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission) + * Workaround: + * BIOS should enable ATS write permission check by setting + * L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b + */ +static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu) +{ + u32 value; + + if ((boot_cpu_data.x86 != 0x15) || + (boot_cpu_data.x86_model < 0x30) || + (boot_cpu_data.x86_model > 0x3f)) + return; + + /* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */ + value = iommu_read_l2(iommu, 0x47); + + if (value & BIT(0)) + return; + + /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */ + iommu_write_l2(iommu, 0x47, value | BIT(0)); + + pr_info("AMD-Vi: Applying ATS write check workaround for IOMMU at %s\n", + dev_name(&iommu->dev->dev)); +} + /* * This function clues the initialization function for one IOMMU * together and also allocates the command buffer and programs the @@ -1284,6 +1312,7 @@ static int iommu_init_pci(struct amd_iommu *iommu) } amd_iommu_erratum_746_workaround(iommu); + amd_iommu_ats_write_check_workaround(iommu); iommu->iommu_dev = iommu_device_create(&iommu->dev->dev, iommu, amd_iommu_groups, "ivhd%d", From 44792d5c907c9afa03ac1f6a8ace7df4db500d85 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Tue, 23 Feb 2016 13:03:30 +0100 Subject: [PATCH 363/418] iommu/amd: Fix boot warning when device 00:00.0 is not iommu covered commit 38e45d02ea9f194b89d6bf41e52ccafc8e2c2b47 upstream. The setup code for the performance counters in the AMD IOMMU driver tests whether the counters can be written. It tests to setup a counter for device 00:00.0, which fails on systems where this particular device is not covered by the IOMMU. Fix this by not relying on device 00:00.0 but only on the IOMMU being present. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/amd_iommu_init.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 0c4257ee6ea6..bf4959f4225b 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -228,6 +228,10 @@ static int amd_iommu_enable_interrupts(void); static int __init iommu_go_to_state(enum iommu_init_state state); static void init_device_table_dma(void); +static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu, + u8 bank, u8 cntr, u8 fxn, + u64 *value, bool is_write); + static inline void update_last_devid(u16 devid) { if (devid > amd_iommu_last_bdf) @@ -1170,8 +1174,8 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu) amd_iommu_pc_present = true; /* Check if the performance counters can be written to */ - if ((0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val, true)) || - (0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val2, false)) || + if ((0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val, true)) || + (0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val2, false)) || (val != val2)) { pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n"); amd_iommu_pc_present = false; @@ -2312,22 +2316,15 @@ u8 amd_iommu_pc_get_max_counters(u16 devid) } EXPORT_SYMBOL(amd_iommu_pc_get_max_counters); -int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, +static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu, + u8 bank, u8 cntr, u8 fxn, u64 *value, bool is_write) { - struct amd_iommu *iommu; u32 offset; u32 max_offset_lim; - /* Make sure the IOMMU PC resource is available */ - if (!amd_iommu_pc_present) - return -ENODEV; - - /* Locate the iommu associated with the device ID */ - iommu = amd_iommu_rlookup_table[devid]; - /* Check for valid iommu and pc register indexing */ - if (WARN_ON((iommu == NULL) || (fxn > 0x28) || (fxn & 7))) + if (WARN_ON((fxn > 0x28) || (fxn & 7))) return -ENODEV; offset = (u32)(((0x40|bank) << 12) | (cntr << 8) | fxn); @@ -2351,3 +2348,16 @@ int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, return 0; } EXPORT_SYMBOL(amd_iommu_pc_get_set_reg_val); + +int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn, + u64 *value, bool is_write) +{ + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + + /* Make sure the IOMMU PC resource is available */ + if (!amd_iommu_pc_present || iommu == NULL) + return -ENODEV; + + return iommu_pc_get_set_reg_val(iommu, bank, cntr, fxn, + value, is_write); +} From 00548ecff548108ebd7a72fd55d135c70f8417e7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 29 Feb 2016 23:49:47 +0100 Subject: [PATCH 364/418] iommu/vt-d: Use BUS_NOTIFY_REMOVED_DEVICE in hotplug path commit e6a8c9b337eed56eb481e1b4dd2180c25a1e5310 upstream. In the PCI hotplug path of the Intel IOMMU driver, replace the usage of the BUS_NOTIFY_DEL_DEVICE notifier, which is executed before the driver is unbound from the device, with BUS_NOTIFY_REMOVED_DEVICE, which runs after that. This fixes a kernel BUG being triggered in the VT-d code when the device driver tries to unmap DMA buffers and the VT-d driver already destroyed all mappings. Reported-by: Stefani Seibold Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/dmar.c | 5 +++-- drivers/iommu/intel-iommu.c | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 55a19e49205b..3821c4786662 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -329,7 +329,8 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb, /* Only care about add/remove events for physical functions */ if (pdev->is_virtfn) return NOTIFY_DONE; - if (action != BUS_NOTIFY_ADD_DEVICE && action != BUS_NOTIFY_DEL_DEVICE) + if (action != BUS_NOTIFY_ADD_DEVICE && + action != BUS_NOTIFY_REMOVED_DEVICE) return NOTIFY_DONE; info = dmar_alloc_pci_notify_info(pdev, action); @@ -339,7 +340,7 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb, down_write(&dmar_global_lock); if (action == BUS_NOTIFY_ADD_DEVICE) dmar_pci_bus_add_dev(info); - else if (action == BUS_NOTIFY_DEL_DEVICE) + else if (action == BUS_NOTIFY_REMOVED_DEVICE) dmar_pci_bus_del_dev(info); up_write(&dmar_global_lock); diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 986a53e3eb96..a2e1b7f14df2 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4367,7 +4367,7 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) rmrru->devices_cnt); if(ret < 0) return ret; - } else if (info->event == BUS_NOTIFY_DEL_DEVICE) { + } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { dmar_remove_dev_scope(info, rmrr->segment, rmrru->devices, rmrru->devices_cnt); } @@ -4387,7 +4387,7 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) break; else if(ret < 0) return ret; - } else if (info->event == BUS_NOTIFY_DEL_DEVICE) { + } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { if (dmar_remove_dev_scope(info, atsr->segment, atsru->devices, atsru->devices_cnt)) break; From 3028963a2869a7b8794685c8a349d30528bfdf5c Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 18 Jan 2016 14:09:27 -0600 Subject: [PATCH 365/418] target: Fix WRITE_SAME/DISCARD conversion to linux 512b sectors commit 8a9ebe717a133ba7bc90b06047f43cc6b8bcb8b3 upstream. In a couple places we are not converting to/from the Linux block layer 512 bytes sectors. 1. The request queue values and what we do are a mismatch of things: max_discard_sectors - This is in linux block layer 512 byte sectors. We are just copying this to max_unmap_lba_count. discard_granularity - This is in bytes. We are converting it to Linux block layer 512 byte sectors. discard_alignment - This is in bytes. We are just copying this over. The problem is that the core LIO code exports these values in spc_emulate_evpd_b0 and we use them to test request arguments in sbc_execute_unmap, but we never convert to the block size we export to the initiator. If we are not using 512 byte sectors then we are exporting the wrong values or are checks are off. And, for the discard_alignment/bytes case we are just plain messed up. 2. blkdev_issue_discard's start and number of sector arguments are supposed to be in linux block layer 512 byte sectors. We are currently passing in the values we get from the initiator which might be based on some other sector size. There is a similar problem in iblock_execute_write_same where the bio functions want values in 512 byte sectors but we are passing in what we got from the initiator. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger [ kamal: backport to 4.4-stable: no unmap_zeroes_data ] Signed-off-by: Kamal Mostafa Signed-off-by: Greg Kroah-Hartman --- drivers/target/target_core_device.c | 43 +++++++++++++++++++++ drivers/target/target_core_file.c | 29 +++++--------- drivers/target/target_core_iblock.c | 56 +++++++--------------------- include/target/target_core_backend.h | 3 ++ 4 files changed, 69 insertions(+), 62 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 88ea4e4f124b..3436a83568ea 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -826,6 +826,49 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) return dev; } +/* + * Check if the underlying struct block_device request_queue supports + * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM + * in ATA and we need to set TPE=1 + */ +bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib, + struct request_queue *q, int block_size) +{ + if (!blk_queue_discard(q)) + return false; + + attrib->max_unmap_lba_count = (q->limits.max_discard_sectors << 9) / + block_size; + /* + * Currently hardcoded to 1 in Linux/SCSI code.. + */ + attrib->max_unmap_block_desc_count = 1; + attrib->unmap_granularity = q->limits.discard_granularity / block_size; + attrib->unmap_granularity_alignment = q->limits.discard_alignment / + block_size; + return true; +} +EXPORT_SYMBOL(target_configure_unmap_from_queue); + +/* + * Convert from blocksize advertised to the initiator to the 512 byte + * units unconditionally used by the Linux block layer. + */ +sector_t target_to_linux_sector(struct se_device *dev, sector_t lb) +{ + switch (dev->dev_attrib.block_size) { + case 4096: + return lb << 3; + case 2048: + return lb << 2; + case 1024: + return lb << 1; + default: + return lb; + } +} +EXPORT_SYMBOL(target_to_linux_sector); + int target_configure_device(struct se_device *dev) { struct se_hba *hba = dev->se_hba; diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index e3195700211a..75f0f08b2a34 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -160,25 +160,11 @@ static int fd_configure_device(struct se_device *dev) " block_device blocks: %llu logical_block_size: %d\n", dev_size, div_u64(dev_size, fd_dev->fd_block_size), fd_dev->fd_block_size); - /* - * Check if the underlying struct block_device request_queue supports - * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM - * in ATA and we need to set TPE=1 - */ - if (blk_queue_discard(q)) { - dev->dev_attrib.max_unmap_lba_count = - q->limits.max_discard_sectors; - /* - * Currently hardcoded to 1 in Linux/SCSI code.. - */ - dev->dev_attrib.max_unmap_block_desc_count = 1; - dev->dev_attrib.unmap_granularity = - q->limits.discard_granularity >> 9; - dev->dev_attrib.unmap_granularity_alignment = - q->limits.discard_alignment; + + if (target_configure_unmap_from_queue(&dev->dev_attrib, q, + fd_dev->fd_block_size)) pr_debug("IFILE: BLOCK Discard support available," - " disabled by default\n"); - } + " disabled by default\n"); /* * Enable write same emulation for IBLOCK and use 0xFFFF as * the smaller WRITE_SAME(10) only has a two-byte block count. @@ -490,9 +476,12 @@ fd_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb) if (S_ISBLK(inode->i_mode)) { /* The backend is block device, use discard */ struct block_device *bdev = inode->i_bdev; + struct se_device *dev = cmd->se_dev; - ret = blkdev_issue_discard(bdev, lba, - nolb, GFP_KERNEL, 0); + ret = blkdev_issue_discard(bdev, + target_to_linux_sector(dev, lba), + target_to_linux_sector(dev, nolb), + GFP_KERNEL, 0); if (ret < 0) { pr_warn("FILEIO: blkdev_issue_discard() failed: %d\n", ret); diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index f29c69120054..2c53dcefff3e 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -121,27 +121,11 @@ static int iblock_configure_device(struct se_device *dev) dev->dev_attrib.hw_max_sectors = queue_max_hw_sectors(q); dev->dev_attrib.hw_queue_depth = q->nr_requests; - /* - * Check if the underlying struct block_device request_queue supports - * the QUEUE_FLAG_DISCARD bit for UNMAP/WRITE_SAME in SCSI + TRIM - * in ATA and we need to set TPE=1 - */ - if (blk_queue_discard(q)) { - dev->dev_attrib.max_unmap_lba_count = - q->limits.max_discard_sectors; - - /* - * Currently hardcoded to 1 in Linux/SCSI code.. - */ - dev->dev_attrib.max_unmap_block_desc_count = 1; - dev->dev_attrib.unmap_granularity = - q->limits.discard_granularity >> 9; - dev->dev_attrib.unmap_granularity_alignment = - q->limits.discard_alignment; - + if (target_configure_unmap_from_queue(&dev->dev_attrib, q, + dev->dev_attrib.hw_block_size)) pr_debug("IBLOCK: BLOCK Discard support available," - " disabled by default\n"); - } + " disabled by default\n"); + /* * Enable write same emulation for IBLOCK and use 0xFFFF as * the smaller WRITE_SAME(10) only has a two-byte block count. @@ -413,9 +397,13 @@ static sense_reason_t iblock_execute_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb) { struct block_device *bdev = IBLOCK_DEV(cmd->se_dev)->ibd_bd; + struct se_device *dev = cmd->se_dev; int ret; - ret = blkdev_issue_discard(bdev, lba, nolb, GFP_KERNEL, 0); + ret = blkdev_issue_discard(bdev, + target_to_linux_sector(dev, lba), + target_to_linux_sector(dev, nolb), + GFP_KERNEL, 0); if (ret < 0) { pr_err("blkdev_issue_discard() failed: %d\n", ret); return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; @@ -431,8 +419,10 @@ iblock_execute_write_same(struct se_cmd *cmd) struct scatterlist *sg; struct bio *bio; struct bio_list list; - sector_t block_lba = cmd->t_task_lba; - sector_t sectors = sbc_get_write_same_sectors(cmd); + struct se_device *dev = cmd->se_dev; + sector_t block_lba = target_to_linux_sector(dev, cmd->t_task_lba); + sector_t sectors = target_to_linux_sector(dev, + sbc_get_write_same_sectors(cmd)); if (cmd->prot_op) { pr_err("WRITE_SAME: Protection information with IBLOCK" @@ -646,12 +636,12 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, enum dma_data_direction data_direction) { struct se_device *dev = cmd->se_dev; + sector_t block_lba = target_to_linux_sector(dev, cmd->t_task_lba); struct iblock_req *ibr; struct bio *bio, *bio_start; struct bio_list list; struct scatterlist *sg; u32 sg_num = sgl_nents; - sector_t block_lba; unsigned bio_cnt; int rw = 0; int i; @@ -677,24 +667,6 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, rw = READ; } - /* - * Convert the blocksize advertised to the initiator to the 512 byte - * units unconditionally used by the Linux block layer. - */ - if (dev->dev_attrib.block_size == 4096) - block_lba = (cmd->t_task_lba << 3); - else if (dev->dev_attrib.block_size == 2048) - block_lba = (cmd->t_task_lba << 2); - else if (dev->dev_attrib.block_size == 1024) - block_lba = (cmd->t_task_lba << 1); - else if (dev->dev_attrib.block_size == 512) - block_lba = cmd->t_task_lba; - else { - pr_err("Unsupported SCSI -> BLOCK LBA conversion:" - " %u\n", dev->dev_attrib.block_size); - return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - } - ibr = kzalloc(sizeof(struct iblock_req), GFP_KERNEL); if (!ibr) goto fail; diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 56cf8e485ef2..28ee5c2e6bcd 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -94,5 +94,8 @@ sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd, sense_reason_t (*exec_cmd)(struct se_cmd *cmd)); bool target_sense_desc_format(struct se_device *dev); +sector_t target_to_linux_sector(struct se_device *dev, sector_t lb); +bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib, + struct request_queue *q, int block_size); #endif /* TARGET_CORE_BACKEND_H */ From 598fc9b7d73e4b55c3aacfd6adf19e194308819e Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Fri, 26 Feb 2016 15:29:32 -0600 Subject: [PATCH 366/418] drm/ast: Fix incorrect register check for DRAM width commit 2d02b8bdba322b527c5f5168ce1ca10c2d982a78 upstream. During DRAM initialization on certain ASpeed devices, an incorrect bit (bit 10) was checked in the "SDRAM Bus Width Status" register to determine DRAM width. Query bit 6 instead in accordance with the Aspeed AST2050 datasheet v1.05. Signed-off-by: Timothy Pearson Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ast/ast_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c index 541a610667ad..e0b4586a26fd 100644 --- a/drivers/gpu/drm/ast/ast_main.c +++ b/drivers/gpu/drm/ast/ast_main.c @@ -227,7 +227,7 @@ static int ast_get_dram_info(struct drm_device *dev) } while (ast_read32(ast, 0x10000) != 0x01); data = ast_read32(ast, 0x10004); - if (data & 0x400) + if (data & 0x40) ast->dram_bus_width = 16; else ast->dram_bus_width = 32; From 80e726c700c3ceb3c61c484654f6f4e5479351c0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 24 Feb 2016 17:38:38 -0500 Subject: [PATCH 367/418] drm/radeon/pm: update current crtc info after setting the powerstate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5e031d9fe8b0741f11d49667dfc3ebf5454121fd upstream. On CI, we need to see if the number of crtcs changes to determine whether or not we need to upload the mclk table again. In practice we don't currently upload the mclk table again after the initial load. The only reason you would would be to add new states, e.g., for arbitrary mclk setting which is not currently supported. Acked-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/radeon/radeon_pm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 2081a60d08fb..1fa81215cea1 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -1076,10 +1076,6 @@ force: /* update display watermarks based on new power state */ radeon_bandwidth_update(rdev); - rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs; - rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count; - rdev->pm.dpm.single_display = single_display; - /* wait for the rings to drain */ for (i = 0; i < RADEON_NUM_RINGS; i++) { struct radeon_ring *ring = &rdev->ring[i]; @@ -1098,6 +1094,10 @@ force: /* update displays */ radeon_dpm_display_configuration_changed(rdev); + rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs; + rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count; + rdev->pm.dpm.single_display = single_display; + if (rdev->asic->dpm.force_performance_level) { if (rdev->pm.dpm.thermal_active) { enum radeon_dpm_forced_level level = rdev->pm.dpm.forced_level; From 3b0809af59ffd2b225c21514c105ea715f9116d8 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 24 Feb 2016 17:18:25 -0500 Subject: [PATCH 368/418] drm/amdgpu/pm: update current crtc info after setting the powerstate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit eda1d1cf8d18383f19cd2b752f786120efa4768f upstream. On CI, we need to see if the number of crtcs changes to determine whether or not we need to upload the mclk table again. In practice we don't currently upload the mclk table again after the initial load. The only reason you would would be to add new states, e.g., for arbitrary mclk setting which is not currently supported. Acked-by: Jordan Lazare Acked-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 03fe25142b78..7ae15fad16ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -596,9 +596,6 @@ force: /* update display watermarks based on new power state */ amdgpu_display_bandwidth_update(adev); - adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs; - adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count; - /* wait for the rings to drain */ for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; @@ -617,6 +614,9 @@ force: /* update displays */ amdgpu_dpm_display_configuration_changed(adev); + adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs; + adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count; + if (adev->pm.funcs->force_performance_level) { if (adev->pm.dpm.thermal_active) { enum amdgpu_dpm_forced_level level = adev->pm.dpm.forced_level; From a46a700abc520bfadf5fa1cc371ba07d6d7f9230 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 26 Feb 2016 16:18:15 +0100 Subject: [PATCH 369/418] drm/amdgpu: apply gfx_v8 fixes to gfx_v7 as well MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit feebe91aa9a9d99d9ec157612a614fadb79beb99 upstream. We never ported that back to CIK, so we could run into VM faults here. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 72793f93e2fc..aa491540ba85 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -3628,6 +3628,19 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr) { int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); + uint32_t seq = ring->fence_drv.sync_seq; + uint64_t addr = ring->fence_drv.gpu_addr; + + amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ + amdgpu_ring_write(ring, addr & 0xfffffffc); + amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); + amdgpu_ring_write(ring, seq); + amdgpu_ring_write(ring, 0xffffffff); + amdgpu_ring_write(ring, 4); /* poll interval */ + if (usepfp) { /* synce CE with ME to prevent CE fetch CEIB before context switch done */ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); From 624a59a5d85f95c77fa99fd33ae7cea35bc062a5 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Mon, 29 Feb 2016 14:12:38 +0800 Subject: [PATCH 370/418] drm/amdgpu/gfx8: specify which engine to wait before vm flush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9cac537332f5502c103415b25609548c276a09f8 upstream. Select between me and pfp properly. Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 4cb45f4602aa..d1054034d14b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4681,7 +4681,8 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ - WAIT_REG_MEM_FUNCTION(3))); /* equal */ + WAIT_REG_MEM_FUNCTION(3) | /* equal */ + WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ amdgpu_ring_write(ring, addr & 0xfffffffc); amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); amdgpu_ring_write(ring, seq); From f17a7e403d87e5ffb6c3598c9c562650ce70b392 Mon Sep 17 00:00:00 2001 From: Arindam Nath Date: Wed, 2 Mar 2016 17:19:01 +0530 Subject: [PATCH 371/418] drm/amdgpu: return from atombios_dp_get_dpcd only when error commit 0b39c531cfa12dad54eac238c2e303b994df1ef7 upstream. In amdgpu_connector_hotplug(), we need to start DP link training only after we have received DPCD. The function amdgpu_atombios_dp_get_dpcd() returns non-zero value only when an error condition is met, otherwise returns zero. So in case the function encounters an error, we need to skip rest of the code and return from amdgpu_connector_hotplug() immediately. Only when we are successfull in reading DPCD pin, we should carry on with turning-on the monitor. Signed-off-by: Arindam Nath Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 89c3dd62ba21..119cdc2c43e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -77,7 +77,7 @@ void amdgpu_connector_hotplug(struct drm_connector *connector) } else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) { /* Don't try to start link training before we * have the dpcd */ - if (!amdgpu_atombios_dp_get_dpcd(amdgpu_connector)) + if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector)) return; /* set it to OFF so that drm_helper_connector_dpms() From b693f2ad0f7bf4b7080dc05da8d436eba8c47193 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 11 Feb 2016 14:16:27 +0100 Subject: [PATCH 372/418] libata: fix HDIO_GET_32BIT ioctl commit 287e6611ab1eac76c2c5ebf6e345e04c80ca9c61 upstream. As reported by Soohoon Lee, the HDIO_GET_32BIT ioctl does not work correctly in compat mode with libata. I have investigated the issue further and found multiple problems that all appeared with the same commit that originally introduced HDIO_GET_32BIT handling in libata back in linux-2.6.8 and presumably also linux-2.4, as the code uses "copy_to_user(arg, &val, 1)" to copy a 'long' variable containing either 0 or 1 to user space. The problems with this are: * On big-endian machines, this will always write a zero because it stores the wrong byte into user space. * In compat mode, the upper three bytes of the variable are updated by the compat_hdio_ioctl() function, but they now contain uninitialized stack data. * The hdparm tool calling this ioctl uses a 'static long' variable to store the result. This means at least the upper bytes are initialized to zero, but calling another ioctl like HDIO_GET_MULTCOUNT would fill them with data that remains stale when the low byte is overwritten. Fortunately libata doesn't implement any of the affected ioctl commands, so this would only happen when we query both an IDE and an ATA device in the same command such as "hdparm -N -c /dev/hda /dev/sda" * The libata code for unknown reasons started using ATA_IOC_GET_IO32 and ATA_IOC_SET_IO32 as aliases for HDIO_GET_32BIT and HDIO_SET_32BIT, while the ioctl commands that were added later use the normal HDIO_* names. This is harmless but rather confusing. This addresses all four issues by changing the code to use put_user() on an 'unsigned long' variable in HDIO_GET_32BIT, like the IDE subsystem does, and by clarifying the names of the ioctl commands. Signed-off-by: Arnd Bergmann Reported-by: Soohoon Lee Tested-by: Soohoon Lee Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-scsi.c | 11 +++++------ include/linux/ata.h | 4 ++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 7e959f90c020..e417e1a1d02c 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -675,19 +675,18 @@ static int ata_ioc32(struct ata_port *ap) int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *scsidev, int cmd, void __user *arg) { - int val = -EINVAL, rc = -EINVAL; + unsigned long val; + int rc = -EINVAL; unsigned long flags; switch (cmd) { - case ATA_IOC_GET_IO32: + case HDIO_GET_32BIT: spin_lock_irqsave(ap->lock, flags); val = ata_ioc32(ap); spin_unlock_irqrestore(ap->lock, flags); - if (copy_to_user(arg, &val, 1)) - return -EFAULT; - return 0; + return put_user(val, (unsigned long __user *)arg); - case ATA_IOC_SET_IO32: + case HDIO_SET_32BIT: val = (unsigned long) arg; rc = 0; spin_lock_irqsave(ap->lock, flags); diff --git a/include/linux/ata.h b/include/linux/ata.h index d2992bfa1706..c1a2f345cbe6 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -487,8 +487,8 @@ enum ata_tf_protocols { }; enum ata_ioctls { - ATA_IOC_GET_IO32 = 0x309, - ATA_IOC_SET_IO32 = 0x324, + ATA_IOC_GET_IO32 = 0x309, /* HDIO_GET_32BIT */ + ATA_IOC_SET_IO32 = 0x324, /* HDIO_SET_32BIT */ }; /* core structures */ From cea2cbff57c5b65375adb6fe65612c10a7301327 Mon Sep 17 00:00:00 2001 From: Harvey Hunt Date: Wed, 24 Feb 2016 15:16:43 +0000 Subject: [PATCH 373/418] libata: Align ata_device's id on a cacheline commit 4ee34ea3a12396f35b26d90a094c75db95080baa upstream. The id buffer in ata_device is a DMA target, but it isn't explicitly cacheline aligned. Due to this, adjacent fields can be overwritten with stale data from memory on non coherent architectures. As a result, the kernel is sometimes unable to communicate with an ATA device. Fix this by ensuring that the id buffer is cacheline aligned. This issue is similar to that fixed by Commit 84bda12af31f ("libata: align ap->sector_buf"). Signed-off-by: Harvey Hunt Cc: linux-kernel@vger.kernel.org Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/libata.h b/include/linux/libata.h index 600c1e0626a5..b20a2752f934 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -718,7 +718,7 @@ struct ata_device { union { u16 id[ATA_ID_WORDS]; /* IDENTIFY xxx DEVICE data */ u32 gscr[SATA_PMP_GSCR_DWORDS]; /* PMP GSCR block */ - }; + } ____cacheline_aligned; /* DEVSLP Timing Variables from Identify Device Data Log */ u8 devslp_timing[ATA_LOG_DEVSLP_SIZE]; From 7adb5cc0f39be29c16fae035d9e30e332095bbbc Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 26 Feb 2016 23:40:50 +0800 Subject: [PATCH 374/418] block: bio: introduce helpers to get the 1st and last bvec commit 7bcd79ac50d9d83350a835bdb91c04ac9e098412 upstream. The bio passed to bio_will_gap() may be fast cloned from upper layer(dm, md, bcache, fs, ...), or from bio splitting in block core. Unfortunately bio_will_gap() just figures out the last bvec via 'bi_io_vec[prev->bi_vcnt - 1]' directly, and this way is obviously wrong. This patch introduces two helpers for getting the first and last bvec of one bio for fixing the issue. Reported-by: Sagi Grimberg Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- include/linux/bio.h | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/include/linux/bio.h b/include/linux/bio.h index b9b6e046b52e..79cfaeef1b0d 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -310,6 +310,43 @@ static inline void bio_clear_flag(struct bio *bio, unsigned int bit) bio->bi_flags &= ~(1U << bit); } +static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv) +{ + *bv = bio_iovec(bio); +} + +static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv) +{ + struct bvec_iter iter = bio->bi_iter; + int idx; + + if (!bio_flagged(bio, BIO_CLONED)) { + *bv = bio->bi_io_vec[bio->bi_vcnt - 1]; + return; + } + + if (unlikely(!bio_multiple_segments(bio))) { + *bv = bio_iovec(bio); + return; + } + + bio_advance_iter(bio, &iter, iter.bi_size); + + if (!iter.bi_bvec_done) + idx = iter.bi_idx - 1; + else /* in the middle of bvec */ + idx = iter.bi_idx; + + *bv = bio->bi_io_vec[idx]; + + /* + * iter.bi_bvec_done records actual length of the last bvec + * if this bio ends in the middle of one io vector + */ + if (iter.bi_bvec_done) + bv->bv_len = iter.bi_bvec_done; +} + enum bip_flags { BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ From c5cbbec54fe71c4de2d34f8c0ec8fbfdd7f17339 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 29 Feb 2016 18:28:53 -0500 Subject: [PATCH 375/418] writeback: flush inode cgroup wb switches instead of pinning super_block commit a1a0e23e49037c23ea84bc8cc146a03584d13577 upstream. If cgroup writeback is in use, inodes can be scheduled for asynchronous wb switching. Before 5ff8eaac1636 ("writeback: keep superblock pinned during cgroup writeback association switches"), this could race with umount leading to super_block being destroyed while inodes are pinned for wb switching. 5ff8eaac1636 fixed it by bumping s_active while wb switches are in flight; however, this allowed in-flight wb switches to make umounts asynchronous when the userland expected synchronosity - e.g. fsck immediately following umount may fail because the device is still busy. This patch removes the problematic super_block pinning and instead makes generic_shutdown_super() flush in-flight wb switches. wb switches are now executed on a dedicated isw_wq so that they can be flushed and isw_nr_in_flight keeps track of the number of in-flight wb switches so that flushing can be avoided in most cases. v2: Move cgroup_writeback_umount() further below and add MS_ACTIVE check in inode_switch_wbs() as Jan an Al suggested. Signed-off-by: Tejun Heo Reported-by: Tahsin Erdogan Cc: Jan Kara Cc: Al Viro Link: http://lkml.kernel.org/g/CAAeU0aNCq7LGODvVGRU-oU_o-6enii5ey0p1c26D1ZzYwkDc5A@mail.gmail.com Fixes: 5ff8eaac1636 ("writeback: keep superblock pinned during cgroup writeback association switches") Reviewed-by: Jan Kara Tested-by: Tahsin Erdogan Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/fs-writeback.c | 54 +++++++++++++++++++++++++++++---------- fs/super.c | 1 + include/linux/writeback.h | 5 ++++ 3 files changed, 47 insertions(+), 13 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e5232bbcbe3d..7a8ea1351584 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -223,6 +223,9 @@ static void wb_wait_for_completion(struct backing_dev_info *bdi, #define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1) /* one round can affect upto 5 slots */ +static atomic_t isw_nr_in_flight = ATOMIC_INIT(0); +static struct workqueue_struct *isw_wq; + void __inode_attach_wb(struct inode *inode, struct page *page) { struct backing_dev_info *bdi = inode_to_bdi(inode); @@ -317,7 +320,6 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) struct inode_switch_wbs_context *isw = container_of(work, struct inode_switch_wbs_context, work); struct inode *inode = isw->inode; - struct super_block *sb = inode->i_sb; struct address_space *mapping = inode->i_mapping; struct bdi_writeback *old_wb = inode->i_wb; struct bdi_writeback *new_wb = isw->new_wb; @@ -424,8 +426,9 @@ skip_switch: wb_put(new_wb); iput(inode); - deactivate_super(sb); kfree(isw); + + atomic_dec(&isw_nr_in_flight); } static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head) @@ -435,7 +438,7 @@ static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head) /* needs to grab bh-unsafe locks, bounce to work item */ INIT_WORK(&isw->work, inode_switch_wbs_work_fn); - schedule_work(&isw->work); + queue_work(isw_wq, &isw->work); } /** @@ -471,20 +474,20 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) /* while holding I_WB_SWITCH, no one else can update the association */ spin_lock(&inode->i_lock); - - if (inode->i_state & (I_WB_SWITCH | I_FREEING) || - inode_to_wb(inode) == isw->new_wb) - goto out_unlock; - - if (!atomic_inc_not_zero(&inode->i_sb->s_active)) - goto out_unlock; - + if (!(inode->i_sb->s_flags & MS_ACTIVE) || + inode->i_state & (I_WB_SWITCH | I_FREEING) || + inode_to_wb(inode) == isw->new_wb) { + spin_unlock(&inode->i_lock); + goto out_free; + } inode->i_state |= I_WB_SWITCH; spin_unlock(&inode->i_lock); ihold(inode); isw->inode = inode; + atomic_inc(&isw_nr_in_flight); + /* * In addition to synchronizing among switchers, I_WB_SWITCH tells * the RCU protected stat update paths to grab the mapping's @@ -494,8 +497,6 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id) call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn); return; -out_unlock: - spin_unlock(&inode->i_lock); out_free: if (isw->new_wb) wb_put(isw->new_wb); @@ -849,6 +850,33 @@ restart: wb_put(last_wb); } +/** + * cgroup_writeback_umount - flush inode wb switches for umount + * + * This function is called when a super_block is about to be destroyed and + * flushes in-flight inode wb switches. An inode wb switch goes through + * RCU and then workqueue, so the two need to be flushed in order to ensure + * that all previously scheduled switches are finished. As wb switches are + * rare occurrences and synchronize_rcu() can take a while, perform + * flushing iff wb switches are in flight. + */ +void cgroup_writeback_umount(void) +{ + if (atomic_read(&isw_nr_in_flight)) { + synchronize_rcu(); + flush_workqueue(isw_wq); + } +} + +static int __init cgroup_writeback_init(void) +{ + isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0); + if (!isw_wq) + return -ENOMEM; + return 0; +} +fs_initcall(cgroup_writeback_init); + #else /* CONFIG_CGROUP_WRITEBACK */ static struct bdi_writeback * diff --git a/fs/super.c b/fs/super.c index 954aeb80e202..f5f4b328f860 100644 --- a/fs/super.c +++ b/fs/super.c @@ -415,6 +415,7 @@ void generic_shutdown_super(struct super_block *sb) sb->s_flags &= ~MS_ACTIVE; fsnotify_unmount_inodes(sb); + cgroup_writeback_umount(); evict_inodes(sb); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index b333c945e571..d0b5ca5d4e08 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -198,6 +198,7 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc, void wbc_detach_inode(struct writeback_control *wbc); void wbc_account_io(struct writeback_control *wbc, struct page *page, size_t bytes); +void cgroup_writeback_umount(void); /** * inode_attach_wb - associate an inode with its wb @@ -301,6 +302,10 @@ static inline void wbc_account_io(struct writeback_control *wbc, { } +static inline void cgroup_writeback_umount(void) +{ +} + #endif /* CONFIG_CGROUP_WRITEBACK */ /* From 419a2cd1f5ab18a14026fc15a626d49f2df80fdc Mon Sep 17 00:00:00 2001 From: Alexandra Yates Date: Wed, 17 Feb 2016 19:36:20 -0800 Subject: [PATCH 376/418] Adding Intel Lewisburg device IDs for SATA commit f5bdd66c705484b4bc77eb914be15c1b7881fae7 upstream. This patch complements the list of device IDs previously added for lewisburg sata. Signed-off-by: Alexandra Yates Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/ahci.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 99921aa0daca..60a15831c009 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -367,15 +367,21 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H RAID */ { PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */ { PCI_VDEVICE(INTEL, 0x2822), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Lewisburg AHCI*/ { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa182), board_ahci }, /* Lewisburg AHCI*/ { PCI_VDEVICE(INTEL, 0xa184), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa186), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa18e), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa1d2), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa1d6), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa202), board_ahci }, /* Lewisburg AHCI*/ { PCI_VDEVICE(INTEL, 0xa204), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0xa20e), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa252), board_ahci }, /* Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0xa256), board_ahci }, /* Lewisburg RAID*/ /* JMicron 360/1/3/5/6, match class to avoid IDE function */ { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, From e75c4b65150f099789821cb5e3f0efc964c1db05 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 26 Feb 2016 17:57:13 +0100 Subject: [PATCH 377/418] arm64: vmemmap: use virtual projection of linear region commit dfd55ad85e4a7fbaa82df12467515ac3c81e8a3e upstream. Commit dd006da21646 ("arm64: mm: increase VA range of identity map") made some changes to the memory mapping code to allow physical memory to reside at an offset that exceeds the size of the virtual mapping. However, since the size of the vmemmap area is proportional to the size of the VA area, but it is populated relative to the physical space, we may end up with the struct page array being mapped outside of the vmemmap region. For instance, on my Seattle A0 box, I can see the following output in the dmesg log. vmemmap : 0xffffffbdc0000000 - 0xffffffbfc0000000 ( 8 GB maximum) 0xffffffbfc0000000 - 0xffffffbfd0000000 ( 256 MB actual) We can fix this by deciding that the vmemmap region is not a projection of the physical space, but of the virtual space above PAGE_OFFSET, i.e., the linear region. This way, we are guaranteed that the vmemmap region is of sufficient size, and we can even reduce the size by half. Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/pgtable.h | 7 ++++--- arch/arm64/mm/init.c | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 63f52b55defe..fc9f7ef2f4ab 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -34,13 +34,13 @@ /* * VMALLOC and SPARSEMEM_VMEMMAP ranges. * - * VMEMAP_SIZE: allows the whole VA space to be covered by a struct page array + * VMEMAP_SIZE: allows the whole linear region to be covered by a struct page array * (rounded up to PUD_SIZE). * VMALLOC_START: beginning of the kernel VA space * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space, * fixed mappings and modules */ -#define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE) +#define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT - 1)) * sizeof(struct page), PUD_SIZE) #ifndef CONFIG_KASAN #define VMALLOC_START (VA_START) @@ -51,7 +51,8 @@ #define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K) -#define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) +#define VMEMMAP_START (VMALLOC_END + SZ_64K) +#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)) #define FIRST_USER_ADDRESS 0UL diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 17bf39ac83ba..4cb98aa8c27b 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -319,8 +319,8 @@ void __init mem_init(void) #endif MLG(VMALLOC_START, VMALLOC_END), #ifdef CONFIG_SPARSEMEM_VMEMMAP - MLG((unsigned long)vmemmap, - (unsigned long)vmemmap + VMEMMAP_SIZE), + MLG(VMEMMAP_START, + VMEMMAP_START + VMEMMAP_SIZE), MLM((unsigned long)virt_to_page(PAGE_OFFSET), (unsigned long)virt_to_page(high_memory)), #endif From af2d87aeac4fac50bcb5e0cf5a7d37edc72fed1f Mon Sep 17 00:00:00 2001 From: Todd E Brandt Date: Wed, 2 Mar 2016 16:05:29 -0800 Subject: [PATCH 378/418] PM / sleep / x86: Fix crash on graph trace through x86 suspend commit 92f9e179a702a6adbc11e2fedc76ecd6ffc9e3f7 upstream. Pause/unpause graph tracing around do_suspend_lowlevel as it has inconsistent call/return info after it jumps to the wakeup vector. The graph trace buffer will otherwise become misaligned and may eventually crash and hang on suspend. To reproduce the issue and test the fix: Run a function_graph trace over suspend/resume and set the graph function to suspend_devices_and_enter. This consistently hangs the system without this fix. Signed-off-by: Todd Brandt Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/acpi/sleep.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index d1daead5fcdd..adb3eaf8fe2a 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -16,6 +16,7 @@ #include #include +#include #include "../../realmode/rm/wakeup.h" #include "sleep.h" @@ -107,7 +108,13 @@ int x86_acpi_suspend_lowlevel(void) saved_magic = 0x123456789abcdef0L; #endif /* CONFIG_64BIT */ + /* + * Pause/unpause graph tracing around do_suspend_lowlevel as it has + * inconsistent call/return info after it jumps to the wakeup vector. + */ + pause_graph_tracing(); do_suspend_lowlevel(); + unpause_graph_tracing(); return 0; } From a55479ab637cda5ebbfdb9eb7c062bd99c13d5d9 Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Sat, 27 Feb 2016 16:10:05 +0100 Subject: [PATCH 379/418] ata: ahci: don't mark HotPlugCapable Ports as external/removable commit dc8b4afc4a04fac8ee55a19b59f2356a25e7e778 upstream. The HPCP bit is set by bioses for on-board sata ports either because they think sata is hotplug capable in general or to allow Windows to display a "device eject" icon on ports which are routed to an external connector bracket. However in Redhat Bugzilla #1310682, users report that with kernel 4.4, where this bit test first appeared, a lot of partitions on sata drives are now mounted automatically. This patch should fix redhat and a lot of other distros which unconditionally automount all devices which have the "removable" bit set. Signed-off-by: Manuel Lauss Signed-off-by: Tejun Heo Fixes: 8a3e33cf92c7 ("ata: ahci: find eSATA ports and flag them as removable" changes userspace behavior) Link: http://lkml.kernel.org/g/56CF35FA.1070500@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libahci.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 1f225cc1827f..998c6a85ad89 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -1142,8 +1142,7 @@ static void ahci_port_init(struct device *dev, struct ata_port *ap, /* mark esata ports */ tmp = readl(port_mmio + PORT_CMD); - if ((tmp & PORT_CMD_HPCP) || - ((tmp & PORT_CMD_ESP) && (hpriv->cap & HOST_CAP_SXS))) + if ((tmp & PORT_CMD_ESP) && (hpriv->cap & HOST_CAP_SXS)) ap->pflags |= ATA_PFLAG_EXTERNAL; } From 180c86a4f06231a09a00b8e6d1c07775341c94b8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 3 Mar 2016 17:18:20 -0500 Subject: [PATCH 380/418] tracing: Do not have 'comm' filter override event 'comm' field commit e57cbaf0eb006eaa207395f3bfd7ce52c1b5539c upstream. Commit 9f61668073a8d "tracing: Allow triggers to filter for CPU ids and process names" added a 'comm' filter that will filter events based on the current tasks struct 'comm'. But this now hides the ability to filter events that have a 'comm' field too. For example, sched_migrate_task trace event. That has a 'comm' field of the task to be migrated. echo 'comm == "bash"' > events/sched_migrate_task/filter will now filter all sched_migrate_task events for tasks named "bash" that migrates other tasks (in interrupt context), instead of seeing when "bash" itself gets migrated. This fix requires a couple of changes. 1) Change the look up order for filter predicates to look at the events fields before looking at the generic filters. 2) Instead of basing the filter function off of the "comm" name, have the generic "comm" filter have its own filter_type (FILTER_COMM). Test against the type instead of the name to assign the filter function. 3) Add a new "COMM" filter that works just like "comm" but will filter based on the current task, even if the trace event contains a "comm" field. Do the same for "cpu" field, adding a FILTER_CPU and a filter "CPU". Fixes: 9f61668073a8d "tracing: Allow triggers to filter for CPU ids and process names" Reported-by: Matt Fleming Signed-off-by: Steven Rostedt Signed-off-by: Greg Kroah-Hartman --- include/linux/trace_events.h | 2 ++ kernel/trace/trace_events.c | 18 ++++++++++-------- kernel/trace/trace_events_filter.c | 13 +++++++------ 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 429fdfc3baf5..925730bc9fc1 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -568,6 +568,8 @@ enum { FILTER_DYN_STRING, FILTER_PTR_STRING, FILTER_TRACE_FN, + FILTER_COMM, + FILTER_CPU, }; extern int trace_event_raw_init(struct trace_event_call *call); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index debf6e878076..d202d991edae 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -97,16 +97,16 @@ trace_find_event_field(struct trace_event_call *call, char *name) struct ftrace_event_field *field; struct list_head *head; + head = trace_get_fields(call); + field = __find_event_field(head, name); + if (field) + return field; + field = __find_event_field(&ftrace_generic_fields, name); if (field) return field; - field = __find_event_field(&ftrace_common_fields, name); - if (field) - return field; - - head = trace_get_fields(call); - return __find_event_field(head, name); + return __find_event_field(&ftrace_common_fields, name); } static int __trace_define_field(struct list_head *head, const char *type, @@ -171,8 +171,10 @@ static int trace_define_generic_fields(void) { int ret; - __generic_field(int, cpu, FILTER_OTHER); - __generic_field(char *, comm, FILTER_PTR_STRING); + __generic_field(int, CPU, FILTER_CPU); + __generic_field(int, cpu, FILTER_CPU); + __generic_field(char *, COMM, FILTER_COMM); + __generic_field(char *, comm, FILTER_COMM); return ret; } diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index f93a219b18da..6816302542b2 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1043,13 +1043,14 @@ static int init_pred(struct filter_parse_state *ps, return -EINVAL; } - if (is_string_field(field)) { + if (field->filter_type == FILTER_COMM) { + filter_build_regex(pred); + fn = filter_pred_comm; + pred->regex.field_len = TASK_COMM_LEN; + } else if (is_string_field(field)) { filter_build_regex(pred); - if (!strcmp(field->name, "comm")) { - fn = filter_pred_comm; - pred->regex.field_len = TASK_COMM_LEN; - } else if (field->filter_type == FILTER_STATIC_STRING) { + if (field->filter_type == FILTER_STATIC_STRING) { fn = filter_pred_string; pred->regex.field_len = field->size; } else if (field->filter_type == FILTER_DYN_STRING) @@ -1072,7 +1073,7 @@ static int init_pred(struct filter_parse_state *ps, } pred->val = val; - if (!strcmp(field->name, "cpu")) + if (field->filter_type == FILTER_CPU) fn = filter_pred_cpu; else fn = select_comparison_fn(pred->op, field->size, From 5667d50cc2acf4c67393602bc7d1f5f4018fb2d7 Mon Sep 17 00:00:00 2001 From: Gabor Juhos Date: Wed, 17 Feb 2016 12:58:20 +0100 Subject: [PATCH 381/418] pata-rb532-cf: get rid of the irq_to_gpio() call commit 018361767a21fb2d5ebd3ac182c04baf8a8b4e08 upstream. The RB532 platform specific irq_to_gpio() implementation has been removed with commit 832f5dacfa0b ("MIPS: Remove all the uses of custom gpio.h"). Now the platform uses the generic stub which causes the following error: pata-rb532-cf pata-rb532-cf: no GPIO found for irq149 pata-rb532-cf: probe of pata-rb532-cf failed with error -2 Drop the irq_to_gpio() call and get the GPIO number from platform data instead. After this change, the driver works again: scsi host0: pata-rb532-cf ata1: PATA max PIO4 irq 149 ata1.00: CFA: CF 1GB, 20080820, max MWDMA4 ata1.00: 1989792 sectors, multi 0: LBA ata1.00: configured for PIO4 scsi 0:0:0:0: Direct-Access ATA CF 1GB 0820 PQ: 0\ ANSI: 5 sd 0:0:0:0: [sda] 1989792 512-byte logical blocks: (1.01 GB/971 MiB) sd 0:0:0:0: [sda] Write Protect is off sd 0:0:0:0: [sda] Write cache: disabled, read cache: enabled, doesn't\ support DPO or FUA sda: sda1 sda2 sd 0:0:0:0: [sda] Attached SCSI disk Fixes: 832f5dacfa0b ("MIPS: Remove all the uses of custom gpio.h") Cc: Alban Bedel Cc: Ralf Baechle Cc: Arnd Bergmann Signed-off-by: Gabor Juhos Signed-off-by: Tejun Heo Signed-off-by: Greg Kroah-Hartman --- drivers/ata/pata_rb532_cf.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/ata/pata_rb532_cf.c b/drivers/ata/pata_rb532_cf.c index 12fe0f3bb7e9..c8b6a780a290 100644 --- a/drivers/ata/pata_rb532_cf.c +++ b/drivers/ata/pata_rb532_cf.c @@ -32,6 +32,8 @@ #include #include +#include + #define DRV_NAME "pata-rb532-cf" #define DRV_VERSION "0.1.0" #define DRV_DESC "PATA driver for RouterBOARD 532 Compact Flash" @@ -107,6 +109,7 @@ static int rb532_pata_driver_probe(struct platform_device *pdev) int gpio; struct resource *res; struct ata_host *ah; + struct cf_device *pdata; struct rb532_cf_info *info; int ret; @@ -122,7 +125,13 @@ static int rb532_pata_driver_probe(struct platform_device *pdev) return -ENOENT; } - gpio = irq_to_gpio(irq); + pdata = dev_get_platdata(&pdev->dev); + if (!pdata) { + dev_err(&pdev->dev, "no platform data specified\n"); + return -EINVAL; + } + + gpio = pdata->gpio_pin; if (gpio < 0) { dev_err(&pdev->dev, "no GPIO found for irq%d\n", irq); return -ENOENT; From c8421505b80438ca8dbc1a54d3fb1348b2560599 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 2 Mar 2016 15:49:38 +0000 Subject: [PATCH 382/418] Btrfs: fix loading of orphan roots leading to BUG_ON commit 909c3a22da3b8d2cfd3505ca5658f0176859d400 upstream. When looking for orphan roots during mount we can end up hitting a BUG_ON() (at root-item.c:btrfs_find_orphan_roots()) if a log tree is replayed and qgroups are enabled. This is because after a log tree is replayed, a transaction commit is made, which triggers qgroup extent accounting which in turn does backref walking which ends up reading and inserting all roots in the radix tree fs_info->fs_root_radix, including orphan roots (deleted snapshots). So after the log tree is replayed, when finding orphan roots we hit the BUG_ON with the following trace: [118209.182438] ------------[ cut here ]------------ [118209.183279] kernel BUG at fs/btrfs/root-tree.c:314! [118209.184074] invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC [118209.185123] Modules linked in: btrfs dm_flakey dm_mod crc32c_generic ppdev xor raid6_pq evdev sg parport_pc parport acpi_cpufreq tpm_tis tpm psmouse processor i2c_piix4 serio_raw pcspkr i2c_core button loop autofs4 ext4 crc16 mbcache jbd2 sd_mod sr_mod cdrom ata_generic virtio_scsi ata_piix libata virtio_pci virtio_ring virtio scsi_mod e1000 floppy [last unloaded: btrfs] [118209.186318] CPU: 14 PID: 28428 Comm: mount Tainted: G W 4.5.0-rc5-btrfs-next-24+ #1 [118209.186318] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS by qemu-project.org 04/01/2014 [118209.186318] task: ffff8801ec131040 ti: ffff8800af34c000 task.ti: ffff8800af34c000 [118209.186318] RIP: 0010:[] [] btrfs_find_orphan_roots+0x1fc/0x244 [btrfs] [118209.186318] RSP: 0018:ffff8800af34faa8 EFLAGS: 00010246 [118209.186318] RAX: 00000000ffffffef RBX: 00000000ffffffef RCX: 0000000000000001 [118209.186318] RDX: 0000000080000000 RSI: 0000000000000001 RDI: 00000000ffffffff [118209.186318] RBP: ffff8800af34fb08 R08: 0000000000000001 R09: 0000000000000000 [118209.186318] R10: ffff8800af34f9f0 R11: 6db6db6db6db6db7 R12: ffff880171b97000 [118209.186318] R13: ffff8801ca9d65e0 R14: ffff8800afa2e000 R15: 0000160000000000 [118209.186318] FS: 00007f5bcb914840(0000) GS:ffff88023edc0000(0000) knlGS:0000000000000000 [118209.186318] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [118209.186318] CR2: 00007f5bcaceb5d9 CR3: 00000000b49b5000 CR4: 00000000000006e0 [118209.186318] Stack: [118209.186318] fffffbffffffffff 010230ffffffffff 0101000000000000 ff84000000000000 [118209.186318] fbffffffffffffff 30ffffffffffffff 0000000000000101 ffff880082348000 [118209.186318] 0000000000000000 ffff8800afa2e000 ffff8800afa2e000 0000000000000000 [118209.186318] Call Trace: [118209.186318] [] open_ctree+0x1e37/0x21b9 [btrfs] [118209.186318] [] btrfs_mount+0x97e/0xaed [btrfs] [118209.186318] [] ? trace_hardirqs_on+0xd/0xf [118209.186318] [] mount_fs+0x67/0x131 [118209.186318] [] vfs_kern_mount+0x6c/0xde [118209.186318] [] btrfs_mount+0x1ac/0xaed [btrfs] [118209.186318] [] ? trace_hardirqs_on+0xd/0xf [118209.186318] [] ? lockdep_init_map+0xb9/0x1b3 [118209.186318] [] mount_fs+0x67/0x131 [118209.186318] [] vfs_kern_mount+0x6c/0xde [118209.186318] [] do_mount+0x8a6/0x9e8 [118209.186318] [] SyS_mount+0x77/0x9f [118209.186318] [] entry_SYSCALL_64_fastpath+0x12/0x6b [118209.186318] Code: 64 00 00 85 c0 89 c3 75 24 f0 41 80 4c 24 20 20 49 8b bc 24 f0 01 00 00 4c 89 e6 e8 e8 65 00 00 85 c0 89 c3 74 11 83 f8 ef 75 02 <0f> 0b 4c 89 e7 e8 da 72 00 00 eb 1c 41 83 bc 24 00 01 00 00 00 [118209.186318] RIP [] btrfs_find_orphan_roots+0x1fc/0x244 [btrfs] [118209.186318] RSP [118209.230735] ---[ end trace 83938f987d85d477 ]--- So fix this by not treating the error -EEXIST, returned when attempting to insert a root already inserted by the backref walking code, as an error. The following test case for xfstests reproduces the bug: seq=`basename $0` seqres=$RESULT_DIR/$seq echo "QA output created by $seq" tmp=/tmp/$$ status=1 # failure is the default! trap "_cleanup; exit \$status" 0 1 2 3 15 _cleanup() { _cleanup_flakey cd / rm -f $tmp.* } # get standard environment, filters and checks . ./common/rc . ./common/filter . ./common/dmflakey # real QA test starts here _supported_fs btrfs _supported_os Linux _require_scratch _require_dm_target flakey _require_metadata_journaling $SCRATCH_DEV rm -f $seqres.full _scratch_mkfs >>$seqres.full 2>&1 _init_flakey _mount_flakey _run_btrfs_util_prog quota enable $SCRATCH_MNT # Create 2 directories with one file in one of them. # We use these just to trigger a transaction commit later, moving the file from # directory a to directory b and doing an fsync against directory a. mkdir $SCRATCH_MNT/a mkdir $SCRATCH_MNT/b touch $SCRATCH_MNT/a/f sync # Create our test file with 2 4K extents. $XFS_IO_PROG -f -s -c "pwrite -S 0xaa 0 8K" $SCRATCH_MNT/foobar | _filter_xfs_io # Create a snapshot and delete it. This doesn't really delete the snapshot # immediately, just makes it inaccessible and invisible to user space, the # snapshot is deleted later by a dedicated kernel thread (cleaner kthread) # which is woke up at the next transaction commit. # A root orphan item is inserted into the tree of tree roots, so that if a # power failure happens before the dedicated kernel thread does the snapshot # deletion, the next time the filesystem is mounted it resumes the snapshot # deletion. _run_btrfs_util_prog subvolume snapshot $SCRATCH_MNT $SCRATCH_MNT/snap _run_btrfs_util_prog subvolume delete $SCRATCH_MNT/snap # Now overwrite half of the extents we wrote before. Because we made a snapshpot # before, which isn't really deleted yet (since no transaction commit happened # after we did the snapshot delete request), the non overwritten extents get # referenced twice, once by the default subvolume and once by the snapshot. $XFS_IO_PROG -c "pwrite -S 0xbb 4K 8K" $SCRATCH_MNT/foobar | _filter_xfs_io # Now move file f from directory a to directory b and fsync directory a. # The fsync on the directory a triggers a transaction commit (because a file # was moved from it to another directory) and the file fsync leaves a log tree # with file extent items to replay. mv $SCRATCH_MNT/a/f $SCRATCH_MNT/a/b $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/a $XFS_IO_PROG -c "fsync" $SCRATCH_MNT/foobar echo "File digest before power failure:" md5sum $SCRATCH_MNT/foobar | _filter_scratch # Now simulate a power failure and mount the filesystem to replay the log tree. # After the log tree was replayed, we used to hit a BUG_ON() when processing # the root orphan item for the deleted snapshot. This is because when processing # an orphan root the code expected to be the first code inserting the root into # the fs_info->fs_root_radix radix tree, while in reallity it was the second # caller attempting to do it - the first caller was the transaction commit that # took place after replaying the log tree, when updating the qgroup counters. _flakey_drop_and_remount echo "File digest before after failure:" # Must match what he got before the power failure. md5sum $SCRATCH_MNT/foobar | _filter_scratch _unmount_flakey status=0 exit Fixes: 2d9e97761087 ("Btrfs: use btrfs_get_fs_root in resolve_indirect_ref") Signed-off-by: Filipe Manana Reviewed-by: Qu Wenruo Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/root-tree.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 7cf8509deda7..2c849b08a91b 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -310,8 +310,16 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); err = btrfs_insert_fs_root(root->fs_info, root); + /* + * The root might have been inserted already, as before we look + * for orphan roots, log replay might have happened, which + * triggers a transaction commit and qgroup accounting, which + * in turn reads and inserts fs roots while doing backref + * walking. + */ + if (err == -EEXIST) + err = 0; if (err) { - BUG_ON(err == -EEXIST); btrfs_free_fs_root(root); break; } From 17f9501631948e130b81aa7ff7a0bd8ef107e07c Mon Sep 17 00:00:00 2001 From: Thomas Betker Date: Tue, 10 Nov 2015 22:18:15 +0100 Subject: [PATCH 383/418] Revert "jffs2: Fix lock acquisition order bug in jffs2_write_begin" commit 157078f64b8a9cd7011b6b900b2f2498df850748 upstream. This reverts commit 5ffd3412ae55 ("jffs2: Fix lock acquisition order bug in jffs2_write_begin"). The commit modified jffs2_write_begin() to remove a deadlock with jffs2_garbage_collect_live(), but this introduced new deadlocks found by multiple users. page_lock() actually has to be called before mutex_lock(&c->alloc_sem) or mutex_lock(&f->sem) because jffs2_write_end() and jffs2_readpage() are called with the page locked, and they acquire c->alloc_sem and f->sem, resp. In other words, the lock order in jffs2_write_begin() was correct, and it is the jffs2_garbage_collect_live() path that has to be changed. Revert the commit to get rid of the new deadlocks, and to clear the way for a better fix of the original deadlock. Reported-by: Deng Chao Reported-by: Ming Liu Reported-by: wangzaiwei Signed-off-by: Thomas Betker Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- fs/jffs2/file.c | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index f509f62e12f6..3361979d728c 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -137,39 +137,33 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, struct page *pg; struct inode *inode = mapping->host; struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); - struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); - struct jffs2_raw_inode ri; - uint32_t alloc_len = 0; pgoff_t index = pos >> PAGE_CACHE_SHIFT; uint32_t pageofs = index << PAGE_CACHE_SHIFT; int ret = 0; + pg = grab_cache_page_write_begin(mapping, index, flags); + if (!pg) + return -ENOMEM; + *pagep = pg; + jffs2_dbg(1, "%s()\n", __func__); if (pageofs > inode->i_size) { - ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, - ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); - if (ret) - return ret; - } - - mutex_lock(&f->sem); - pg = grab_cache_page_write_begin(mapping, index, flags); - if (!pg) { - if (alloc_len) - jffs2_complete_reservation(c); - mutex_unlock(&f->sem); - return -ENOMEM; - } - *pagep = pg; - - if (alloc_len) { /* Make new hole frag from old EOF to new page */ + struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); + struct jffs2_raw_inode ri; struct jffs2_full_dnode *fn; + uint32_t alloc_len; jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n", (unsigned int)inode->i_size, pageofs); + ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, + ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); + if (ret) + goto out_page; + + mutex_lock(&f->sem); memset(&ri, 0, sizeof(ri)); ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); @@ -196,6 +190,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, if (IS_ERR(fn)) { ret = PTR_ERR(fn); jffs2_complete_reservation(c); + mutex_unlock(&f->sem); goto out_page; } ret = jffs2_add_full_dnode_to_inode(c, f, fn); @@ -210,10 +205,12 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, jffs2_mark_node_obsolete(c, fn->raw); jffs2_free_full_dnode(fn); jffs2_complete_reservation(c); + mutex_unlock(&f->sem); goto out_page; } jffs2_complete_reservation(c); inode->i_size = pageofs; + mutex_unlock(&f->sem); } /* @@ -222,18 +219,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, * case of a short-copy. */ if (!PageUptodate(pg)) { + mutex_lock(&f->sem); ret = jffs2_do_readpage_nolock(inode, pg); + mutex_unlock(&f->sem); if (ret) goto out_page; } - mutex_unlock(&f->sem); jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags); return ret; out_page: unlock_page(pg); page_cache_release(pg); - mutex_unlock(&f->sem); return ret; } From 17d919c7379efbe5489243cd7ffa8a24fe09470b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 1 Feb 2016 12:37:20 +0000 Subject: [PATCH 384/418] jffs2: Fix page lock / f->sem deadlock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 49e91e7079febe59a20ca885a87dd1c54240d0f1 upstream. With this fix, all code paths should now be obtaining the page lock before f->sem. Reported-by: Szabó Tamás Tested-by: Thomas Betker Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- fs/jffs2/README.Locking | 5 +---- fs/jffs2/gc.c | 17 ++++++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/fs/jffs2/README.Locking b/fs/jffs2/README.Locking index 3ea36554107f..8918ac905a3b 100644 --- a/fs/jffs2/README.Locking +++ b/fs/jffs2/README.Locking @@ -2,10 +2,6 @@ JFFS2 LOCKING DOCUMENTATION --------------------------- -At least theoretically, JFFS2 does not require the Big Kernel Lock -(BKL), which was always helpfully obtained for it by Linux 2.4 VFS -code. It has its own locking, as described below. - This document attempts to describe the existing locking rules for JFFS2. It is not expected to remain perfectly up to date, but ought to be fairly close. @@ -69,6 +65,7 @@ Ordering constraints: any f->sem held. 2. Never attempt to lock two file mutexes in one thread. No ordering rules have been made for doing so. + 3. Never lock a page cache page with f->sem held. erase_completion_lock spinlock diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c index 5a2dec2b064c..95d5880a63ee 100644 --- a/fs/jffs2/gc.c +++ b/fs/jffs2/gc.c @@ -1296,14 +1296,17 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era BUG_ON(start > orig_start); } - /* First, use readpage() to read the appropriate page into the page cache */ - /* Q: What happens if we actually try to GC the _same_ page for which commit_write() - * triggered garbage collection in the first place? - * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the - * page OK. We'll actually write it out again in commit_write, which is a little - * suboptimal, but at least we're correct. - */ + /* The rules state that we must obtain the page lock *before* f->sem, so + * drop f->sem temporarily. Since we also hold c->alloc_sem, nothing's + * actually going to *change* so we're safe; we only allow reading. + * + * It is important to note that jffs2_write_begin() will ensure that its + * page is marked Uptodate before allocating space. That means that if we + * end up here trying to GC the *same* page that jffs2_write_begin() is + * trying to write out, read_cache_page() will not deadlock. */ + mutex_unlock(&f->sem); pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg); + mutex_lock(&f->sem); if (IS_ERR(pg_ptr)) { pr_warn("read_cache_page() returned error: %ld\n", From 3ef98fdea9a9bb8d0ba653faadcf16525d6e0f9c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 1 Feb 2016 14:04:46 +0000 Subject: [PATCH 385/418] Fix directory hardlinks from deleted directories MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit be629c62a603e5935f8177fd8a19e014100a259e upstream. When a directory is deleted, we don't take too much care about killing off all the dirents that belong to it — on the basis that on remount, the scan will conclude that the directory is dead anyway. This doesn't work though, when the deleted directory contained a child directory which was moved *out*. In the early stages of the fs build we can then end up with an apparent hard link, with the child directory appearing both in its true location, and as a child of the original directory which are this stage of the mount process we don't *yet* know is defunct. To resolve this, take out the early special-casing of the "directories shall not have hard links" rule in jffs2_build_inode_pass1(), and let the normal nlink processing happen for directories as well as other inodes. Then later in the build process we can set ic->pino_nlink to the parent inode#, as is required for directories during normal operaton, instead of the nlink. And complain only *then* about hard links which are still in evidence even after killing off all the unreachable paths. Reported-by: Liu Song Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- fs/jffs2/build.c | 75 ++++++++++++++++++++++++++++++++++----------- fs/jffs2/nodelist.h | 6 +++- 2 files changed, 62 insertions(+), 19 deletions(-) diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c index a3750f902adc..c1f04947d7dc 100644 --- a/fs/jffs2/build.c +++ b/fs/jffs2/build.c @@ -49,7 +49,8 @@ next_inode(int *i, struct jffs2_inode_cache *ic, struct jffs2_sb_info *c) static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, - struct jffs2_inode_cache *ic) + struct jffs2_inode_cache *ic, + int *dir_hardlinks) { struct jffs2_full_dirent *fd; @@ -68,19 +69,21 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, dbg_fsbuild("child \"%s\" (ino #%u) of dir ino #%u doesn't exist!\n", fd->name, fd->ino, ic->ino); jffs2_mark_node_obsolete(c, fd->raw); + /* Clear the ic/raw union so it doesn't cause problems later. */ + fd->ic = NULL; continue; } + /* From this point, fd->raw is no longer used so we can set fd->ic */ + fd->ic = child_ic; + child_ic->pino_nlink++; + /* If we appear (at this stage) to have hard-linked directories, + * set a flag to trigger a scan later */ if (fd->type == DT_DIR) { - if (child_ic->pino_nlink) { - JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n", - fd->name, fd->ino, ic->ino); - /* TODO: What do we do about it? */ - } else { - child_ic->pino_nlink = ic->ino; - } - } else - child_ic->pino_nlink++; + child_ic->flags |= INO_FLAGS_IS_DIR; + if (child_ic->pino_nlink > 1) + *dir_hardlinks = 1; + } dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino); /* Can't free scan_dents so far. We might need them in pass 2 */ @@ -94,8 +97,7 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c, */ static int jffs2_build_filesystem(struct jffs2_sb_info *c) { - int ret; - int i; + int ret, i, dir_hardlinks = 0; struct jffs2_inode_cache *ic; struct jffs2_full_dirent *fd; struct jffs2_full_dirent *dead_fds = NULL; @@ -119,7 +121,7 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) /* Now scan the directory tree, increasing nlink according to every dirent found. */ for_each_inode(i, c, ic) { if (ic->scan_dents) { - jffs2_build_inode_pass1(c, ic); + jffs2_build_inode_pass1(c, ic, &dir_hardlinks); cond_resched(); } } @@ -155,6 +157,20 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) } dbg_fsbuild("pass 2a complete\n"); + + if (dir_hardlinks) { + /* If we detected directory hardlinks earlier, *hopefully* + * they are gone now because some of the links were from + * dead directories which still had some old dirents lying + * around and not yet garbage-collected, but which have + * been discarded above. So clear the pino_nlink field + * in each directory, so that the final scan below can + * print appropriate warnings. */ + for_each_inode(i, c, ic) { + if (ic->flags & INO_FLAGS_IS_DIR) + ic->pino_nlink = 0; + } + } dbg_fsbuild("freeing temporary data structures\n"); /* Finally, we can scan again and free the dirent structs */ @@ -162,6 +178,33 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c) while(ic->scan_dents) { fd = ic->scan_dents; ic->scan_dents = fd->next; + /* We do use the pino_nlink field to count nlink of + * directories during fs build, so set it to the + * parent ino# now. Now that there's hopefully only + * one. */ + if (fd->type == DT_DIR) { + if (!fd->ic) { + /* We'll have complained about it and marked the coresponding + raw node obsolete already. Just skip it. */ + continue; + } + + /* We *have* to have set this in jffs2_build_inode_pass1() */ + BUG_ON(!(fd->ic->flags & INO_FLAGS_IS_DIR)); + + /* We clear ic->pino_nlink ∀ directories' ic *only* if dir_hardlinks + * is set. Otherwise, we know this should never trigger anyway, so + * we don't do the check. And ic->pino_nlink still contains the nlink + * value (which is 1). */ + if (dir_hardlinks && fd->ic->pino_nlink) { + JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u is also hard linked from dir ino #%u\n", + fd->name, fd->ino, ic->ino, fd->ic->pino_nlink); + /* Should we unlink it from its previous parent? */ + } + + /* For directories, ic->pino_nlink holds that parent inode # */ + fd->ic->pino_nlink = ic->ino; + } jffs2_free_full_dirent(fd); } ic->scan_dents = NULL; @@ -240,11 +283,7 @@ static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *c, /* Reduce nlink of the child. If it's now zero, stick it on the dead_fds list to be cleaned up later. Else just free the fd */ - - if (fd->type == DT_DIR) - child_ic->pino_nlink = 0; - else - child_ic->pino_nlink--; + child_ic->pino_nlink--; if (!child_ic->pino_nlink) { dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n", diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index fa35ff79ab35..0637271f3770 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h @@ -194,6 +194,7 @@ struct jffs2_inode_cache { #define INO_STATE_CLEARING 6 /* In clear_inode() */ #define INO_FLAGS_XATTR_CHECKED 0x01 /* has no duplicate xattr_ref */ +#define INO_FLAGS_IS_DIR 0x02 /* is a directory */ #define RAWNODE_CLASS_INODE_CACHE 0 #define RAWNODE_CLASS_XATTR_DATUM 1 @@ -249,7 +250,10 @@ struct jffs2_readinode_info struct jffs2_full_dirent { - struct jffs2_raw_node_ref *raw; + union { + struct jffs2_raw_node_ref *raw; + struct jffs2_inode_cache *ic; /* Just during part of build */ + }; struct jffs2_full_dirent *next; uint32_t version; uint32_t ino; /* == zero for unlink */ From 7abfb63f28e1fc56f39f0ef981035b6c9dbd9278 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Tue, 16 Feb 2016 22:54:02 +0100 Subject: [PATCH 386/418] dmaengine: pxa_dma: fix cyclic transfers commit f16921275cc3c2442d0b95225785a601603b990f upstream. While testing audio with pxa2xx-ac97, underrun were happening while the user application was correctly feeding the music. Debug proved that the cyclic transfer is not cyclic, ie. the last descriptor did not loop on the first. Another issue is that the descriptor length was always set to 8192, because of an trivial operator issue. This was tested on a pxa27x platform. Fixes: a57e16cf0333 ("dmaengine: pxa: add pxa dmaengine driver") Reported-by: Vasily Khoruzhick Tested-by: Vasily Khoruzhick Signed-off-by: Robert Jarzmik Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/pxa_dma.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index fc4156afa070..a59061e4221a 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -583,6 +583,8 @@ static void set_updater_desc(struct pxad_desc_sw *sw_desc, (PXA_DCMD_LENGTH & sizeof(u32)); if (flags & DMA_PREP_INTERRUPT) updater->dcmd |= PXA_DCMD_ENDIRQEN; + if (sw_desc->cyclic) + sw_desc->hw_desc[sw_desc->nb_desc - 2]->ddadr = sw_desc->first; } static bool is_desc_completed(struct virt_dma_desc *vd) @@ -673,6 +675,10 @@ static irqreturn_t pxad_chan_handler(int irq, void *dev_id) dev_dbg(&chan->vc.chan.dev->device, "%s(): checking txd %p[%x]: completed=%d\n", __func__, vd, vd->tx.cookie, is_desc_completed(vd)); + if (to_pxad_sw_desc(vd)->cyclic) { + vchan_cyclic_callback(vd); + break; + } if (is_desc_completed(vd)) { list_del(&vd->node); vchan_cookie_complete(vd); @@ -1080,7 +1086,7 @@ pxad_prep_dma_cyclic(struct dma_chan *dchan, return NULL; pxad_get_config(chan, dir, &dcmd, &dsadr, &dtadr); - dcmd |= PXA_DCMD_ENDIRQEN | (PXA_DCMD_LENGTH | period_len); + dcmd |= PXA_DCMD_ENDIRQEN | (PXA_DCMD_LENGTH & period_len); dev_dbg(&chan->vc.chan.dev->device, "%s(): buf_addr=0x%lx len=%zu period=%zu dir=%d flags=%lx\n", __func__, (unsigned long)buf_addr, len, period_len, dir, flags); From f7591f1a02b5b896ba2da9855a9af24634cae2b9 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 10 Feb 2016 08:09:10 -0200 Subject: [PATCH 387/418] adv7604: fix tx 5v detect regression commit 0ba4581c84cfb39fd527f6b3457f1c97f6356c04 upstream. The 5 volt detect functionality broke in 3.14: the code reads IO register 0x70 again after it has already been cleared. Instead it should use the cached irq_reg_0x70 value and the io_write to 0x71 to clear 0x70 can be dropped since this has already been done. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/i2c/adv7604.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c index 5631ec004eed..01adcdc52346 100644 --- a/drivers/media/i2c/adv7604.c +++ b/drivers/media/i2c/adv7604.c @@ -1960,10 +1960,9 @@ static int adv76xx_isr(struct v4l2_subdev *sd, u32 status, bool *handled) } /* tx 5v detect */ - tx_5v = io_read(sd, 0x70) & info->cable_det_mask; + tx_5v = irq_reg_0x70 & info->cable_det_mask; if (tx_5v) { v4l2_dbg(1, debug, sd, "%s: tx_5v: 0x%x\n", __func__, tx_5v); - io_write(sd, 0x71, tx_5v); adv76xx_s_detect_tx_5v_ctrl(sd); if (handled) *handled = true; From 020615e8192717e236a5bf6d162c2b35117bb4dc Mon Sep 17 00:00:00 2001 From: Dennis Kadioglu Date: Tue, 1 Mar 2016 14:23:29 +0100 Subject: [PATCH 388/418] ALSA: usb-audio: Add a quirk for Plantronics DA45 commit 17e2df4613be57d0fab68df749f6b8114e453152 upstream. Plantronics DA45 does not support reading the sample rate which leads to many lines of "cannot get freq at ep 0x4" and "cannot get freq at ep 0x84". This patch adds the USB ID of the DA45 to quirks.c and avoids those error messages. Signed-off-by: Dennis Kadioglu Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 4f6ce1cac8e2..c458d60d5030 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1124,6 +1124,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x045E, 0x076F): /* MS Lifecam HD-6000 */ case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */ case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */ + case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */ case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */ case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */ From c223c645e9d843a5b80af8daaae6d84c512aa67e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 27 Feb 2016 17:52:42 +0100 Subject: [PATCH 389/418] ALSA: ctl: Fix ioctls for X32 ABI commit 6236d8bb2afcfe71b88ecea554e0dc638090a45f upstream. The X32 ABI takes the same alignment like x86-64, and this may result in the incompatible struct size from ia32. Unfortunately, we hit this in some control ABI: struct snd_ctl_elem_value differs between them due to the position of 64bit variable array. This ends up with the unknown ioctl (ENOTTY) error. The fix is to add the compat entries for the new aligned struct. Reported-and-tested-by: Steven Newbury Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/control_compat.c | 90 ++++++++++++++++++++++++++++++------- 1 file changed, 74 insertions(+), 16 deletions(-) diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c index b9c0910fb8c4..0608f216f359 100644 --- a/sound/core/control_compat.c +++ b/sound/core/control_compat.c @@ -170,6 +170,19 @@ struct snd_ctl_elem_value32 { unsigned char reserved[128]; }; +#ifdef CONFIG_X86_X32 +/* x32 has a different alignment for 64bit values from ia32 */ +struct snd_ctl_elem_value_x32 { + struct snd_ctl_elem_id id; + unsigned int indirect; /* bit-field causes misalignment */ + union { + s32 integer[128]; + unsigned char data[512]; + s64 integer64[64]; + } value; + unsigned char reserved[128]; +}; +#endif /* CONFIG_X86_X32 */ /* get the value type and count of the control */ static int get_ctl_type(struct snd_card *card, struct snd_ctl_elem_id *id, @@ -219,9 +232,11 @@ static int get_elem_size(int type, int count) static int copy_ctl_value_from_user(struct snd_card *card, struct snd_ctl_elem_value *data, - struct snd_ctl_elem_value32 __user *data32, + void __user *userdata, + void __user *valuep, int *typep, int *countp) { + struct snd_ctl_elem_value32 __user *data32 = userdata; int i, type, size; int uninitialized_var(count); unsigned int indirect; @@ -239,8 +254,9 @@ static int copy_ctl_value_from_user(struct snd_card *card, if (type == SNDRV_CTL_ELEM_TYPE_BOOLEAN || type == SNDRV_CTL_ELEM_TYPE_INTEGER) { for (i = 0; i < count; i++) { + s32 __user *intp = valuep; int val; - if (get_user(val, &data32->value.integer[i])) + if (get_user(val, &intp[i])) return -EFAULT; data->value.integer.value[i] = val; } @@ -250,8 +266,7 @@ static int copy_ctl_value_from_user(struct snd_card *card, dev_err(card->dev, "snd_ioctl32_ctl_elem_value: unknown type %d\n", type); return -EINVAL; } - if (copy_from_user(data->value.bytes.data, - data32->value.data, size)) + if (copy_from_user(data->value.bytes.data, valuep, size)) return -EFAULT; } @@ -261,7 +276,8 @@ static int copy_ctl_value_from_user(struct snd_card *card, } /* restore the value to 32bit */ -static int copy_ctl_value_to_user(struct snd_ctl_elem_value32 __user *data32, +static int copy_ctl_value_to_user(void __user *userdata, + void __user *valuep, struct snd_ctl_elem_value *data, int type, int count) { @@ -270,22 +286,22 @@ static int copy_ctl_value_to_user(struct snd_ctl_elem_value32 __user *data32, if (type == SNDRV_CTL_ELEM_TYPE_BOOLEAN || type == SNDRV_CTL_ELEM_TYPE_INTEGER) { for (i = 0; i < count; i++) { + s32 __user *intp = valuep; int val; val = data->value.integer.value[i]; - if (put_user(val, &data32->value.integer[i])) + if (put_user(val, &intp[i])) return -EFAULT; } } else { size = get_elem_size(type, count); - if (copy_to_user(data32->value.data, - data->value.bytes.data, size)) + if (copy_to_user(valuep, data->value.bytes.data, size)) return -EFAULT; } return 0; } -static int snd_ctl_elem_read_user_compat(struct snd_card *card, - struct snd_ctl_elem_value32 __user *data32) +static int ctl_elem_read_user(struct snd_card *card, + void __user *userdata, void __user *valuep) { struct snd_ctl_elem_value *data; int err, type, count; @@ -294,7 +310,9 @@ static int snd_ctl_elem_read_user_compat(struct snd_card *card, if (data == NULL) return -ENOMEM; - if ((err = copy_ctl_value_from_user(card, data, data32, &type, &count)) < 0) + err = copy_ctl_value_from_user(card, data, userdata, valuep, + &type, &count); + if (err < 0) goto error; snd_power_lock(card); @@ -303,14 +321,15 @@ static int snd_ctl_elem_read_user_compat(struct snd_card *card, err = snd_ctl_elem_read(card, data); snd_power_unlock(card); if (err >= 0) - err = copy_ctl_value_to_user(data32, data, type, count); + err = copy_ctl_value_to_user(userdata, valuep, data, + type, count); error: kfree(data); return err; } -static int snd_ctl_elem_write_user_compat(struct snd_ctl_file *file, - struct snd_ctl_elem_value32 __user *data32) +static int ctl_elem_write_user(struct snd_ctl_file *file, + void __user *userdata, void __user *valuep) { struct snd_ctl_elem_value *data; struct snd_card *card = file->card; @@ -320,7 +339,9 @@ static int snd_ctl_elem_write_user_compat(struct snd_ctl_file *file, if (data == NULL) return -ENOMEM; - if ((err = copy_ctl_value_from_user(card, data, data32, &type, &count)) < 0) + err = copy_ctl_value_from_user(card, data, userdata, valuep, + &type, &count); + if (err < 0) goto error; snd_power_lock(card); @@ -329,12 +350,39 @@ static int snd_ctl_elem_write_user_compat(struct snd_ctl_file *file, err = snd_ctl_elem_write(card, file, data); snd_power_unlock(card); if (err >= 0) - err = copy_ctl_value_to_user(data32, data, type, count); + err = copy_ctl_value_to_user(userdata, valuep, data, + type, count); error: kfree(data); return err; } +static int snd_ctl_elem_read_user_compat(struct snd_card *card, + struct snd_ctl_elem_value32 __user *data32) +{ + return ctl_elem_read_user(card, data32, &data32->value); +} + +static int snd_ctl_elem_write_user_compat(struct snd_ctl_file *file, + struct snd_ctl_elem_value32 __user *data32) +{ + return ctl_elem_write_user(file, data32, &data32->value); +} + +#ifdef CONFIG_X86_X32 +static int snd_ctl_elem_read_user_x32(struct snd_card *card, + struct snd_ctl_elem_value_x32 __user *data32) +{ + return ctl_elem_read_user(card, data32, &data32->value); +} + +static int snd_ctl_elem_write_user_x32(struct snd_ctl_file *file, + struct snd_ctl_elem_value_x32 __user *data32) +{ + return ctl_elem_write_user(file, data32, &data32->value); +} +#endif /* CONFIG_X86_X32 */ + /* add or replace a user control */ static int snd_ctl_elem_add_compat(struct snd_ctl_file *file, struct snd_ctl_elem_info32 __user *data32, @@ -393,6 +441,10 @@ enum { SNDRV_CTL_IOCTL_ELEM_WRITE32 = _IOWR('U', 0x13, struct snd_ctl_elem_value32), SNDRV_CTL_IOCTL_ELEM_ADD32 = _IOWR('U', 0x17, struct snd_ctl_elem_info32), SNDRV_CTL_IOCTL_ELEM_REPLACE32 = _IOWR('U', 0x18, struct snd_ctl_elem_info32), +#ifdef CONFIG_X86_X32 + SNDRV_CTL_IOCTL_ELEM_READ_X32 = _IOWR('U', 0x12, struct snd_ctl_elem_value_x32), + SNDRV_CTL_IOCTL_ELEM_WRITE_X32 = _IOWR('U', 0x13, struct snd_ctl_elem_value_x32), +#endif /* CONFIG_X86_X32 */ }; static inline long snd_ctl_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) @@ -431,6 +483,12 @@ static inline long snd_ctl_ioctl_compat(struct file *file, unsigned int cmd, uns return snd_ctl_elem_add_compat(ctl, argp, 0); case SNDRV_CTL_IOCTL_ELEM_REPLACE32: return snd_ctl_elem_add_compat(ctl, argp, 1); +#ifdef CONFIG_X86_X32 + case SNDRV_CTL_IOCTL_ELEM_READ_X32: + return snd_ctl_elem_read_user_x32(ctl->card, argp); + case SNDRV_CTL_IOCTL_ELEM_WRITE_X32: + return snd_ctl_elem_write_user_x32(ctl, argp); +#endif /* CONFIG_X86_X32 */ } down_read(&snd_ioctl_rwsem); From b422823d27cd64a496aae0acb38eee1b50d796ba Mon Sep 17 00:00:00 2001 From: Simon South Date: Wed, 2 Mar 2016 23:10:44 -0500 Subject: [PATCH 390/418] ALSA: hda - Fix mic issues on Acer Aspire E1-472 commit 02322ac9dee9aff8d8862e8d6660ebe102f492ea upstream. This patch applies the microphone-related fix created for the Acer Aspire E1-572 to the E1-472 as well, as it uses the same Realtek ALC282 CODEC and demonstrates the same issues. This patch allows an external, headset microphone to be used and limits the gain on the (quite noisy) internal microphone. Signed-off-by: Simon South Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 72fa58dd7723..c2430b36e1ce 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5386,6 +5386,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x080d, "Acer Aspire V5-122P", ALC269_FIXUP_ASPIRE_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x0740, "Acer AO725", ALC271_FIXUP_HP_GATE_MIC_JACK), SND_PCI_QUIRK(0x1025, 0x0742, "Acer AO756", ALC271_FIXUP_HP_GATE_MIC_JACK), + SND_PCI_QUIRK(0x1025, 0x0762, "Acer Aspire E1-472", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572), SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572), SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS), SND_PCI_QUIRK(0x1025, 0x106d, "Acer Cloudbook 14", ALC283_FIXUP_CHROME_BOOK), From 8a1faa9cd284d1d4f218099d8e478e05e5da142f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 28 Feb 2016 11:28:08 +0100 Subject: [PATCH 391/418] ALSA: rawmidi: Fix ioctls X32 ABI commit 2251fbbc1539f05b0b206b37a602d5776be37252 upstream. Like the previous fixes for ctl and PCM, we need a fix for incompatible X32 ABI regarding the rawmidi: namely, struct snd_rawmidi_status has the timespec, and the size and the alignment on X32 differ from IA32. This patch fixes the incompatible ioctl for X32. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/rawmidi_compat.c | 53 +++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/sound/core/rawmidi_compat.c b/sound/core/rawmidi_compat.c index 5268c1f58c25..09a89094dcf7 100644 --- a/sound/core/rawmidi_compat.c +++ b/sound/core/rawmidi_compat.c @@ -94,9 +94,58 @@ static int snd_rawmidi_ioctl_status_compat(struct snd_rawmidi_file *rfile, return 0; } +#ifdef CONFIG_X86_X32 +/* X32 ABI has 64bit timespec and 64bit alignment */ +struct snd_rawmidi_status_x32 { + s32 stream; + u32 rsvd; /* alignment */ + struct timespec tstamp; + u32 avail; + u32 xruns; + unsigned char reserved[16]; +} __attribute__((packed)); + +#define put_timespec(src, dst) copy_to_user(dst, src, sizeof(*dst)) + +static int snd_rawmidi_ioctl_status_x32(struct snd_rawmidi_file *rfile, + struct snd_rawmidi_status_x32 __user *src) +{ + int err; + struct snd_rawmidi_status status; + + if (rfile->output == NULL) + return -EINVAL; + if (get_user(status.stream, &src->stream)) + return -EFAULT; + + switch (status.stream) { + case SNDRV_RAWMIDI_STREAM_OUTPUT: + err = snd_rawmidi_output_status(rfile->output, &status); + break; + case SNDRV_RAWMIDI_STREAM_INPUT: + err = snd_rawmidi_input_status(rfile->input, &status); + break; + default: + return -EINVAL; + } + if (err < 0) + return err; + + if (put_timespec(&status.tstamp, &src->tstamp) || + put_user(status.avail, &src->avail) || + put_user(status.xruns, &src->xruns)) + return -EFAULT; + + return 0; +} +#endif /* CONFIG_X86_X32 */ + enum { SNDRV_RAWMIDI_IOCTL_PARAMS32 = _IOWR('W', 0x10, struct snd_rawmidi_params32), SNDRV_RAWMIDI_IOCTL_STATUS32 = _IOWR('W', 0x20, struct snd_rawmidi_status32), +#ifdef CONFIG_X86_X32 + SNDRV_RAWMIDI_IOCTL_STATUS_X32 = _IOWR('W', 0x20, struct snd_rawmidi_status_x32), +#endif /* CONFIG_X86_X32 */ }; static long snd_rawmidi_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) @@ -115,6 +164,10 @@ static long snd_rawmidi_ioctl_compat(struct file *file, unsigned int cmd, unsign return snd_rawmidi_ioctl_params_compat(rfile, argp); case SNDRV_RAWMIDI_IOCTL_STATUS32: return snd_rawmidi_ioctl_status_compat(rfile, argp); +#ifdef CONFIG_X86_X32 + case SNDRV_RAWMIDI_IOCTL_STATUS_X32: + return snd_rawmidi_ioctl_status_x32(rfile, argp); +#endif /* CONFIG_X86_X32 */ } return -ENOIOCTLCMD; } From 7c0fe28f45a58434011e8fe04fa09f595ed3a97f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 28 Feb 2016 11:41:47 +0100 Subject: [PATCH 392/418] ALSA: timer: Fix ioctls for X32 ABI commit b24e7ad1fdc22177eb3e51584e1cfcb45d818488 upstream. X32 ABI takes the 64bit timespec, thus the timer user status ioctl becomes incompatible with IA32. This results in NOTTY error when the ioctl is issued. Meanwhile, this struct in X32 is essentially identical with the one in X86-64, so we can just bypassing to the existing code for this specific compat ioctl. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/timer_compat.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/core/timer_compat.c b/sound/core/timer_compat.c index e05802ae6e1b..1314b732bff3 100644 --- a/sound/core/timer_compat.c +++ b/sound/core/timer_compat.c @@ -88,12 +88,21 @@ static int snd_timer_user_status_compat(struct file *file, return 0; } +#ifdef CONFIG_X86_X32 +/* X32 ABI has the same struct as x86-64 */ +#define snd_timer_user_status_x32(file, s) \ + snd_timer_user_status(file, s) +#endif /* CONFIG_X86_X32 */ + /* */ enum { SNDRV_TIMER_IOCTL_INFO32 = _IOR('T', 0x11, struct snd_timer_info32), SNDRV_TIMER_IOCTL_STATUS32 = _IOW('T', 0x14, struct snd_timer_status32), +#ifdef CONFIG_X86_X32 + SNDRV_TIMER_IOCTL_STATUS_X32 = _IOW('T', 0x14, struct snd_timer_status), +#endif /* CONFIG_X86_X32 */ }; static long snd_timer_user_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) @@ -122,6 +131,10 @@ static long snd_timer_user_ioctl_compat(struct file *file, unsigned int cmd, uns return snd_timer_user_info_compat(file, argp); case SNDRV_TIMER_IOCTL_STATUS32: return snd_timer_user_status_compat(file, argp); +#ifdef CONFIG_X86_X32 + case SNDRV_TIMER_IOCTL_STATUS_X32: + return snd_timer_user_status_x32(file, argp); +#endif /* CONFIG_X86_X32 */ } return -ENOIOCTLCMD; } From f9ed7db13ebd5c7723731228b190fe19ef255ba7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 28 Feb 2016 11:23:09 +0100 Subject: [PATCH 393/418] ALSA: pcm: Fix ioctls for X32 ABI commit 513ace79b657e2022a592e77f24074e088681ecc upstream. X32 ABI uses the 64bit timespec in addition to 64bit alignment of 64bit values. This leads to incompatibilities in some PCM ioctls involved with snd_pcm_channel_info, snd_pcm_status and snd_pcm_sync_ptr structs. Fix the PCM compat ABI for these ioctls like the previous commit for ctl API. Reported-by: Steven Newbury Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/pcm_compat.c | 177 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 176 insertions(+), 1 deletion(-) diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c index 9630e9f72b7b..1f64ab0c2a95 100644 --- a/sound/core/pcm_compat.c +++ b/sound/core/pcm_compat.c @@ -183,6 +183,14 @@ static int snd_pcm_ioctl_channel_info_compat(struct snd_pcm_substream *substream return err; } +#ifdef CONFIG_X86_X32 +/* X32 ABI has the same struct as x86-64 for snd_pcm_channel_info */ +static int snd_pcm_channel_info_user(struct snd_pcm_substream *substream, + struct snd_pcm_channel_info __user *src); +#define snd_pcm_ioctl_channel_info_x32(s, p) \ + snd_pcm_channel_info_user(s, p) +#endif /* CONFIG_X86_X32 */ + struct snd_pcm_status32 { s32 state; struct compat_timespec trigger_tstamp; @@ -243,6 +251,71 @@ static int snd_pcm_status_user_compat(struct snd_pcm_substream *substream, return err; } +#ifdef CONFIG_X86_X32 +/* X32 ABI has 64bit timespec and 64bit alignment */ +struct snd_pcm_status_x32 { + s32 state; + u32 rsvd; /* alignment */ + struct timespec trigger_tstamp; + struct timespec tstamp; + u32 appl_ptr; + u32 hw_ptr; + s32 delay; + u32 avail; + u32 avail_max; + u32 overrange; + s32 suspended_state; + u32 audio_tstamp_data; + struct timespec audio_tstamp; + struct timespec driver_tstamp; + u32 audio_tstamp_accuracy; + unsigned char reserved[52-2*sizeof(struct timespec)]; +} __packed; + +#define put_timespec(src, dst) copy_to_user(dst, src, sizeof(*dst)) + +static int snd_pcm_status_user_x32(struct snd_pcm_substream *substream, + struct snd_pcm_status_x32 __user *src, + bool ext) +{ + struct snd_pcm_status status; + int err; + + memset(&status, 0, sizeof(status)); + /* + * with extension, parameters are read/write, + * get audio_tstamp_data from user, + * ignore rest of status structure + */ + if (ext && get_user(status.audio_tstamp_data, + (u32 __user *)(&src->audio_tstamp_data))) + return -EFAULT; + err = snd_pcm_status(substream, &status); + if (err < 0) + return err; + + if (clear_user(src, sizeof(*src))) + return -EFAULT; + if (put_user(status.state, &src->state) || + put_timespec(&status.trigger_tstamp, &src->trigger_tstamp) || + put_timespec(&status.tstamp, &src->tstamp) || + put_user(status.appl_ptr, &src->appl_ptr) || + put_user(status.hw_ptr, &src->hw_ptr) || + put_user(status.delay, &src->delay) || + put_user(status.avail, &src->avail) || + put_user(status.avail_max, &src->avail_max) || + put_user(status.overrange, &src->overrange) || + put_user(status.suspended_state, &src->suspended_state) || + put_user(status.audio_tstamp_data, &src->audio_tstamp_data) || + put_timespec(&status.audio_tstamp, &src->audio_tstamp) || + put_timespec(&status.driver_tstamp, &src->driver_tstamp) || + put_user(status.audio_tstamp_accuracy, &src->audio_tstamp_accuracy)) + return -EFAULT; + + return err; +} +#endif /* CONFIG_X86_X32 */ + /* both for HW_PARAMS and HW_REFINE */ static int snd_pcm_ioctl_hw_params_compat(struct snd_pcm_substream *substream, int refine, @@ -469,6 +542,93 @@ static int snd_pcm_ioctl_sync_ptr_compat(struct snd_pcm_substream *substream, return 0; } +#ifdef CONFIG_X86_X32 +/* X32 ABI has 64bit timespec and 64bit alignment */ +struct snd_pcm_mmap_status_x32 { + s32 state; + s32 pad1; + u32 hw_ptr; + u32 pad2; /* alignment */ + struct timespec tstamp; + s32 suspended_state; + struct timespec audio_tstamp; +} __packed; + +struct snd_pcm_mmap_control_x32 { + u32 appl_ptr; + u32 avail_min; +}; + +struct snd_pcm_sync_ptr_x32 { + u32 flags; + u32 rsvd; /* alignment */ + union { + struct snd_pcm_mmap_status_x32 status; + unsigned char reserved[64]; + } s; + union { + struct snd_pcm_mmap_control_x32 control; + unsigned char reserved[64]; + } c; +} __packed; + +static int snd_pcm_ioctl_sync_ptr_x32(struct snd_pcm_substream *substream, + struct snd_pcm_sync_ptr_x32 __user *src) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + volatile struct snd_pcm_mmap_status *status; + volatile struct snd_pcm_mmap_control *control; + u32 sflags; + struct snd_pcm_mmap_control scontrol; + struct snd_pcm_mmap_status sstatus; + snd_pcm_uframes_t boundary; + int err; + + if (snd_BUG_ON(!runtime)) + return -EINVAL; + + if (get_user(sflags, &src->flags) || + get_user(scontrol.appl_ptr, &src->c.control.appl_ptr) || + get_user(scontrol.avail_min, &src->c.control.avail_min)) + return -EFAULT; + if (sflags & SNDRV_PCM_SYNC_PTR_HWSYNC) { + err = snd_pcm_hwsync(substream); + if (err < 0) + return err; + } + status = runtime->status; + control = runtime->control; + boundary = recalculate_boundary(runtime); + if (!boundary) + boundary = 0x7fffffff; + snd_pcm_stream_lock_irq(substream); + /* FIXME: we should consider the boundary for the sync from app */ + if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) + control->appl_ptr = scontrol.appl_ptr; + else + scontrol.appl_ptr = control->appl_ptr % boundary; + if (!(sflags & SNDRV_PCM_SYNC_PTR_AVAIL_MIN)) + control->avail_min = scontrol.avail_min; + else + scontrol.avail_min = control->avail_min; + sstatus.state = status->state; + sstatus.hw_ptr = status->hw_ptr % boundary; + sstatus.tstamp = status->tstamp; + sstatus.suspended_state = status->suspended_state; + sstatus.audio_tstamp = status->audio_tstamp; + snd_pcm_stream_unlock_irq(substream); + if (put_user(sstatus.state, &src->s.status.state) || + put_user(sstatus.hw_ptr, &src->s.status.hw_ptr) || + put_timespec(&sstatus.tstamp, &src->s.status.tstamp) || + put_user(sstatus.suspended_state, &src->s.status.suspended_state) || + put_timespec(&sstatus.audio_tstamp, &src->s.status.audio_tstamp) || + put_user(scontrol.appl_ptr, &src->c.control.appl_ptr) || + put_user(scontrol.avail_min, &src->c.control.avail_min)) + return -EFAULT; + + return 0; +} +#endif /* CONFIG_X86_X32 */ /* */ @@ -487,7 +647,12 @@ enum { SNDRV_PCM_IOCTL_WRITEN_FRAMES32 = _IOW('A', 0x52, struct snd_xfern32), SNDRV_PCM_IOCTL_READN_FRAMES32 = _IOR('A', 0x53, struct snd_xfern32), SNDRV_PCM_IOCTL_SYNC_PTR32 = _IOWR('A', 0x23, struct snd_pcm_sync_ptr32), - +#ifdef CONFIG_X86_X32 + SNDRV_PCM_IOCTL_CHANNEL_INFO_X32 = _IOR('A', 0x32, struct snd_pcm_channel_info), + SNDRV_PCM_IOCTL_STATUS_X32 = _IOR('A', 0x20, struct snd_pcm_status_x32), + SNDRV_PCM_IOCTL_STATUS_EXT_X32 = _IOWR('A', 0x24, struct snd_pcm_status_x32), + SNDRV_PCM_IOCTL_SYNC_PTR_X32 = _IOWR('A', 0x23, struct snd_pcm_sync_ptr_x32), +#endif /* CONFIG_X86_X32 */ }; static long snd_pcm_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) @@ -559,6 +724,16 @@ static long snd_pcm_ioctl_compat(struct file *file, unsigned int cmd, unsigned l return snd_pcm_ioctl_rewind_compat(substream, argp); case SNDRV_PCM_IOCTL_FORWARD32: return snd_pcm_ioctl_forward_compat(substream, argp); +#ifdef CONFIG_X86_X32 + case SNDRV_PCM_IOCTL_STATUS_X32: + return snd_pcm_status_user_x32(substream, argp, false); + case SNDRV_PCM_IOCTL_STATUS_EXT_X32: + return snd_pcm_status_user_x32(substream, argp, true); + case SNDRV_PCM_IOCTL_SYNC_PTR_X32: + return snd_pcm_ioctl_sync_ptr_x32(substream, argp); + case SNDRV_PCM_IOCTL_CHANNEL_INFO_X32: + return snd_pcm_ioctl_channel_info_x32(substream, argp); +#endif /* CONFIG_X86_X32 */ } return -ENOIOCTLCMD; From 93e93ec18306c6a6be47196030c0be92c5865d1b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 1 Mar 2016 18:30:18 +0100 Subject: [PATCH 394/418] ALSA: seq: oss: Don't drain at closing a client commit 197b958c1e76a575d77038cc98b4bebc2134279f upstream. The OSS sequencer client tries to drain the pending events at releasing. Unfortunately, as spotted by syzkaller fuzzer, this may lead to an unkillable process state when the event has been queued at the far future. Since the process being released can't be signaled any longer, it remains and waits for the echo-back event in that far future. Back to history, the draining feature was implemented at the time we misinterpreted POSIX definition for blocking file operation. Actually, such a behavior is superfluous at release, and we should just release the device as is instead of keeping it up forever. This patch just removes the draining call that may block the release for too long time unexpectedly. BugLink: http://lkml.kernel.org/r/CACT4Y+Y4kD-aBGj37rf-xBw9bH3GMU6P+MYg4W1e-s-paVD2pg@mail.gmail.com Reported-by: Dmitry Vyukov Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/seq/oss/seq_oss.c | 2 -- sound/core/seq/oss/seq_oss_device.h | 1 - sound/core/seq/oss/seq_oss_init.c | 16 ---------------- 3 files changed, 19 deletions(-) diff --git a/sound/core/seq/oss/seq_oss.c b/sound/core/seq/oss/seq_oss.c index 7354b8bed860..cb23899100ee 100644 --- a/sound/core/seq/oss/seq_oss.c +++ b/sound/core/seq/oss/seq_oss.c @@ -148,8 +148,6 @@ odev_release(struct inode *inode, struct file *file) if ((dp = file->private_data) == NULL) return 0; - snd_seq_oss_drain_write(dp); - mutex_lock(®ister_mutex); snd_seq_oss_release(dp); mutex_unlock(®ister_mutex); diff --git a/sound/core/seq/oss/seq_oss_device.h b/sound/core/seq/oss/seq_oss_device.h index b43924325249..d7b4d016b547 100644 --- a/sound/core/seq/oss/seq_oss_device.h +++ b/sound/core/seq/oss/seq_oss_device.h @@ -127,7 +127,6 @@ int snd_seq_oss_write(struct seq_oss_devinfo *dp, const char __user *buf, int co unsigned int snd_seq_oss_poll(struct seq_oss_devinfo *dp, struct file *file, poll_table * wait); void snd_seq_oss_reset(struct seq_oss_devinfo *dp); -void snd_seq_oss_drain_write(struct seq_oss_devinfo *dp); /* */ void snd_seq_oss_process_queue(struct seq_oss_devinfo *dp, abstime_t time); diff --git a/sound/core/seq/oss/seq_oss_init.c b/sound/core/seq/oss/seq_oss_init.c index 6779e82b46dd..92c96a95a903 100644 --- a/sound/core/seq/oss/seq_oss_init.c +++ b/sound/core/seq/oss/seq_oss_init.c @@ -435,22 +435,6 @@ snd_seq_oss_release(struct seq_oss_devinfo *dp) } -/* - * Wait until the queue is empty (if we don't have nonblock) - */ -void -snd_seq_oss_drain_write(struct seq_oss_devinfo *dp) -{ - if (! dp->timer->running) - return; - if (is_write_mode(dp->file_mode) && !is_nonblock_mode(dp->file_mode) && - dp->writeq) { - while (snd_seq_oss_writeq_sync(dp->writeq)) - ; - } -} - - /* * reset sequencer devices */ From 7483d67fba5928e855eb3c42d59fa040a92363e8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 29 Feb 2016 14:25:16 +0100 Subject: [PATCH 395/418] ALSA: hdspm: Fix wrong boolean ctl value accesses commit 537e48136295c5860a92138c5ea3959b9542868b upstream. snd-hdspm driver accesses enum item values (int) instead of boolean values (long) wrongly for some ctl elements. This patch fixes them. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/rme9652/hdspm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index 8bc8016c173d..b047b1ba48fd 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -2261,7 +2261,7 @@ static int snd_hdspm_put_system_sample_rate(struct snd_kcontrol *kcontrol, { struct hdspm *hdspm = snd_kcontrol_chip(kcontrol); - hdspm_set_dds_value(hdspm, ucontrol->value.enumerated.item[0]); + hdspm_set_dds_value(hdspm, ucontrol->value.integer.value[0]); return 0; } @@ -4449,7 +4449,7 @@ static int snd_hdspm_get_tco_word_term(struct snd_kcontrol *kcontrol, { struct hdspm *hdspm = snd_kcontrol_chip(kcontrol); - ucontrol->value.enumerated.item[0] = hdspm->tco->term; + ucontrol->value.integer.value[0] = hdspm->tco->term; return 0; } @@ -4460,8 +4460,8 @@ static int snd_hdspm_put_tco_word_term(struct snd_kcontrol *kcontrol, { struct hdspm *hdspm = snd_kcontrol_chip(kcontrol); - if (hdspm->tco->term != ucontrol->value.enumerated.item[0]) { - hdspm->tco->term = ucontrol->value.enumerated.item[0]; + if (hdspm->tco->term != ucontrol->value.integer.value[0]) { + hdspm->tco->term = ucontrol->value.integer.value[0]; hdspm_tco_write(hdspm); From eb390c30581b9a60dd30ae4c90842f43d3e1b095 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 29 Feb 2016 14:26:43 +0100 Subject: [PATCH 396/418] ALSA: hdsp: Fix wrong boolean ctl value accesses commit eab3c4db193f5fcccf70e884de9a922ca2c63d80 upstream. snd-hdsp driver accesses enum item values (int) instead of boolean values (long) wrongly for some ctl elements. This patch fixes them. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/rme9652/hdsp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c index 2875b4f6d8c9..7c8941b8b2de 100644 --- a/sound/pci/rme9652/hdsp.c +++ b/sound/pci/rme9652/hdsp.c @@ -2879,7 +2879,7 @@ static int snd_hdsp_get_dds_offset(struct snd_kcontrol *kcontrol, struct snd_ctl { struct hdsp *hdsp = snd_kcontrol_chip(kcontrol); - ucontrol->value.enumerated.item[0] = hdsp_dds_offset(hdsp); + ucontrol->value.integer.value[0] = hdsp_dds_offset(hdsp); return 0; } @@ -2891,7 +2891,7 @@ static int snd_hdsp_put_dds_offset(struct snd_kcontrol *kcontrol, struct snd_ctl if (!snd_hdsp_use_is_exclusive(hdsp)) return -EBUSY; - val = ucontrol->value.enumerated.item[0]; + val = ucontrol->value.integer.value[0]; spin_lock_irq(&hdsp->lock); if (val != hdsp_dds_offset(hdsp)) change = (hdsp_set_dds_offset(hdsp, val) == 0) ? 1 : 0; From 8a358d00c8732a110657904a108d022bd073f35d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 29 Feb 2016 14:32:42 +0100 Subject: [PATCH 397/418] ALSA: hdspm: Fix zero-division commit c1099c3294c2344110085a38c50e478a5992b368 upstream. HDSPM driver contains a code issuing zero-division potentially in system sample rate ctl code. This patch fixes it by not processing a zero or invalid rate value as a divisor, as well as excluding the invalid value to be passed via the given ctl element. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/rme9652/hdspm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index b047b1ba48fd..a4a999a0317e 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -1601,6 +1601,9 @@ static void hdspm_set_dds_value(struct hdspm *hdspm, int rate) { u64 n; + if (snd_BUG_ON(rate <= 0)) + return; + if (rate >= 112000) rate /= 4; else if (rate >= 56000) @@ -2215,6 +2218,8 @@ static int hdspm_get_system_sample_rate(struct hdspm *hdspm) } else { /* slave mode, return external sample rate */ rate = hdspm_external_sample_rate(hdspm); + if (!rate) + rate = hdspm->system_sample_rate; } } @@ -2260,7 +2265,10 @@ static int snd_hdspm_put_system_sample_rate(struct snd_kcontrol *kcontrol, ucontrol) { struct hdspm *hdspm = snd_kcontrol_chip(kcontrol); + int rate = ucontrol->value.integer.value[0]; + if (rate < 27000 || rate > 207000) + return -EINVAL; hdspm_set_dds_value(hdspm, ucontrol->value.integer.value[0]); return 0; } From 48ad515e8d652e8f03a35bf963eeb5360cf5e38f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 28 Feb 2016 11:36:14 +0100 Subject: [PATCH 398/418] ALSA: timer: Fix broken compat timer user status ioctl commit 3a72494ac2a3bd229db941d51e7efe2f6ccd947b upstream. The timer user status compat ioctl returned the bogus struct used for 64bit architectures instead of the 32bit one. This patch addresses it to return the proper struct. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/timer_compat.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/core/timer_compat.c b/sound/core/timer_compat.c index 1314b732bff3..2e908225d754 100644 --- a/sound/core/timer_compat.c +++ b/sound/core/timer_compat.c @@ -70,13 +70,14 @@ static int snd_timer_user_status_compat(struct file *file, struct snd_timer_status32 __user *_status) { struct snd_timer_user *tu; - struct snd_timer_status status; + struct snd_timer_status32 status; tu = file->private_data; if (snd_BUG_ON(!tu->timeri)) return -ENXIO; memset(&status, 0, sizeof(status)); - status.tstamp = tu->tstamp; + status.tstamp.tv_sec = tu->tstamp.tv_sec; + status.tstamp.tv_nsec = tu->tstamp.tv_nsec; status.resolution = snd_timer_resolution(tu->timeri); status.lost = tu->timeri->lost; status.overrun = tu->overrun; From 0ba460a5fd15673ad895a8fc51d378eb30f1922b Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Wed, 24 Feb 2016 11:05:25 +0800 Subject: [PATCH 399/418] usb: chipidea: otg: change workqueue ci_otg as freezable commit d144dfea8af7108f613139623e63952ed7e69c0c upstream. If we use USB ID pin as wakeup source, and there is a USB block device on this USB OTG (ID) cable, the system will be deadlock after system resume. The root cause for this problem is: the workqueue ci_otg may try to remove hcd before the driver resume has finished, and hcd will disconnect the device on it, then, it will call device_release_driver, and holds the device lock "dev->mutex", but it is never unlocked since it waits workqueue writeback to run to flush the block information, but the workqueue writeback is freezable, it is not thawed before driver resume has finished. When the driver (device: sd 0:0:0:0:) resume goes to dpm_complete, it tries to get its device lock "dev->mutex", but it can't get it forever, then the deadlock occurs. Below call stacks show the situation. So, in order to fix this problem, we need to change workqueue ci_otg as freezable, then the work item in this workqueue will be run after driver's resume, this workqueue will not be blocked forever like above case since the workqueue writeback has been thawed too. Tested at: i.mx6qdl-sabresd and i.mx6sx-sdb. [ 555.178869] kworker/u2:13 D c07de74c 0 826 2 0x00000000 [ 555.185310] Workqueue: ci_otg ci_otg_work [ 555.189353] Backtrace: [ 555.191849] [] (__schedule) from [] (schedule+0x48/0xa0) [ 555.198912] r10:ee471ba0 r9:00000000 r8:00000000 r7:00000002 r6:ee470000 r5:ee471ba4 [ 555.206867] r4:ee470000 [ 555.209453] [] (schedule) from [] (schedule_timeout+0x15c/0x1e0) [ 555.217212] r4:7fffffff r3:edc2b000 [ 555.220862] [] (schedule_timeout) from [] (wait_for_common+0x94/0x144) [ 555.229140] r8:00000000 r7:00000002 r6:ee470000 r5:ee471ba4 r4:7fffffff [ 555.235980] [] (wait_for_common) from [] (wait_for_completion+0x18/0x1c) [ 555.244430] r10:00000001 r9:c0b5563c r8:c0042e48 r7:ef086000 r6:eea4372c r5:ef131b00 [ 555.252383] r4:00000000 [ 555.254970] [] (wait_for_completion) from [] (flush_work+0x19c/0x234) [ 555.263177] [] (flush_work) from [] (flush_delayed_work+0x48/0x4c) [ 555.271106] r8:ed5b5000 r7:c0b38a3c r6:eea439cc r5:eea4372c r4:eea4372c [ 555.277958] [] (flush_delayed_work) from [] (bdi_unregister+0x84/0xec) [ 555.286236] r4:eea43520 r3:20000153 [ 555.289885] [] (bdi_unregister) from [] (blk_cleanup_queue+0x180/0x29c) [ 555.298250] r5:eea43808 r4:eea43400 [ 555.301909] [] (blk_cleanup_queue) from [] (__scsi_remove_device+0x48/0xb8) [ 555.310623] r7:00000000 r6:20000153 r5:ededa950 r4:ededa800 [ 555.316403] [] (__scsi_remove_device) from [] (scsi_forget_host+0x64/0x68) [ 555.325028] r5:ededa800 r4:ed5b5000 [ 555.328689] [] (scsi_forget_host) from [] (scsi_remove_host+0x78/0x104) [ 555.337054] r5:ed5b5068 r4:ed5b5000 [ 555.340709] [] (scsi_remove_host) from [] (usb_stor_disconnect+0x50/0xb4) [ 555.349247] r6:ed5b56e4 r5:ed5b5818 r4:ed5b5690 r3:00000008 [ 555.355025] [] (usb_stor_disconnect) from [] (usb_unbind_interface+0x78/0x25c) [ 555.363997] r8:c13919b4 r7:edd3c000 r6:edd3c020 r5:ee551c68 r4:ee551c00 r3:c04cdf7c [ 555.371892] [] (usb_unbind_interface) from [] (__device_release_driver+0x8c/0x118) [ 555.381213] r10:00000001 r9:edd90c00 r8:c13919b4 r7:ee551c68 r6:c0b546e0 r5:c0b5563c [ 555.389167] r4:edd3c020 [ 555.391752] [] (__device_release_driver) from [] (device_release_driver+0x28/0x34) [ 555.401071] r5:edd3c020 r4:edd3c054 [ 555.404721] [] (device_release_driver) from [] (bus_remove_device+0xe0/0x110) [ 555.413607] r5:edd3c020 r4:ef17f04c [ 555.417253] [] (bus_remove_device) from [] (device_del+0x114/0x21c) [ 555.425270] r6:edd3c028 r5:edd3c020 r4:ee551c00 r3:00000000 [ 555.431045] [] (device_del) from [] (usb_disable_device+0xa4/0x1e8) [ 555.439061] r8:edd3c000 r7:eded8000 r6:00000000 r5:00000001 r4:ee551c00 [ 555.445906] [] (usb_disable_device) from [] (usb_disconnect+0x74/0x224) [ 555.454271] r9:edd90c00 r8:ee551000 r7:ee551c68 r6:ee551c9c r5:ee551c00 r4:00000001 [ 555.462156] [] (usb_disconnect) from [] (usb_disconnect+0x1d8/0x224) [ 555.470259] r10:00000001 r9:edd90000 r8:ee471e2c r7:ee551468 r6:ee55149c r5:ee551400 [ 555.478213] r4:00000001 [ 555.480797] [] (usb_disconnect) from [] (usb_remove_hcd+0xa0/0x1ac) [ 555.488813] r10:00000001 r9:ee471eb0 r8:00000000 r7:ef3d9500 r6:eded810c r5:eded80b0 [ 555.496765] r4:eded8000 [ 555.499351] [] (usb_remove_hcd) from [] (host_stop+0x28/0x64) [ 555.506847] r6:eeb50010 r5:eded8000 r4:eeb51010 [ 555.511563] [] (host_stop) from [] (ci_otg_work+0xc4/0x124) [ 555.518885] r6:00000001 r5:eeb50010 r4:eeb502a0 r3:c04d4130 [ 555.524665] [] (ci_otg_work) from [] (process_one_work+0x194/0x420) [ 555.532682] r6:ef086000 r5:eeb502a0 r4:edc44480 [ 555.537393] [] (process_one_work) from [] (worker_thread+0x34/0x514) [ 555.545496] r10:edc44480 r9:ef086000 r8:c0b1a100 r7:ef086034 r6:00000088 r5:edc44498 [ 555.553450] r4:ef086000 [ 555.556032] [] (worker_thread) from [] (kthread+0xdc/0xf8) [ 555.563268] r10:00000000 r9:00000000 r8:00000000 r7:c004577c r6:edc44480 r5:eddc15c0 [ 555.571221] r4:00000000 [ 555.573804] [] (kthread) from [] (ret_from_fork+0x14/0x24) [ 555.581040] r7:00000000 r6:00000000 r5:c004b9d8 r4:eddc15c0 [ 553.429383] sh D c07de74c 0 694 691 0x00000000 [ 553.435801] Backtrace: [ 553.438295] [] (__schedule) from [] (schedule+0x48/0xa0) [ 553.445358] r10:edd3c054 r9:edd3c078 r8:edddbd50 r7:edcbbc00 r6:c1377c34 r5:60000153 [ 553.453313] r4:eddda000 [ 553.455896] [] (schedule) from [] (schedule_preempt_disabled+0x10/0x14) [ 553.464261] r4:edd3c058 r3:0000000a [ 553.467910] [] (schedule_preempt_disabled) from [] (mutex_lock_nested+0x1a0/0x3e8) [ 553.477254] [] (mutex_lock_nested) from [] (dpm_complete+0xc0/0x1b0) [ 553.485358] r10:00561408 r9:edd3c054 r8:c0b4863c r7:edddbd90 r6:c0b485d8 r5:edd3c020 [ 553.493313] r4:edd3c0d0 [ 553.495896] [] (dpm_complete) from [] (dpm_resume_end+0x1c/0x20) [ 553.503652] r9:00000000 r8:c0b1a9d0 r7:c1334ec0 r6:c1334edc r5:00000003 r4:00000010 [ 553.511544] [] (dpm_resume_end) from [] (suspend_devices_and_enter+0x158/0x504) [ 553.520604] r4:00000000 r3:c1334efc [ 553.524250] [] (suspend_devices_and_enter) from [] (pm_suspend+0x234/0x2cc) [ 553.532961] r10:00561408 r9:ed6b7300 r8:00000004 r7:c1334eec r6:00000000 r5:c1334ee8 [ 553.540914] r4:00000003 [ 553.543493] [] (pm_suspend) from [] (state_store+0x6c/0xc0) [ 555.703684] 7 locks held by kworker/u2:13/826: [ 555.708140] #0: ("%s""ci_otg"){++++.+}, at: [] process_one_work+0x128/0x420 [ 555.716277] #1: ((&ci->work)){+.+.+.}, at: [] process_one_work+0x128/0x420 [ 555.724317] #2: (usb_bus_list_lock){+.+.+.}, at: [] usb_remove_hcd+0x98/0x1ac [ 555.732626] #3: (&dev->mutex){......}, at: [] usb_disconnect+0x48/0x224 [ 555.740403] #4: (&dev->mutex){......}, at: [] usb_disconnect+0x48/0x224 [ 555.748179] #5: (&dev->mutex){......}, at: [] device_release_driver+0x20/0x34 [ 555.756487] #6: (&shost->scan_mutex){+.+.+.}, at: [] scsi_remove_host+0x20/0x104 Cc: Jun Li Signed-off-by: Peter Chen Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/otg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/chipidea/otg.c b/drivers/usb/chipidea/otg.c index 45f86da1d6d3..03b6743461d1 100644 --- a/drivers/usb/chipidea/otg.c +++ b/drivers/usb/chipidea/otg.c @@ -158,7 +158,7 @@ static void ci_otg_work(struct work_struct *work) int ci_hdrc_otg_init(struct ci_hdrc *ci) { INIT_WORK(&ci->work, ci_otg_work); - ci->wq = create_singlethread_workqueue("ci_otg"); + ci->wq = create_freezable_workqueue("ci_otg"); if (!ci->wq) { dev_err(ci->dev, "can't create workqueue\n"); return -ENODEV; From 1f249cfb339c0e5fb9080d0e4c0c336f99f17f46 Mon Sep 17 00:00:00 2001 From: Vittorio Alfieri Date: Sun, 28 Feb 2016 14:40:24 +0100 Subject: [PATCH 400/418] USB: cp210x: Add ID for Parrot NMEA GPS Flight Recorder commit 3c4c615d70c8cbdc8ba8c79ed702640930652a79 upstream. The Parrot NMEA GPS Flight Recorder is a USB composite device consisting of hub, flash storage, and cp210x usb to serial chip. It is an accessory to the mass-produced Parrot AR Drone 2. The device emits standard NMEA messages which make the it compatible with NMEA compatible software. It was tested using gpsd version 3.11-3 as an NMEA interpreter and using the official Parrot Flight Recorder. Signed-off-by: Vittorio Alfieri Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index a7caf53d8b5e..7a76fe4c2f9e 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -164,6 +164,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */ { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */ { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */ + { USB_DEVICE(0x19CF, 0x3000) }, /* Parrot NMEA GPS Flight Recorder */ { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */ { USB_DEVICE(0x1B1C, 0x1C00) }, /* Corsair USB Dongle */ { USB_DEVICE(0x1BA4, 0x0002) }, /* Silicon Labs 358x factory default */ From 505fefa9ce9c29ea936460f905c7b811bc7c4c67 Mon Sep 17 00:00:00 2001 From: Patrik Halfar Date: Sat, 20 Feb 2016 18:49:56 +0100 Subject: [PATCH 401/418] USB: qcserial: add Dell Wireless 5809e Gobi 4G HSPA+ (rev3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 013dd239d6220a4e0dfdf0d45a82c34f1fd73deb upstream. New revision of Dell Wireless 5809e Gobi 4G HSPA+ Mobile Broadband Card has new idProduct. Bus 002 Device 006: ID 413c:81b3 Dell Computer Corp. Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 2.00 bDeviceClass 0 bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 64 idVendor 0x413c Dell Computer Corp. idProduct 0x81b3 bcdDevice 0.06 iManufacturer 1 Sierra Wireless, Incorporated iProduct 2 Dell Wireless 5809e Gobi™ 4G HSPA+ Mobile Broadband Card iSerial 3 bNumConfigurations 2 Signed-off-by: Patrik Halfar Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/qcserial.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 9919d2a9faf2..9c09d5feb50d 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -165,6 +165,7 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x413c, 0x81a8)}, /* Dell Wireless 5808 Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a9)}, /* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81b1)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */ + {DEVICE_SWI(0x413c, 0x81b3)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */ /* Huawei devices */ {DEVICE_HWI(0x03f0, 0x581d)}, /* HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) */ From 48a485211fb8f1989736f4a272741a9143134d88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Tue, 1 Mar 2016 14:36:32 +0100 Subject: [PATCH 402/418] USB: qcserial: add Sierra Wireless EM74xx device ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 04fdbc825ffc02fb098964b92de802fff44e73fd upstream. The MC74xx and EM74xx modules use different IDs by default, according to the Lenovo EM7455 driver for Windows. Signed-off-by: Bjørn Mork Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/qcserial.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 9c09d5feb50d..1bc6089b9008 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -157,8 +157,10 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x1199, 0x9056)}, /* Sierra Wireless Modem */ {DEVICE_SWI(0x1199, 0x9060)}, /* Sierra Wireless Modem */ {DEVICE_SWI(0x1199, 0x9061)}, /* Sierra Wireless Modem */ - {DEVICE_SWI(0x1199, 0x9070)}, /* Sierra Wireless MC74xx/EM74xx */ - {DEVICE_SWI(0x1199, 0x9071)}, /* Sierra Wireless MC74xx/EM74xx */ + {DEVICE_SWI(0x1199, 0x9070)}, /* Sierra Wireless MC74xx */ + {DEVICE_SWI(0x1199, 0x9071)}, /* Sierra Wireless MC74xx */ + {DEVICE_SWI(0x1199, 0x9078)}, /* Sierra Wireless EM74xx */ + {DEVICE_SWI(0x1199, 0x9079)}, /* Sierra Wireless EM74xx */ {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ From b642669e51cecdea70e5b8b5c55a93987556a5d4 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Mon, 29 Feb 2016 15:36:11 +0100 Subject: [PATCH 403/418] USB: serial: option: add support for Telit LE922 PID 0x1045 commit 5deef5551c77e488922cc4bf4bc76df63be650d0 upstream. This patch adds support for 0x1045 PID of Telit LE922. Signed-off-by: Daniele Palmas Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 8849439a8f18..8cea03fbdbf0 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -270,6 +270,7 @@ static void option_instat_callback(struct urb *urb); #define TELIT_PRODUCT_UE910_V2 0x1012 #define TELIT_PRODUCT_LE922_USBCFG0 0x1042 #define TELIT_PRODUCT_LE922_USBCFG3 0x1043 +#define TELIT_PRODUCT_LE922_USBCFG5 0x1045 #define TELIT_PRODUCT_LE920 0x1200 #define TELIT_PRODUCT_LE910 0x1201 @@ -1183,6 +1184,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG3), .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG5, 0xff), + .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = (kernel_ulong_t)&telit_le910_blacklist }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920), From 760b02ed4e71d2f0b48de6c1e7edc1a708f6cbbc Mon Sep 17 00:00:00 2001 From: Yegor Yefremov Date: Mon, 29 Feb 2016 16:39:57 +0100 Subject: [PATCH 404/418] USB: serial: option: add support for Quectel UC20 commit c0992d0f54847d0d1d85c60fcaa054f175ab1ccd upstream. Add support for Quectel UC20 and blacklist the QMI interface. Signed-off-by: Yegor Yefremov [johan: amend commit message ] Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 8cea03fbdbf0..348e19834b83 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1133,6 +1133,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */ + { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9003), /* Quectel UC20 */ + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003), From 75d46a71e0b86770c9f0f81d9745e16e04a27239 Mon Sep 17 00:00:00 2001 From: Govindraj Raja Date: Mon, 29 Feb 2016 11:41:20 +0000 Subject: [PATCH 405/418] MIPS: scache: Fix scache init with invalid line size. commit 56fa81fc9a5445938f3aa2e63d15ab63dc938ad6 upstream. In current scache init cache line_size is determined from cpu config register, however if there there no scache then mips_sc_probe_cm3 function populates a invalid line_size of 2. The invalid line_size can cause a NULL pointer deference during r4k_dma_cache_inv as r4k_blast_scache is populated based on line_size. Scache line_size of 2 is invalid option in r4k_blast_scache_setup. This issue was faced during a MIPS I6400 based virtual platform bring up where scache was not available in virtual platform model. Signed-off-by: Govindraj Raja Fixes: 7d53e9c4cd21("MIPS: CM3: Add support for CM3 L2 cache.") Cc: Paul Burton Cc: James Hogan Cc: Ralf Baechle Cc: James Hartley Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/12710/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/mm/sc-mips.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c index 3bd0597d9c3d..ddb8154610cc 100644 --- a/arch/mips/mm/sc-mips.c +++ b/arch/mips/mm/sc-mips.c @@ -164,11 +164,13 @@ static int __init mips_sc_probe_cm3(void) sets = cfg & CM_GCR_L2_CONFIG_SET_SIZE_MSK; sets >>= CM_GCR_L2_CONFIG_SET_SIZE_SHF; - c->scache.sets = 64 << sets; + if (sets) + c->scache.sets = 64 << sets; line_sz = cfg & CM_GCR_L2_CONFIG_LINE_SIZE_MSK; line_sz >>= CM_GCR_L2_CONFIG_LINE_SIZE_SHF; - c->scache.linesz = 2 << line_sz; + if (line_sz) + c->scache.linesz = 2 << line_sz; assoc = cfg & CM_GCR_L2_CONFIG_ASSOC_MSK; assoc >>= CM_GCR_L2_CONFIG_ASSOC_SHF; @@ -176,9 +178,12 @@ static int __init mips_sc_probe_cm3(void) c->scache.waysize = c->scache.sets * c->scache.linesz; c->scache.waybit = __ffs(c->scache.waysize); - c->scache.flags &= ~MIPS_CACHE_NOT_PRESENT; + if (c->scache.linesz) { + c->scache.flags &= ~MIPS_CACHE_NOT_PRESENT; + return 1; + } - return 1; + return 0; } void __weak platform_early_l2_init(void) From 8eb8cd6c307984bdc568308f5853c4b4485156dd Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 4 Mar 2016 01:42:49 +0000 Subject: [PATCH 406/418] MIPS: traps: Fix SIGFPE information leak from `do_ov' and `do_trap_or_bp' commit e723e3f7f9591b79e8c56b3d7c5a204a9c571b55 upstream. Avoid sending a partially initialised `siginfo_t' structure along SIGFPE signals issued from `do_ov' and `do_trap_or_bp', leading to information leaking from the kernel stack. Signed-off-by: Maciej W. Rozycki Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/traps.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 886cb1976e90..ca9a81007489 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -690,15 +690,15 @@ static int simulate_sync(struct pt_regs *regs, unsigned int opcode) asmlinkage void do_ov(struct pt_regs *regs) { enum ctx_state prev_state; - siginfo_t info; + siginfo_t info = { + .si_signo = SIGFPE, + .si_code = FPE_INTOVF, + .si_addr = (void __user *)regs->cp0_epc, + }; prev_state = exception_enter(); die_if_kernel("Integer overflow", regs); - info.si_code = FPE_INTOVF; - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_addr = (void __user *) regs->cp0_epc; force_sig_info(SIGFPE, &info, current); exception_exit(prev_state); } @@ -874,7 +874,7 @@ out: void do_trap_or_bp(struct pt_regs *regs, unsigned int code, const char *str) { - siginfo_t info; + siginfo_t info = { 0 }; char b[40]; #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP @@ -903,7 +903,6 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code, else info.si_code = FPE_INTOVF; info.si_signo = SIGFPE; - info.si_errno = 0; info.si_addr = (void __user *) regs->cp0_epc; force_sig_info(SIGFPE, &info, current); break; From 816e8b31fceb50ebf0f4a9dea92fc64fc75b5999 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Wed, 24 Feb 2016 18:27:51 +0100 Subject: [PATCH 407/418] cxl: Fix PSL timebase synchronization detection commit 923adb1646d5ba739d2a1e63ee20d60574d9da8e upstream. The PSL timebase synchronization is seemingly failing for configuration not including VIRT_CPU_ACCOUNTING_NATIVE. The driver shows the following trace in dmesg: PSL: Timebase sync: giving up! The PSL timebase register is actually syncing correctly, but the cxl driver is not detecting it. Fix is to use the proper timebase-to-time conversion. Signed-off-by: Frederic Barrat Acked-by: Michael Neuling Reviewed-by: Matthew R. Ochs Acked-by: Ian Munsie Reviewed-by: Andrew Donnellan Reviewed-by: Vaibhav Jain Signed-off-by: Michael Ellerman Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cxl/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 85761d7eb333..be2c8e248e2e 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -414,7 +414,7 @@ static int cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev) delta = mftb() - psl_tb; if (delta < 0) delta = -delta; - } while (cputime_to_usecs(delta) > 16); + } while (tb_to_ns(delta) > 16000); return 0; } From c5c2ce3560b6b28666ef337fd33653165eb97b78 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Sun, 21 Feb 2016 10:53:03 +0100 Subject: [PATCH 408/418] ubi: Fix out of bounds write in volume update code commit e4f6daac20332448529b11f09388f1d55ef2084c upstream. ubi_start_leb_change() allocates too few bytes. ubi_more_leb_change_data() will write up to req->upd_bytes + ubi->min_io_size bytes. Signed-off-by: Richard Weinberger Reviewed-by: Boris Brezillon Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/ubi/upd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c index 2a1b6e037e1a..0134ba32a057 100644 --- a/drivers/mtd/ubi/upd.c +++ b/drivers/mtd/ubi/upd.c @@ -193,7 +193,7 @@ int ubi_start_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, vol->changing_leb = 1; vol->ch_lnum = req->lnum; - vol->upd_buf = vmalloc(req->bytes); + vol->upd_buf = vmalloc(ALIGN((int)req->bytes, ubi->min_io_size)); if (!vol->upd_buf) return -ENOMEM; From b252f82aebb00e629203c4c685e41147c32f1226 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 21 Feb 2016 15:16:48 +0100 Subject: [PATCH 409/418] i2c: brcmstb: allocate correct amount of memory for regmap commit 7314d22a2f5bd40468d57768be368c3d9b4bd726 upstream. We want the size of the struct, not of a pointer to it. To be future proof, just dereference the pointer to get the desired type. Fixes: dd1aa2524bc5 ("i2c: brcmstb: Add Broadcom settop SoC i2c controller driver") Acked-by: Gregory Fong Acked-by: Florian Fainelli Reviewed-by: Kamal Dasu Signed-off-by: Wolfram Sang Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-brcmstb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c index 8e9637eea512..81115abf3c1f 100644 --- a/drivers/i2c/busses/i2c-brcmstb.c +++ b/drivers/i2c/busses/i2c-brcmstb.c @@ -562,8 +562,7 @@ static int brcmstb_i2c_probe(struct platform_device *pdev) if (!dev) return -ENOMEM; - dev->bsc_regmap = devm_kzalloc(&pdev->dev, sizeof(struct bsc_regs *), - GFP_KERNEL); + dev->bsc_regmap = devm_kzalloc(&pdev->dev, sizeof(*dev->bsc_regmap), GFP_KERNEL); if (!dev->bsc_regmap) return -ENOMEM; From e6f54e7f5770f91d3058902967ab995e802ba1c2 Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Thu, 11 Feb 2016 12:00:51 +0000 Subject: [PATCH 410/418] thermal: cpu_cooling: fix out of bounds access in time_in_idle commit a53b8394ec3c67255928df6ee9cc99dd1cd452e3 upstream. In __cpufreq_cooling_register() we allocate the arrays for time_in_idle and time_in_idle_timestamp to be as big as the number of cpus in this cpufreq device. However, in get_load() we access this array using the cpu number as index, which can result in an out of bound access. Index time_in_idle{,_timestamp} using the index in the cpufreq_device's allowed_cpus mask, as we do for the load_cpu array in cpufreq_get_requested_power() Reported-by: Nicolas Boichat Cc: Amit Daniel Kachhap Cc: Zhang Rui Cc: Eduardo Valentin Tested-by: Nicolas Boichat Acked-by: Viresh Kumar Signed-off-by: Javi Merino Signed-off-by: Eduardo Valentin Signed-off-by: Greg Kroah-Hartman --- drivers/thermal/cpu_cooling.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index e3fbc5a5d88f..6ceac4f2d4b2 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -377,26 +377,28 @@ static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_device, * get_load() - get load for a cpu since last updated * @cpufreq_device: &struct cpufreq_cooling_device for this cpu * @cpu: cpu number + * @cpu_idx: index of the cpu in cpufreq_device->allowed_cpus * * Return: The average load of cpu @cpu in percentage since this * function was last called. */ -static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu) +static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu, + int cpu_idx) { u32 load; u64 now, now_idle, delta_time, delta_idle; now_idle = get_cpu_idle_time(cpu, &now, 0); - delta_idle = now_idle - cpufreq_device->time_in_idle[cpu]; - delta_time = now - cpufreq_device->time_in_idle_timestamp[cpu]; + delta_idle = now_idle - cpufreq_device->time_in_idle[cpu_idx]; + delta_time = now - cpufreq_device->time_in_idle_timestamp[cpu_idx]; if (delta_time <= delta_idle) load = 0; else load = div64_u64(100 * (delta_time - delta_idle), delta_time); - cpufreq_device->time_in_idle[cpu] = now_idle; - cpufreq_device->time_in_idle_timestamp[cpu] = now; + cpufreq_device->time_in_idle[cpu_idx] = now_idle; + cpufreq_device->time_in_idle_timestamp[cpu_idx] = now; return load; } @@ -598,7 +600,7 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev, u32 load; if (cpu_online(cpu)) - load = get_load(cpufreq_device, cpu); + load = get_load(cpufreq_device, cpu, i); else load = 0; From 7f74146b5f52660da8119a8fa47c3874363cc037 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Tue, 19 Jan 2016 17:59:46 +0900 Subject: [PATCH 411/418] drm/amdgpu: Use drm_calloc_large for VM page_tables array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9571e1d84042f5670df9fabdcbe7dd5da3abe43e upstream. It can be big, depending on the VM address space size, which is tunable via the vm_size module parameter. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93721 Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 39adbb6470d1..8c5ec151ddac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1248,7 +1248,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) { const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, AMDGPU_VM_PTE_COUNT * 8); - unsigned pd_size, pd_entries, pts_size; + unsigned pd_size, pd_entries; int i, r; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { @@ -1266,8 +1266,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) pd_entries = amdgpu_vm_num_pdes(adev); /* allocate page table array */ - pts_size = pd_entries * sizeof(struct amdgpu_vm_pt); - vm->page_tables = kzalloc(pts_size, GFP_KERNEL); + vm->page_tables = drm_calloc_large(pd_entries, sizeof(struct amdgpu_vm_pt)); if (vm->page_tables == NULL) { DRM_ERROR("Cannot allocate memory for page table array\n"); return -ENOMEM; @@ -1327,7 +1326,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) amdgpu_bo_unref(&vm->page_tables[i].bo); - kfree(vm->page_tables); + drm_free_large(vm->page_tables); amdgpu_bo_unref(&vm->page_directory); fence_put(vm->page_directory_fence); From 0528bdbc4448afd76ca54eb7b4f81dafdf3ba68a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 26 Feb 2016 23:40:51 +0800 Subject: [PATCH 412/418] block: check virt boundary in bio_will_gap() commit e0af29171aa8912e1ca95023b75ef336cd70d661 upstream. In the following patch, the way for figuring out the last bvec will be changed with a bit cost introduced, so return immediately if the queue doesn't have virt boundary limit. Actually most of devices have not this limit. Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- include/linux/blkdev.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c70e3588a48c..7f2f0f61d156 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1367,6 +1367,13 @@ static inline void put_dev_sector(Sector p) page_cache_release(p.v); } +static inline bool __bvec_gap_to_prev(struct request_queue *q, + struct bio_vec *bprv, unsigned int offset) +{ + return offset || + ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q)); +} + /* * Check if adding a bio_vec after bprv with offset would create a gap in * the SG list. Most drivers don't care about this, but some do. @@ -1376,18 +1383,17 @@ static inline bool bvec_gap_to_prev(struct request_queue *q, { if (!queue_virt_boundary(q)) return false; - return offset || - ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q)); + return __bvec_gap_to_prev(q, bprv, offset); } static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, struct bio *next) { - if (!bio_has_data(prev)) + if (!bio_has_data(prev) || !queue_virt_boundary(q)) return false; - return bvec_gap_to_prev(q, &prev->bi_io_vec[prev->bi_vcnt - 1], - next->bi_io_vec[0].bv_offset); + return __bvec_gap_to_prev(q, &prev->bi_io_vec[prev->bi_vcnt - 1], + next->bi_io_vec[0].bv_offset); } static inline bool req_gap_back_merge(struct request *req, struct bio *bio) From f4f0cca3c121f80706aa05b08f17f661df6396fc Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 26 Feb 2016 23:40:52 +0800 Subject: [PATCH 413/418] block: get the 1st and last bvec via helpers commit 25e71a99f10e444cd00bb2ebccb11e1c9fb672b1 upstream. This patch applies the two introduced helpers to figure out the 1st and last bvec, and fixes the original way after bio splitting. Reported-by: Sagi Grimberg Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- include/linux/blkdev.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7f2f0f61d156..168755791ec8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1389,11 +1389,16 @@ static inline bool bvec_gap_to_prev(struct request_queue *q, static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, struct bio *next) { - if (!bio_has_data(prev) || !queue_virt_boundary(q)) - return false; + if (bio_has_data(prev) && queue_virt_boundary(q)) { + struct bio_vec pb, nb; - return __bvec_gap_to_prev(q, &prev->bi_io_vec[prev->bi_vcnt - 1], - next->bi_io_vec[0].bv_offset); + bio_get_last_bvec(prev, &pb); + bio_get_first_bvec(next, &nb); + + return __bvec_gap_to_prev(q, &pb, nb.bv_offset); + } + + return false; } static inline bool req_gap_back_merge(struct request *req, struct bio *bio) From bdfa7f6e5338729b810b6a073d12addf4d85d7c4 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 26 Nov 2015 12:03:51 +0100 Subject: [PATCH 414/418] drm/i915: more virtual south bridge detection commit 39bfcd5235e07e95ad3e70eab8e0b85db181de9e upstream. Commit "30c964a drm/i915: Detect virtual south bridge" detects and handles the southbridge emulated by vmware esx. Add the ich9 south bridge emulated by 'qemu -M q35'. Signed-off-by: Gerd Hoffmann Signed-off-by: Daniel Vetter Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_drv.c | 3 ++- drivers/gpu/drm/i915/i915_drv.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 760e0ce4aa26..a3c482e59930 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -531,7 +531,8 @@ void intel_detect_pch(struct drm_device *dev) dev_priv->pch_type = PCH_SPT; DRM_DEBUG_KMS("Found SunrisePoint LP PCH\n"); WARN_ON(!IS_SKYLAKE(dev)); - } else if (id == INTEL_PCH_P2X_DEVICE_ID_TYPE) { + } else if ((id == INTEL_PCH_P2X_DEVICE_ID_TYPE) || + (id == INTEL_PCH_QEMU_DEVICE_ID_TYPE)) { dev_priv->pch_type = intel_virt_detect_pch(dev); } else continue; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f4af19a0d569..d3ce4da6a6ad 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2614,6 +2614,7 @@ struct drm_i915_cmd_table { #define INTEL_PCH_SPT_DEVICE_ID_TYPE 0xA100 #define INTEL_PCH_SPT_LP_DEVICE_ID_TYPE 0x9D00 #define INTEL_PCH_P2X_DEVICE_ID_TYPE 0x7100 +#define INTEL_PCH_QEMU_DEVICE_ID_TYPE 0x2900 /* qemu q35 has 2918 */ #define INTEL_PCH_TYPE(dev) (__I915__(dev)->pch_type) #define HAS_PCH_SPT(dev) (INTEL_PCH_TYPE(dev) == PCH_SPT) From 74b2c72122a8145554afaeb6e98dda23b077387e Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Mon, 25 Jan 2016 12:02:28 +0100 Subject: [PATCH 415/418] drm/i915: refine qemu south bridge detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f2e305108faba0c85eb4ba4066599decb675117e upstream. The test for the qemu q35 south bridge added by commit "39bfcd52 drm/i915: more virtual south bridge detection" also matches on real hardware. Having the check for virtual systems last in the list is not enough to avoid that ... Refine the check by additionally verifying the pci subsystem id to see whenever it *really* is qemu. [ v2: fix subvendor tyops ] Reported-and-tested-by: Bjørn Mork Signed-off-by: Gerd Hoffmann Tested-by: Bruno Wolff III Cc: drm-intel-fixes@lists.freedesktop.org Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1453719748-10944-1-git-send-email-kraxel@redhat.com (cherry picked from commit 1e859111c128265f8d62b39ff322e42b1ddb5a20) Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_drv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index a3c482e59930..a6ad938f44a6 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -532,7 +532,9 @@ void intel_detect_pch(struct drm_device *dev) DRM_DEBUG_KMS("Found SunrisePoint LP PCH\n"); WARN_ON(!IS_SKYLAKE(dev)); } else if ((id == INTEL_PCH_P2X_DEVICE_ID_TYPE) || - (id == INTEL_PCH_QEMU_DEVICE_ID_TYPE)) { + ((id == INTEL_PCH_QEMU_DEVICE_ID_TYPE) && + pch->subsystem_vendor == 0x1af4 && + pch->subsystem_device == 0x1100)) { dev_priv->pch_type = intel_virt_detect_pch(dev); } else continue; From 610dde5afb2dbf7182eb0e6d56daa7e8700d52c4 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 3 Feb 2016 16:55:26 +1030 Subject: [PATCH 416/418] modules: fix longstanding /proc/kallsyms vs module insertion race. commit 8244062ef1e54502ef55f54cced659913f244c3e upstream. For CONFIG_KALLSYMS, we keep two symbol tables and two string tables. There's one full copy, marked SHF_ALLOC and laid out at the end of the module's init section. There's also a cut-down version that only contains core symbols and strings, and lives in the module's core section. After module init (and before we free the module memory), we switch the mod->symtab, mod->num_symtab and mod->strtab to point to the core versions. We do this under the module_mutex. However, kallsyms doesn't take the module_mutex: it uses preempt_disable() and rcu tricks to walk through the modules, because it's used in the oops path. It's also used in /proc/kallsyms. There's nothing atomic about the change of these variables, so we can get the old (larger!) num_symtab and the new symtab pointer; in fact this is what I saw when trying to reproduce. By grouping these variables together, we can use a carefully-dereferenced pointer to ensure we always get one or the other (the free of the module init section is already done in an RCU callback, so that's safe). We allocate the init one at the end of the module init section, and keep the core one inside the struct module itself (it could also have been allocated at the end of the module core, but that's probably overkill). [ Rebased for 4.4-stable and older, because the following changes aren't in the older trees: - e0224418516b4d8a6c2160574bac18447c354ef0: adds arg to is_core_symbol - 7523e4dc5057e157212b4741abd6256e03404cf1: module_init/module_core/init_size/core_size become init_layout.base/core_layout.base/init_layout.size/core_layout.size. ] Reported-by: Weilong Chen Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=111541 Cc: stable@kernel.org Signed-off-by: Rusty Russell Signed-off-by: Greg Kroah-Hartman --- include/linux/module.h | 17 ++++--- kernel/module.c | 110 +++++++++++++++++++++++++---------------- 2 files changed, 77 insertions(+), 50 deletions(-) diff --git a/include/linux/module.h b/include/linux/module.h index 3a19c79918e0..b229a9961d02 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -302,6 +302,12 @@ struct mod_tree_node { struct latch_tree_node node; }; +struct mod_kallsyms { + Elf_Sym *symtab; + unsigned int num_symtab; + char *strtab; +}; + struct module { enum module_state state; @@ -411,14 +417,9 @@ struct module { #endif #ifdef CONFIG_KALLSYMS - /* - * We keep the symbol and string tables for kallsyms. - * The core_* fields below are temporary, loader-only (they - * could really be discarded after module init). - */ - Elf_Sym *symtab, *core_symtab; - unsigned int num_symtab, core_num_syms; - char *strtab, *core_strtab; + /* Protected by RCU and/or module_mutex: use rcu_dereference() */ + struct mod_kallsyms *kallsyms; + struct mod_kallsyms core_kallsyms; /* Section attributes */ struct module_sect_attrs *sect_attrs; diff --git a/kernel/module.c b/kernel/module.c index 14833e6d5e37..0e5c71195f18 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -327,6 +327,9 @@ struct load_info { struct _ddebug *debug; unsigned int num_debug; bool sig_ok; +#ifdef CONFIG_KALLSYMS + unsigned long mod_kallsyms_init_off; +#endif struct { unsigned int sym, str, mod, vers, info, pcpu; } index; @@ -2492,10 +2495,21 @@ static void layout_symtab(struct module *mod, struct load_info *info) strsect->sh_flags |= SHF_ALLOC; strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect, info->index.str) | INIT_OFFSET_MASK; - mod->init_size = debug_align(mod->init_size); pr_debug("\t%s\n", info->secstrings + strsect->sh_name); + + /* We'll tack temporary mod_kallsyms on the end. */ + mod->init_size = ALIGN(mod->init_size, + __alignof__(struct mod_kallsyms)); + info->mod_kallsyms_init_off = mod->init_size; + mod->init_size += sizeof(struct mod_kallsyms); + mod->init_size = debug_align(mod->init_size); } +/* + * We use the full symtab and strtab which layout_symtab arranged to + * be appended to the init section. Later we switch to the cut-down + * core-only ones. + */ static void add_kallsyms(struct module *mod, const struct load_info *info) { unsigned int i, ndst; @@ -2504,28 +2518,33 @@ static void add_kallsyms(struct module *mod, const struct load_info *info) char *s; Elf_Shdr *symsec = &info->sechdrs[info->index.sym]; - mod->symtab = (void *)symsec->sh_addr; - mod->num_symtab = symsec->sh_size / sizeof(Elf_Sym); + /* Set up to point into init section. */ + mod->kallsyms = mod->module_init + info->mod_kallsyms_init_off; + + mod->kallsyms->symtab = (void *)symsec->sh_addr; + mod->kallsyms->num_symtab = symsec->sh_size / sizeof(Elf_Sym); /* Make sure we get permanent strtab: don't use info->strtab. */ - mod->strtab = (void *)info->sechdrs[info->index.str].sh_addr; + mod->kallsyms->strtab = (void *)info->sechdrs[info->index.str].sh_addr; /* Set types up while we still have access to sections. */ - for (i = 0; i < mod->num_symtab; i++) - mod->symtab[i].st_info = elf_type(&mod->symtab[i], info); + for (i = 0; i < mod->kallsyms->num_symtab; i++) + mod->kallsyms->symtab[i].st_info + = elf_type(&mod->kallsyms->symtab[i], info); - mod->core_symtab = dst = mod->module_core + info->symoffs; - mod->core_strtab = s = mod->module_core + info->stroffs; - src = mod->symtab; - for (ndst = i = 0; i < mod->num_symtab; i++) { + /* Now populate the cut down core kallsyms for after init. */ + mod->core_kallsyms.symtab = dst = mod->module_core + info->symoffs; + mod->core_kallsyms.strtab = s = mod->module_core + info->stroffs; + src = mod->kallsyms->symtab; + for (ndst = i = 0; i < mod->kallsyms->num_symtab; i++) { if (i == 0 || is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) { dst[ndst] = src[i]; - dst[ndst++].st_name = s - mod->core_strtab; - s += strlcpy(s, &mod->strtab[src[i].st_name], + dst[ndst++].st_name = s - mod->core_kallsyms.strtab; + s += strlcpy(s, &mod->kallsyms->strtab[src[i].st_name], KSYM_NAME_LEN) + 1; } } - mod->core_num_syms = ndst; + mod->core_kallsyms.num_symtab = ndst; } #else static inline void layout_symtab(struct module *mod, struct load_info *info) @@ -3274,9 +3293,8 @@ static noinline int do_init_module(struct module *mod) module_put(mod); trim_init_extable(mod); #ifdef CONFIG_KALLSYMS - mod->num_symtab = mod->core_num_syms; - mod->symtab = mod->core_symtab; - mod->strtab = mod->core_strtab; + /* Switch to core kallsyms now init is done: kallsyms may be walking! */ + rcu_assign_pointer(mod->kallsyms, &mod->core_kallsyms); #endif mod_tree_remove_init(mod); unset_module_init_ro_nx(mod); @@ -3646,9 +3664,9 @@ static inline int is_arm_mapping_symbol(const char *str) && (str[2] == '\0' || str[2] == '.'); } -static const char *symname(struct module *mod, unsigned int symnum) +static const char *symname(struct mod_kallsyms *kallsyms, unsigned int symnum) { - return mod->strtab + mod->symtab[symnum].st_name; + return kallsyms->strtab + kallsyms->symtab[symnum].st_name; } static const char *get_ksymbol(struct module *mod, @@ -3658,6 +3676,7 @@ static const char *get_ksymbol(struct module *mod, { unsigned int i, best = 0; unsigned long nextval; + struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms); /* At worse, next value is at end of module */ if (within_module_init(addr, mod)) @@ -3667,32 +3686,32 @@ static const char *get_ksymbol(struct module *mod, /* Scan for closest preceding symbol, and next symbol. (ELF starts real symbols at 1). */ - for (i = 1; i < mod->num_symtab; i++) { - if (mod->symtab[i].st_shndx == SHN_UNDEF) + for (i = 1; i < kallsyms->num_symtab; i++) { + if (kallsyms->symtab[i].st_shndx == SHN_UNDEF) continue; /* We ignore unnamed symbols: they're uninformative * and inserted at a whim. */ - if (*symname(mod, i) == '\0' - || is_arm_mapping_symbol(symname(mod, i))) + if (*symname(kallsyms, i) == '\0' + || is_arm_mapping_symbol(symname(kallsyms, i))) continue; - if (mod->symtab[i].st_value <= addr - && mod->symtab[i].st_value > mod->symtab[best].st_value) + if (kallsyms->symtab[i].st_value <= addr + && kallsyms->symtab[i].st_value > kallsyms->symtab[best].st_value) best = i; - if (mod->symtab[i].st_value > addr - && mod->symtab[i].st_value < nextval) - nextval = mod->symtab[i].st_value; + if (kallsyms->symtab[i].st_value > addr + && kallsyms->symtab[i].st_value < nextval) + nextval = kallsyms->symtab[i].st_value; } if (!best) return NULL; if (size) - *size = nextval - mod->symtab[best].st_value; + *size = nextval - kallsyms->symtab[best].st_value; if (offset) - *offset = addr - mod->symtab[best].st_value; - return symname(mod, best); + *offset = addr - kallsyms->symtab[best].st_value; + return symname(kallsyms, best); } /* For kallsyms to ask for address resolution. NULL means not found. Careful @@ -3782,18 +3801,21 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, preempt_disable(); list_for_each_entry_rcu(mod, &modules, list) { + struct mod_kallsyms *kallsyms; + if (mod->state == MODULE_STATE_UNFORMED) continue; - if (symnum < mod->num_symtab) { - *value = mod->symtab[symnum].st_value; - *type = mod->symtab[symnum].st_info; - strlcpy(name, symname(mod, symnum), KSYM_NAME_LEN); + kallsyms = rcu_dereference_sched(mod->kallsyms); + if (symnum < kallsyms->num_symtab) { + *value = kallsyms->symtab[symnum].st_value; + *type = kallsyms->symtab[symnum].st_info; + strlcpy(name, symname(kallsyms, symnum), KSYM_NAME_LEN); strlcpy(module_name, mod->name, MODULE_NAME_LEN); *exported = is_exported(name, *value, mod); preempt_enable(); return 0; } - symnum -= mod->num_symtab; + symnum -= kallsyms->num_symtab; } preempt_enable(); return -ERANGE; @@ -3802,11 +3824,12 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, static unsigned long mod_find_symname(struct module *mod, const char *name) { unsigned int i; + struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms); - for (i = 0; i < mod->num_symtab; i++) - if (strcmp(name, symname(mod, i)) == 0 && - mod->symtab[i].st_info != 'U') - return mod->symtab[i].st_value; + for (i = 0; i < kallsyms->num_symtab; i++) + if (strcmp(name, symname(kallsyms, i)) == 0 && + kallsyms->symtab[i].st_info != 'U') + return kallsyms->symtab[i].st_value; return 0; } @@ -3845,11 +3868,14 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *, module_assert_mutex(); list_for_each_entry(mod, &modules, list) { + /* We hold module_mutex: no need for rcu_dereference_sched */ + struct mod_kallsyms *kallsyms = mod->kallsyms; + if (mod->state == MODULE_STATE_UNFORMED) continue; - for (i = 0; i < mod->num_symtab; i++) { - ret = fn(data, symname(mod, i), - mod, mod->symtab[i].st_value); + for (i = 0; i < kallsyms->num_symtab; i++) { + ret = fn(data, symname(kallsyms, i), + mod, kallsyms->symtab[i].st_value); if (ret != 0) return ret; } From 53e609099daa023ad7771ec8351202f2a7bee1c1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 7 Mar 2016 18:40:45 -0500 Subject: [PATCH 417/418] drm/amdgpu: fix topaz/tonga gmc assignment in 4.4 stable When upstream commit 429c45deae6e57f1bb91bfb05b671063fb0cef60 was applied to 4.4 as d60703ca942e8d044d61360bc9792fcab54b95d0 it applied incorrectly to the tonga_ip_blocks array rather than the topaz_ip_blocks array. Fix that up here. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=113951 Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/vi.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 7628eb44cce2..3e9cbe398151 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1082,10 +1082,10 @@ static const struct amdgpu_ip_block_version topaz_ip_blocks[] = }, { .type = AMD_IP_BLOCK_TYPE_GMC, - .major = 8, - .minor = 0, + .major = 7, + .minor = 4, .rev = 0, - .funcs = &gmc_v8_0_ip_funcs, + .funcs = &gmc_v7_0_ip_funcs, }, { .type = AMD_IP_BLOCK_TYPE_IH, @@ -1129,10 +1129,10 @@ static const struct amdgpu_ip_block_version tonga_ip_blocks[] = }, { .type = AMD_IP_BLOCK_TYPE_GMC, - .major = 7, - .minor = 4, + .major = 8, + .minor = 0, .rev = 0, - .funcs = &gmc_v7_0_ip_funcs, + .funcs = &gmc_v8_0_ip_funcs, }, { .type = AMD_IP_BLOCK_TYPE_IH, From 62e21959dc6f25c5fce0c1a0934e4a9d982bf99b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 9 Mar 2016 15:35:58 -0800 Subject: [PATCH 418/418] Linux 4.4.5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 344bc6f27ea1..d13322ade3a0 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 4 +SUBLEVEL = 5 EXTRAVERSION = NAME = Blurry Fish Butt