From 7ff40996bb2fff86c334f61bb7e154a55d56f901 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Thu, 7 Nov 2019 09:48:01 +0100
Subject: [PATCH 01/81] CDC-NCM: handle incomplete transfer of MTU

[ Upstream commit 332f989a3b0041b810836c5c3747e59aad7e9d0b ]

A malicious device may give half an answer when asked
for its MTU. The driver will proceed after this with
a garbage MTU. Anything but a complete answer must be treated
as an error.

V2: used sizeof as request by Alexander

Reported-and-tested-by: syzbot+0631d878823ce2411636@syzkaller.appspotmail.com
Signed-off-by: Oliver Neukum <oneukum@suse.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/cdc_ncm.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 442efbccd005..71ef895b4dca 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -533,8 +533,8 @@ static void cdc_ncm_set_dgram_size(struct usbnet *dev, int new_size)
 	/* read current mtu value from device */
 	err = usbnet_read_cmd(dev, USB_CDC_GET_MAX_DATAGRAM_SIZE,
 			      USB_TYPE_CLASS | USB_DIR_IN | USB_RECIP_INTERFACE,
-			      0, iface_no, &max_datagram_size, 2);
-	if (err < 0) {
+			      0, iface_no, &max_datagram_size, sizeof(max_datagram_size));
+	if (err < sizeof(max_datagram_size)) {
 		dev_dbg(&dev->intf->dev, "GET_MAX_DATAGRAM_SIZE failed\n");
 		goto out;
 	}
@@ -545,7 +545,7 @@ static void cdc_ncm_set_dgram_size(struct usbnet *dev, int new_size)
 	max_datagram_size = cpu_to_le16(ctx->max_datagram_size);
 	err = usbnet_write_cmd(dev, USB_CDC_SET_MAX_DATAGRAM_SIZE,
 			       USB_TYPE_CLASS | USB_DIR_OUT | USB_RECIP_INTERFACE,
-			       0, iface_no, &max_datagram_size, 2);
+			       0, iface_no, &max_datagram_size, sizeof(max_datagram_size));
 	if (err < 0)
 		dev_dbg(&dev->intf->dev, "SET_MAX_DATAGRAM_SIZE failed\n");
 

From 817e5140ed12cfb67f8d33de75dccce03895230d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 7 Nov 2019 20:08:19 -0800
Subject: [PATCH 02/81] net: fix data-race in neigh_event_send()

[ Upstream commit 1b53d64435d56902fc234ff2507142d971a09687 ]

KCSAN reported the following data-race [1]

The fix will also prevent the compiler from optimizing out
the condition.

[1]

BUG: KCSAN: data-race in neigh_resolve_output / neigh_resolve_output

write to 0xffff8880a41dba78 of 8 bytes by interrupt on cpu 1:
 neigh_event_send include/net/neighbour.h:443 [inline]
 neigh_resolve_output+0x78/0x480 net/core/neighbour.c:1474
 neigh_output include/net/neighbour.h:511 [inline]
 ip_finish_output2+0x4af/0xe40 net/ipv4/ip_output.c:228
 __ip_finish_output net/ipv4/ip_output.c:308 [inline]
 __ip_finish_output+0x23a/0x490 net/ipv4/ip_output.c:290
 ip_finish_output+0x41/0x160 net/ipv4/ip_output.c:318
 NF_HOOK_COND include/linux/netfilter.h:294 [inline]
 ip_output+0xdf/0x210 net/ipv4/ip_output.c:432
 dst_output include/net/dst.h:436 [inline]
 ip_local_out+0x74/0x90 net/ipv4/ip_output.c:125
 __ip_queue_xmit+0x3a8/0xa40 net/ipv4/ip_output.c:532
 ip_queue_xmit+0x45/0x60 include/net/ip.h:237
 __tcp_transmit_skb+0xe81/0x1d60 net/ipv4/tcp_output.c:1169
 tcp_transmit_skb net/ipv4/tcp_output.c:1185 [inline]
 __tcp_retransmit_skb+0x4bd/0x15f0 net/ipv4/tcp_output.c:2976
 tcp_retransmit_skb+0x36/0x1a0 net/ipv4/tcp_output.c:2999
 tcp_retransmit_timer+0x719/0x16d0 net/ipv4/tcp_timer.c:515
 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:598
 tcp_write_timer+0xd1/0xf0 net/ipv4/tcp_timer.c:618

read to 0xffff8880a41dba78 of 8 bytes by interrupt on cpu 0:
 neigh_event_send include/net/neighbour.h:442 [inline]
 neigh_resolve_output+0x57/0x480 net/core/neighbour.c:1474
 neigh_output include/net/neighbour.h:511 [inline]
 ip_finish_output2+0x4af/0xe40 net/ipv4/ip_output.c:228
 __ip_finish_output net/ipv4/ip_output.c:308 [inline]
 __ip_finish_output+0x23a/0x490 net/ipv4/ip_output.c:290
 ip_finish_output+0x41/0x160 net/ipv4/ip_output.c:318
 NF_HOOK_COND include/linux/netfilter.h:294 [inline]
 ip_output+0xdf/0x210 net/ipv4/ip_output.c:432
 dst_output include/net/dst.h:436 [inline]
 ip_local_out+0x74/0x90 net/ipv4/ip_output.c:125
 __ip_queue_xmit+0x3a8/0xa40 net/ipv4/ip_output.c:532
 ip_queue_xmit+0x45/0x60 include/net/ip.h:237
 __tcp_transmit_skb+0xe81/0x1d60 net/ipv4/tcp_output.c:1169
 tcp_transmit_skb net/ipv4/tcp_output.c:1185 [inline]
 __tcp_retransmit_skb+0x4bd/0x15f0 net/ipv4/tcp_output.c:2976
 tcp_retransmit_skb+0x36/0x1a0 net/ipv4/tcp_output.c:2999
 tcp_retransmit_timer+0x719/0x16d0 net/ipv4/tcp_timer.c:515
 tcp_write_timer_handler+0x42d/0x510 net/ipv4/tcp_timer.c:598

Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.4.0-rc3+ #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/neighbour.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index f6017ddc4ded..1c0d07376125 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -425,8 +425,8 @@ static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 {
 	unsigned long now = jiffies;
 	
-	if (neigh->used != now)
-		neigh->used = now;
+	if (READ_ONCE(neigh->used) != now)
+		WRITE_ONCE(neigh->used, now);
 	if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE)))
 		return __neigh_event_send(neigh, skb);
 	return 0;

From a5f9f1aca5d55a986e3faf648c7067d9ae24574f Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Tue, 5 Nov 2019 16:34:07 +0800
Subject: [PATCH 03/81] NFC: fdp: fix incorrect free object

[ Upstream commit 517ce4e93368938b204451285e53014549804868 ]

The address of fw_vsc_cfg is on stack. Releasing it with devm_kfree() is
incorrect, which may result in a system crash or other security impacts.
The expected object to free is *fw_vsc_cfg.

Signed-off-by: Pan Bian <bianpan2016@163.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nfc/fdp/i2c.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c
index a5d7332dfce5..a54e67cf0493 100644
--- a/drivers/nfc/fdp/i2c.c
+++ b/drivers/nfc/fdp/i2c.c
@@ -268,7 +268,7 @@ static void fdp_nci_i2c_read_device_properties(struct device *dev,
 						  *fw_vsc_cfg, len);
 
 		if (r) {
-			devm_kfree(dev, fw_vsc_cfg);
+			devm_kfree(dev, *fw_vsc_cfg);
 			goto vsc_read_err;
 		}
 	} else {

From 0c5f54da33876d187bd6baca050c7f980144248f Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Thu, 7 Nov 2019 09:33:20 +0800
Subject: [PATCH 04/81] NFC: st21nfca: fix double free

[ Upstream commit 99a8efbb6e30b72ac98cecf81103f847abffb1e5 ]

The variable nfcid_skb is not changed in the callee nfc_hci_get_param()
if error occurs. Consequently, the freed variable nfcid_skb will be
freed again, resulting in a double free bug. Set nfcid_skb to NULL after
releasing it to fix the bug.

Signed-off-by: Pan Bian <bianpan2016@163.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nfc/st21nfca/core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/nfc/st21nfca/core.c b/drivers/nfc/st21nfca/core.c
index dd8b150fbffa..1826cd330468 100644
--- a/drivers/nfc/st21nfca/core.c
+++ b/drivers/nfc/st21nfca/core.c
@@ -726,6 +726,7 @@ static int st21nfca_hci_complete_target_discovered(struct nfc_hci_dev *hdev,
 							NFC_PROTO_FELICA_MASK;
 		} else {
 			kfree_skb(nfcid_skb);
+			nfcid_skb = NULL;
 			/* P2P in type A */
 			r = nfc_hci_get_param(hdev, ST21NFCA_RF_READER_F_GATE,
 					ST21NFCA_RF_READER_F_NFCID1,

From 9025d23cfc30c1c75f912533147ee85e826f7001 Mon Sep 17 00:00:00 2001
From: Manish Chopra <manishc@marvell.com>
Date: Fri, 8 Nov 2019 02:42:30 -0800
Subject: [PATCH 05/81] qede: fix NULL pointer deref in __qede_remove()

[ Upstream commit deabc87111c690097c03765ea017cd500f7376fc ]

While rebooting the system with SR-IOV vfs enabled leads
to below crash due to recurrence of __qede_remove() on the VF
devices (first from .shutdown() flow of the VF itself and
another from PF's .shutdown() flow executing pci_disable_sriov())

This patch adds a safeguard in __qede_remove() flow to fix this,
so that driver doesn't attempt to remove "already removed" devices.

[  194.360134] BUG: unable to handle kernel NULL pointer dereference at 00000000000008dc
[  194.360227] IP: [<ffffffffc03553c4>] __qede_remove+0x24/0x130 [qede]
[  194.360304] PGD 0
[  194.360325] Oops: 0000 [#1] SMP
[  194.360360] Modules linked in: tcp_lp fuse tun bridge stp llc devlink bonding ip_set nfnetlink ib_isert iscsi_target_mod ib_srpt target_core_mod ib_srp scsi_transport_srp scsi_tgt ib_ipoib ib_umad rpcrdma sunrpc rdma_ucm ib_uverbs ib_iser rdma_cm iw_cm ib_cm libiscsi scsi_transport_iscsi dell_smbios iTCO_wdt iTCO_vendor_support dell_wmi_descriptor dcdbas vfat fat pcc_cpufreq skx_edac intel_powerclamp coretemp intel_rapl iosf_mbi kvm_intel kvm irqbypass crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd qedr ib_core pcspkr ses enclosure joydev ipmi_ssif sg i2c_i801 lpc_ich mei_me mei wmi ipmi_si ipmi_devintf ipmi_msghandler tpm_crb acpi_pad acpi_power_meter xfs libcrc32c sd_mod crc_t10dif crct10dif_generic crct10dif_pclmul crct10dif_common crc32c_intel mgag200
[  194.361044]  qede i2c_algo_bit drm_kms_helper qed syscopyarea sysfillrect nvme sysimgblt fb_sys_fops ttm nvme_core mpt3sas crc8 ptp drm pps_core ahci raid_class scsi_transport_sas libahci libata drm_panel_orientation_quirks nfit libnvdimm dm_mirror dm_region_hash dm_log dm_mod [last unloaded: ip_tables]
[  194.361297] CPU: 51 PID: 7996 Comm: reboot Kdump: loaded Not tainted 3.10.0-1062.el7.x86_64 #1
[  194.361359] Hardware name: Dell Inc. PowerEdge MX840c/0740HW, BIOS 2.4.6 10/15/2019
[  194.361412] task: ffff9cea9b360000 ti: ffff9ceabebdc000 task.ti: ffff9ceabebdc000
[  194.361463] RIP: 0010:[<ffffffffc03553c4>]  [<ffffffffc03553c4>] __qede_remove+0x24/0x130 [qede]
[  194.361534] RSP: 0018:ffff9ceabebdfac0  EFLAGS: 00010282
[  194.361570] RAX: 0000000000000000 RBX: ffff9cd013846098 RCX: 0000000000000000
[  194.361621] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9cd013846098
[  194.361668] RBP: ffff9ceabebdfae8 R08: 0000000000000000 R09: 0000000000000000
[  194.361715] R10: 00000000bfe14201 R11: ffff9ceabfe141e0 R12: 0000000000000000
[  194.361762] R13: ffff9cd013846098 R14: 0000000000000000 R15: ffff9ceab5e48000
[  194.361810] FS:  00007f799c02d880(0000) GS:ffff9ceacb0c0000(0000) knlGS:0000000000000000
[  194.361865] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  194.361903] CR2: 00000000000008dc CR3: 0000001bdac76000 CR4: 00000000007607e0
[  194.361953] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  194.362002] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  194.362051] PKRU: 55555554
[  194.362073] Call Trace:
[  194.362109]  [<ffffffffc0355500>] qede_remove+0x10/0x20 [qede]
[  194.362180]  [<ffffffffb97d0f3e>] pci_device_remove+0x3e/0xc0
[  194.362240]  [<ffffffffb98b3c52>] __device_release_driver+0x82/0xf0
[  194.362285]  [<ffffffffb98b3ce3>] device_release_driver+0x23/0x30
[  194.362343]  [<ffffffffb97c86d4>] pci_stop_bus_device+0x84/0xa0
[  194.362388]  [<ffffffffb97c87e2>] pci_stop_and_remove_bus_device+0x12/0x20
[  194.362450]  [<ffffffffb97f153f>] pci_iov_remove_virtfn+0xaf/0x160
[  194.362496]  [<ffffffffb97f1aec>] sriov_disable+0x3c/0xf0
[  194.362534]  [<ffffffffb97f1bc3>] pci_disable_sriov+0x23/0x30
[  194.362599]  [<ffffffffc02f83c3>] qed_sriov_disable+0x5e3/0x650 [qed]
[  194.362658]  [<ffffffffb9622df6>] ? kfree+0x106/0x140
[  194.362709]  [<ffffffffc02cc0c0>] ? qed_free_stream_mem+0x70/0x90 [qed]
[  194.362754]  [<ffffffffb9622df6>] ? kfree+0x106/0x140
[  194.362803]  [<ffffffffc02cd659>] qed_slowpath_stop+0x1a9/0x1d0 [qed]
[  194.362854]  [<ffffffffc035544e>] __qede_remove+0xae/0x130 [qede]
[  194.362904]  [<ffffffffc03554e0>] qede_shutdown+0x10/0x20 [qede]
[  194.362956]  [<ffffffffb97cf90a>] pci_device_shutdown+0x3a/0x60
[  194.363010]  [<ffffffffb98b180b>] device_shutdown+0xfb/0x1f0
[  194.363066]  [<ffffffffb94b66c6>] kernel_restart_prepare+0x36/0x40
[  194.363107]  [<ffffffffb94b66e2>] kernel_restart+0x12/0x60
[  194.363146]  [<ffffffffb94b6959>] SYSC_reboot+0x229/0x260
[  194.363196]  [<ffffffffb95f200d>] ? handle_mm_fault+0x39d/0x9b0
[  194.363253]  [<ffffffffb942b621>] ? __switch_to+0x151/0x580
[  194.363304]  [<ffffffffb9b7ec28>] ? __schedule+0x448/0x9c0
[  194.363343]  [<ffffffffb94b69fe>] SyS_reboot+0xe/0x10
[  194.363387]  [<ffffffffb9b8bede>] system_call_fastpath+0x25/0x2a
[  194.363430] Code: f9 e9 37 ff ff ff 90 0f 1f 44 00 00 55 48 89 e5 41 57 41 56 41 55 4c 8d af 98 00 00 00 41 54 4c 89 ef 41 89 f4 53 e8 4c e4 55 f9 <80> b8 dc 08 00 00 01 48 89 c3 4c 8d b8 c0 08 00 00 4c 8b b0 c0
[  194.363712] RIP  [<ffffffffc03553c4>] __qede_remove+0x24/0x130 [qede]
[  194.363764]  RSP <ffff9ceabebdfac0>
[  194.363791] CR2: 00000000000008dc

Signed-off-by: Manish Chopra <manishc@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Sudarsana Kalluru <skalluru@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/qlogic/qede/qede_main.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index f4657a2e730a..8b63c9d183a2 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -1465,8 +1465,16 @@ enum qede_remove_mode {
 static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
 {
 	struct net_device *ndev = pci_get_drvdata(pdev);
-	struct qede_dev *edev = netdev_priv(ndev);
-	struct qed_dev *cdev = edev->cdev;
+	struct qede_dev *edev;
+	struct qed_dev *cdev;
+
+	if (!ndev) {
+		dev_info(&pdev->dev, "Device has already been removed\n");
+		return;
+	}
+
+	edev = netdev_priv(ndev);
+	cdev = edev->cdev;
 
 	DP_INFO(edev, "Starting qede_remove\n");
 

From 73463008aa79f22edafaf922ec6f8100b0e66bdc Mon Sep 17 00:00:00 2001
From: Pan Bian <bianpan2016@163.com>
Date: Thu, 7 Nov 2019 14:29:50 +0800
Subject: [PATCH 06/81] nfc: netlink: fix double device reference drop

[ Upstream commit 025ec40b81d785a98f76b8bdb509ac10773b4f12 ]

The function nfc_put_device(dev) is called twice to drop the reference
to dev when there is no associated local llcp. Remove one of them to fix
the bug.

Fixes: 52feb444a903 ("NFC: Extend netlink interface for LTO, RW, and MIUX parameters support")
Fixes: d9b8d8e19b07 ("NFC: llcp: Service Name Lookup netlink interface")
Signed-off-by: Pan Bian <bianpan2016@163.com>
Reviewed-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/nfc/netlink.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 80def98c9dba..04d4c388a7a8 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -1066,7 +1066,6 @@ static int nfc_genl_llc_set_params(struct sk_buff *skb, struct genl_info *info)
 
 	local = nfc_llcp_find_local(dev);
 	if (!local) {
-		nfc_put_device(dev);
 		rc = -ENODEV;
 		goto exit;
 	}
@@ -1126,7 +1125,6 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info)
 
 	local = nfc_llcp_find_local(dev);
 	if (!local) {
-		nfc_put_device(dev);
 		rc = -ENODEV;
 		goto exit;
 	}

From 70f79945ade28f6045071c0ddc0b8b1e0899f84e Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Sun, 3 Nov 2019 00:09:20 +0900
Subject: [PATCH 07/81] ALSA: bebob: fix to detect configured source of
 sampling clock for Focusrite Saffire Pro i/o series

commit 706ad6746a66546daf96d4e4a95e46faf6cf689a upstream.

For Focusrite Saffire Pro i/o, the lowest 8 bits of register represents
configured source of sampling clock. The next lowest 8 bits represents
whether the configured source is actually detected or not just after
the register is changed for the source.

Current implementation evaluates whole the register to detect configured
source. This results in failure due to the next lowest 8 bits when the
source is connected in advance.

This commit fixes the bug.

Fixes: 25784ec2d034 ("ALSA: bebob: Add support for Focusrite Saffire/SaffirePro series")
Cc: <stable@vger.kernel.org> # v3.16+
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Link: https://lore.kernel.org/r/20191102150920.20367-1-o-takashi@sakamocchi.jp
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/firewire/bebob/bebob_focusrite.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/firewire/bebob/bebob_focusrite.c b/sound/firewire/bebob/bebob_focusrite.c
index f11090057949..d0a8736613a1 100644
--- a/sound/firewire/bebob/bebob_focusrite.c
+++ b/sound/firewire/bebob/bebob_focusrite.c
@@ -28,6 +28,8 @@
 #define SAFFIRE_CLOCK_SOURCE_SPDIF		1
 
 /* clock sources as returned from register of Saffire Pro 10 and 26 */
+#define SAFFIREPRO_CLOCK_SOURCE_SELECT_MASK	0x000000ff
+#define SAFFIREPRO_CLOCK_SOURCE_DETECT_MASK	0x0000ff00
 #define SAFFIREPRO_CLOCK_SOURCE_INTERNAL	0
 #define SAFFIREPRO_CLOCK_SOURCE_SKIP		1 /* never used on hardware */
 #define SAFFIREPRO_CLOCK_SOURCE_SPDIF		2
@@ -190,6 +192,7 @@ saffirepro_both_clk_src_get(struct snd_bebob *bebob, unsigned int *id)
 		map = saffirepro_clk_maps[1];
 
 	/* In a case that this driver cannot handle the value of register. */
+	value &= SAFFIREPRO_CLOCK_SOURCE_SELECT_MASK;
 	if (value >= SAFFIREPRO_CLOCK_SOURCE_COUNT || map[value] < 0) {
 		err = -EIO;
 		goto end;

From 72f99285e2c1be4ecaff1149eeee635df1388cf7 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 5 Nov 2019 14:43:16 +0100
Subject: [PATCH 08/81] ALSA: hda/ca0132 - Fix possible workqueue stall

commit 15c2b3cc09a31620914955cb2a89c277c18ee999 upstream.

The unsolicited event handler for the headphone jack on CA0132 codec
driver tries to reschedule the another delayed work with
cancel_delayed_work_sync().  It's no good idea, unfortunately,
especially after we changed the work queue to the standard global
one; this may lead to a stall because both works are using the same
global queue.

Fix it by dropping the _sync but does call cancel_delayed_work()
instead.

Fixes: 993884f6a26c ("ALSA: hda/ca0132 - Delay HP amp turnon.")
BugLink: https://bugzilla.suse.com/show_bug.cgi?id=1155836
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20191105134316.19294-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_ca0132.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c
index c55c0131be0a..c0742ee11519 100644
--- a/sound/pci/hda/patch_ca0132.c
+++ b/sound/pci/hda/patch_ca0132.c
@@ -4440,7 +4440,7 @@ static void hp_callback(struct hda_codec *codec, struct hda_jack_callback *cb)
 	/* Delay enabling the HP amp, to let the mic-detection
 	 * state machine run.
 	 */
-	cancel_delayed_work_sync(&spec->unsol_hp_work);
+	cancel_delayed_work(&spec->unsol_hp_work);
 	schedule_delayed_work(&spec->unsol_hp_work, msecs_to_jiffies(500));
 	tbl = snd_hda_jack_tbl_get(codec, cb->nid);
 	if (tbl)

From a6d17aa291cad1d36331ba17cbcacd045381a3f1 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Tue, 5 Nov 2019 21:16:40 -0800
Subject: [PATCH 09/81] mm, vmstat: hide /proc/pagetypeinfo from normal users

commit abaed0112c1db08be15a784a2c5c8a8b3063cdd3 upstream.

/proc/pagetypeinfo is a debugging tool to examine internal page
allocator state wrt to fragmentation.  It is not very useful for any
other use so normal users really do not need to read this file.

Waiman Long has noticed that reading this file can have negative side
effects because zone->lock is necessary for gathering data and that a)
interferes with the page allocator and its users and b) can lead to hard
lockups on large machines which have very long free_list.

Reduce both issues by simply not exporting the file to regular users.

Link: http://lkml.kernel.org/r/20191025072610.18526-2-mhocko@kernel.org
Fixes: 467c996c1e19 ("Print out statistics in relation to fragmentation avoidance to /proc/pagetypeinfo")
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reported-by: Waiman Long <longman@redhat.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Waiman Long <longman@redhat.com>
Acked-by: Rafael Aquini <aquini@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Roman Gushchin <guro@fb.com>
Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Cc: Jann Horn <jannh@google.com>
Cc: Song Liu <songliubraving@fb.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/vmstat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/vmstat.c b/mm/vmstat.c
index 59e131e82b81..9d8936c7b40d 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1589,7 +1589,7 @@ static int __init setup_vmstat(void)
 #endif
 #ifdef CONFIG_PROC_FS
 	proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);
-	proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops);
+	proc_create("pagetypeinfo", 0400, NULL, &pagetypeinfo_file_ops);
 	proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations);
 	proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations);
 #endif

From b9b3132491724147e7c13503b9bd59cafb9bd506 Mon Sep 17 00:00:00 2001
From: Kevin Hao <haokexin@gmail.com>
Date: Tue, 5 Nov 2019 21:16:57 -0800
Subject: [PATCH 10/81] dump_stack: avoid the livelock of the dump_lock

commit 5cbf2fff3bba8d3c6a4d47c1754de1cf57e2b01f upstream.

In the current code, we use the atomic_cmpxchg() to serialize the output
of the dump_stack(), but this implementation suffers the thundering herd
problem.  We have observed such kind of livelock on a Marvell cn96xx
board(24 cpus) when heavily using the dump_stack() in a kprobe handler.
Actually we can let the competitors to wait for the releasing of the
lock before jumping to atomic_cmpxchg().  This will definitely mitigate
the thundering herd problem.  Thanks Linus for the suggestion.

[akpm@linux-foundation.org: fix comment]
Link: http://lkml.kernel.org/r/20191030031637.6025-1-haokexin@gmail.com
Fixes: b58d977432c8 ("dump_stack: serialize the output from dump_stack()")
Signed-off-by: Kevin Hao <haokexin@gmail.com>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/dump_stack.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index c30d07e99dba..72de6444934d 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -44,7 +44,12 @@ retry:
 		was_locked = 1;
 	} else {
 		local_irq_restore(flags);
-		cpu_relax();
+		/*
+		 * Wait for the lock to release before jumping to
+		 * atomic_cmpxchg() in order to mitigate the thundering herd
+		 * problem.
+		 */
+		do { cpu_relax(); } while (atomic_read(&dump_lock) != -1);
 		goto retry;
 	}
 

From 9aeedfc71ce8427c0c770f38df34f47e73f27cfa Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 5 Nov 2019 00:27:11 +0100
Subject: [PATCH 11/81] perf tools: Fix time sorting

commit 722ddfde366fd46205456a9c5ff9b3359dc9a75e upstream.

The final sort might get confused when the comparison is done over
bigger numbers than int like for -s time.

Check the following report for longer workloads:

  $ perf report -s time -F time,overhead --stdio

Fix hist_entry__sort() to properly return int64_t and not possible cut
int.

Fixes: 043ca389a318 ("perf tools: Use hpp formats to sort final output")
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org # v3.16+
Link: http://lore.kernel.org/lkml/20191104232711.16055-1-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/util/hist.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index f6720afa9f34..97ebd1d3646d 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1080,7 +1080,7 @@ void hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
 	}
 }
 
-static int hist_entry__sort(struct hist_entry *a, struct hist_entry *b)
+static int64_t hist_entry__sort(struct hist_entry *a, struct hist_entry *b)
 {
 	struct perf_hpp_fmt *fmt;
 	int64_t cmp = 0;

From b7ac794ec5af527003a5120ddff299192c061592 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 30 Oct 2019 10:21:28 -0400
Subject: [PATCH 12/81] drm/radeon: fix si_enable_smc_cac() failed issue

commit 2c409ba81be25516afe05ae27a4a15da01740b01 upstream.

Need to set the dte flag on this asic.

Port the fix from amdgpu:
5cb818b861be114 ("drm/amd/amdgpu: fix si_enable_smc_cac() failed issue")

Reviewed-by: Yong Zhao <yong.zhao@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/radeon/si_dpm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 892d0a71d766..57724a3afe78 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -1956,6 +1956,7 @@ static void si_initialize_powertune_defaults(struct radeon_device *rdev)
 		case 0x682C:
 			si_pi->cac_weights = cac_weights_cape_verde_pro;
 			si_pi->dte_data = dte_data_sun_xt;
+			update_dte_from_pl2 = true;
 			break;
 		case 0x6825:
 		case 0x6827:

From 2c5e0e605073445ea22bb3bd55eac9df9803a08c Mon Sep 17 00:00:00 2001
From: Luis Henriques <lhenriques@suse.com>
Date: Fri, 25 Oct 2019 14:05:24 +0100
Subject: [PATCH 13/81] ceph: fix use-after-free in __ceph_remove_cap()

commit ea60ed6fcf29eebc78f2ce91491e6309ee005a01 upstream.

KASAN reports a use-after-free when running xfstest generic/531, with the
following trace:

[  293.903362]  kasan_report+0xe/0x20
[  293.903365]  rb_erase+0x1f/0x790
[  293.903370]  __ceph_remove_cap+0x201/0x370
[  293.903375]  __ceph_remove_caps+0x4b/0x70
[  293.903380]  ceph_evict_inode+0x4e/0x360
[  293.903386]  evict+0x169/0x290
[  293.903390]  __dentry_kill+0x16f/0x250
[  293.903394]  dput+0x1c6/0x440
[  293.903398]  __fput+0x184/0x330
[  293.903404]  task_work_run+0xb9/0xe0
[  293.903410]  exit_to_usermode_loop+0xd3/0xe0
[  293.903413]  do_syscall_64+0x1a0/0x1c0
[  293.903417]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

This happens because __ceph_remove_cap() may queue a cap release
(__ceph_queue_cap_release) which can be scheduled before that cap is
removed from the inode list with

	rb_erase(&cap->ci_node, &ci->i_caps);

And, when this finally happens, the use-after-free will occur.

This can be fixed by removing the cap from the inode list before being
removed from the session list, and thus eliminating the risk of an UAF.

Cc: stable@vger.kernel.org
Signed-off-by: Luis Henriques <lhenriques@suse.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ceph/caps.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index e137ff6cd9da..aa4df4a02252 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -926,6 +926,11 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
 
 	dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
 
+	/* remove from inode's cap rbtree, and clear auth cap */
+	rb_erase(&cap->ci_node, &ci->i_caps);
+	if (ci->i_auth_cap == cap)
+		ci->i_auth_cap = NULL;
+
 	/* remove from session list */
 	spin_lock(&session->s_cap_lock);
 	if (session->s_cap_iterator == cap) {
@@ -961,11 +966,6 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
 
 	spin_unlock(&session->s_cap_lock);
 
-	/* remove from inode list */
-	rb_erase(&cap->ci_node, &ci->i_caps);
-	if (ci->i_auth_cap == cap)
-		ci->i_auth_cap = NULL;
-
 	if (removed)
 		ceph_put_cap(mdsc, cap);
 

From c1bbd9808c4072302b1bf65a46f2953a3d7ed3b6 Mon Sep 17 00:00:00 2001
From: Alexandru Ardelean <alexandru.ardelean@analog.com>
Date: Tue, 8 Oct 2019 17:15:37 +0300
Subject: [PATCH 14/81] iio: imu: adis16480: make sure provided frequency is
 positive

commit 24e1eb5c0d78cfb9750b690bbe997d4d59170258 upstream.

It could happen that either `val` or `val2` [provided from userspace] is
negative. In that case the computed frequency could get a weird value.

Fix this by checking that neither of the 2 variables is negative, and check
that the computed result is not-zero.

Fixes: e4f959390178 ("iio: imu: adis16480 switch sampling frequency attr to core support")
Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/iio/imu/adis16480.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/iio/imu/adis16480.c b/drivers/iio/imu/adis16480.c
index 1880105cc8c4..778a46247f8d 100644
--- a/drivers/iio/imu/adis16480.c
+++ b/drivers/iio/imu/adis16480.c
@@ -266,8 +266,11 @@ static int adis16480_set_freq(struct iio_dev *indio_dev, int val, int val2)
 	struct adis16480 *st = iio_priv(indio_dev);
 	unsigned int t;
 
+	if (val < 0 || val2 < 0)
+		return -EINVAL;
+
 	t =  val * 1000 + val2 / 1000;
-	if (t <= 0)
+	if (t == 0)
 		return -EINVAL;
 
 	t = 2460000 / t;

From b10f57eb8982c4601d7e830c5490d98ebe3c775f Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Thu, 31 Oct 2019 11:06:24 +0100
Subject: [PATCH 15/81] netfilter: nf_tables: Align nft_expr private data to
 64-bit

commit 250367c59e6ba0d79d702a059712d66edacd4a1a upstream.

Invoking the following commands on a 32-bit architecture with strict
alignment requirements (such as an ARMv7-based Raspberry Pi) results
in an alignment exception:

 # nft add table ip test-ip4
 # nft add chain ip test-ip4 output { type filter hook output priority 0; }
 # nft add rule  ip test-ip4 output quota 1025 bytes

Alignment trap: not handling instruction e1b26f9f at [<7f4473f8>]
Unhandled fault: alignment exception (0x001) at 0xb832e824
Internal error: : 1 [#1] PREEMPT SMP ARM
Hardware name: BCM2835
[<7f4473fc>] (nft_quota_do_init [nft_quota])
[<7f447448>] (nft_quota_init [nft_quota])
[<7f4260d0>] (nf_tables_newrule [nf_tables])
[<7f4168dc>] (nfnetlink_rcv_batch [nfnetlink])
[<7f416bd0>] (nfnetlink_rcv [nfnetlink])
[<8078b334>] (netlink_unicast)
[<8078b664>] (netlink_sendmsg)
[<8071b47c>] (sock_sendmsg)
[<8071bd18>] (___sys_sendmsg)
[<8071ce3c>] (__sys_sendmsg)
[<8071ce94>] (sys_sendmsg)

The reason is that nft_quota_do_init() calls atomic64_set() on an
atomic64_t which is only aligned to 32-bit, not 64-bit, because it
succeeds struct nft_expr in memory which only contains a 32-bit pointer.
Fix by aligning the nft_expr private data to 64-bit.

Fixes: 96518518cc41 ("netfilter: add nftables")
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Cc: stable@vger.kernel.org # v3.13+
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/netfilter/nf_tables.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 4bd7508bedc9..b96df7499600 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -648,7 +648,8 @@ struct nft_expr_ops {
  */
 struct nft_expr {
 	const struct nft_expr_ops	*ops;
-	unsigned char			data[];
+	unsigned char			data[]
+		__attribute__((aligned(__alignof__(u64))));
 };
 
 static inline void *nft_expr_priv(const struct nft_expr *expr)

From 119f371c0936b1182ef75978be09a145f9d36b56 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 24 Aug 2019 17:49:55 +0300
Subject: [PATCH 16/81] netfilter: ipset: Fix an error code in
 ip_set_sockfn_get()

commit 30b7244d79651460ff114ba8f7987ed94c86b99a upstream.

The copy_to_user() function returns the number of bytes remaining to be
copied.  In this code, that positive return is checked at the end of the
function and we return zero/success.  What we should do instead is
return -EFAULT.

Fixes: a7b4f989a629 ("netfilter: ipset: IP set core support")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/ipset/ip_set_core.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 54f3d7cb23e6..caa26184f7e3 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1930,8 +1930,9 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 		}
 
 		req_version->version = IPSET_PROTOCOL;
-		ret = copy_to_user(user, req_version,
-				   sizeof(struct ip_set_req_version));
+		if (copy_to_user(user, req_version,
+				 sizeof(struct ip_set_req_version)))
+			ret = -EFAULT;
 		goto done;
 	}
 	case IP_SET_OP_GET_BYNAME: {
@@ -1988,7 +1989,8 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 	}	/* end of switch(op) */
 
 copy:
-	ret = copy_to_user(user, data, copylen);
+	if (copy_to_user(user, data, copylen))
+		ret = -EFAULT;
 
 done:
 	vfree(data);

From 4096b1402ea49cc1fa825eb98cb7ceaa718c1679 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Tue, 1 Oct 2019 12:29:14 +0200
Subject: [PATCH 17/81] can: usb_8dev: fix use-after-free on disconnect

commit 3759739426186a924675651b388d1c3963c5710e upstream.

The driver was accessing its driver data after having freed it.

Fixes: 0024d8ad1639 ("can: usb_8dev: Add support for USB2CAN interface from 8 devices")
Cc: stable <stable@vger.kernel.org>     # 3.9
Cc: Bernd Krumboeck <b.krumboeck@gmail.com>
Cc: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/usb/usb_8dev.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c
index 522286cc0f9c..50d9b945089e 100644
--- a/drivers/net/can/usb/usb_8dev.c
+++ b/drivers/net/can/usb/usb_8dev.c
@@ -1010,9 +1010,8 @@ static void usb_8dev_disconnect(struct usb_interface *intf)
 		netdev_info(priv->netdev, "device disconnected\n");
 
 		unregister_netdev(priv->netdev);
-		free_candev(priv->netdev);
-
 		unlink_all_urbs(priv);
+		free_candev(priv->netdev);
 	}
 
 }

From def6b03f667bce4b1465644a1d1b325336fddd63 Mon Sep 17 00:00:00 2001
From: Kurt Van Dijck <dev.kurt@vandijck-laurijssen.be>
Date: Tue, 1 Oct 2019 09:40:36 +0200
Subject: [PATCH 18/81] can: c_can: c_can_poll(): only read status register
 after status IRQ

commit 3cb3eaac52c0f145d895f4b6c22834d5f02b8569 upstream.

When the status register is read without the status IRQ pending, the
chip may not raise the interrupt line for an upcoming status interrupt
and the driver may miss a status interrupt.

It is critical that the BUSOFF status interrupt is forwarded to the
higher layers, since no more interrupts will follow without
intervention.

Thanks to Wolfgang and Joe for bringing up the first idea.

Signed-off-by: Kurt Van Dijck <dev.kurt@vandijck-laurijssen.be>
Cc: Wolfgang Grandegger <wg@grandegger.com>
Cc: Joe Burmeister <joe.burmeister@devtank.co.uk>
Fixes: fa39b54ccf28 ("can: c_can: Get rid of pointless interrupts")
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/c_can/c_can.c | 25 ++++++++++++++++++++-----
 drivers/net/can/c_can/c_can.h |  1 +
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index e3dccd3200d5..7d35f6737499 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -97,6 +97,9 @@
 #define BTR_TSEG2_SHIFT		12
 #define BTR_TSEG2_MASK		(0x7 << BTR_TSEG2_SHIFT)
 
+/* interrupt register */
+#define INT_STS_PENDING		0x8000
+
 /* brp extension register */
 #define BRP_EXT_BRPE_MASK	0x0f
 #define BRP_EXT_BRPE_SHIFT	0
@@ -1029,10 +1032,16 @@ static int c_can_poll(struct napi_struct *napi, int quota)
 	u16 curr, last = priv->last_status;
 	int work_done = 0;
 
-	priv->last_status = curr = priv->read_reg(priv, C_CAN_STS_REG);
-	/* Ack status on C_CAN. D_CAN is self clearing */
-	if (priv->type != BOSCH_D_CAN)
-		priv->write_reg(priv, C_CAN_STS_REG, LEC_UNUSED);
+	/* Only read the status register if a status interrupt was pending */
+	if (atomic_xchg(&priv->sie_pending, 0)) {
+		priv->last_status = curr = priv->read_reg(priv, C_CAN_STS_REG);
+		/* Ack status on C_CAN. D_CAN is self clearing */
+		if (priv->type != BOSCH_D_CAN)
+			priv->write_reg(priv, C_CAN_STS_REG, LEC_UNUSED);
+	} else {
+		/* no change detected ... */
+		curr = last;
+	}
 
 	/* handle state changes */
 	if ((curr & STATUS_EWARN) && (!(last & STATUS_EWARN))) {
@@ -1083,10 +1092,16 @@ static irqreturn_t c_can_isr(int irq, void *dev_id)
 {
 	struct net_device *dev = (struct net_device *)dev_id;
 	struct c_can_priv *priv = netdev_priv(dev);
+	int reg_int;
 
-	if (!priv->read_reg(priv, C_CAN_INT_REG))
+	reg_int = priv->read_reg(priv, C_CAN_INT_REG);
+	if (!reg_int)
 		return IRQ_NONE;
 
+	/* save for later use */
+	if (reg_int & INT_STS_PENDING)
+		atomic_set(&priv->sie_pending, 1);
+
 	/* disable all interrupts and schedule the NAPI */
 	c_can_irq_control(priv, false);
 	napi_schedule(&priv->napi);
diff --git a/drivers/net/can/c_can/c_can.h b/drivers/net/can/c_can/c_can.h
index 8acdc7fa4792..d5567a7c1c6d 100644
--- a/drivers/net/can/c_can/c_can.h
+++ b/drivers/net/can/c_can/c_can.h
@@ -198,6 +198,7 @@ struct c_can_priv {
 	struct net_device *dev;
 	struct device *device;
 	atomic_t tx_active;
+	atomic_t sie_pending;
 	unsigned long tx_dir;
 	int last_status;
 	u16 (*read_reg) (const struct c_can_priv *priv, enum reg index);

From c41a025d641156a5694a7669af9efaf4fe7dea2a Mon Sep 17 00:00:00 2001
From: Stephane Grosjean <s.grosjean@peak-system.com>
Date: Tue, 8 Oct 2019 10:35:44 +0200
Subject: [PATCH 19/81] can: peak_usb: fix a potential out-of-sync while
 decoding packets

commit de280f403f2996679e2607384980703710576fed upstream.

When decoding a buffer received from PCAN-USB, the first timestamp read in
a packet is a 16-bit coded time base, and the next ones are an 8-bit
offset to this base, regardless of the type of packet read.

This patch corrects a potential loss of synchronization by using a
timestamp index read from the buffer, rather than an index of received
data packets, to determine on the sizeof the timestamp to be read from the
packet being decoded.

Signed-off-by: Stephane Grosjean <s.grosjean@peak-system.com>
Fixes: 46be265d3388 ("can: usb: PEAK-System Technik PCAN-USB specific part")
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/usb/peak_usb/pcan_usb.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c
index 838545ce468d..e626c2afbbb1 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
@@ -108,7 +108,7 @@ struct pcan_usb_msg_context {
 	u8 *end;
 	u8 rec_cnt;
 	u8 rec_idx;
-	u8 rec_data_idx;
+	u8 rec_ts_idx;
 	struct net_device *netdev;
 	struct pcan_usb *pdev;
 };
@@ -552,10 +552,15 @@ static int pcan_usb_decode_status(struct pcan_usb_msg_context *mc,
 	mc->ptr += PCAN_USB_CMD_ARGS;
 
 	if (status_len & PCAN_USB_STATUSLEN_TIMESTAMP) {
-		int err = pcan_usb_decode_ts(mc, !mc->rec_idx);
+		int err = pcan_usb_decode_ts(mc, !mc->rec_ts_idx);
 
 		if (err)
 			return err;
+
+		/* Next packet in the buffer will have a timestamp on a single
+		 * byte
+		 */
+		mc->rec_ts_idx++;
 	}
 
 	switch (f) {
@@ -638,10 +643,13 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len)
 
 	cf->can_dlc = get_can_dlc(rec_len);
 
-	/* first data packet timestamp is a word */
-	if (pcan_usb_decode_ts(mc, !mc->rec_data_idx))
+	/* Only first packet timestamp is a word */
+	if (pcan_usb_decode_ts(mc, !mc->rec_ts_idx))
 		goto decode_failed;
 
+	/* Next packet in the buffer will have a timestamp on a single byte */
+	mc->rec_ts_idx++;
+
 	/* read data */
 	memset(cf->data, 0x0, sizeof(cf->data));
 	if (status_len & PCAN_USB_STATUSLEN_RTR) {
@@ -695,7 +703,6 @@ static int pcan_usb_decode_msg(struct peak_usb_device *dev, u8 *ibuf, u32 lbuf)
 		/* handle normal can frames here */
 		} else {
 			err = pcan_usb_decode_data(&mc, sl);
-			mc.rec_data_idx++;
 		}
 	}
 

From 7f18860337d74fdf79e7152bee7117d945ff6945 Mon Sep 17 00:00:00 2001
From: Navid Emamdoost <navid.emamdoost@gmail.com>
Date: Thu, 19 Sep 2019 21:44:38 -0500
Subject: [PATCH 20/81] can: gs_usb: gs_can_open(): prevent memory leak

commit fb5be6a7b4863ecc44963bb80ca614584b6c7817 upstream.

In gs_can_open() if usb_submit_urb() fails the allocated urb should be
released.

Fixes: d08e973a77d1 ("can: gs_usb: Added support for the GS_USB CAN devices")
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Navid Emamdoost <navid.emamdoost@gmail.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/usb/gs_usb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index b227f81e4a7e..6982ab8777b7 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -617,6 +617,7 @@ static int gs_can_open(struct net_device *netdev)
 					   rc);
 
 				usb_unanchor_urb(urb);
+				usb_free_urb(urb);
 				break;
 			}
 

From 1afef2b7a5d8d97cee332aee1c4d5a96597c223d Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 23 Oct 2019 10:27:05 +0200
Subject: [PATCH 21/81] can: peak_usb: fix slab info leak

commit f7a1337f0d29b98733c8824e165fca3371d7d4fd upstream.

Fix a small slab info leak due to a failure to clear the command buffer
at allocation.

The first 16 bytes of the command buffer are always sent to the device
in pcan_usb_send_cmd() even though only the first two may have been
initialised in case no argument payload is provided (e.g. when waiting
for a response).

Fixes: bb4785551f64 ("can: usb: PEAK-System Technik USB adapters driver core")
Cc: stable <stable@vger.kernel.org>     # 3.4
Reported-by: syzbot+863724e7128e14b26732@syzkaller.appspotmail.com
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/usb/peak_usb/pcan_usb_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
index b1d68f49b398..8c47cc8dc896 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
@@ -776,7 +776,7 @@ static int peak_usb_create_dev(const struct peak_usb_adapter *peak_usb_adapter,
 	dev = netdev_priv(netdev);
 
 	/* allocate a buffer large enough to send commands */
-	dev->cmd_buf = kmalloc(PCAN_USB_MAX_CMD_LEN, GFP_KERNEL);
+	dev->cmd_buf = kzalloc(PCAN_USB_MAX_CMD_LEN, GFP_KERNEL);
 	if (!dev->cmd_buf) {
 		err = -ENOMEM;
 		goto lbl_free_candev;

From d1d4c4364724bf33d5645da2d6dde3005f40aeb2 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <garsilva@embeddedor.com>
Date: Thu, 9 Feb 2017 01:49:56 -0600
Subject: [PATCH 22/81] drivers: usb: usbip: Add missing break statement to
 switch

commit 7c92e5fbf4dac0dd4dd41a0383adc54f16f403e2 upstream.

Add missing break statement to prevent the code for case
USB_PORT_FEAT_C_RESET falling through to the default case.

Addresses-Coverity-ID: 143155
Signed-off-by: Gustavo A. R. Silva <garsilva@embeddedor.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/usbip/vhci_hcd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c
index 4d68a1e9e878..3476d02967f7 100644
--- a/drivers/usb/usbip/vhci_hcd.c
+++ b/drivers/usb/usbip/vhci_hcd.c
@@ -303,6 +303,7 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 			default:
 				break;
 			}
+			break;
 		default:
 			usbip_dbg_vhci_rh(" ClearPortFeature: default %x\n",
 					  wValue);

From 0120fc18b3472c30a2ab943819489a83139ebcf4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 3 Aug 2019 11:51:18 -0400
Subject: [PATCH 23/81] configfs: fix a deadlock in configfs_symlink()

commit 351e5d869e5ac10cb40c78b5f2d7dfc816ad4587 upstream.

Configfs abuses symlink(2).  Unlike the normal filesystems, it
wants the target resolved at symlink(2) time, like link(2) would've
done.  The problem is that ->symlink() is called with the parent
directory locked exclusive, so resolving the target inside the
->symlink() is easily deadlocked.

Short of really ugly games in sys_symlink() itself, all we can
do is to unlock the parent before resolving the target and
relock it after.  However, that invalidates the checks done
by the caller of ->symlink(), so we have to
	* check that dentry is still where it used to be
(it couldn't have been moved, but it could've been unhashed)
	* recheck that it's still negative (somebody else
might've successfully created a symlink with the same name
while we were looking the target up)
	* recheck the permissions on the parent directory.

Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/configfs/symlink.c | 33 ++++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 66e8c5d58b21..3af565e8fd51 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -157,11 +157,42 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
 	    !type->ct_item_ops->allow_link)
 		goto out_put;
 
+	/*
+	 * This is really sick.  What they wanted was a hybrid of
+	 * link(2) and symlink(2) - they wanted the target resolved
+	 * at syscall time (as link(2) would've done), be a directory
+	 * (which link(2) would've refused to do) *AND* be a deep
+	 * fucking magic, making the target busy from rmdir POV.
+	 * symlink(2) is nothing of that sort, and the locking it
+	 * gets matches the normal symlink(2) semantics.  Without
+	 * attempts to resolve the target (which might very well
+	 * not even exist yet) done prior to locking the parent
+	 * directory.  This perversion, OTOH, needs to resolve
+	 * the target, which would lead to obvious deadlocks if
+	 * attempted with any directories locked.
+	 *
+	 * Unfortunately, that garbage is userland ABI and we should've
+	 * said "no" back in 2005.  Too late now, so we get to
+	 * play very ugly games with locking.
+	 *
+	 * Try *ANYTHING* of that sort in new code, and you will
+	 * really regret it.  Just ask yourself - what could a BOFH
+	 * do to me and do I want to find it out first-hand?
+	 *
+	 *  AV, a thoroughly annoyed bastard.
+	 */
+	inode_unlock(dir);
 	ret = get_target(symname, &path, &target_item, dentry->d_sb);
+	inode_lock(dir);
 	if (ret)
 		goto out_put;
 
-	ret = type->ct_item_ops->allow_link(parent_item, target_item);
+	if (dentry->d_inode || d_unhashed(dentry))
+		ret = -EEXIST;
+	else
+		ret = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+	if (!ret)
+		ret = type->ct_item_ops->allow_link(parent_item, target_item);
 	if (!ret) {
 		mutex_lock(&configfs_symlink_mutex);
 		ret = create_link(parent_item, target_item, dentry);

From dd47fd5ba80a9bc37b80d4010fe0ecd1e4aef6b1 Mon Sep 17 00:00:00 2001
From: Vidya Sagar <vidyas@nvidia.com>
Date: Thu, 4 Jul 2019 20:34:28 +0530
Subject: [PATCH 24/81] PCI: tegra: Enable Relaxed Ordering only for Tegra20 &
 Tegra30

commit 7be142caabc4780b13a522c485abc806de5c4114 upstream.

The PCI Tegra controller conversion to a device tree configurable
driver in commit d1523b52bff3 ("PCI: tegra: Move PCIe driver
to drivers/pci/host") implied that code for the driver can be
compiled in for a kernel supporting multiple platforms.

Unfortunately, a blind move of the code did not check that some of the
quirks that were applied in arch/arm (eg enabling Relaxed Ordering on
all PCI devices - since the quirk hook erroneously matches PCI_ANY_ID
for both Vendor-ID and Device-ID) are now applied in all kernels that
compile the PCI Tegra controlled driver, DT and ACPI alike.

This is completely wrong, in that enablement of Relaxed Ordering is only
required by default in Tegra20 platforms as described in the Tegra20
Technical Reference Manual (available at
https://developer.nvidia.com/embedded/downloads#?search=tegra%202 in
Section 34.1, where it is mentioned that Relaxed Ordering bit needs to
be enabled in its root ports to avoid deadlock in hardware) and in the
Tegra30 platforms for the same reasons (unfortunately not documented
in the TRM).

There is no other strict requirement on PCI devices Relaxed Ordering
enablement on any other Tegra platforms or PCI host bridge driver.

Fix this quite upsetting situation by limiting the vendor and device IDs
to which the Relaxed Ordering quirk applies to the root ports in
question, reported above.

Signed-off-by: Vidya Sagar <vidyas@nvidia.com>
[lorenzo.pieralisi@arm.com: completely rewrote the commit log/fixes tag]
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/host/pci-tegra.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c
index 30323114c53c..9865793b538a 100644
--- a/drivers/pci/host/pci-tegra.c
+++ b/drivers/pci/host/pci-tegra.c
@@ -586,12 +586,15 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0bf1, tegra_pcie_fixup_class);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e1c, tegra_pcie_fixup_class);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e1d, tegra_pcie_fixup_class);
 
-/* Tegra PCIE requires relaxed ordering */
+/* Tegra20 and Tegra30 PCIE requires relaxed ordering */
 static void tegra_pcie_relax_enable(struct pci_dev *dev)
 {
 	pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_RELAX_EN);
 }
-DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, tegra_pcie_relax_enable);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0bf0, tegra_pcie_relax_enable);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0bf1, tegra_pcie_relax_enable);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0e1c, tegra_pcie_relax_enable);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0e1d, tegra_pcie_relax_enable);
 
 static int tegra_pcie_setup(int nr, struct pci_sys_data *sys)
 {

From 9f21002d3b8aad007427820351eac2c531d34a55 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.com>
Date: Fri, 18 Oct 2019 16:04:58 +0200
Subject: [PATCH 25/81] scsi: qla2xxx: fixup incorrect usage of host_byte

[ Upstream commit 66cf50e65b183c863825f5c28a818e3f47a72e40 ]

DRIVER_ERROR is a a driver byte setting, not a host byte.  The qla2xxx
driver should rather return DID_ERROR here to be in line with the other
drivers.

Link: https://lore.kernel.org/r/20191018140458.108278-1-hare@suse.de
Signed-off-by: Hannes Reinecke <hare@suse.com>
Acked-by: Himanshu Madhani <hmadhani@marvell.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/scsi/qla2xxx/qla_bsg.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
index c26acde797f0..2d5375d67736 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.c
+++ b/drivers/scsi/qla2xxx/qla_bsg.c
@@ -252,7 +252,7 @@ qla2x00_process_els(struct fc_bsg_job *bsg_job)
 	srb_t *sp;
 	const char *type;
 	int req_sg_cnt, rsp_sg_cnt;
-	int rval =  (DRIVER_ERROR << 16);
+	int rval =  (DID_ERROR << 16);
 	uint16_t nextlid = 0;
 
 	if (bsg_job->request->msgcode == FC_BSG_RPT_ELS) {
@@ -426,7 +426,7 @@ qla2x00_process_ct(struct fc_bsg_job *bsg_job)
 	struct Scsi_Host *host = bsg_job->shost;
 	scsi_qla_host_t *vha = shost_priv(host);
 	struct qla_hw_data *ha = vha->hw;
-	int rval = (DRIVER_ERROR << 16);
+	int rval = (DID_ERROR << 16);
 	int req_sg_cnt, rsp_sg_cnt;
 	uint16_t loop_id;
 	struct fc_port *fcport;
@@ -1910,7 +1910,7 @@ qlafx00_mgmt_cmd(struct fc_bsg_job *bsg_job)
 	struct Scsi_Host *host = bsg_job->shost;
 	scsi_qla_host_t *vha = shost_priv(host);
 	struct qla_hw_data *ha = vha->hw;
-	int rval = (DRIVER_ERROR << 16);
+	int rval = (DID_ERROR << 16);
 	struct qla_mt_iocb_rqst_fx00 *piocb_rqst;
 	srb_t *sp;
 	int req_sg_cnt = 0, rsp_sg_cnt = 0;

From 43165d64b74ae355dadbcabbabfc21ec93181443 Mon Sep 17 00:00:00 2001
From: Daniel Wagner <dwagner@suse.de>
Date: Tue, 22 Oct 2019 09:21:12 +0200
Subject: [PATCH 26/81] scsi: lpfc: Honor module parameter lpfc_use_adisc

[ Upstream commit 0fd103ccfe6a06e40e2d9d8c91d96332cc9e1239 ]

The initial lpfc_desc_set_adisc implementation in commit
dea3101e0a5c ("lpfc: add Emulex FC driver version 8.0.28") enabled ADISC if

	cfg_use_adisc && RSCN_MODE && FCP_2_DEVICE

In commit 92d7f7b0cde3 ("[SCSI] lpfc: NPIV: add NPIV support on top of
SLI-3") this changed to

	(cfg_use_adisc && RSC_MODE) || FCP_2_DEVICE

and later in commit ffc954936b13 ("[SCSI] lpfc 8.3.13: FC Discovery Fixes
and enhancements.") to

	(cfg_use_adisc && RSC_MODE) || (FCP_2_DEVICE && FCP_TARGET)

A customer reports that after a devloss, an ADISC failure is logged. It
turns out the ADISC flag is set even the user explicitly set lpfc_use_adisc
= 0.

[Sat Dec 22 22:55:58 2018] lpfc 0000:82:00.0: 2:(0):0203 Devloss timeout on WWPN 50:01:43:80:12:8e:40:20 NPort x05df00 Data: x82000000 x8 xa
[Sat Dec 22 23:08:20 2018] lpfc 0000:82:00.0: 2:(0):2755 ADISC failure DID:05DF00 Status:x9/x70000

[mkp: fixed Hannes' email]

Fixes: 92d7f7b0cde3 ("[SCSI] lpfc: NPIV: add NPIV support on top of SLI-3")
Cc: Dick Kennedy <dick.kennedy@broadcom.com>
Cc: James Smart <james.smart@broadcom.com>
Link: https://lore.kernel.org/r/20191022072112.132268-1-dwagner@suse.de
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Daniel Wagner <dwagner@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/scsi/lpfc/lpfc_nportdisc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index 193733e8c823..3a4613f9fb9f 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -759,9 +759,9 @@ lpfc_disc_set_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
 
 	if (!(vport->fc_flag & FC_PT2PT)) {
 		/* Check config parameter use-adisc or FCP-2 */
-		if ((vport->cfg_use_adisc && (vport->fc_flag & FC_RSCN_MODE)) ||
+		if (vport->cfg_use_adisc && ((vport->fc_flag & FC_RSCN_MODE) ||
 		    ((ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE) &&
-		     (ndlp->nlp_type & NLP_FCP_TARGET))) {
+		     (ndlp->nlp_type & NLP_FCP_TARGET)))) {
 			spin_lock_irq(shost->host_lock);
 			ndlp->nlp_flag |= NLP_NPR_ADISC;
 			spin_unlock_irq(shost->host_lock);

From f00565130bde6f2e2906b6a961f095b5589d7393 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 23 Oct 2019 09:53:03 -0700
Subject: [PATCH 27/81] ipvs: move old_secure_tcp into struct netns_ipvs

[ Upstream commit c24b75e0f9239e78105f81c5f03a751641eb07ef ]

syzbot reported the following issue :

BUG: KCSAN: data-race in update_defense_level / update_defense_level

read to 0xffffffff861a6260 of 4 bytes by task 3006 on cpu 1:
 update_defense_level+0x621/0xb30 net/netfilter/ipvs/ip_vs_ctl.c:177
 defense_work_handler+0x3d/0xd0 net/netfilter/ipvs/ip_vs_ctl.c:225
 process_one_work+0x3d4/0x890 kernel/workqueue.c:2269
 worker_thread+0xa0/0x800 kernel/workqueue.c:2415
 kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253
 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352

write to 0xffffffff861a6260 of 4 bytes by task 7333 on cpu 0:
 update_defense_level+0xa62/0xb30 net/netfilter/ipvs/ip_vs_ctl.c:205
 defense_work_handler+0x3d/0xd0 net/netfilter/ipvs/ip_vs_ctl.c:225
 process_one_work+0x3d4/0x890 kernel/workqueue.c:2269
 worker_thread+0xa0/0x800 kernel/workqueue.c:2415
 kthread+0x1d4/0x200 drivers/block/aoe/aoecmd.c:1253
 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:352

Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 7333 Comm: kworker/0:5 Not tainted 5.4.0-rc3+ #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Workqueue: events defense_work_handler

Indeed, old_secure_tcp is currently a static variable, while it
needs to be a per netns variable.

Fixes: a0840e2e165a ("IPVS: netns, ip_vs_ctl local vars moved to ipvs struct.")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 include/net/ip_vs.h            |  1 +
 net/netfilter/ipvs/ip_vs_ctl.c | 15 +++++++--------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index a6cc576fd467..b0156f8a9ab7 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -880,6 +880,7 @@ struct netns_ipvs {
 	struct delayed_work	defense_work;   /* Work handler */
 	int			drop_rate;
 	int			drop_counter;
+	int			old_secure_tcp;
 	atomic_t		dropentry;
 	/* locks in ctl.c */
 	spinlock_t		dropentry_lock;  /* drop entry handling */
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 56c62b65923f..b176f76dfaa1 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -97,7 +97,6 @@ static bool __ip_vs_addr_is_local_v6(struct net *net,
 static void update_defense_level(struct netns_ipvs *ipvs)
 {
 	struct sysinfo i;
-	static int old_secure_tcp = 0;
 	int availmem;
 	int nomem;
 	int to_change = -1;
@@ -178,35 +177,35 @@ static void update_defense_level(struct netns_ipvs *ipvs)
 	spin_lock(&ipvs->securetcp_lock);
 	switch (ipvs->sysctl_secure_tcp) {
 	case 0:
-		if (old_secure_tcp >= 2)
+		if (ipvs->old_secure_tcp >= 2)
 			to_change = 0;
 		break;
 	case 1:
 		if (nomem) {
-			if (old_secure_tcp < 2)
+			if (ipvs->old_secure_tcp < 2)
 				to_change = 1;
 			ipvs->sysctl_secure_tcp = 2;
 		} else {
-			if (old_secure_tcp >= 2)
+			if (ipvs->old_secure_tcp >= 2)
 				to_change = 0;
 		}
 		break;
 	case 2:
 		if (nomem) {
-			if (old_secure_tcp < 2)
+			if (ipvs->old_secure_tcp < 2)
 				to_change = 1;
 		} else {
-			if (old_secure_tcp >= 2)
+			if (ipvs->old_secure_tcp >= 2)
 				to_change = 0;
 			ipvs->sysctl_secure_tcp = 1;
 		}
 		break;
 	case 3:
-		if (old_secure_tcp < 2)
+		if (ipvs->old_secure_tcp < 2)
 			to_change = 1;
 		break;
 	}
-	old_secure_tcp = ipvs->sysctl_secure_tcp;
+	ipvs->old_secure_tcp = ipvs->sysctl_secure_tcp;
 	if (to_change >= 0)
 		ip_vs_protocol_timeout_change(ipvs,
 					      ipvs->sysctl_secure_tcp > 1);

From 6c67be51113eb346a37be408776c0d34a2931b3f Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Mon, 21 Oct 2019 18:47:52 +0000
Subject: [PATCH 28/81] bonding: fix unexpected IFF_BONDING bit unset

[ Upstream commit 65de65d9033750d2cf1b336c9d6e9da3a8b5cc6e ]

The IFF_BONDING means bonding master or bonding slave device.
->ndo_add_slave() sets IFF_BONDING flag and ->ndo_del_slave() unsets
IFF_BONDING flag.

bond0<--bond1

Both bond0 and bond1 are bonding device and these should keep having
IFF_BONDING flag until they are removed.
But bond1 would lose IFF_BONDING at ->ndo_del_slave() because that routine
do not check whether the slave device is the bonding type or not.
This patch adds the interface type check routine before removing
IFF_BONDING flag.

Test commands:
    ip link add bond0 type bond
    ip link add bond1 type bond
    ip link set bond1 master bond0
    ip link set bond1 nomaster
    ip link del bond1 type bond
    ip link add bond1 type bond

Splat looks like:
[  226.665555] proc_dir_entry 'bonding/bond1' already registered
[  226.666440] WARNING: CPU: 0 PID: 737 at fs/proc/generic.c:361 proc_register+0x2a9/0x3e0
[  226.667571] Modules linked in: bonding af_packet sch_fq_codel ip_tables x_tables unix
[  226.668662] CPU: 0 PID: 737 Comm: ip Not tainted 5.4.0-rc3+ #96
[  226.669508] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
[  226.670652] RIP: 0010:proc_register+0x2a9/0x3e0
[  226.671612] Code: 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 39 01 00 00 48 8b 04 24 48 89 ea 48 c7 c7 a0 0b 14 9f 48 8b b0 e
0 00 00 00 e8 07 e7 88 ff <0f> 0b 48 c7 c7 40 2d a5 9f e8 59 d6 23 01 48 8b 4c 24 10 48 b8 00
[  226.675007] RSP: 0018:ffff888050e17078 EFLAGS: 00010282
[  226.675761] RAX: dffffc0000000008 RBX: ffff88805fdd0f10 RCX: ffffffff9dd344e2
[  226.676757] RDX: 0000000000000001 RSI: 0000000000000008 RDI: ffff88806c9f6b8c
[  226.677751] RBP: ffff8880507160f3 R08: ffffed100d940019 R09: ffffed100d940019
[  226.678761] R10: 0000000000000001 R11: ffffed100d940018 R12: ffff888050716008
[  226.679757] R13: ffff8880507160f2 R14: dffffc0000000000 R15: ffffed100a0e2c1e
[  226.680758] FS:  00007fdc217cc0c0(0000) GS:ffff88806c800000(0000) knlGS:0000000000000000
[  226.681886] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  226.682719] CR2: 00007f49313424d0 CR3: 0000000050e46001 CR4: 00000000000606f0
[  226.683727] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  226.684725] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  226.685681] Call Trace:
[  226.687089]  proc_create_seq_private+0xb3/0xf0
[  226.687778]  bond_create_proc_entry+0x1b3/0x3f0 [bonding]
[  226.691458]  bond_netdev_event+0x433/0x970 [bonding]
[  226.692139]  ? __module_text_address+0x13/0x140
[  226.692779]  notifier_call_chain+0x90/0x160
[  226.693401]  register_netdevice+0x9b3/0xd80
[  226.694010]  ? alloc_netdev_mqs+0x854/0xc10
[  226.694629]  ? netdev_change_features+0xa0/0xa0
[  226.695278]  ? rtnl_create_link+0x2ed/0xad0
[  226.695849]  bond_newlink+0x2a/0x60 [bonding]
[  226.696422]  __rtnl_newlink+0xb9f/0x11b0
[  226.696968]  ? rtnl_link_unregister+0x220/0x220
[ ... ]

Fixes: 0b680e753724 ("[PATCH] bonding: Add priv_flag to avoid event mishandling")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/net/bonding/bond_main.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 1bf4f54c2bef..e31b4c7d2522 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1719,7 +1719,8 @@ err_detach:
 	slave_disable_netpoll(new_slave);
 
 err_close:
-	slave_dev->priv_flags &= ~IFF_BONDING;
+	if (!netif_is_bond_master(slave_dev))
+		slave_dev->priv_flags &= ~IFF_BONDING;
 	dev_close(slave_dev);
 
 err_restore_mac:
@@ -1915,7 +1916,8 @@ static int __bond_release_one(struct net_device *bond_dev,
 
 	dev_set_mtu(slave_dev, slave->original_mtu);
 
-	slave_dev->priv_flags &= ~IFF_BONDING;
+	if (!netif_is_bond_master(slave_dev))
+		slave_dev->priv_flags &= ~IFF_BONDING;
 
 	bond_free_slave(slave);
 

From 260f8a60277530a4066b184d519533791c70d10b Mon Sep 17 00:00:00 2001
From: Nikhil Badola <nikhil.badola@freescale.com>
Date: Mon, 21 Oct 2019 18:21:51 +0800
Subject: [PATCH 29/81] usb: fsl: Check memory resource before releasing it

[ Upstream commit bc1e3a2dd0c9954fd956ac43ca2876bbea018c01 ]

Check memory resource existence before releasing it to avoid NULL
pointer dereference

Signed-off-by: Nikhil Badola <nikhil.badola@freescale.com>
Reviewed-by: Ran Wang <ran.wang_1@nxp.com>
Reviewed-by: Peter Chen <peter.chen@nxp.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/usb/gadget/udc/fsl_udc_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c
index 8991a4070792..bd98557caa28 100644
--- a/drivers/usb/gadget/udc/fsl_udc_core.c
+++ b/drivers/usb/gadget/udc/fsl_udc_core.c
@@ -2570,7 +2570,7 @@ static int fsl_udc_remove(struct platform_device *pdev)
 	dma_pool_destroy(udc_controller->td_pool);
 	free_irq(udc_controller->irq, udc_controller);
 	iounmap(dr_regs);
-	if (pdata->operating_mode == FSL_USB2_DR_DEVICE)
+	if (res && (pdata->operating_mode == FSL_USB2_DR_DEVICE))
 		release_mem_region(res->start, resource_size(res));
 
 	/* free udc --wait for the release() finished */

From 8748e69e7e3dccc622709a9771751a63fe39f26c Mon Sep 17 00:00:00 2001
From: Cristian Birsan <cristian.birsan@microchip.com>
Date: Fri, 4 Oct 2019 20:10:54 +0300
Subject: [PATCH 30/81] usb: gadget: udc: atmel: Fix interrupt storm in FIFO
 mode.

[ Upstream commit ba3a1a915c49cc3023e4ddfc88f21e7514e82aa4 ]

Fix interrupt storm generated by endpoints when working in FIFO mode.
The TX_COMPLETE interrupt is used only by control endpoints processing.
Do not enable it for other types of endpoints.

Fixes: 914a3f3b3754 ("USB: add atmel_usba_udc driver")
Signed-off-by: Cristian Birsan <cristian.birsan@microchip.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/usb/gadget/udc/atmel_usba_udc.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c
index 585cb8734f50..668ac5e8681b 100644
--- a/drivers/usb/gadget/udc/atmel_usba_udc.c
+++ b/drivers/usb/gadget/udc/atmel_usba_udc.c
@@ -403,9 +403,11 @@ static void submit_request(struct usba_ep *ep, struct usba_request *req)
 		next_fifo_transaction(ep, req);
 		if (req->last_transaction) {
 			usba_ep_writel(ep, CTL_DIS, USBA_TX_PK_RDY);
-			usba_ep_writel(ep, CTL_ENB, USBA_TX_COMPLETE);
+			if (ep_is_control(ep))
+				usba_ep_writel(ep, CTL_ENB, USBA_TX_COMPLETE);
 		} else {
-			usba_ep_writel(ep, CTL_DIS, USBA_TX_COMPLETE);
+			if (ep_is_control(ep))
+				usba_ep_writel(ep, CTL_DIS, USBA_TX_COMPLETE);
 			usba_ep_writel(ep, CTL_ENB, USBA_TX_PK_RDY);
 		}
 	}

From ef4590495118b05727f936622ce0d9813ba1901f Mon Sep 17 00:00:00 2001
From: Chandana Kishori Chiluveru <cchiluve@codeaurora.org>
Date: Tue, 1 Oct 2019 13:16:48 +0530
Subject: [PATCH 31/81] usb: gadget: composite: Fix possible double free memory
 bug

[ Upstream commit 1c20c89b0421b52b2417bb0f62a611bc669eda1d ]

composite_dev_cleanup call from the failure of configfs_composite_bind
frees up the cdev->os_desc_req and cdev->req. If the previous calls of
bind and unbind is successful these will carry stale values.

Consider the below sequence of function calls:
configfs_composite_bind()
        composite_dev_prepare()
                - Allocate cdev->req, cdev->req->buf
        composite_os_desc_req_prepare()
                - Allocate cdev->os_desc_req, cdev->os_desc_req->buf
configfs_composite_unbind()
        composite_dev_cleanup()
                - free the cdev->os_desc_req->buf and cdev->req->buf
Next composition switch
configfs_composite_bind()
        - If it fails goto err_comp_cleanup will call the
	  composite_dev_cleanup() function
        composite_dev_cleanup()
	        - calls kfree up with the stale values of cdev->req->buf and
		  cdev->os_desc_req from the previous configfs_composite_bind
		  call. The free call on these stale values leads to double free.

Hence, Fix this issue by setting request and buffer pointer to NULL after
kfree.

Signed-off-by: Chandana Kishori Chiluveru <cchiluve@codeaurora.org>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/usb/gadget/composite.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 351a406b97af..0f2d1e98481f 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -2068,14 +2068,18 @@ void composite_dev_cleanup(struct usb_composite_dev *cdev)
 			usb_ep_dequeue(cdev->gadget->ep0, cdev->os_desc_req);
 
 		kfree(cdev->os_desc_req->buf);
+		cdev->os_desc_req->buf = NULL;
 		usb_ep_free_request(cdev->gadget->ep0, cdev->os_desc_req);
+		cdev->os_desc_req = NULL;
 	}
 	if (cdev->req) {
 		if (cdev->setup_pending)
 			usb_ep_dequeue(cdev->gadget->ep0, cdev->req);
 
 		kfree(cdev->req->buf);
+		cdev->req->buf = NULL;
 		usb_ep_free_request(cdev->gadget->ep0, cdev->req);
+		cdev->req = NULL;
 	}
 	cdev->next_string_id = 0;
 	device_remove_file(&cdev->gadget->dev, &dev_attr_suspended);

From a6af4089638e3376a515ea3efc789ed48040c860 Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@nxp.com>
Date: Mon, 26 Aug 2019 15:10:55 -0400
Subject: [PATCH 32/81] usb: gadget: configfs: fix concurrent issue between
 composite APIs

[ Upstream commit 1a1c851bbd706ea9f3a9756c2d3db28523506d3b ]

We meet several NULL pointer issues if configfs_composite_unbind
and composite_setup (or composite_disconnect) are running together.
These issues occur when do the function switch stress test, the
configfs_compsoite_unbind is called from user mode by
echo "" to /sys/../UDC entry, and meanwhile, the setup interrupt
or disconnect interrupt occurs by hardware. The composite_setup
will get the cdev from get_gadget_data, but configfs_composite_unbind
will set gadget data as NULL, so the NULL pointer issue occurs.
This concurrent is hard to reproduce by native kernel, but can be
reproduced by android kernel.

In this commit, we introduce one spinlock belongs to structure
gadget_info since we can't use the same spinlock in usb_composite_dev
due to exclusive running together between composite_setup and
configfs_composite_unbind. And one bit flag 'unbind' to indicate the
code is at unbind routine, this bit is needed due to we release the
lock at during configfs_composite_unbind sometimes, and composite_setup
may be run at that time.

Several oops:

oops 1:
android_work: sent uevent USB_STATE=CONNECTED
configfs-gadget gadget: super-speed config #1: b
android_work: sent uevent USB_STATE=CONFIGURED
init: Received control message 'start' for 'adbd' from pid: 3515 (system_server)
Unable to handle kernel NULL pointer dereference at virtual address 0000002a
init: Received control message 'stop' for 'adbd' from pid: 3375 (/vendor/bin/hw/android.hardware.usb@1.1-servic)
Mem abort info:
  Exception class = DABT (current EL), IL = 32 bits
  SET = 0, FnV = 0
  EA = 0, S1PTW = 0
Data abort info:
  ISV = 0, ISS = 0x00000004
  CM = 0, WnR = 0
user pgtable: 4k pages, 48-bit VAs, pgd = ffff8008f1b7f000
[000000000000002a] *pgd=0000000000000000
Internal error: Oops: 96000004 [#1] PREEMPT SMP
Modules linked in:
CPU: 4 PID: 2457 Comm: irq/125-5b11000 Not tainted 4.14.98-07846-g0b40a9b-dirty #16
Hardware name: Freescale i.MX8QM MEK (DT)
task: ffff8008f2a98000 task.stack: ffff00000b7b8000
PC is at composite_setup+0x44/0x1508
LR is at android_setup+0xb8/0x13c
pc : [<ffff0000089ffb3c>] lr : [<ffff000008a032fc>] pstate: 800001c5
sp : ffff00000b7bbb80
x29: ffff00000b7bbb80 x28: ffff8008f2a3c010
x27: 0000000000000001 x26: 0000000000000000                                                          [1232/1897]
audit: audit_lost=25791 audit_rate_limit=5 audit_backlog_limit=64
x25: 00000000ffffffa1 x24: ffff8008f2a3c010
audit: rate limit exceeded
x23: 0000000000000409 x22: ffff000009c8e000
x21: ffff8008f7a8b428 x20: ffff00000afae000
x19: ffff0000089ff000 x18: 0000000000000000
x17: 0000000000000000 x16: ffff0000082b7c9c
x15: 0000000000000000 x14: f1866f5b952aca46
x13: e35502e30d44349c x12: 0000000000000008
x11: 0000000000000008 x10: 0000000000000a30
x9 : ffff00000b7bbd00 x8 : ffff8008f2a98a90
x7 : ffff8008f27a9c90 x6 : 0000000000000001
x5 : 0000000000000000 x4 : 0000000000000001
x3 : 0000000000000000 x2 : 0000000000000006
x1 : ffff0000089ff8d0 x0 : 732a010310b9ed00

X7: 0xffff8008f27a9c10:
9c10  00000002 00000000 00000001 00000000 13110000 ffff0000 00000002 00208040
9c30  00000000 00000000 00000000 00000000 00000000 00000005 00000029 00000000
9c50  00051778 00000001 f27a8e00 ffff8008 00000005 00000000 00000078 00000078
9c70  00000078 00000000 09031d48 ffff0000 00100000 00000000 00400000 00000000
9c90  00000001 00000000 00000000 00000000 00000000 00000000 ffefb1a0 ffff8008
9cb0  f27a9ca8 ffff8008 00000000 00000000 b9d88037 00000173 1618a3eb 00000001
9cd0  870a792a 0000002e 16188fe6 00000001 0000242b 00000000 00000000 00000000
using random self ethernet address
9cf0  019a4646 00000000 000547f3 00000000 ecfd6c33 00000002 00000000
using random host ethernet address
 00000000

X8: 0xffff8008f2a98a10:
8a10  00000000 00000000 f7788d00 ffff8008 00000001 00000000 00000000 00000000
8a30  eb218000 ffff8008 f2a98000 ffff8008 f2a98000 ffff8008 09885000 ffff0000
8a50  f34df480 ffff8008 00000000 00000000 f2a98648 ffff8008 09c8e000 ffff0000
8a70  fff2c800 ffff8008 09031d48 ffff0000 0b7bbd00 ffff0000 0b7bbd00 ffff0000
8a90  080861bc ffff0000 00000000 00000000 00000000 00000000 00000000 00000000
8ab0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
8ad0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
8af0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000

X21: 0xffff8008f7a8b3a8:
b3a8  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
b3c8  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
b3e8  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
b408  00000000 00000000 00000000 00000000 00000000 00000000 00000001 00000000
b428  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
b448  0053004d 00540046 00300031 00010030 eb07b520 ffff8008 20011201 00000003
b468  e418d109 0104404e 00010302 00000000 eb07b558 ffff8008 eb07b558 ffff8008
b488  f7a8b488 ffff8008 f7a8b488 ffff8008 f7a8b300 ffff8008 00000000 00000000

X24: 0xffff8008f2a3bf90:
bf90  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
bfb0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
bfd0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
bff0  00000000 00000000 00000000 00000000 f76c8010 ffff8008 f76c8010 ffff8008
c010  00000000 00000000 f2a3c018 ffff8008 f2a3c018 ffff8008 08a067dc ffff0000
c030  f2a5a000 ffff8008 091c3650 ffff0000 f716fd18 ffff8008 f716fe30 ffff8008
c050  f2ce4a30 ffff8008 00000000 00000005 00000000 00000000 095d1568 ffff0000
c070  f76c8010 ffff8008 f2ce4b00 ffff8008 095cac68 ffff0000 f2a5a028 ffff8008

X28: 0xffff8008f2a3bf90:
bf90  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
bfb0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
bfd0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
bff0  00000000 00000000 00000000 00000000 f76c8010 ffff8008 f76c8010 ffff8008
c010  00000000 00000000 f2a3c018 ffff8008 f2a3c018 ffff8008 08a067dc ffff0000
c030  f2a5a000 ffff8008 091c3650 ffff0000 f716fd18 ffff8008 f716fe30 ffff8008
c050  f2ce4a30 ffff8008 00000000 00000005 00000000 00000000 095d1568 ffff0000
c070  f76c8010 ffff8008 f2ce4b00 ffff8008 095cac68 ffff0000 f2a5a028 ffff8008

Process irq/125-5b11000 (pid: 2457, stack limit = 0xffff00000b7b8000)
Call trace:
Exception stack(0xffff00000b7bba40 to 0xffff00000b7bbb80)
ba40: 732a010310b9ed00 ffff0000089ff8d0 0000000000000006 0000000000000000
ba60: 0000000000000001 0000000000000000 0000000000000001 ffff8008f27a9c90
ba80: ffff8008f2a98a90 ffff00000b7bbd00 0000000000000a30 0000000000000008
baa0: 0000000000000008 e35502e30d44349c f1866f5b952aca46 0000000000000000
bac0: ffff0000082b7c9c 0000000000000000 0000000000000000 ffff0000089ff000
bae0: ffff00000afae000 ffff8008f7a8b428 ffff000009c8e000 0000000000000409
bb00: ffff8008f2a3c010 00000000ffffffa1 0000000000000000 0000000000000001
bb20: ffff8008f2a3c010 ffff00000b7bbb80 ffff000008a032fc ffff00000b7bbb80
bb40: ffff0000089ffb3c 00000000800001c5 ffff00000b7bbb80 732a010310b9ed00
bb60: ffffffffffffffff ffff0000080f777c ffff00000b7bbb80 ffff0000089ffb3c
[<ffff0000089ffb3c>] composite_setup+0x44/0x1508
[<ffff000008a032fc>] android_setup+0xb8/0x13c
[<ffff0000089bd9a8>] cdns3_ep0_delegate_req+0x44/0x70
[<ffff0000089bdff4>] cdns3_check_ep0_interrupt_proceed+0x33c/0x654
[<ffff0000089bca44>] cdns3_device_thread_irq_handler+0x4b0/0x4bc
[<ffff0000089b77b4>] cdns3_thread_irq+0x48/0x68
[<ffff000008145bf0>] irq_thread_fn+0x28/0x88
[<ffff000008145e38>] irq_thread+0x13c/0x228
[<ffff0000080fed70>] kthread+0x104/0x130
[<ffff000008085064>] ret_from_fork+0x10/0x18

oops2:
composite_disconnect: Calling disconnect on a Gadget that is                      not connected
android_work: did not send uevent (0 0           (null))
init: Received control message 'stop' for 'adbd' from pid: 3359 (/vendor/bin/hw/android.hardware.usb@1.1-service.imx)
init: Sending signal 9 to service 'adbd' (pid 22343) process group...
------------[ cut here ]------------
audit: audit_lost=180038 audit_rate_limit=5 audit_backlog_limit=64
audit: rate limit exceeded
WARNING: CPU: 0 PID: 3468 at kernel_imx/drivers/usb/gadget/composite.c:2009 composite_disconnect+0x80/0x88
Modules linked in:
CPU: 0 PID: 3468 Comm: HWC-UEvent-Thre Not tainted 4.14.98-07846-g0b40a9b-dirty #16
Hardware name: Freescale i.MX8QM MEK (DT)
task: ffff8008f2349c00 task.stack: ffff00000b0a8000
PC is at composite_disconnect+0x80/0x88
LR is at composite_disconnect+0x80/0x88
pc : [<ffff0000089ff9b0>] lr : [<ffff0000089ff9b0>] pstate: 600001c5
sp : ffff000008003dd0
x29: ffff000008003dd0 x28: ffff8008f2349c00
x27: ffff000009885018 x26: ffff000008004000
Timeout for IPC response!
x25: ffff000009885018 x24: ffff000009c8e280
x23: ffff8008f2d98010 x22: 00000000000001c0
x21: ffff8008f2d98394 x20: ffff8008f2d98010
x19: 0000000000000000 x18: 0000e3956f4f075a
fxos8700 4-001e: i2c block read acc failed
x17: 0000e395735727e8 x16: ffff00000829f4d4
x15: ffffffffffffffff x14: 7463656e6e6f6320
x13: 746f6e2009090920 x12: 7369207461687420
x11: 7465676461472061 x10: 206e6f207463656e
x9 : 6e6f637369642067 x8 : ffff000009c8e280
x7 : ffff0000086ca6cc x6 : ffff000009f15e78
x5 : 0000000000000000 x4 : 0000000000000000
x3 : ffffffffffffffff x2 : c3f28b86000c3900
x1 : c3f28b86000c3900 x0 : 000000000000004e

X20: 0xffff8008f2d97f90:
7f90  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
7fb0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
libprocessgroup: Failed to kill process cgroup uid 0 pid 22343 in 215ms, 1 processes remain
7fd0
Timeout for IPC response!
 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
using random self ethernet address
7ff0  00000000 00000000 00000000 00000000 f76c8010 ffff8008 f76c8010 ffff8008
8010  00000100 00000000 f2d98018 ffff8008 f2d98018 ffff8008 08a067dc
using random host ethernet address
 ffff0000
8030  f206d800 ffff8008 091c3650 ffff0000 f7957b18 ffff8008 f7957730 ffff8008
8050  f716a630 ffff8008 00000000 00000005 00000000 00000000 095d1568 ffff0000
8070  f76c8010 ffff8008 f716a800 ffff8008 095cac68 ffff0000 f206d828 ffff8008

X21: 0xffff8008f2d98314:
8314  ffff8008 00000000 00000000 00000000 00000000 00000000 00000000 00000000
8334  00000000 00000000 00000000 00000000 00000000 08a04cf4 ffff0000 00000000
8354  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
8374  00000000 00000000 00000000 00001001 00000000 00000000 00000000 00000000
8394  e4bbe4bb 0f230000 ffff0000 0afae000 ffff0000 ae001000 00000000 f206d400
Timeout for IPC response!
83b4  ffff8008 00000000 00000000 f7957b18 ffff8008 f7957718 ffff8008 f7957018
83d4  ffff8008 f7957118 ffff8008 f7957618 ffff8008 f7957818 ffff8008 f7957918
83f4  ffff8008 f7957d18 ffff8008 00000000 00000000 00000000 00000000 00000000

X23: 0xffff8008f2d97f90:
7f90  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
7fb0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
7fd0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
7ff0  00000000 00000000 00000000 00000000 f76c8010 ffff8008 f76c8010 ffff8008
8010  00000100 00000000 f2d98018 ffff8008 f2d98018 ffff8008 08a067dc ffff0000
8030  f206d800 ffff8008 091c3650 ffff0000 f7957b18 ffff8008 f7957730 ffff8008
8050  f716a630 ffff8008 00000000 00000005 00000000 00000000 095d1568 ffff0000
8070  f76c8010 ffff8008 f716a800 ffff8008 095cac68 ffff0000 f206d828 ffff8008

X28: 0xffff8008f2349b80:
9b80  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
9ba0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
9bc0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
9be0  00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
9c00  00000022 00000000 ffffffff ffffffff 00010001 00000000 00000000 00000000
9c20  0b0a8000 ffff0000 00000002 00404040 00000000 00000000 00000000 00000000
9c40  00000001 00000000 00000001 00000000 001ebd44 00000001 f390b800 ffff8008
9c60  00000000 00000001 00000070 00000070 00000070 00000000 09031d48 ffff0000

Call trace:
Exception stack(0xffff000008003c90 to 0xffff000008003dd0)
3c80:                                   000000000000004e c3f28b86000c3900
3ca0: c3f28b86000c3900 ffffffffffffffff 0000000000000000 0000000000000000
3cc0: ffff000009f15e78 ffff0000086ca6cc ffff000009c8e280 6e6f637369642067
3ce0: 206e6f207463656e 7465676461472061 7369207461687420 746f6e2009090920
3d00: 7463656e6e6f6320 ffffffffffffffff ffff00000829f4d4 0000e395735727e8
3d20: 0000e3956f4f075a 0000000000000000 ffff8008f2d98010 ffff8008f2d98394
3d40: 00000000000001c0 ffff8008f2d98010 ffff000009c8e280 ffff000009885018
3d60: ffff000008004000 ffff000009885018 ffff8008f2349c00 ffff000008003dd0
3d80: ffff0000089ff9b0 ffff000008003dd0 ffff0000089ff9b0 00000000600001c5
3da0: ffff8008f33f2cd8 0000000000000000 0000ffffffffffff 0000000000000000
init: Received control message 'start' for 'adbd' from pid: 3359 (/vendor/bin/hw/android.hardware.usb@1.1-service.imx)
3dc0: ffff000008003dd0 ffff0000089ff9b0
[<ffff0000089ff9b0>] composite_disconnect+0x80/0x88
[<ffff000008a044d4>] android_disconnect+0x3c/0x68
[<ffff0000089ba9f8>] cdns3_device_irq_handler+0xfc/0x2c8
[<ffff0000089b84c0>] cdns3_irq+0x44/0x94
[<ffff00000814494c>] __handle_irq_event_percpu+0x60/0x24c
[<ffff000008144c0c>] handle_irq_event+0x58/0xc0
[<ffff00000814873c>] handle_fasteoi_irq+0x98/0x180
[<ffff000008143a10>] generic_handle_irq+0x24/0x38
[<ffff000008144170>] __handle_domain_irq+0x60/0xac
[<ffff0000080819c4>] gic_handle_irq+0xd4/0x17c

Signed-off-by: Peter Chen <peter.chen@nxp.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/usb/gadget/configfs.c | 110 ++++++++++++++++++++++++++++++++--
 1 file changed, 105 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index d412e234f336..f91a43140694 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -62,6 +62,8 @@ struct gadget_info {
 	bool use_os_desc;
 	char b_vendor_code;
 	char qw_sign[OS_STRING_QW_SIGN_LEN];
+	spinlock_t spinlock;
+	bool unbind;
 };
 
 static inline struct gadget_info *to_gadget_info(struct config_item *item)
@@ -1246,6 +1248,7 @@ static int configfs_composite_bind(struct usb_gadget *gadget,
 	int				ret;
 
 	/* the gi->lock is hold by the caller */
+	gi->unbind = 0;
 	cdev->gadget = gadget;
 	set_gadget_data(gadget, cdev);
 	ret = composite_dev_prepare(composite, cdev);
@@ -1378,31 +1381,128 @@ static void configfs_composite_unbind(struct usb_gadget *gadget)
 {
 	struct usb_composite_dev	*cdev;
 	struct gadget_info		*gi;
+	unsigned long flags;
 
 	/* the gi->lock is hold by the caller */
 
 	cdev = get_gadget_data(gadget);
 	gi = container_of(cdev, struct gadget_info, cdev);
+	spin_lock_irqsave(&gi->spinlock, flags);
+	gi->unbind = 1;
+	spin_unlock_irqrestore(&gi->spinlock, flags);
 
 	kfree(otg_desc[0]);
 	otg_desc[0] = NULL;
 	purge_configs_funcs(gi);
 	composite_dev_cleanup(cdev);
 	usb_ep_autoconfig_reset(cdev->gadget);
+	spin_lock_irqsave(&gi->spinlock, flags);
 	cdev->gadget = NULL;
 	set_gadget_data(gadget, NULL);
+	spin_unlock_irqrestore(&gi->spinlock, flags);
+}
+
+static int configfs_composite_setup(struct usb_gadget *gadget,
+		const struct usb_ctrlrequest *ctrl)
+{
+	struct usb_composite_dev *cdev;
+	struct gadget_info *gi;
+	unsigned long flags;
+	int ret;
+
+	cdev = get_gadget_data(gadget);
+	if (!cdev)
+		return 0;
+
+	gi = container_of(cdev, struct gadget_info, cdev);
+	spin_lock_irqsave(&gi->spinlock, flags);
+	cdev = get_gadget_data(gadget);
+	if (!cdev || gi->unbind) {
+		spin_unlock_irqrestore(&gi->spinlock, flags);
+		return 0;
+	}
+
+	ret = composite_setup(gadget, ctrl);
+	spin_unlock_irqrestore(&gi->spinlock, flags);
+	return ret;
+}
+
+static void configfs_composite_disconnect(struct usb_gadget *gadget)
+{
+	struct usb_composite_dev *cdev;
+	struct gadget_info *gi;
+	unsigned long flags;
+
+	cdev = get_gadget_data(gadget);
+	if (!cdev)
+		return;
+
+	gi = container_of(cdev, struct gadget_info, cdev);
+	spin_lock_irqsave(&gi->spinlock, flags);
+	cdev = get_gadget_data(gadget);
+	if (!cdev || gi->unbind) {
+		spin_unlock_irqrestore(&gi->spinlock, flags);
+		return;
+	}
+
+	composite_disconnect(gadget);
+	spin_unlock_irqrestore(&gi->spinlock, flags);
+}
+
+static void configfs_composite_suspend(struct usb_gadget *gadget)
+{
+	struct usb_composite_dev *cdev;
+	struct gadget_info *gi;
+	unsigned long flags;
+
+	cdev = get_gadget_data(gadget);
+	if (!cdev)
+		return;
+
+	gi = container_of(cdev, struct gadget_info, cdev);
+	spin_lock_irqsave(&gi->spinlock, flags);
+	cdev = get_gadget_data(gadget);
+	if (!cdev || gi->unbind) {
+		spin_unlock_irqrestore(&gi->spinlock, flags);
+		return;
+	}
+
+	composite_suspend(gadget);
+	spin_unlock_irqrestore(&gi->spinlock, flags);
+}
+
+static void configfs_composite_resume(struct usb_gadget *gadget)
+{
+	struct usb_composite_dev *cdev;
+	struct gadget_info *gi;
+	unsigned long flags;
+
+	cdev = get_gadget_data(gadget);
+	if (!cdev)
+		return;
+
+	gi = container_of(cdev, struct gadget_info, cdev);
+	spin_lock_irqsave(&gi->spinlock, flags);
+	cdev = get_gadget_data(gadget);
+	if (!cdev || gi->unbind) {
+		spin_unlock_irqrestore(&gi->spinlock, flags);
+		return;
+	}
+
+	composite_resume(gadget);
+	spin_unlock_irqrestore(&gi->spinlock, flags);
 }
 
 static const struct usb_gadget_driver configfs_driver_template = {
 	.bind           = configfs_composite_bind,
 	.unbind         = configfs_composite_unbind,
 
-	.setup          = composite_setup,
-	.reset          = composite_disconnect,
-	.disconnect     = composite_disconnect,
+	.setup          = configfs_composite_setup,
+	.reset          = configfs_composite_disconnect,
+	.disconnect     = configfs_composite_disconnect,
 
-	.suspend	= composite_suspend,
-	.resume		= composite_resume,
+	.suspend	= configfs_composite_suspend,
+	.resume		= configfs_composite_resume,
 
 	.max_speed	= USB_SPEED_SUPER,
 	.driver = {

From 123d58e1ac8a0f120b79cb5566549ff7bdf6928d Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@amd.com>
Date: Wed, 23 Oct 2019 10:09:54 -0500
Subject: [PATCH 33/81] perf/x86/amd/ibs: Fix reading of the IBS OpData
 register and thus precise RIP validity

[ Upstream commit 317b96bb14303c7998dbcd5bc606bd8038fdd4b4 ]

The loop that reads all the IBS MSRs into *buf stopped one MSR short of
reading the IbsOpData register, which contains the RipInvalid status bit.

Fix the offset_max assignment so the MSR gets read, so the RIP invalid
evaluation is based on what the IBS h/w output, instead of what was
left in memory.

Signed-off-by: Kim Phillips <kim.phillips@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Fixes: d47e8238cd76 ("perf/x86-ibs: Take instruction pointer from ibs sample")
Link: https://lkml.kernel.org/r/20191023150955.30292-1-kim.phillips@amd.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_amd_ibs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 989d3c215d2b..66ca6ec09bd4 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -555,7 +555,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
 	if (event->attr.sample_type & PERF_SAMPLE_RAW)
 		offset_max = perf_ibs->offset_max;
 	else if (check_rip)
-		offset_max = 2;
+		offset_max = 3;
 	else
 		offset_max = 1;
 	do {

From c4dc7c7af4ccabcfcaef2789286ccc49094e484e Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 28 Oct 2019 10:52:35 -0400
Subject: [PATCH 34/81] USB: Skip endpoints with 0 maxpacket length

[ Upstream commit d482c7bb0541d19dea8bff437a9f3c5563b5b2d2 ]

Endpoints with a maxpacket length of 0 are probably useless.  They
can't transfer any data, and it's not at all unlikely that an HCD will
crash or hang when trying to handle an URB for such an endpoint.

Currently the USB core does not check for endpoints having a maxpacket
value of 0.  This patch adds a check, printing a warning and skipping
over any endpoints it catches.

Now, the USB spec does not rule out endpoints having maxpacket = 0.
But since they wouldn't have any practical use, there doesn't seem to
be any good reason for us to accept them.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>

Link: https://lore.kernel.org/r/Pine.LNX.4.44L0.1910281050420.1485-100000@iolanthe.rowland.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/usb/core/config.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index 5abc4e5434ec..cbd064fae23b 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -314,6 +314,11 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
 
 	/* Validate the wMaxPacketSize field */
 	maxp = usb_endpoint_maxp(&endpoint->desc);
+	if (maxp == 0) {
+		dev_warn(ddev, "config %d interface %d altsetting %d endpoint 0x%X has wMaxPacketSize 0, skipping\n",
+		    cfgno, inum, asnum, d->bEndpointAddress);
+		goto skip_to_next_endpoint_or_interface_descriptor;
+	}
 
 	/* Find the highest legal maxpacket size for this endpoint */
 	i = 0;		/* additional transactions per microframe */

From 913a2c73e1e6756b786bbf4d63421f1fd473c75d Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 24 Oct 2019 16:38:04 +1000
Subject: [PATCH 35/81] scsi: qla2xxx: stop timer in shutdown path

[ Upstream commit d3566abb1a1e7772116e4d50fb6a58d19c9802e5 ]

In shutdown/reboot paths, the timer is not stopped:

  qla2x00_shutdown
  pci_device_shutdown
  device_shutdown
  kernel_restart_prepare
  kernel_restart
  sys_reboot

This causes lockups (on powerpc) when firmware config space access calls
are interrupted by smp_send_stop later in reboot.

Fixes: e30d1756480dc ("[SCSI] qla2xxx: Addition of shutdown callback handler.")
Link: https://lore.kernel.org/r/20191024063804.14538-1-npiggin@gmail.com
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Acked-by: Himanshu Madhani <hmadhani@marvell.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/scsi/qla2xxx/qla_os.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index ff5df33fc740..611a127f08d8 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -2962,6 +2962,10 @@ qla2x00_shutdown(struct pci_dev *pdev)
 	/* Stop currently executing firmware. */
 	qla2x00_try_to_stop_firmware(vha);
 
+	/* Disable timer */
+	if (vha->timer_active)
+		qla2x00_stop_timer(vha);
+
 	/* Turn adapter off line */
 	vha->flags.online = 0;
 

From 5c2da8de678bb5b19c45bb510a4f9022d4709127 Mon Sep 17 00:00:00 2001
From: Jiangfeng Xiao <xiaojiangfeng@huawei.com>
Date: Fri, 25 Oct 2019 21:48:22 +0800
Subject: [PATCH 36/81] net: hisilicon: Fix "Trying to free already-free IRQ"

[ Upstream commit 63a41746827cb16dc6ad0d4d761ab4e7dda7a0c3 ]

When rmmod hip04_eth.ko, we can get the following warning:

Task track: rmmod(1623)>bash(1591)>login(1581)>init(1)
------------[ cut here ]------------
WARNING: CPU: 0 PID: 1623 at kernel/irq/manage.c:1557 __free_irq+0xa4/0x2ac()
Trying to free already-free IRQ 200
Modules linked in: ping(O) pramdisk(O) cpuinfo(O) rtos_snapshot(O) interrupt_ctrl(O) mtdblock mtd_blkdevrtfs nfs_acl nfs lockd grace sunrpc xt_tcpudp ipt_REJECT iptable_filter ip_tables x_tables nf_reject_ipv
CPU: 0 PID: 1623 Comm: rmmod Tainted: G           O    4.4.193 #1
Hardware name: Hisilicon A15
[<c020b408>] (rtos_unwind_backtrace) from [<c0206624>] (show_stack+0x10/0x14)
[<c0206624>] (show_stack) from [<c03f2be4>] (dump_stack+0xa0/0xd8)
[<c03f2be4>] (dump_stack) from [<c021a780>] (warn_slowpath_common+0x84/0xb0)
[<c021a780>] (warn_slowpath_common) from [<c021a7e8>] (warn_slowpath_fmt+0x3c/0x68)
[<c021a7e8>] (warn_slowpath_fmt) from [<c026876c>] (__free_irq+0xa4/0x2ac)
[<c026876c>] (__free_irq) from [<c0268a14>] (free_irq+0x60/0x7c)
[<c0268a14>] (free_irq) from [<c0469e80>] (release_nodes+0x1c4/0x1ec)
[<c0469e80>] (release_nodes) from [<c0466924>] (__device_release_driver+0xa8/0x104)
[<c0466924>] (__device_release_driver) from [<c0466a80>] (driver_detach+0xd0/0xf8)
[<c0466a80>] (driver_detach) from [<c0465e18>] (bus_remove_driver+0x64/0x8c)
[<c0465e18>] (bus_remove_driver) from [<c02935b0>] (SyS_delete_module+0x198/0x1e0)
[<c02935b0>] (SyS_delete_module) from [<c0202ed0>] (__sys_trace_return+0x0/0x10)
---[ end trace bb25d6123d849b44 ]---

Currently "rmmod hip04_eth.ko" call free_irq more than once
as devres_release_all and hip04_remove both call free_irq.
This results in a 'Trying to free already-free IRQ' warning.
To solve the problem free_irq has been moved out of hip04_remove.

Signed-off-by: Jiangfeng Xiao <xiaojiangfeng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/net/ethernet/hisilicon/hip04_eth.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c
index e8b7dc1bcfa6..2a7dfac20546 100644
--- a/drivers/net/ethernet/hisilicon/hip04_eth.c
+++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
@@ -950,7 +950,6 @@ static int hip04_remove(struct platform_device *pdev)
 
 	hip04_free_ring(ndev, d);
 	unregister_netdev(ndev);
-	free_irq(ndev->irq, ndev);
 	of_node_put(priv->phy_node);
 	cancel_work_sync(&priv->tx_timeout_task);
 	free_netdev(ndev);

From 9eecc1310ff052857e7e6bf6864f790f20e9515a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trondmy@gmail.com>
Date: Thu, 31 Oct 2019 18:40:32 -0400
Subject: [PATCH 37/81] NFSv4: Don't allow a cached open with a revoked
 delegation

[ Upstream commit be3df3dd4c70ee020587a943a31b98a0fb4b6424 ]

If the delegation is marked as being revoked, we must not use it
for cached opens.

Fixes: 869f9dfa4d6d ("NFSv4: Fix races between nfs_remove_bad_delegation() and delegation return")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/nfs/delegation.c | 10 ++++++++++
 fs/nfs/delegation.h |  1 +
 fs/nfs/nfs4proc.c   |  7 ++-----
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 7af5eeabc80e..5dac3382405c 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -52,6 +52,16 @@ nfs4_is_valid_delegation(const struct nfs_delegation *delegation,
 	return false;
 }
 
+struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode)
+{
+	struct nfs_delegation *delegation;
+
+	delegation = rcu_dereference(NFS_I(inode)->delegation);
+	if (nfs4_is_valid_delegation(delegation, 0))
+		return delegation;
+	return NULL;
+}
+
 static int
 nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
 {
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 333063e032f0..26a8af7bdca3 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -58,6 +58,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
 int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid);
 bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags);
 
+struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode);
 void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
 int nfs4_have_delegation(struct inode *inode, fmode_t flags);
 int nfs4_check_delegation(struct inode *inode, fmode_t flags);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 900a62a9ad4e..08207001d475 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1243,8 +1243,6 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode,
 		return 0;
 	if ((delegation->type & fmode) != fmode)
 		return 0;
-	if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
-		return 0;
 	switch (claim) {
 	case NFS4_OPEN_CLAIM_NULL:
 	case NFS4_OPEN_CLAIM_FH:
@@ -1473,7 +1471,6 @@ static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmo
 static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
 {
 	struct nfs4_state *state = opendata->state;
-	struct nfs_inode *nfsi = NFS_I(state->inode);
 	struct nfs_delegation *delegation;
 	int open_mode = opendata->o_arg.open_flags;
 	fmode_t fmode = opendata->o_arg.fmode;
@@ -1490,7 +1487,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
 		}
 		spin_unlock(&state->owner->so_lock);
 		rcu_read_lock();
-		delegation = rcu_dereference(nfsi->delegation);
+		delegation = nfs4_get_valid_delegation(state->inode);
 		if (!can_open_delegated(delegation, fmode, claim)) {
 			rcu_read_unlock();
 			break;
@@ -1981,7 +1978,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
 		if (can_open_cached(data->state, data->o_arg.fmode, data->o_arg.open_flags))
 			goto out_no_action;
 		rcu_read_lock();
-		delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
+		delegation = nfs4_get_valid_delegation(data->state->inode);
 		if (can_open_delegated(delegation, data->o_arg.fmode, claim))
 			goto unlock_no_action;
 		rcu_read_unlock();

From 1b374b0762da20bd4c8c3866de426ef9035b8d3d Mon Sep 17 00:00:00 2001
From: Manfred Rudigier <manfred.rudigier@omicronenergy.com>
Date: Thu, 15 Aug 2019 13:55:20 -0700
Subject: [PATCH 38/81] igb: Fix constant media auto sense switching when no
 cable is connected

[ Upstream commit 8d5cfd7f76a2414e23c74bb8858af7540365d985 ]

At least on the i350 there is an annoying behavior that is maybe also
present on 82580 devices, but was probably not noticed yet as MAS is not
widely used.

If no cable is connected on both fiber/copper ports the media auto sense
code will constantly swap between them as part of the watchdog task and
produce many unnecessary kernel log messages.

The swap code responsible for this behavior (switching to fiber) should
not be executed if the current media type is copper and there is no signal
detected on the fiber port. In this case we can safely wait until the
AUTOSENSE_EN bit is cleared.

Signed-off-by: Manfred Rudigier <manfred.rudigier@omicronenergy.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 70ed5e5c3514..9404f38d9d0d 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -1673,7 +1673,8 @@ static void igb_check_swap_media(struct igb_adapter *adapter)
 	if ((hw->phy.media_type == e1000_media_type_copper) &&
 	    (!(connsw & E1000_CONNSW_AUTOSENSE_EN))) {
 		swap_now = true;
-	} else if (!(connsw & E1000_CONNSW_SERDESD)) {
+	} else if ((hw->phy.media_type != e1000_media_type_copper) &&
+		   !(connsw & E1000_CONNSW_SERDESD)) {
 		/* copper signal takes time to appear */
 		if (adapter->copper_tries < 4) {
 			adapter->copper_tries++;

From 47886f2933610d90f0af055c29e017a43303f181 Mon Sep 17 00:00:00 2001
From: Wenwen Wang <wenwen@cs.uga.edu>
Date: Mon, 12 Aug 2019 00:59:21 -0500
Subject: [PATCH 39/81] e1000: fix memory leaks

[ Upstream commit 8472ba62154058b64ebb83d5f57259a352d28697 ]

In e1000_set_ringparam(), 'tx_old' and 'rx_old' are not deallocated if
e1000_up() fails, leading to memory leaks. Refactor the code to fix this
issue.

Signed-off-by: Wenwen Wang <wenwen@cs.uga.edu>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/net/ethernet/intel/e1000/e1000_ethtool.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
index d70b2e5d5222..cbb0bdf85177 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
@@ -628,6 +628,7 @@ static int e1000_set_ringparam(struct net_device *netdev,
 	for (i = 0; i < adapter->num_rx_queues; i++)
 		rxdr[i].count = rxdr->count;
 
+	err = 0;
 	if (netif_running(adapter->netdev)) {
 		/* Try to get new resources before deleting old */
 		err = e1000_setup_all_rx_resources(adapter);
@@ -648,14 +649,13 @@ static int e1000_set_ringparam(struct net_device *netdev,
 		adapter->rx_ring = rxdr;
 		adapter->tx_ring = txdr;
 		err = e1000_up(adapter);
-		if (err)
-			goto err_setup;
 	}
 	kfree(tx_old);
 	kfree(rx_old);
 
 	clear_bit(__E1000_RESETTING, &adapter->flags);
-	return 0;
+	return err;
+
 err_setup_tx:
 	e1000_free_all_rx_resources(adapter);
 err_setup_rx:
@@ -667,7 +667,6 @@ err_alloc_rx:
 err_alloc_tx:
 	if (netif_running(adapter->netdev))
 		e1000_up(adapter);
-err_setup:
 	clear_bit(__E1000_RESETTING, &adapter->flags);
 	return err;
 }

From a2c0d9a9349967a14f2ccec174c92223cb00e43c Mon Sep 17 00:00:00 2001
From: Joakim Zhang <qiangqing.zhang@nxp.com>
Date: Thu, 15 Aug 2019 08:00:26 +0000
Subject: [PATCH 40/81] can: flexcan: disable completely the ECC mechanism

[ Upstream commit 5e269324db5adb2f5f6ec9a93a9c7b0672932b47 ]

The ECC (memory error detection and correction) mechanism can be
activated or not, controlled by the ECCDIS bit in CAN_MECR. When
disabled, updates on indications and reporting registers are stopped.
So if want to disable ECC completely, had better assert ECCDIS bit, not
just mask the related interrupts.

Fixes: cdce844865be ("can: flexcan: add vf610 support for FlexCAN")
Signed-off-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/net/can/flexcan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index baef09b9449f..6b866d0451b2 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -923,6 +923,7 @@ static int flexcan_chip_start(struct net_device *dev)
 		reg_mecr = flexcan_read(&regs->mecr);
 		reg_mecr &= ~FLEXCAN_MECR_ECRWRDIS;
 		flexcan_write(reg_mecr, &regs->mecr);
+		reg_mecr |= FLEXCAN_MECR_ECCDIS;
 		reg_mecr &= ~(FLEXCAN_MECR_NCEFAFRZ | FLEXCAN_MECR_HANCEI_MSK |
 			      FLEXCAN_MECR_FANCEI_MSK);
 		flexcan_write(reg_mecr, &regs->mecr);

From bfb01e6a4fb95470171cfdc476911da3e2b290e2 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Mon, 23 Sep 2019 15:34:45 -0700
Subject: [PATCH 41/81] mm/filemap.c: don't initiate writeback if mapping has
 no dirty pages

commit c3aab9a0bd91b696a852169479b7db1ece6cbf8c upstream.

Functions like filemap_write_and_wait_range() should do nothing if inode
has no dirty pages or pages currently under writeback.  But they anyway
construct struct writeback_control and this does some atomic operations if
CONFIG_CGROUP_WRITEBACK=y - on fast path it locks inode->i_lock and
updates state of writeback ownership, on slow path might be more work.
Current this path is safely avoided only when inode mapping has no pages.

For example generic_file_read_iter() calls filemap_write_and_wait_range()
at each O_DIRECT read - pretty hot path.

This patch skips starting new writeback if mapping has no dirty tags set.
If writeback is already in progress filemap_write_and_wait_range() will
wait for it.

Link: http://lkml.kernel.org/r/156378816804.1087.8607636317907921438.stgit@buzz
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 mm/filemap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 21e750b6e810..f217120973eb 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -340,7 +340,8 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
 		.range_end = end,
 	};
 
-	if (!mapping_cap_writeback_dirty(mapping))
+	if (!mapping_cap_writeback_dirty(mapping) ||
+	    !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
 		return 0;
 
 	wbc_attach_fdatawrite_inode(&wbc, mapping->host);

From 6c9b44791dfea9b7735137242cc4f4746ffedced Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 8 Nov 2019 12:18:29 -0800
Subject: [PATCH 42/81] cgroup,writeback: don't switch wbs immediately on dead
 wbs if the memcg is dead

commit 65de03e251382306a4575b1779c57c87889eee49 upstream.

cgroup writeback tries to refresh the associated wb immediately if the
current wb is dead.  This is to avoid keeping issuing IOs on the stale
wb after memcg - blkcg association has changed (ie. when blkcg got
disabled / enabled higher up in the hierarchy).

Unfortunately, the logic gets triggered spuriously on inodes which are
associated with dead cgroups.  When the logic is triggered on dead
cgroups, the attempt fails only after doing quite a bit of work
allocating and initializing a new wb.

While c3aab9a0bd91 ("mm/filemap.c: don't initiate writeback if mapping
has no dirty pages") alleviated the issue significantly as it now only
triggers when the inode has dirty pages.  However, the condition can
still be triggered before the inode is switched to a different cgroup
and the logic simply doesn't make sense.

Skip the immediate switching if the associated memcg is dying.

This is a simplified version of the following two patches:

 * https://lore.kernel.org/linux-mm/20190513183053.GA73423@dennisz-mbp/
 * http://lkml.kernel.org/r/156355839560.2063.5265687291430814589.stgit@buzz

Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Fixes: e8a7abf5a5bd ("writeback: disassociate inodes from dying bdi_writebacks")
Acked-by: Dennis Zhou <dennis@kernel.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fs-writeback.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 76597dd8cfe8..80ea03034017 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -582,10 +582,13 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
 	spin_unlock(&inode->i_lock);
 
 	/*
-	 * A dying wb indicates that the memcg-blkcg mapping has changed
-	 * and a new wb is already serving the memcg.  Switch immediately.
+	 * A dying wb indicates that either the blkcg associated with the
+	 * memcg changed or the associated memcg is dying.  In the first
+	 * case, a replacement wb should already be available and we should
+	 * refresh the wb immediately.  In the second case, trying to
+	 * refresh will keep failing.
 	 */
-	if (unlikely(wb_dying(wbc->wb)))
+	if (unlikely(wb_dying(wbc->wb) && !css_is_dying(wbc->wb->memcg_css)))
 		inode_switch_wbs(inode, wbc->wb_id);
 }
 

From b575bf8be32348bb1053141d7afb4b7a7336e7f6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 4 Nov 2019 21:38:43 -0800
Subject: [PATCH 43/81] net: prevent load/store tearing on sk->sk_stamp

[ Upstream commit f75359f3ac855940c5718af10ba089b8977bf339 ]

Add a couple of READ_ONCE() and WRITE_ONCE() to prevent
load-tearing and store-tearing in sock_read_timestamp()
and sock_write_timestamp()

This might prevent another KCSAN report.

Fixes: 3a0ed3e96197 ("sock: Make sock->sk_stamp thread-safe")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Deepa Dinamani <deepa.kernel@gmail.com>
Acked-by: Deepa Dinamani <deepa.kernel@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 include/net/sock.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 1571ab68de16..5ed4786a2058 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2174,7 +2174,7 @@ static inline ktime_t sock_read_timestamp(struct sock *sk)
 
 	return kt;
 #else
-	return sk->sk_stamp;
+	return READ_ONCE(sk->sk_stamp);
 #endif
 }
 
@@ -2185,7 +2185,7 @@ static inline void sock_write_timestamp(struct sock *sk, ktime_t kt)
 	sk->sk_stamp = kt;
 	write_sequnlock(&sk->sk_stamp_seq);
 #else
-	sk->sk_stamp = kt;
+	WRITE_ONCE(sk->sk_stamp, kt);
 #endif
 }
 

From 6d0cfddc7afc715835f0e17827106f832b14dd2a Mon Sep 17 00:00:00 2001
From: Jon Bloomfield <jon.bloomfield@intel.com>
Date: Thu, 12 Jul 2018 19:53:10 +0100
Subject: [PATCH 44/81] drm/i915/gtt: Add read only pages to gen8_pte_encode

commit 25dda4dabeeb12af5209b0183c788ef2a88dabbe upstream.

We can set a bit inside the ppGTT PTE to indicate a page is read-only;
writes from the GPU will be discarded. We can use this to protect pages
and in particular support read-only userptr mappings (necessary for
importing PROT_READ vma).

Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.william.auld@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180712185315.3288-1-chris@chris-wilson.co.uk
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index b37fe0df743e..272b03b4f1ec 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -172,11 +172,14 @@ static void ppgtt_unbind_vma(struct i915_vma *vma)
 
 static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
 				  enum i915_cache_level level,
-				  bool valid)
+				  bool valid, u32 flags)
 {
 	gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
 	pte |= addr;
 
+	if (unlikely(flags & PTE_READ_ONLY))
+		pte &= ~_PAGE_RW;
+
 	switch (level) {
 	case I915_CACHE_NONE:
 		pte |= PPAT_UNCACHED_INDEX;
@@ -460,7 +463,7 @@ static void gen8_initialize_pt(struct i915_address_space *vm,
 	gen8_pte_t scratch_pte;
 
 	scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
-				      I915_CACHE_LLC, true);
+				      I915_CACHE_LLC, true, 0);
 
 	fill_px(vm->dev, pt, scratch_pte);
 }
@@ -757,8 +760,9 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 {
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
-	gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
-						 I915_CACHE_LLC, use_scratch);
+	gen8_pte_t scratch_pte =
+		gen8_pte_encode(px_dma(vm->scratch_page),
+				I915_CACHE_LLC, use_scratch, 0);
 
 	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
 		gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
@@ -799,7 +803,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
 
 		pt_vaddr[pte] =
 			gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
-					cache_level, true);
+					cache_level, true, 0);
 		if (++pte == GEN8_PTES) {
 			kunmap_px(ppgtt, pt_vaddr);
 			pt_vaddr = NULL;
@@ -1447,7 +1451,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 	uint64_t start = ppgtt->base.start;
 	uint64_t length = ppgtt->base.total;
 	gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
-						 I915_CACHE_LLC, true);
+						 I915_CACHE_LLC, true, 0);
 
 	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
 		gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
@@ -2357,7 +2361,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 		addr = sg_dma_address(sg_iter.sg) +
 			(sg_iter.sg_pgoffset << PAGE_SHIFT);
 		gen8_set_pte(&gtt_entries[i],
-			     gen8_pte_encode(addr, level, true));
+			     gen8_pte_encode(addr, level, true, 0));
 		i++;
 	}
 
@@ -2370,7 +2374,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	 */
 	if (i != 0)
 		WARN_ON(readq(&gtt_entries[i-1])
-			!= gen8_pte_encode(addr, level, true));
+			!= gen8_pte_encode(addr, level, true, 0));
 
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates
@@ -2444,7 +2448,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 
 	scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
 				      I915_CACHE_LLC,
-				      use_scratch);
+				      use_scratch, 0);
 	for (i = 0; i < num_entries; i++)
 		gen8_set_pte(&gtt_base[i], scratch_pte);
 	readl(gtt_base);

From 774b68aa2105c70b40c3b1777feb7ab500d716dd Mon Sep 17 00:00:00 2001
From: Jon Bloomfield <jon.bloomfield@intel.com>
Date: Mon, 6 Aug 2018 14:10:48 -0700
Subject: [PATCH 45/81] drm/i915/gtt: Read-only pages for insert_entries on
 bdw+

commit 250f8c8140ac0a5e5acb91891d6813f12778b224 upstream.

Hook up the flags to allow read-only ppGTT mappings for gen8+

v2: Include a selftest to check that writes to a readonly PTE are
dropped
v3: Don't duplicate cpu_check() as we can just reuse it, and even worse
don't wholesale copy the theory-of-operation comment from igt_ctx_exec
without changing it to explain the intention behind the new test!
v4: Joonas really likes magic mystery values

Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.william.auld@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180712185315.3288-2-chris@chris-wilson.co.uk
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c     | 30 ++++++++++++++++---------
 drivers/gpu/drm/i915/i915_gem_gtt.h     |  3 +++
 drivers/gpu/drm/i915/intel_ringbuffer.c | 10 +++++++--
 3 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 272b03b4f1ec..3a80446e4e37 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -152,7 +152,8 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
 {
 	u32 pte_flags = 0;
 
-	/* Currently applicable only to VLV */
+	/* Applicable to VLV, and gen8+ */
+	pte_flags = 0;
 	if (vma->obj->gt_ro)
 		pte_flags |= PTE_READ_ONLY;
 
@@ -783,7 +784,8 @@ gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
 			      struct i915_page_directory_pointer *pdp,
 			      struct sg_page_iter *sg_iter,
 			      uint64_t start,
-			      enum i915_cache_level cache_level)
+			      enum i915_cache_level cache_level,
+			      u32 flags)
 {
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
@@ -803,7 +805,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
 
 		pt_vaddr[pte] =
 			gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
-					cache_level, true, 0);
+					cache_level, true, flags);
 		if (++pte == GEN8_PTES) {
 			kunmap_px(ppgtt, pt_vaddr);
 			pt_vaddr = NULL;
@@ -824,7 +826,7 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
 				      struct sg_table *pages,
 				      uint64_t start,
 				      enum i915_cache_level cache_level,
-				      u32 unused)
+				      u32 flags)
 {
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
@@ -834,7 +836,7 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
 
 	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
 		gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
-					      cache_level);
+					      cache_level, flags);
 	} else {
 		struct i915_page_directory_pointer *pdp;
 		uint64_t templ4, pml4e;
@@ -842,7 +844,7 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
 
 		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) {
 			gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
-						      start, cache_level);
+						      start, cache_level, flags);
 		}
 	}
 }
@@ -1519,6 +1521,10 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
 	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
 	ppgtt->base.bind_vma = ppgtt_bind_vma;
+
+	/* From bdw, there is support for read-only pages in the PPGTT */
+	ppgtt->base.has_read_only = true;
+
 	ppgtt->debug_dump = gen8_dump_ppgtt;
 
 	if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
@@ -2347,7 +2353,7 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
 static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
 				     uint64_t start,
-				     enum i915_cache_level level, u32 unused)
+				     enum i915_cache_level level, u32 flags)
 {
 	struct drm_i915_private *dev_priv = vm->dev->dev_private;
 	unsigned first_entry = start >> PAGE_SHIFT;
@@ -2361,7 +2367,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 		addr = sg_dma_address(sg_iter.sg) +
 			(sg_iter.sg_pgoffset << PAGE_SHIFT);
 		gen8_set_pte(&gtt_entries[i],
-			     gen8_pte_encode(addr, level, true, 0));
+			     gen8_pte_encode(addr, level, true, flags));
 		i++;
 	}
 
@@ -2374,7 +2380,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 	 */
 	if (i != 0)
 		WARN_ON(readq(&gtt_entries[i-1])
-			!= gen8_pte_encode(addr, level, true, 0));
+			!= gen8_pte_encode(addr, level, true, flags));
 
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates
@@ -2514,7 +2520,8 @@ static int ggtt_bind_vma(struct i915_vma *vma,
 	if (ret)
 		return ret;
 
-	/* Currently applicable only to VLV */
+	/* Applicable to VLV (gen8+ do not support RO in the GGTT) */
+	pte_flags = 0;
 	if (obj->gt_ro)
 		pte_flags |= PTE_READ_ONLY;
 
@@ -2657,6 +2664,9 @@ static int i915_gem_setup_global_gtt(struct drm_device *dev,
 	i915_address_space_init(ggtt_vm, dev_priv);
 	ggtt_vm->total += PAGE_SIZE;
 
+	/* Only VLV supports read-only GGTT mappings */
+	ggtt_vm->has_read_only = IS_VALLEYVIEW(dev_priv);
+
 	if (intel_vgpu_active(dev)) {
 		ret = intel_vgt_balloon(dev);
 		if (ret)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index a216397ead52..d36f2d77576a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -307,6 +307,9 @@ struct i915_address_space {
 	 */
 	struct list_head inactive_list;
 
+	/* Some systems support read-only mappings for GGTT and/or PPGTT */
+	bool has_read_only:1;
+
 	/* FIXME: Need a more generic return type */
 	gen6_pte_t (*pte_encode)(dma_addr_t addr,
 				 enum i915_cache_level level,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 9d48443bca2e..df6547f60a5c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2058,6 +2058,8 @@ static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
 static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
 				      struct intel_ringbuffer *ringbuf)
 {
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	struct drm_i915_gem_object *obj;
 
 	obj = NULL;
@@ -2068,8 +2070,12 @@ static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
 	if (obj == NULL)
 		return -ENOMEM;
 
-	/* mark ring buffers as read-only from GPU side by default */
-	obj->gt_ro = 1;
+	/*
+	 * Mark ring buffers as read-only from GPU side (so no stray overwrites)
+	 * if supported by the platform's GGTT.
+	 */
+	if (vm->has_read_only)
+		obj->gt_ro = 1;
 
 	ringbuf->obj = obj;
 

From 3fd1c2e65c60c1c513155e1d1d74138b141aa8a3 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 12 Jul 2018 19:53:12 +0100
Subject: [PATCH 46/81] drm/i915/gtt: Disable read-only support under GVT

commit c9e666880de5a1fed04dc412b046916d542b72dd upstream.

GVT is not propagating the PTE bits, and is always setting the
read-write bit, thus breaking read-only support.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Zhenyu Wang <zhenyuw@linux.intel.com>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Reviewed-by: Jon Bloomfield <jon.bloomfield@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180712185315.3288-3-chris@chris-wilson.co.uk
Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 3a80446e4e37..974c309a9392 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1522,8 +1522,12 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
 	ppgtt->base.bind_vma = ppgtt_bind_vma;
 
-	/* From bdw, there is support for read-only pages in the PPGTT */
-	ppgtt->base.has_read_only = true;
+	/*
+	 * From bdw, there is support for read-only pages in the PPGTT.
+	 *
+	 * XXX GVT is not honouring the lack of RW in the PTE bits.
+	 */
+	ppgtt->base.has_read_only = !intel_vgpu_active(ppgtt->base.dev);
 
 	ppgtt->debug_dump = gen8_dump_ppgtt;
 

From e5e3c0154c19f2d8213e0af88b7a10d9de7fbafd Mon Sep 17 00:00:00 2001
From: Jon Bloomfield <jon.bloomfield@intel.com>
Date: Fri, 20 Apr 2018 14:26:01 -0700
Subject: [PATCH 47/81] drm/i915: Rename gen7 cmdparser tables

commit 0a2f661b6c21815a7fa60e30babe975fee8e73c6 upstream.

We're about to introduce some new tables for later gens, and the
current naming for the gen7 tables will no longer make sense.

v2: rebase

Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Chris Wilson <chris.p.wilson@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 70 +++++++++++++-------------
 1 file changed, 35 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index db58c8d664c2..2c3f13dfe26f 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -113,7 +113,7 @@
 
 /*            Command                          Mask   Fixed Len   Action
 	      ---------------------------------------------------------- */
-static const struct drm_i915_cmd_descriptor common_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = {
 	CMD(  MI_NOOP,                          SMI,    F,  1,      S  ),
 	CMD(  MI_USER_INTERRUPT,                SMI,    F,  1,      R  ),
 	CMD(  MI_WAIT_FOR_EVENT,                SMI,    F,  1,      M  ),
@@ -146,7 +146,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
 	CMD(  MI_BATCH_BUFFER_START,            SMI,   !F,  0xFF,   S  ),
 };
 
-static const struct drm_i915_cmd_descriptor render_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_render_cmds[] = {
 	CMD(  MI_FLUSH,                         SMI,    F,  1,      S  ),
 	CMD(  MI_ARB_ON_OFF,                    SMI,    F,  1,      R  ),
 	CMD(  MI_PREDICATE,                     SMI,    F,  1,      S  ),
@@ -229,7 +229,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = {
 	CMD(  GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS,  S3D,   !F,  0x1FF,  S  ),
 };
 
-static const struct drm_i915_cmd_descriptor video_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_video_cmds[] = {
 	CMD(  MI_ARB_ON_OFF,                    SMI,    F,  1,      R  ),
 	CMD(  MI_SET_APPID,                     SMI,    F,  1,      S  ),
 	CMD(  MI_STORE_DWORD_IMM,               SMI,   !F,  0xFF,   B,
@@ -273,7 +273,7 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = {
 	CMD(  MFX_WAIT,                         SMFX,   F,  1,      S  ),
 };
 
-static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_vecs_cmds[] = {
 	CMD(  MI_ARB_ON_OFF,                    SMI,    F,  1,      R  ),
 	CMD(  MI_SET_APPID,                     SMI,    F,  1,      S  ),
 	CMD(  MI_STORE_DWORD_IMM,               SMI,   !F,  0xFF,   B,
@@ -311,7 +311,7 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
 	      }},						       ),
 };
 
-static const struct drm_i915_cmd_descriptor blt_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_blt_cmds[] = {
 	CMD(  MI_DISPLAY_FLIP,                  SMI,   !F,  0xFF,   R  ),
 	CMD(  MI_STORE_DWORD_IMM,               SMI,   !F,  0x3FF,  B,
 	      .bits = {{
@@ -361,35 +361,35 @@ static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = {
 #undef B
 #undef M
 
-static const struct drm_i915_cmd_table gen7_render_cmds[] = {
-	{ common_cmds, ARRAY_SIZE(common_cmds) },
-	{ render_cmds, ARRAY_SIZE(render_cmds) },
+static const struct drm_i915_cmd_table gen7_render_cmd_table[] = {
+	{ gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+	{ gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) },
 };
 
-static const struct drm_i915_cmd_table hsw_render_ring_cmds[] = {
-	{ common_cmds, ARRAY_SIZE(common_cmds) },
-	{ render_cmds, ARRAY_SIZE(render_cmds) },
+static const struct drm_i915_cmd_table hsw_render_ring_cmd_table[] = {
+	{ gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+	{ gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) },
 	{ hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) },
 };
 
-static const struct drm_i915_cmd_table gen7_video_cmds[] = {
-	{ common_cmds, ARRAY_SIZE(common_cmds) },
-	{ video_cmds, ARRAY_SIZE(video_cmds) },
+static const struct drm_i915_cmd_table gen7_video_cmd_table[] = {
+	{ gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+	{ gen7_video_cmds, ARRAY_SIZE(gen7_video_cmds) },
 };
 
-static const struct drm_i915_cmd_table hsw_vebox_cmds[] = {
-	{ common_cmds, ARRAY_SIZE(common_cmds) },
-	{ vecs_cmds, ARRAY_SIZE(vecs_cmds) },
+static const struct drm_i915_cmd_table hsw_vebox_cmd_table[] = {
+	{ gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+	{ gen7_vecs_cmds, ARRAY_SIZE(gen7_vecs_cmds) },
 };
 
-static const struct drm_i915_cmd_table gen7_blt_cmds[] = {
-	{ common_cmds, ARRAY_SIZE(common_cmds) },
-	{ blt_cmds, ARRAY_SIZE(blt_cmds) },
+static const struct drm_i915_cmd_table gen7_blt_cmd_table[] = {
+	{ gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+	{ gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) },
 };
 
-static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = {
-	{ common_cmds, ARRAY_SIZE(common_cmds) },
-	{ blt_cmds, ARRAY_SIZE(blt_cmds) },
+static const struct drm_i915_cmd_table hsw_blt_ring_cmd_table[] = {
+	{ gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+	{ gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) },
 	{ hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) },
 };
 
@@ -697,12 +697,12 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *ring)
 	switch (ring->id) {
 	case RCS:
 		if (IS_HASWELL(ring->dev)) {
-			cmd_tables = hsw_render_ring_cmds;
+			cmd_tables = hsw_render_ring_cmd_table;
 			cmd_table_count =
-				ARRAY_SIZE(hsw_render_ring_cmds);
+				ARRAY_SIZE(hsw_render_ring_cmd_table);
 		} else {
-			cmd_tables = gen7_render_cmds;
-			cmd_table_count = ARRAY_SIZE(gen7_render_cmds);
+			cmd_tables = gen7_render_cmd_table;
+			cmd_table_count = ARRAY_SIZE(gen7_render_cmd_table);
 		}
 
 		ring->reg_table = gen7_render_regs;
@@ -719,17 +719,17 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *ring)
 		ring->get_cmd_length_mask = gen7_render_get_cmd_length_mask;
 		break;
 	case VCS:
-		cmd_tables = gen7_video_cmds;
-		cmd_table_count = ARRAY_SIZE(gen7_video_cmds);
+		cmd_tables = gen7_video_cmd_table;
+		cmd_table_count = ARRAY_SIZE(gen7_video_cmd_table);
 		ring->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
 		break;
 	case BCS:
 		if (IS_HASWELL(ring->dev)) {
-			cmd_tables = hsw_blt_ring_cmds;
-			cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds);
+			cmd_tables = hsw_blt_ring_cmd_table;
+			cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table);
 		} else {
-			cmd_tables = gen7_blt_cmds;
-			cmd_table_count = ARRAY_SIZE(gen7_blt_cmds);
+			cmd_tables = gen7_blt_cmd_table;
+			cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table);
 		}
 
 		ring->reg_table = gen7_blt_regs;
@@ -746,8 +746,8 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *ring)
 		ring->get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
 		break;
 	case VECS:
-		cmd_tables = hsw_vebox_cmds;
-		cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds);
+		cmd_tables = hsw_vebox_cmd_table;
+		cmd_table_count = ARRAY_SIZE(hsw_vebox_cmd_table);
 		/* VECS can use the same length_mask function as VCS */
 		ring->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
 		break;

From 3122671a5df3ee13f5cf22b7bdacf422b7b4319a Mon Sep 17 00:00:00 2001
From: Jon Bloomfield <jon.bloomfield@intel.com>
Date: Fri, 8 Jun 2018 08:53:46 -0700
Subject: [PATCH 48/81] drm/i915: Disable Secure Batches for gen6+

commit 44157641d448cbc0c4b73c5231d2b911f0cb0427 upstream.

Retroactively stop reporting support for secure batches
through the api for gen6+ so that older binaries trigger
the fallback path instead.

Older binaries use secure batches pre gen6 to access resources
that are not available to normal usermode processes. However,
all known userspace explicitly checks for HAS_SECURE_BATCHES
before relying on the secure batch feature.

Since there are no known binaries relying on this for newer gens
we can kill secure batches from gen6, via I915_PARAM_HAS_SECURE_BATCHES.

v2: rebase (Mika)
v3: rebase (Mika)

Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Chris Wilson <chris.p.wilson@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/i915_dma.c            | 2 +-
 drivers/gpu/drm/i915/i915_drv.h            | 3 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 ++++
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 61fcb3b22297..816ece568ffc 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -133,7 +133,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 		value = 1;
 		break;
 	case I915_PARAM_HAS_SECURE_BATCHES:
-		value = capable(CAP_SYS_ADMIN);
+		value = HAS_SECURE_BATCHES(dev_priv) && capable(CAP_SYS_ADMIN);
 		break;
 	case I915_PARAM_HAS_PINNED_BATCHES:
 		value = 1;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5044f2257e89..6c0a1fa23447 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2539,6 +2539,9 @@ struct drm_i915_cmd_table {
 #define HAS_BSD2(dev)		(INTEL_INFO(dev)->ring_mask & BSD2_RING)
 #define HAS_BLT(dev)		(INTEL_INFO(dev)->ring_mask & BLT_RING)
 #define HAS_VEBOX(dev)		(INTEL_INFO(dev)->ring_mask & VEBOX_RING)
+
+#define HAS_SECURE_BATCHES(dev_priv) (INTEL_INFO(dev_priv)->gen < 6)
+
 #define HAS_LLC(dev)		(INTEL_INFO(dev)->has_llc)
 #define HAS_WT(dev)		((IS_HASWELL(dev) || IS_BROADWELL(dev)) && \
 				 __I915__(dev)->ellc_size)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 6ed7d63a0688..280167ef4f9d 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1351,6 +1351,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
 	dispatch_flags = 0;
 	if (args->flags & I915_EXEC_SECURE) {
+		/* Return -EPERM to trigger fallback code on old binaries. */
+		if (!HAS_SECURE_BATCHES(dev_priv))
+			return -EPERM;
+
 		if (!file->is_master || !capable(CAP_SYS_ADMIN))
 		    return -EPERM;
 

From 544fd7d9d4cfe32357beab2f1dc543637d42e69f Mon Sep 17 00:00:00 2001
From: Jon Bloomfield <jon.bloomfield@intel.com>
Date: Fri, 8 Jun 2018 10:05:26 -0700
Subject: [PATCH 49/81] drm/i915: Remove Master tables from cmdparser

commit 66d8aba1cd6db34af10de465c0d52af679288cb6 upstream.

The previous patch has killed support for secure batches
on gen6+, and hence the cmdparsers master tables are
now dead code. Remove them.

Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Tyler Hicks <tyhicks@canonical.com>
Reviewed-by: Chris Wilson <chris.p.wilson@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c     | 72 ++++------------------
 drivers/gpu/drm/i915/i915_drv.h            |  3 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  9 +--
 3 files changed, 15 insertions(+), 69 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 2c3f13dfe26f..7fdbbf2f9c2e 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -50,13 +50,11 @@
  * granting userspace undue privileges. There are three categories of privilege.
  *
  * First, commands which are explicitly defined as privileged or which should
- * only be used by the kernel driver. The parser generally rejects such
- * commands, though it may allow some from the drm master process.
+ * only be used by the kernel driver. The parser rejects such commands
  *
  * Second, commands which access registers. To support correct/enhanced
  * userspace functionality, particularly certain OpenGL extensions, the parser
- * provides a whitelist of registers which userspace may safely access (for both
- * normal and drm master processes).
+ * provides a whitelist of registers which userspace may safely access
  *
  * Third, commands which access privileged memory (i.e. GGTT, HWS page, etc).
  * The parser always rejects such commands.
@@ -81,9 +79,9 @@
  * in the per-ring command tables.
  *
  * Other command table entries map fairly directly to high level categories
- * mentioned above: rejected, master-only, register whitelist. The parser
- * implements a number of checks, including the privileged memory checks, via a
- * general bitmasking mechanism.
+ * mentioned above: rejected, register whitelist. The parser implements a number
+ * of checks, including the privileged memory checks, via a general bitmasking
+ * mechanism.
  */
 
 #define STD_MI_OPCODE_MASK  0xFF800000
@@ -109,14 +107,13 @@
 #define R CMD_DESC_REJECT
 #define W CMD_DESC_REGISTER
 #define B CMD_DESC_BITMASK
-#define M CMD_DESC_MASTER
 
 /*            Command                          Mask   Fixed Len   Action
 	      ---------------------------------------------------------- */
 static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = {
 	CMD(  MI_NOOP,                          SMI,    F,  1,      S  ),
 	CMD(  MI_USER_INTERRUPT,                SMI,    F,  1,      R  ),
-	CMD(  MI_WAIT_FOR_EVENT,                SMI,    F,  1,      M  ),
+	CMD(  MI_WAIT_FOR_EVENT,                SMI,    F,  1,      R  ),
 	CMD(  MI_ARB_CHECK,                     SMI,    F,  1,      S  ),
 	CMD(  MI_REPORT_HEAD,                   SMI,    F,  1,      S  ),
 	CMD(  MI_SUSPEND_FLUSH,                 SMI,    F,  1,      S  ),
@@ -213,7 +210,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = {
 	CMD(  MI_URB_ATOMIC_ALLOC,              SMI,    F,  1,      S  ),
 	CMD(  MI_SET_APPID,                     SMI,    F,  1,      S  ),
 	CMD(  MI_RS_CONTEXT,                    SMI,    F,  1,      S  ),
-	CMD(  MI_LOAD_SCAN_LINES_INCL,          SMI,   !F,  0x3F,   M  ),
+	CMD(  MI_LOAD_SCAN_LINES_INCL,          SMI,   !F,  0x3F,   R  ),
 	CMD(  MI_LOAD_SCAN_LINES_EXCL,          SMI,   !F,  0x3F,   R  ),
 	CMD(  MI_LOAD_REGISTER_REG,             SMI,   !F,  0xFF,   R  ),
 	CMD(  MI_RS_STORE_DATA_IMM,             SMI,   !F,  0xFF,   S  ),
@@ -345,7 +342,7 @@ static const struct drm_i915_cmd_descriptor gen7_blt_cmds[] = {
 };
 
 static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = {
-	CMD(  MI_LOAD_SCAN_LINES_INCL,          SMI,   !F,  0x3F,   M  ),
+	CMD(  MI_LOAD_SCAN_LINES_INCL,          SMI,   !F,  0x3F,   R  ),
 	CMD(  MI_LOAD_SCAN_LINES_EXCL,          SMI,   !F,  0x3F,   R  ),
 };
 
@@ -359,7 +356,6 @@ static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = {
 #undef R
 #undef W
 #undef B
-#undef M
 
 static const struct drm_i915_cmd_table gen7_render_cmd_table[] = {
 	{ gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
@@ -479,19 +475,6 @@ static const struct drm_i915_reg_descriptor gen7_blt_regs[] = {
 	REG32(BCS_SWCTRL),
 };
 
-static const struct drm_i915_reg_descriptor ivb_master_regs[] = {
-	REG32(FORCEWAKE_MT),
-	REG32(DERRMR),
-	REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_A)),
-	REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_B)),
-	REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_C)),
-};
-
-static const struct drm_i915_reg_descriptor hsw_master_regs[] = {
-	REG32(FORCEWAKE_MT),
-	REG32(DERRMR),
-};
-
 #undef REG64
 #undef REG32
 
@@ -608,9 +591,7 @@ static bool check_sorted(int ring_id,
 
 static bool validate_regs_sorted(struct intel_engine_cs *ring)
 {
-	return check_sorted(ring->id, ring->reg_table, ring->reg_count) &&
-		check_sorted(ring->id, ring->master_reg_table,
-			     ring->master_reg_count);
+	return check_sorted(ring->id, ring->reg_table, ring->reg_count);
 }
 
 struct cmd_node {
@@ -708,14 +689,6 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *ring)
 		ring->reg_table = gen7_render_regs;
 		ring->reg_count = ARRAY_SIZE(gen7_render_regs);
 
-		if (IS_HASWELL(ring->dev)) {
-			ring->master_reg_table = hsw_master_regs;
-			ring->master_reg_count = ARRAY_SIZE(hsw_master_regs);
-		} else {
-			ring->master_reg_table = ivb_master_regs;
-			ring->master_reg_count = ARRAY_SIZE(ivb_master_regs);
-		}
-
 		ring->get_cmd_length_mask = gen7_render_get_cmd_length_mask;
 		break;
 	case VCS:
@@ -735,14 +708,6 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *ring)
 		ring->reg_table = gen7_blt_regs;
 		ring->reg_count = ARRAY_SIZE(gen7_blt_regs);
 
-		if (IS_HASWELL(ring->dev)) {
-			ring->master_reg_table = hsw_master_regs;
-			ring->master_reg_count = ARRAY_SIZE(hsw_master_regs);
-		} else {
-			ring->master_reg_table = ivb_master_regs;
-			ring->master_reg_count = ARRAY_SIZE(ivb_master_regs);
-		}
-
 		ring->get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
 		break;
 	case VECS:
@@ -972,7 +937,6 @@ bool i915_needs_cmd_parser(struct intel_engine_cs *ring)
 static bool check_cmd(const struct intel_engine_cs *ring,
 		      const struct drm_i915_cmd_descriptor *desc,
 		      const u32 *cmd, u32 length,
-		      const bool is_master,
 		      bool *oacontrol_set)
 {
 	if (desc->flags & CMD_DESC_REJECT) {
@@ -980,12 +944,6 @@ static bool check_cmd(const struct intel_engine_cs *ring,
 		return false;
 	}
 
-	if ((desc->flags & CMD_DESC_MASTER) && !is_master) {
-		DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n",
-				 *cmd);
-		return false;
-	}
-
 	if (desc->flags & CMD_DESC_REGISTER) {
 		/*
 		 * Get the distance between individual register offset
@@ -1002,11 +960,6 @@ static bool check_cmd(const struct intel_engine_cs *ring,
 				find_reg(ring->reg_table, ring->reg_count,
 					 reg_addr);
 
-			if (!reg && is_master)
-				reg = find_reg(ring->master_reg_table,
-					       ring->master_reg_count,
-					       reg_addr);
-
 			if (!reg) {
 				DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (ring=%d)\n",
 						 reg_addr, *cmd, ring->id);
@@ -1100,7 +1053,6 @@ static bool check_cmd(const struct intel_engine_cs *ring,
  * @shadow_batch_obj: copy of the batch buffer in question
  * @batch_start_offset: byte offset in the batch at which execution starts
  * @batch_len: length of the commands in batch_obj
- * @is_master: is the submitting process the drm master?
  *
  * Parses the specified batch buffer looking for privilege violations as
  * described in the overview.
@@ -1112,8 +1064,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
 		    struct drm_i915_gem_object *batch_obj,
 		    struct drm_i915_gem_object *shadow_batch_obj,
 		    u32 batch_start_offset,
-		    u32 batch_len,
-		    bool is_master)
+		    u32 batch_len)
 {
 	u32 *cmd, *batch_base, *batch_end;
 	struct drm_i915_cmd_descriptor default_desc = { 0 };
@@ -1174,8 +1125,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
 			break;
 		}
 
-		if (!check_cmd(ring, desc, cmd, length, is_master,
-			       &oacontrol_set)) {
+		if (!check_cmd(ring, desc, cmd, length, &oacontrol_set)) {
 			ret = -EINVAL;
 			break;
 		}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6c0a1fa23447..3760b96eb855 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3287,8 +3287,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
 		    struct drm_i915_gem_object *batch_obj,
 		    struct drm_i915_gem_object *shadow_batch_obj,
 		    u32 batch_start_offset,
-		    u32 batch_len,
-		    bool is_master);
+		    u32 batch_len);
 
 /* i915_suspend.c */
 extern int i915_save_state(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 280167ef4f9d..08842e4842a7 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1129,8 +1129,7 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
 			  struct eb_vmas *eb,
 			  struct drm_i915_gem_object *batch_obj,
 			  u32 batch_start_offset,
-			  u32 batch_len,
-			  bool is_master)
+			  u32 batch_len)
 {
 	struct drm_i915_gem_object *shadow_batch_obj;
 	struct i915_vma *vma;
@@ -1145,8 +1144,7 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
 			      batch_obj,
 			      shadow_batch_obj,
 			      batch_start_offset,
-			      batch_len,
-			      is_master);
+			      batch_len);
 	if (ret)
 		goto err;
 
@@ -1501,8 +1499,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 						      eb,
 						      batch_obj,
 						      args->batch_start_offset,
-						      args->batch_len,
-						      file->is_master);
+						      args->batch_len);
 		if (IS_ERR(parsed_batch_obj)) {
 			ret = PTR_ERR(parsed_batch_obj);
 			goto err;

From 17e89f38212d8b3cba470efca91b997ac03c592c Mon Sep 17 00:00:00 2001
From: Jon Bloomfield <jon.bloomfield@intel.com>
Date: Wed, 1 Aug 2018 09:33:59 -0700
Subject: [PATCH 50/81] drm/i915: Add support for mandatory cmdparsing

commit 311a50e76a33d1e029563c24b2ff6db0c02b5afe upstream.

The existing cmdparser for gen7 can be bypassed by specifying
batch_len=0 in the execbuf call. This is safe because bypassing
simply reduces the cmd-set available.

In a later patch we will introduce cmdparsing for gen9, as a
security measure, which must be strictly enforced since without
it we are vulnerable to DoS attacks.

Introduce the concept of 'required' cmd parsing that cannot be
bypassed by submitting zero-length bb's.

v2: rebase (Mika)
v2: rebase (Mika)
v3: fix conflict on engine flags (Mika)

Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Chris Wilson <chris.p.wilson@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c     | 24 ++--------------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  9 +++++++-
 drivers/gpu/drm/i915/intel_ringbuffer.h    |  3 ++-
 3 files changed, 12 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 7fdbbf2f9c2e..bd11913518d4 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -734,7 +734,7 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *ring)
 		return ret;
 	}
 
-	ring->needs_cmd_parser = true;
+	ring->using_cmd_parser = true;
 
 	return 0;
 }
@@ -748,7 +748,7 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *ring)
  */
 void i915_cmd_parser_fini_ring(struct intel_engine_cs *ring)
 {
-	if (!ring->needs_cmd_parser)
+	if (!ring->using_cmd_parser)
 		return;
 
 	fini_hash_table(ring);
@@ -914,26 +914,6 @@ unpin_src:
 	return ret ? ERR_PTR(ret) : dst;
 }
 
-/**
- * i915_needs_cmd_parser() - should a given ring use software command parsing?
- * @ring: the ring in question
- *
- * Only certain platforms require software batch buffer command parsing, and
- * only when enabled via module parameter.
- *
- * Return: true if the ring requires software command parsing
- */
-bool i915_needs_cmd_parser(struct intel_engine_cs *ring)
-{
-	if (!ring->needs_cmd_parser)
-		return false;
-
-	if (!USES_PPGTT(ring->dev))
-		return false;
-
-	return (i915.enable_cmd_parser == 1);
-}
-
 static bool check_cmd(const struct intel_engine_cs *ring,
 		      const struct drm_i915_cmd_descriptor *desc,
 		      const u32 *cmd, u32 length,
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 08842e4842a7..137cca662365 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1320,6 +1320,13 @@ eb_get_batch(struct eb_vmas *eb)
 	return vma->obj;
 }
 
+static inline bool use_cmdparser(const struct intel_engine_cs *ring,
+				 u32 batch_len)
+{
+	return ring->requires_cmd_parser ||
+		(ring->using_cmd_parser && batch_len && USES_PPGTT(ring->dev));
+}
+
 static int
 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		       struct drm_file *file,
@@ -1491,7 +1498,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	}
 
 	params->args_batch_start_offset = args->batch_start_offset;
-	if (i915_needs_cmd_parser(ring) && args->batch_len) {
+	if (use_cmdparser(ring, args->batch_len)) {
 		struct drm_i915_gem_object *parsed_batch_obj;
 
 		parsed_batch_obj = i915_gem_execbuffer_parse(ring,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 49fa41dc0eb6..56c872b89a92 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -314,7 +314,8 @@ struct  intel_engine_cs {
 		volatile u32 *cpu_page;
 	} scratch;
 
-	bool needs_cmd_parser;
+	bool using_cmd_parser;
+	bool requires_cmd_parser;
 
 	/*
 	 * Table of commands the command parser needs to know about

From 77524398bccea3592a25cbe92a9a54fa555013af Mon Sep 17 00:00:00 2001
From: Jon Bloomfield <jon.bloomfield@intel.com>
Date: Tue, 22 May 2018 13:59:06 -0700
Subject: [PATCH 51/81] drm/i915: Support ro ppgtt mapped cmdparser shadow
 buffers

commit 4f7af1948abcb18b4772fe1bcd84d7d27d96258c upstream.

For Gen7, the original cmdparser motive was to permit limited
use of register read/write instructions in unprivileged BB's.
This worked by copying the user supplied bb to a kmd owned
bb, and running it in secure mode, from the ggtt, only if
the scanner finds no unsafe commands or registers.

For Gen8+ we can't use this same technique because running bb's
from the ggtt also disables access to ppgtt space. But we also
do not actually require 'secure' execution since we are only
trying to reduce the available command/register set. Instead we
will copy the user buffer to a kmd owned read-only bb in ppgtt,
and run in the usual non-secure mode.

Note that ro pages are only supported by ppgtt (not ggtt), but
luckily that's exactly what we need.

Add the required paths to map the shadow buffer to ppgtt ro for Gen8+

v2: IS_GEN7/IS_GEN (Mika)
v3: rebase
v4: rebase
v5: rebase

Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Chris Wilson <chris.p.wilson@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/i915_drv.h            |  6 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 62 ++++++++++++++++------
 2 files changed, 52 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3760b96eb855..91ab878bd32d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2556,6 +2556,12 @@ struct drm_i915_cmd_table {
 #define HAS_OVERLAY(dev)		(INTEL_INFO(dev)->has_overlay)
 #define OVERLAY_NEEDS_PHYSICAL(dev)	(INTEL_INFO(dev)->overlay_needs_physical)
 
+/*
+ * The Gen7 cmdparser copies the scanned buffer to the ggtt for execution
+ * All later gens can run the final buffer from the ppgtt
+ */
+#define CMDPARSER_USES_GGTT(dev_priv) IS_GEN7(dev_priv)
+
 /* Early gen2 have a totally busted CS tlb and require pinned batches. */
 #define HAS_BROKEN_CS_TLB(dev)		(IS_I830(dev) || IS_845G(dev))
 /*
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 137cca662365..1392e8a72797 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1123,10 +1123,41 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
 	return 0;
 }
 
+static struct i915_vma*
+shadow_batch_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm)
+{
+	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+	struct i915_address_space *pin_vm = vm;
+	u64 flags;
+	int ret;
+
+	/*
+	 * PPGTT backed shadow buffers must be mapped RO, to prevent
+	 * post-scan tampering
+	 */
+	if (CMDPARSER_USES_GGTT(dev_priv)) {
+		flags = PIN_GLOBAL;
+		pin_vm = &dev_priv->gtt.base;
+	} else if (vm->has_read_only) {
+		flags = PIN_USER;
+		obj->gt_ro = 1;
+	} else {
+		DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	ret = i915_gem_object_pin(obj, pin_vm, 0, flags);
+	if (ret)
+		return ERR_PTR(ret);
+	else
+		return i915_gem_obj_to_vma(obj, pin_vm);
+}
+
 static struct drm_i915_gem_object*
 i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
 			  struct drm_i915_gem_exec_object2 *shadow_exec_entry,
 			  struct eb_vmas *eb,
+			  struct i915_address_space *vm,
 			  struct drm_i915_gem_object *batch_obj,
 			  u32 batch_start_offset,
 			  u32 batch_len)
@@ -1148,15 +1179,16 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
 	if (ret)
 		goto err;
 
-	ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 0, 0);
-	if (ret)
+	vma = shadow_batch_pin(shadow_batch_obj, vm);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
 		goto err;
+	}
 
 	i915_gem_object_unpin_pages(shadow_batch_obj);
 
 	memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
 
-	vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
 	vma->exec_entry = shadow_exec_entry;
 	vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
 	drm_gem_object_reference(&shadow_batch_obj->base);
@@ -1168,7 +1200,14 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
 
 err:
 	i915_gem_object_unpin_pages(shadow_batch_obj);
-	if (ret == -EACCES) /* unhandled chained batch */
+
+	/*
+	 * Unsafe GGTT-backed buffers can still be submitted safely
+	 * as non-secure.
+	 * For PPGTT backing however, we have no choice but to forcibly
+	 * reject unsafe buffers
+	 */
+	if (CMDPARSER_USES_GGTT(batch_obj->base.dev) && (ret == -EACCES))
 		return batch_obj;
 	else
 		return ERR_PTR(ret);
@@ -1503,7 +1542,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
 		parsed_batch_obj = i915_gem_execbuffer_parse(ring,
 						      &shadow_exec_entry,
-						      eb,
+						      eb, vm,
 						      batch_obj,
 						      args->batch_start_offset,
 						      args->batch_len);
@@ -1516,18 +1555,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		 * parsed_batch_obj == batch_obj means batch not fully parsed:
 		 * Accept, but don't promote to secure.
 		 */
-
 		if (parsed_batch_obj != batch_obj) {
-			/*
-			 * Batch parsed and accepted:
-			 *
-			 * Set the DISPATCH_SECURE bit to remove the NON_SECURE
-			 * bit from MI_BATCH_BUFFER_START commands issued in
-			 * the dispatch_execbuffer implementations. We
-			 * specifically don't want that set on batches the
-			 * command parser has accepted.
-			 */
-			dispatch_flags |= I915_DISPATCH_SECURE;
+			if (CMDPARSER_USES_GGTT(dev_priv))
+				dispatch_flags |= I915_DISPATCH_SECURE;
 			params->args_batch_start_offset = 0;
 			batch_obj = parsed_batch_obj;
 		}

From 2ac501479a1325d00aca5012887ebfece8358032 Mon Sep 17 00:00:00 2001
From: Jon Bloomfield <jon.bloomfield@intel.com>
Date: Wed, 1 Aug 2018 09:45:50 -0700
Subject: [PATCH 52/81] drm/i915: Allow parsing of unsized batches

commit 435e8fc059dbe0eec823a75c22da2972390ba9e0 upstream.

In "drm/i915: Add support for mandatory cmdparsing" we introduced the
concept of mandatory parsing. This allows the cmdparser to be invoked
even when user passes batch_len=0 to the execbuf ioctl's.

However, the cmdparser needs to know the extents of the buffer being
scanned. Refactor the code to ensure the cmdparser uses the actual
object size, instead of the incoming length, if user passes 0.

Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Tyler Hicks <tyhicks@canonical.com>
Reviewed-by: Chris Wilson <chris.p.wilson@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 1392e8a72797..556849350f27 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1540,12 +1540,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	if (use_cmdparser(ring, args->batch_len)) {
 		struct drm_i915_gem_object *parsed_batch_obj;
 
+		u32 batch_off = args->batch_start_offset;
+		u32 batch_len = args->batch_len;
+		if (batch_len == 0)
+			batch_len = batch_obj->base.size - batch_off;
+
 		parsed_batch_obj = i915_gem_execbuffer_parse(ring,
 						      &shadow_exec_entry,
 						      eb, vm,
 						      batch_obj,
-						      args->batch_start_offset,
-						      args->batch_len);
+						      batch_off,
+						      batch_len);
 		if (IS_ERR(parsed_batch_obj)) {
 			ret = PTR_ERR(parsed_batch_obj);
 			goto err;

From 57c2c8f58ca07e8045f020e4e2548ac3bc3a5aab Mon Sep 17 00:00:00 2001
From: Jon Bloomfield <jon.bloomfield@intel.com>
Date: Mon, 23 Apr 2018 11:12:15 -0700
Subject: [PATCH 53/81] drm/i915: Add gen9 BCS cmdparsing

commit 0f2f39758341df70202ae1c42d5a1e4ee392b6d3 upstream.

For gen9 we enable cmdparsing on the BCS ring, specifically
to catch inadvertent accesses to sensitive registers

Unlike gen7/hsw, we use the parser only to block certain
registers. We can rely on h/w to block restricted commands,
so the command tables only provide enough info to allow the
parser to delineate each command, and identify commands that
access registers.

Note: This patch deliberately ignores checkpatch issues in
favour of matching the style of the surrounding code. We'll
correct the entire file in one go in a later patch.

v3: rebase (Mika)
v4: Add RING_TIMESTAMP registers to whitelist (Jon)

Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Chris Wilson <chris.p.wilson@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 121 ++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_dma.c        |   2 +-
 drivers/gpu/drm/i915/i915_drv.h        |   2 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c    |   3 +-
 drivers/gpu/drm/i915/i915_reg.h        |   5 +
 5 files changed, 119 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index bd11913518d4..d37c6d272a2e 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -346,6 +346,47 @@ static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = {
 	CMD(  MI_LOAD_SCAN_LINES_EXCL,          SMI,   !F,  0x3F,   R  ),
 };
 
+/*
+ * For Gen9 we can still rely on the h/w to enforce cmd security, and only
+ * need to re-enforce the register access checks. We therefore only need to
+ * teach the cmdparser how to find the end of each command, and identify
+ * register accesses. The table doesn't need to reject any commands, and so
+ * the only commands listed here are:
+ *   1) Those that touch registers
+ *   2) Those that do not have the default 8-bit length
+ *
+ * Note that the default MI length mask chosen for this table is 0xFF, not
+ * the 0x3F used on older devices. This is because the vast majority of MI
+ * cmds on Gen9 use a standard 8-bit Length field.
+ * All the Gen9 blitter instructions are standard 0xFF length mask, and
+ * none allow access to non-general registers, so in fact no BLT cmds are
+ * included in the table at all.
+ *
+ */
+static const struct drm_i915_cmd_descriptor gen9_blt_cmds[] = {
+	CMD(  MI_NOOP,                          SMI,    F,  1,      S  ),
+	CMD(  MI_USER_INTERRUPT,                SMI,    F,  1,      S  ),
+	CMD(  MI_WAIT_FOR_EVENT,                SMI,    F,  1,      S  ),
+	CMD(  MI_FLUSH,                         SMI,    F,  1,      S  ),
+	CMD(  MI_ARB_CHECK,                     SMI,    F,  1,      S  ),
+	CMD(  MI_REPORT_HEAD,                   SMI,    F,  1,      S  ),
+	CMD(  MI_ARB_ON_OFF,                    SMI,    F,  1,      S  ),
+	CMD(  MI_SUSPEND_FLUSH,                 SMI,    F,  1,      S  ),
+	CMD(  MI_LOAD_SCAN_LINES_INCL,          SMI,   !F,  0x3F,   S  ),
+	CMD(  MI_LOAD_SCAN_LINES_EXCL,          SMI,   !F,  0x3F,   S  ),
+	CMD(  MI_STORE_DWORD_IMM,               SMI,   !F,  0x3FF,  S  ),
+	CMD(  MI_LOAD_REGISTER_IMM(1),          SMI,   !F,  0xFF,   W,
+	      .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 }    ),
+	CMD(  MI_UPDATE_GTT,                    SMI,   !F,  0x3FF,  S  ),
+	CMD(  MI_STORE_REGISTER_MEM_GEN8,       SMI,    F,  4,      W,
+	      .reg = { .offset = 1, .mask = 0x007FFFFC }               ),
+	CMD(  MI_FLUSH_DW,                      SMI,   !F,  0x3F,   S  ),
+	CMD(  MI_LOAD_REGISTER_MEM_GEN8,        SMI,    F,  4,      W,
+	      .reg = { .offset = 1, .mask = 0x007FFFFC }               ),
+	CMD(  MI_LOAD_REGISTER_REG,             SMI,    !F,  0xFF,  W,
+	      .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 }    ),
+};
+
 #undef CMD
 #undef SMI
 #undef S3D
@@ -389,6 +430,11 @@ static const struct drm_i915_cmd_table hsw_blt_ring_cmd_table[] = {
 	{ hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) },
 };
 
+static const struct drm_i915_cmd_table gen9_blt_cmd_table[] = {
+	{ gen9_blt_cmds, ARRAY_SIZE(gen9_blt_cmds) },
+};
+
+
 /*
  * Register whitelists, sorted by increasing register offset.
  */
@@ -422,6 +468,10 @@ struct drm_i915_reg_descriptor {
 #define REG64(addr)                                     \
 	REG32(addr), REG32(addr + sizeof(u32))
 
+#define REG64_IDX(_reg, idx) \
+	{ .addr = _reg(idx) }, \
+	{ .addr = _reg ## _UDW(idx) }
+
 static const struct drm_i915_reg_descriptor gen7_render_regs[] = {
 	REG64(GPGPU_THREADS_DISPATCHED),
 	REG64(HS_INVOCATION_COUNT),
@@ -475,6 +525,29 @@ static const struct drm_i915_reg_descriptor gen7_blt_regs[] = {
 	REG32(BCS_SWCTRL),
 };
 
+static const struct drm_i915_reg_descriptor gen9_blt_regs[] = {
+	REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE),
+	REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE),
+	REG32(BCS_SWCTRL),
+	REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
+	REG64_IDX(BCS_GPR, 0),
+	REG64_IDX(BCS_GPR, 1),
+	REG64_IDX(BCS_GPR, 2),
+	REG64_IDX(BCS_GPR, 3),
+	REG64_IDX(BCS_GPR, 4),
+	REG64_IDX(BCS_GPR, 5),
+	REG64_IDX(BCS_GPR, 6),
+	REG64_IDX(BCS_GPR, 7),
+	REG64_IDX(BCS_GPR, 8),
+	REG64_IDX(BCS_GPR, 9),
+	REG64_IDX(BCS_GPR, 10),
+	REG64_IDX(BCS_GPR, 11),
+	REG64_IDX(BCS_GPR, 12),
+	REG64_IDX(BCS_GPR, 13),
+	REG64_IDX(BCS_GPR, 14),
+	REG64_IDX(BCS_GPR, 15),
+};
+
 #undef REG64
 #undef REG32
 
@@ -533,6 +606,17 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
 	return 0;
 }
 
+static u32 gen9_blt_get_cmd_length_mask(u32 cmd_header)
+{
+	u32 client = (cmd_header & INSTR_CLIENT_MASK) >> INSTR_CLIENT_SHIFT;
+
+	if (client == INSTR_MI_CLIENT || client == INSTR_BC_CLIENT)
+		return 0xFF;
+
+	DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header);
+	return 0;
+}
+
 static bool validate_cmds_sorted(struct intel_engine_cs *ring,
 				 const struct drm_i915_cmd_table *cmd_tables,
 				 int cmd_table_count)
@@ -672,7 +756,7 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *ring)
 	int cmd_table_count;
 	int ret;
 
-	if (!IS_GEN7(ring->dev))
+	if (!IS_GEN7(ring->dev) && !(IS_GEN9(ring->dev) && ring->id == BCS))
 		return 0;
 
 	switch (ring->id) {
@@ -697,7 +781,17 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *ring)
 		ring->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
 		break;
 	case BCS:
-		if (IS_HASWELL(ring->dev)) {
+		ring->get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
+		if (IS_GEN9(ring->dev)) {
+			cmd_tables = gen9_blt_cmd_table;
+			cmd_table_count = ARRAY_SIZE(gen9_blt_cmd_table);
+			ring->get_cmd_length_mask =
+				gen9_blt_get_cmd_length_mask;
+
+			/* BCS Engine unsafe without parser */
+			ring->requires_cmd_parser = 1;
+		}
+		else if (IS_HASWELL(ring->dev)) {
 			cmd_tables = hsw_blt_ring_cmd_table;
 			cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table);
 		} else {
@@ -705,10 +799,14 @@ int i915_cmd_parser_init_ring(struct intel_engine_cs *ring)
 			cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table);
 		}
 
-		ring->reg_table = gen7_blt_regs;
-		ring->reg_count = ARRAY_SIZE(gen7_blt_regs);
+		if (IS_GEN9(ring->dev)) {
+			ring->reg_table = gen9_blt_regs;
+			ring->reg_count = ARRAY_SIZE(gen9_blt_regs);
+		} else {
+			ring->reg_table = gen7_blt_regs;
+			ring->reg_count = ARRAY_SIZE(gen7_blt_regs);
+		}
 
-		ring->get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
 		break;
 	case VECS:
 		cmd_tables = hsw_vebox_cmd_table;
@@ -1082,9 +1180,9 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
 		}
 
 		/*
-		 * If the batch buffer contains a chained batch, return an
-		 * error that tells the caller to abort and dispatch the
-		 * workload as a non-secure batch.
+		 * We don't try to handle BATCH_BUFFER_START because it adds
+		 * non-trivial complexity. Instead we abort the scan and return
+		 * and error to indicate that the batch is unsafe.
 		 */
 		if (desc->cmd.value == MI_BATCH_BUFFER_START) {
 			ret = -EACCES;
@@ -1106,7 +1204,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
 		}
 
 		if (!check_cmd(ring, desc, cmd, length, &oacontrol_set)) {
-			ret = -EINVAL;
+			ret = CMDPARSER_USES_GGTT(ring->dev) ? -EINVAL : -EACCES;
 			break;
 		}
 
@@ -1136,7 +1234,7 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
  *
  * Return: the current version number of the cmd parser
  */
-int i915_cmd_parser_get_version(void)
+int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv)
 {
 	/*
 	 * Command parser version history
@@ -1148,6 +1246,7 @@ int i915_cmd_parser_get_version(void)
 	 * 3. Allow access to the GPGPU_THREADS_DISPATCHED register.
 	 * 4. L3 atomic chicken bits of HSW_SCRATCH1 and HSW_ROW_CHICKEN3.
 	 * 5. GPGPU dispatch compute indirect registers.
+	 * 10. Gen9 only - Supports the new ppgtt based BLIT parser
 	 */
-	return 5;
+	return CMDPARSER_USES_GGTT(dev_priv) ? 5 : 10;
 }
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 816ece568ffc..7b61078c2330 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -145,7 +145,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 		value = 1;
 		break;
 	case I915_PARAM_CMD_PARSER_VERSION:
-		value = i915_cmd_parser_get_version();
+		value = i915_cmd_parser_get_version(dev_priv);
 		break;
 	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
 		value = 1;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 91ab878bd32d..6064ca4aaaef 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3285,7 +3285,7 @@ void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone);
 const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
 
 /* i915_cmd_parser.c */
-int i915_cmd_parser_get_version(void);
+int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv);
 int i915_cmd_parser_init_ring(struct intel_engine_cs *ring);
 void i915_cmd_parser_fini_ring(struct intel_engine_cs *ring);
 bool i915_needs_cmd_parser(struct intel_engine_cs *ring);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 974c309a9392..65a53ee398b8 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -119,7 +119,8 @@ static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
 	    (enable_ppgtt == 0 || !has_aliasing_ppgtt))
 		return 0;
 
-	if (enable_ppgtt == 1)
+	/* Full PPGTT is required by the Gen9 cmdparser */
+	if (enable_ppgtt == 1 && INTEL_INFO(dev)->gen != 9)
 		return 1;
 
 	if (enable_ppgtt == 2 && has_full_ppgtt)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index cace154bbdc0..e45e68c3dea2 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -511,6 +511,10 @@
  */
 #define BCS_SWCTRL 0x22200
 
+/* There are 16 GPR registers */
+#define BCS_GPR(n)	(0x22600 + (n) * 8)
+#define BCS_GPR_UDW(n)	(0x22600 + (n) * 8 + 4)
+
 #define GPGPU_THREADS_DISPATCHED        0x2290
 #define HS_INVOCATION_COUNT             0x2300
 #define DS_INVOCATION_COUNT             0x2308
@@ -1567,6 +1571,7 @@ enum skl_disp_power_wells {
 #define RING_IMR(base)		((base)+0xa8)
 #define RING_HWSTAM(base)	((base)+0x98)
 #define RING_TIMESTAMP(base)	((base)+0x358)
+#define RING_TIMESTAMP_UDW(base) ((base) + 0x358 + 4)
 #define   TAIL_ADDR		0x001FFFF8
 #define   HEAD_WRAP_COUNT	0xFFE00000
 #define   HEAD_WRAP_ONE		0x00200000

From d88d2d3fc6076760e903e78135f5bef028e6e813 Mon Sep 17 00:00:00 2001
From: Jon Bloomfield <jon.bloomfield@intel.com>
Date: Fri, 21 Sep 2018 13:18:09 -0700
Subject: [PATCH 54/81] drm/i915/cmdparser: Add support for backward jumps

commit f8c08d8faee5567803c8c533865296ca30286bbf upstream.

To keep things manageable, the pre-gen9 cmdparser does not
attempt to track any form of nested BB_START's. This did not
prevent usermode from using nested starts, or even chained
batches because the cmdparser is not strictly enforced pre gen9.

Instead, the existence of a nested BB_START would cause the batch
to be emitted in insecure mode, and any privileged capabilities
would not be available.

For Gen9, the cmdparser becomes mandatory (for BCS at least), and
so not providing any form of nested BB_START support becomes
overly restrictive. Any such batch will simply not run.

We make heavy use of backward jumps in igt, and it is much easier
to add support for this restricted subset of nested jumps, than to
rewrite the whole of our test suite to avoid them.

Add the required logic to support limited backward jumps, to
instructions that have already been validated by the parser.

Note that it's not sufficient to simply approve any BB_START
that jumps backwards in the buffer because this would allow an
attacker to embed a rogue instruction sequence within the
operand words of a harmless instruction (say LRI) and jump to
that.

We introduce a bit array to track every instr offset successfully
validated, and test the target of BB_START against this. If the
target offset hits, it is re-written to the same offset in the
shadow buffer and the BB_START cmd is allowed.

Note: This patch deliberately ignores checkpatch issues in the
cmdtables, in order to match the style of the surrounding code.
We'll correct the entire file in one go in a later patch.

v2: set dispatch secure late (Mika)
v3: rebase (Mika)
v4: Clear whitelist on each parse
Minor review updates (Chris)
v5: Correct backward jump batching
v6: fix compilation error due to struct eb shuffle (Mika)

Cc: Tony Luck <tony.luck@intel.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: Chris Wilson <chris.p.wilson@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c     | 148 ++++++++++++++++++---
 drivers/gpu/drm/i915/i915_drv.h            |  16 ++-
 drivers/gpu/drm/i915/i915_gem_context.c    |   5 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  35 +++--
 4 files changed, 175 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index d37c6d272a2e..80e7154f17c7 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -385,6 +385,17 @@ static const struct drm_i915_cmd_descriptor gen9_blt_cmds[] = {
 	      .reg = { .offset = 1, .mask = 0x007FFFFC }               ),
 	CMD(  MI_LOAD_REGISTER_REG,             SMI,    !F,  0xFF,  W,
 	      .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 }    ),
+
+	/*
+	 * We allow BB_START but apply further checks. We just sanitize the
+	 * basic fields here.
+	 */
+	CMD( MI_BATCH_BUFFER_START_GEN8,       SMI,    !F,  0xFF,  B,
+	     .bits = {{
+			.offset = 0,
+			.mask = ~SMI,
+			.expected = (MI_BATCH_PPGTT_HSW | 1),
+	      }},					     ),
 };
 
 #undef CMD
@@ -1012,7 +1023,7 @@ unpin_src:
 	return ret ? ERR_PTR(ret) : dst;
 }
 
-static bool check_cmd(const struct intel_engine_cs *ring,
+static int check_cmd(const struct intel_engine_cs *ring,
 		      const struct drm_i915_cmd_descriptor *desc,
 		      const u32 *cmd, u32 length,
 		      bool *oacontrol_set)
@@ -1122,15 +1133,114 @@ static bool check_cmd(const struct intel_engine_cs *ring,
 	return true;
 }
 
+static int check_bbstart(struct intel_context *ctx,
+			 u32 *cmd, u64 offset, u32 length,
+			 u32 batch_len,
+			 u64 batch_start,
+			 u64 shadow_batch_start)
+{
+
+	u64 jump_offset, jump_target;
+	u32 target_cmd_offset, target_cmd_index;
+
+	/* For igt compatibility on older platforms */
+	if (CMDPARSER_USES_GGTT(ctx->i915)) {
+		DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n");
+		return -EACCES;
+	}
+
+	if (length != 3) {
+		DRM_DEBUG("CMD: Recursive BB_START with bad length(%u)\n",
+				 length);
+		return -EINVAL;
+	}
+
+	jump_target = *(u64*)(cmd+1);
+	jump_offset = jump_target - batch_start;
+
+	/*
+	 * Any underflow of jump_target is guaranteed to be outside the range
+	 * of a u32, so >= test catches both too large and too small
+	 */
+	if (jump_offset >= batch_len) {
+		DRM_DEBUG("CMD: BB_START to 0x%llx jumps out of BB\n",
+			  jump_target);
+		return -EINVAL;
+	}
+
+	/*
+	 * This cannot overflow a u32 because we already checked jump_offset
+	 * is within the BB, and the batch_len is a u32
+	 */
+	target_cmd_offset = lower_32_bits(jump_offset);
+	target_cmd_index = target_cmd_offset / sizeof(u32);
+
+	*(u64*)(cmd + 1) = shadow_batch_start + target_cmd_offset;
+
+	if (target_cmd_index == offset)
+		return 0;
+
+	if (ctx->jump_whitelist_cmds <= target_cmd_index) {
+		DRM_DEBUG("CMD: Rejecting BB_START - truncated whitelist array\n");
+		return -EINVAL;
+	} else if (!test_bit(target_cmd_index, ctx->jump_whitelist)) {
+		DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n",
+			  jump_target);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void init_whitelist(struct intel_context *ctx, u32 batch_len)
+{
+	const u32 batch_cmds = DIV_ROUND_UP(batch_len, sizeof(u32));
+	const u32 exact_size = BITS_TO_LONGS(batch_cmds);
+	u32 next_size = BITS_TO_LONGS(roundup_pow_of_two(batch_cmds));
+	unsigned long *next_whitelist;
+
+	if (CMDPARSER_USES_GGTT(ctx->i915))
+		return;
+
+	if (batch_cmds <= ctx->jump_whitelist_cmds) {
+		memset(ctx->jump_whitelist, 0, exact_size * sizeof(u32));
+		return;
+	}
+
+again:
+	next_whitelist = kcalloc(next_size, sizeof(long), GFP_KERNEL);
+	if (next_whitelist) {
+		kfree(ctx->jump_whitelist);
+		ctx->jump_whitelist = next_whitelist;
+		ctx->jump_whitelist_cmds =
+			next_size * BITS_PER_BYTE * sizeof(long);
+		return;
+	}
+
+	if (next_size > exact_size) {
+		next_size = exact_size;
+		goto again;
+	}
+
+	DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n");
+	memset(ctx->jump_whitelist, 0,
+		BITS_TO_LONGS(ctx->jump_whitelist_cmds) * sizeof(u32));
+
+	return;
+}
+
 #define LENGTH_BIAS 2
 
 /**
  * i915_parse_cmds() - parse a submitted batch buffer for privilege violations
+ * @ctx: the context in which the batch is to execute
  * @ring: the ring on which the batch is to execute
  * @batch_obj: the batch buffer in question
- * @shadow_batch_obj: copy of the batch buffer in question
+ * @user_batch_start: Canonical base address of original user batch
  * @batch_start_offset: byte offset in the batch at which execution starts
  * @batch_len: length of the commands in batch_obj
+ * @shadow_batch_obj: copy of the batch buffer in question
+ * @shadow_batch_start: Canonical base address of shadow_batch_obj
  *
  * Parses the specified batch buffer looking for privilege violations as
  * described in the overview.
@@ -1138,13 +1248,16 @@ static bool check_cmd(const struct intel_engine_cs *ring,
  * Return: non-zero if the parser finds violations or otherwise fails; -EACCES
  * if the batch appears legal but should use hardware parsing
  */
-int i915_parse_cmds(struct intel_engine_cs *ring,
+int i915_parse_cmds(struct intel_context *ctx,
+		    struct intel_engine_cs *ring,
 		    struct drm_i915_gem_object *batch_obj,
-		    struct drm_i915_gem_object *shadow_batch_obj,
+		    u64 user_batch_start,
 		    u32 batch_start_offset,
-		    u32 batch_len)
+		    u32 batch_len,
+		    struct drm_i915_gem_object *shadow_batch_obj,
+		    u64 shadow_batch_start)
 {
-	u32 *cmd, *batch_base, *batch_end;
+	u32 *cmd, *batch_base, *batch_end, offset = 0;
 	struct drm_i915_cmd_descriptor default_desc = { 0 };
 	bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
 	int ret = 0;
@@ -1156,6 +1269,8 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
 		return PTR_ERR(batch_base);
 	}
 
+	init_whitelist(ctx, batch_len);
+
 	/*
 	 * We use the batch length as size because the shadow object is as
 	 * large or larger and copy_batch() will write MI_NOPs to the extra
@@ -1179,16 +1294,6 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
 			break;
 		}
 
-		/*
-		 * We don't try to handle BATCH_BUFFER_START because it adds
-		 * non-trivial complexity. Instead we abort the scan and return
-		 * and error to indicate that the batch is unsafe.
-		 */
-		if (desc->cmd.value == MI_BATCH_BUFFER_START) {
-			ret = -EACCES;
-			break;
-		}
-
 		if (desc->flags & CMD_DESC_FIXED)
 			length = desc->length.fixed;
 		else
@@ -1208,7 +1313,18 @@ int i915_parse_cmds(struct intel_engine_cs *ring,
 			break;
 		}
 
+		if (desc->cmd.value == MI_BATCH_BUFFER_START) {
+			ret = check_bbstart(ctx, cmd, offset, length,
+					    batch_len, user_batch_start,
+					    shadow_batch_start);
+			break;
+		}
+
+		if (ctx->jump_whitelist_cmds > offset)
+			set_bit(offset, ctx->jump_whitelist);
+
 		cmd += length;
+		offset += length;
 	}
 
 	if (oacontrol_set) {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 6064ca4aaaef..2a815dc2af91 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -891,6 +891,12 @@ struct intel_context {
 		int pin_count;
 	} engine[I915_NUM_RINGS];
 
+	/* jump_whitelist: Bit array for tracking cmds during cmdparsing */
+	unsigned long *jump_whitelist;
+
+	/* jump_whitelist_cmds: No of cmd slots available */
+	uint32_t jump_whitelist_cmds;
+
 	struct list_head link;
 };
 
@@ -3289,11 +3295,15 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv);
 int i915_cmd_parser_init_ring(struct intel_engine_cs *ring);
 void i915_cmd_parser_fini_ring(struct intel_engine_cs *ring);
 bool i915_needs_cmd_parser(struct intel_engine_cs *ring);
-int i915_parse_cmds(struct intel_engine_cs *ring,
+int i915_parse_cmds(struct intel_context *cxt,
+		    struct intel_engine_cs *ring,
 		    struct drm_i915_gem_object *batch_obj,
-		    struct drm_i915_gem_object *shadow_batch_obj,
+		    u64 user_batch_start,
 		    u32 batch_start_offset,
-		    u32 batch_len);
+		    u32 batch_len,
+		    struct drm_i915_gem_object *shadow_batch_obj,
+		    u64 shadow_batch_start);
+
 
 /* i915_suspend.c */
 extern int i915_save_state(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 0433d25f9d23..20fb0ee1df4f 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -157,6 +157,8 @@ void i915_gem_context_free(struct kref *ctx_ref)
 	if (i915.enable_execlists)
 		intel_lr_context_free(ctx);
 
+	kfree(ctx->jump_whitelist);
+
 	/*
 	 * This context is going away and we need to remove all VMAs still
 	 * around. This is to handle imported shared objects for which
@@ -246,6 +248,9 @@ __create_hw_context(struct drm_device *dev,
 
 	ctx->hang_stats.ban_period_seconds = DRM_I915_CTX_BAN_PERIOD;
 
+	ctx->jump_whitelist = NULL;
+	ctx->jump_whitelist_cmds = 0;
+
 	return ctx;
 
 err_out:
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 556849350f27..c373c45ae3d3 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1154,7 +1154,8 @@ shadow_batch_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm)
 }
 
 static struct drm_i915_gem_object*
-i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
+i915_gem_execbuffer_parse(struct intel_context *ctx,
+			  struct intel_engine_cs *ring,
 			  struct drm_i915_gem_exec_object2 *shadow_exec_entry,
 			  struct eb_vmas *eb,
 			  struct i915_address_space *vm,
@@ -1164,6 +1165,10 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
 {
 	struct drm_i915_gem_object *shadow_batch_obj;
 	struct i915_vma *vma;
+	struct i915_vma *user_vma = list_entry(eb->vmas.prev,
+					typeof(*user_vma), exec_list);
+	u64 batch_start;
+	u64 shadow_batch_start;
 	int ret;
 
 	shadow_batch_obj = i915_gem_batch_pool_get(&ring->batch_pool,
@@ -1171,20 +1176,30 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
 	if (IS_ERR(shadow_batch_obj))
 		return shadow_batch_obj;
 
-	ret = i915_parse_cmds(ring,
-			      batch_obj,
-			      shadow_batch_obj,
-			      batch_start_offset,
-			      batch_len);
-	if (ret)
-		goto err;
-
 	vma = shadow_batch_pin(shadow_batch_obj, vm);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto err;
 	}
 
+	batch_start = user_vma->node.start + batch_start_offset;
+
+	shadow_batch_start = vma->node.start;
+
+	ret = i915_parse_cmds(ctx,
+			      ring,
+			      batch_obj,
+			      batch_start,
+			      batch_start_offset,
+			      batch_len,
+			      shadow_batch_obj,
+			      shadow_batch_start);
+	if (ret) {
+		WARN_ON(vma->pin_count == 0);
+		vma->pin_count--;
+		goto err;
+	}
+
 	i915_gem_object_unpin_pages(shadow_batch_obj);
 
 	memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
@@ -1545,7 +1560,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		if (batch_len == 0)
 			batch_len = batch_obj->base.size - batch_off;
 
-		parsed_batch_obj = i915_gem_execbuffer_parse(ring,
+		parsed_batch_obj = i915_gem_execbuffer_parse(ctx, ring,
 						      &shadow_exec_entry,
 						      eb, vm,
 						      batch_obj,

From 362917ebcfacbd9c2b5172d5a5fe8cbef3ab838f Mon Sep 17 00:00:00 2001
From: Jon Bloomfield <jon.bloomfield@intel.com>
Date: Thu, 20 Sep 2018 09:45:10 -0700
Subject: [PATCH 55/81] drm/i915/cmdparser: Ignore Length operands during
 command matching

commit 926abff21a8f29ef159a3ac893b05c6e50e043c3 upstream.

Some of the gen instruction macros (e.g. MI_DISPLAY_FLIP) have the
length directly encoded in them. Since these are used directly in
the tables, the Length becomes part of the comparison used for
matching during parsing. Thus, if the cmd being parsed has a
different length to that in the table, it is not matched and the
cmd is accepted via the default variable length path.

Fix by masking out everything except the Opcode in the cmd tables

Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Tyler Hicks <tyhicks@canonical.com>
Reviewed-by: Chris Wilson <chris.p.wilson@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 80e7154f17c7..25e6fa20f163 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -92,7 +92,7 @@
 #define CMD(op, opm, f, lm, fl, ...)				\
 	{							\
 		.flags = (fl) | ((f) ? CMD_DESC_FIXED : 0),	\
-		.cmd = { (op), (opm) },				\
+		.cmd = { (op) & (opm), (opm) },			\
 		.length = { (lm) },				\
 		__VA_ARGS__					\
 	}

From 1433b8d41b1aa346e100b839c19fc033871ac5a6 Mon Sep 17 00:00:00 2001
From: Uma Shankar <uma.shankar@intel.com>
Date: Tue, 7 Aug 2018 21:15:35 +0530
Subject: [PATCH 56/81] drm/i915: Lower RM timeout to avoid DSI hard hangs

commit 1d85a299c4db57c55e0229615132c964d17aa765 upstream.

In BXT/APL, device 2 MMIO reads from MIPI controller requires its PLL
to be turned ON. When MIPI PLL is turned off (MIPI Display is not
active or connected), and someone (host or GT engine) tries to read
MIPI registers, it causes hard hang. This is a hardware restriction
or limitation.

Driver by itself doesn't read MIPI registers when MIPI display is off.
But any userspace application can submit unprivileged batch buffer for
execution. In that batch buffer there can be mmio reads. And these
reads are allowed even for unprivileged applications. If these
register reads are for MIPI DSI controller and MIPI display is not
active during that time, then the MMIO read operation causes system
hard hang and only way to recover is hard reboot. A genuine
process/application won't submit batch buffer like this and doesn't
cause any issue. But on a compromised system, a malign userspace
process/app can generate such batch buffer and can trigger system
hard hang (denial of service attack).

The fix is to lower the internal MMIO timeout value to an optimum
value of 950us as recommended by hardware team. If the timeout is
beyond 1ms (which will hit for any value we choose if MMIO READ on a
DSI specific register is performed without PLL ON), it causes the
system hang. But if the timeout value is lower than it will be below
the threshold (even if timeout happens) and system will not get into
a hung state. This will avoid a system hang without losing any
programming or GT interrupts, taking the worst case of lowest CDCLK
frequency and early DC5 abort into account.

Signed-off-by: Uma Shankar <uma.shankar@intel.com>
Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/i915_reg.h | 4 ++++
 drivers/gpu/drm/i915/intel_pm.c | 8 ++++++++
 2 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index e45e68c3dea2..fb54d6e6cfc6 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -5709,6 +5709,10 @@ enum skl_disp_power_wells {
 #define GAMMA_MODE_MODE_12BIT	(2 << 0)
 #define GAMMA_MODE_MODE_SPLIT	(3 << 0)
 
+/* Display Internal Timeout Register */
+#define RM_TIMEOUT		0x42060
+#define  MMIO_TIMEOUT_US(us)	((us) << 0)
+
 /* interrupts */
 #define DE_MASTER_IRQ_CONTROL   (1 << 31)
 #define DE_SPRITEB_FLIP_DONE    (1 << 29)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index fd4690ed93c0..922c0c815b30 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -66,6 +66,14 @@ static void bxt_init_clock_gating(struct drm_device *dev)
 	 */
 	I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
 		   GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
+
+	/*
+	 * Lower the display internal timeout.
+	 * This is needed to avoid any hard hangs when DSI port PLL
+	 * is off and a MMIO access is attempted by any privilege
+	 * application, using batch buffers or any other means.
+	 */
+	I915_WRITE(RM_TIMEOUT, MMIO_TIMEOUT_US(950));
 }
 
 static void i915_pineview_get_mem_freq(struct drm_device *dev)

From 284d38667f7ed7171fd8f168c42490f9087c824c Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Mon, 9 Jul 2018 18:24:27 +0300
Subject: [PATCH 57/81] drm/i915/gen8+: Add RC6 CTX corruption WA

commit 7e34f4e4aad3fd34c02b294a3cf2321adf5b4438 upstream.

In some circumstances the RC6 context can get corrupted. We can detect
this and take the required action, that is disable RC6 and runtime PM.
The HW recovers from the corrupted state after a system suspend/resume
cycle, so detect the recovery and re-enable RC6 and runtime PM.

v2: rebase (Mika)
v3:
- Move intel_suspend_gt_powersave() to the end of the GEM suspend
  sequence.
- Add commit message.
v4:
- Rebased on intel_uncore_forcewake_put(i915->uncore, ...) API
  change.
v5: rebased on gem/gt split (Mika)

Signed-off-by: Imre Deak <imre.deak@intel.com>
Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/i915_drv.c      |   4 +
 drivers/gpu/drm/i915/i915_drv.h      |   5 +
 drivers/gpu/drm/i915/i915_reg.h      |   2 +
 drivers/gpu/drm/i915/intel_display.c |   9 ++
 drivers/gpu/drm/i915/intel_drv.h     |   3 +
 drivers/gpu/drm/i915/intel_pm.c      | 165 ++++++++++++++++++++++++---
 6 files changed, 173 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index a6ad938f44a6..697b2499c7a1 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -698,6 +698,8 @@ static int i915_drm_suspend_late(struct drm_device *drm_dev, bool hibernation)
 		return ret;
 	}
 
+	i915_rc6_ctx_wa_suspend(dev_priv);
+
 	pci_disable_device(drm_dev->pdev);
 	/*
 	 * During hibernation on some platforms the BIOS may try to access
@@ -849,6 +851,8 @@ static int i915_drm_resume_early(struct drm_device *dev)
 	intel_uncore_sanitize(dev);
 	intel_power_domains_init_hw(dev_priv);
 
+	i915_rc6_ctx_wa_resume(dev_priv);
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2a815dc2af91..adbbcaf14af6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1159,6 +1159,7 @@ struct intel_gen6_power_mgmt {
 	bool client_boost;
 
 	bool enabled;
+	bool ctx_corrupted;
 	struct delayed_work delayed_resume_work;
 	unsigned boosts;
 
@@ -2570,6 +2571,10 @@ struct drm_i915_cmd_table {
 
 /* Early gen2 have a totally busted CS tlb and require pinned batches. */
 #define HAS_BROKEN_CS_TLB(dev)		(IS_I830(dev) || IS_845G(dev))
+
+#define NEEDS_RC6_CTX_CORRUPTION_WA(dev)	\
+	(IS_BROADWELL(dev) || INTEL_INFO(dev)->gen == 9)
+
 /*
  * dp aux and gmbus irq on gen4 seems to be able to generate legacy interrupts
  * even when in MSI mode. This results in spurious interrupt warnings if the
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index fb54d6e6cfc6..603d8cdfc5f1 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -170,6 +170,8 @@
 #define   ECOCHK_PPGTT_WT_HSW		(0x2<<3)
 #define   ECOCHK_PPGTT_WB_HSW		(0x3<<3)
 
+#define GEN8_RC6_CTX_INFO		0x8504
+
 #define GAC_ECO_BITS			0x14090
 #define   ECOBITS_SNB_BIT		(1<<13)
 #define   ECOBITS_PPGTT_CACHE64B	(3<<8)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 4f5d07bb3511..a9166ff48a26 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -10747,6 +10747,10 @@ void intel_mark_busy(struct drm_device *dev)
 		return;
 
 	intel_runtime_pm_get(dev_priv);
+
+	if (NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv))
+		intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
 	i915_update_gfx_val(dev_priv);
 	if (INTEL_INFO(dev)->gen >= 6)
 		gen6_rps_busy(dev_priv);
@@ -10765,6 +10769,11 @@ void intel_mark_idle(struct drm_device *dev)
 	if (INTEL_INFO(dev)->gen >= 6)
 		gen6_rps_idle(dev->dev_private);
 
+	if (NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv)) {
+		i915_rc6_ctx_wa_check(dev_priv);
+		intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+	}
+
 	intel_runtime_pm_put(dev_priv);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 722aa159cd28..78503e481313 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1410,6 +1410,9 @@ void intel_enable_gt_powersave(struct drm_device *dev);
 void intel_disable_gt_powersave(struct drm_device *dev);
 void intel_suspend_gt_powersave(struct drm_device *dev);
 void intel_reset_gt_powersave(struct drm_device *dev);
+bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915);
+void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915);
+void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915);
 void gen6_update_ring_freq(struct drm_device *dev);
 void gen6_rps_busy(struct drm_i915_private *dev_priv);
 void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 922c0c815b30..81bd84f9156b 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4599,30 +4599,42 @@ void intel_set_rps(struct drm_device *dev, u8 val)
 		gen6_set_rps(dev, val);
 }
 
-static void gen9_disable_rps(struct drm_device *dev)
+static void gen9_disable_rc6(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	I915_WRITE(GEN6_RC_CONTROL, 0);
+}
+
+static void gen9_disable_rps(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
 	I915_WRITE(GEN9_PG_ENABLE, 0);
 }
 
+static void gen6_disable_rc6(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	I915_WRITE(GEN6_RC_CONTROL, 0);
+}
+
 static void gen6_disable_rps(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	I915_WRITE(GEN6_RC_CONTROL, 0);
 	I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
 }
 
-static void cherryview_disable_rps(struct drm_device *dev)
+static void cherryview_disable_rc6(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	I915_WRITE(GEN6_RC_CONTROL, 0);
 }
 
-static void valleyview_disable_rps(struct drm_device *dev)
+static void valleyview_disable_rc6(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
@@ -4826,7 +4838,8 @@ static void gen9_enable_rc6(struct drm_device *dev)
 	I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25);
 
 	/* 3a: Enable RC6 */
-	if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
+	if (!dev_priv->rps.ctx_corrupted &&
+	    intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
 		rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
 	DRM_INFO("RC6 %s\n", (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
 			"on" : "off");
@@ -4849,7 +4862,7 @@ static void gen9_enable_rc6(struct drm_device *dev)
 	 * WaRsDisableCoarsePowerGating:skl,bxt - Render/Media PG need to be disabled with RC6.
 	 */
 	if ((IS_BROXTON(dev) && (INTEL_REVID(dev) < BXT_REVID_B0)) ||
-	    ((IS_SKL_GT3(dev) || IS_SKL_GT4(dev)) && (INTEL_REVID(dev) <= SKL_REVID_F0)))
+	    INTEL_INFO(dev)->gen == 9)
 		I915_WRITE(GEN9_PG_ENABLE, 0);
 	else
 		I915_WRITE(GEN9_PG_ENABLE, (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ?
@@ -4892,7 +4905,8 @@ static void gen8_enable_rps(struct drm_device *dev)
 		I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
 
 	/* 3: Enable RC6 */
-	if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
+	if (!dev_priv->rps.ctx_corrupted &&
+	    intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
 		rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
 	intel_print_rc6_info(dev, rc6_mask);
 	if (IS_BROADWELL(dev))
@@ -6136,10 +6150,101 @@ static void intel_init_emon(struct drm_device *dev)
 	dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
 }
 
+static bool i915_rc6_ctx_corrupted(struct drm_i915_private *dev_priv)
+{
+	return !I915_READ(GEN8_RC6_CTX_INFO);
+}
+
+static void i915_rc6_ctx_wa_init(struct drm_i915_private *i915)
+{
+	if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+		return;
+
+	if (i915_rc6_ctx_corrupted(i915)) {
+		DRM_INFO("RC6 context corrupted, disabling runtime power management\n");
+		i915->rps.ctx_corrupted = true;
+		intel_runtime_pm_get(i915);
+	}
+}
+
+static void i915_rc6_ctx_wa_cleanup(struct drm_i915_private *i915)
+{
+	if (i915->rps.ctx_corrupted) {
+		intel_runtime_pm_put(i915);
+		i915->rps.ctx_corrupted = false;
+	}
+}
+
+/**
+ * i915_rc6_ctx_wa_suspend - system suspend sequence for the RC6 CTX WA
+ * @i915: i915 device
+ *
+ * Perform any steps needed to clean up the RC6 CTX WA before system suspend.
+ */
+void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915)
+{
+	if (i915->rps.ctx_corrupted)
+		intel_runtime_pm_put(i915);
+}
+
+/**
+ * i915_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA
+ * @i915: i915 device
+ *
+ * Perform any steps needed to re-init the RC6 CTX WA after system resume.
+ */
+void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915)
+{
+	if (!i915->rps.ctx_corrupted)
+		return;
+
+	if (i915_rc6_ctx_corrupted(i915)) {
+		intel_runtime_pm_get(i915);
+		return;
+	}
+
+	DRM_INFO("RC6 context restored, re-enabling runtime power management\n");
+	i915->rps.ctx_corrupted = false;
+}
+
+static void intel_disable_rc6(struct drm_device *dev);
+
+/**
+ * i915_rc6_ctx_wa_check - check for a new RC6 CTX corruption
+ * @i915: i915 device
+ *
+ * Check if an RC6 CTX corruption has happened since the last check and if so
+ * disable RC6 and runtime power management.
+ *
+ * Return false if no context corruption has happened since the last call of
+ * this function, true otherwise.
+*/
+bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915)
+{
+	if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+		return false;
+
+	if (i915->rps.ctx_corrupted)
+		return false;
+
+	if (!i915_rc6_ctx_corrupted(i915))
+		return false;
+
+	DRM_NOTE("RC6 context corruption, disabling runtime power management\n");
+
+	intel_disable_rc6(i915->dev);
+	i915->rps.ctx_corrupted = true;
+	intel_runtime_pm_get_noresume(i915);
+
+	return true;
+}
+
 void intel_init_gt_powersave(struct drm_device *dev)
 {
 	i915.enable_rc6 = sanitize_rc6_option(dev, i915.enable_rc6);
 
+	i915_rc6_ctx_wa_init(to_i915(dev));
+
 	if (IS_CHERRYVIEW(dev))
 		cherryview_init_gt_powersave(dev);
 	else if (IS_VALLEYVIEW(dev))
@@ -6152,6 +6257,8 @@ void intel_cleanup_gt_powersave(struct drm_device *dev)
 		return;
 	else if (IS_VALLEYVIEW(dev))
 		valleyview_cleanup_gt_powersave(dev);
+
+	i915_rc6_ctx_wa_cleanup(to_i915(dev));
 }
 
 static void gen6_suspend_rps(struct drm_device *dev)
@@ -6184,6 +6291,38 @@ void intel_suspend_gt_powersave(struct drm_device *dev)
 	gen6_rps_idle(dev_priv);
 }
 
+static void __intel_disable_rc6(struct drm_device *dev)
+{
+	if (INTEL_INFO(dev)->gen >= 9)
+		gen9_disable_rc6(dev);
+	else if (IS_CHERRYVIEW(dev))
+		cherryview_disable_rc6(dev);
+	else if (IS_VALLEYVIEW(dev))
+		valleyview_disable_rc6(dev);
+	else
+		gen6_disable_rc6(dev);
+}
+
+static void intel_disable_rc6(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+
+	mutex_lock(&dev_priv->rps.hw_lock);
+	__intel_disable_rc6(dev);
+	mutex_unlock(&dev_priv->rps.hw_lock);
+}
+
+static void intel_disable_rps(struct drm_device *dev)
+{
+	if (IS_CHERRYVIEW(dev) || IS_VALLEYVIEW(dev))
+		return;
+
+	if (INTEL_INFO(dev)->gen >= 9)
+		gen9_disable_rps(dev);
+	else
+		gen6_disable_rps(dev);
+}
+
 void intel_disable_gt_powersave(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -6194,16 +6333,12 @@ void intel_disable_gt_powersave(struct drm_device *dev)
 		intel_suspend_gt_powersave(dev);
 
 		mutex_lock(&dev_priv->rps.hw_lock);
-		if (INTEL_INFO(dev)->gen >= 9)
-			gen9_disable_rps(dev);
-		else if (IS_CHERRYVIEW(dev))
-			cherryview_disable_rps(dev);
-		else if (IS_VALLEYVIEW(dev))
-			valleyview_disable_rps(dev);
-		else
-			gen6_disable_rps(dev);
+
+		__intel_disable_rc6(dev);
+		intel_disable_rps(dev);
 
 		dev_priv->rps.enabled = false;
+
 		mutex_unlock(&dev_priv->rps.hw_lock);
 	}
 }

From 6dd52bae8a01af77236b88917e84e84dbcfe06db Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 11 Nov 2019 08:13:24 -0800
Subject: [PATCH 58/81] drm/i915/cmdparser: Fix jump whitelist clearing

commit ea0b163b13ffc52818c079adb00d55e227a6da6f upstream.

When a jump_whitelist bitmap is reused, it needs to be cleared.
Currently this is done with memset() and the size calculation assumes
bitmaps are made of 32-bit words, not longs.  So on 64-bit
architectures, only the first half of the bitmap is cleared.

If some whitelist bits are carried over between successive batches
submitted on the same context, this will presumably allow embedding
the rogue instructions that we're trying to reject.

Use bitmap_zero() instead, which gets the calculation right.

Fixes: f8c08d8faee5 ("drm/i915/cmdparser: Add support for backward jumps")
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 25e6fa20f163..6188b70c2790 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1203,7 +1203,7 @@ static void init_whitelist(struct intel_context *ctx, u32 batch_len)
 		return;
 
 	if (batch_cmds <= ctx->jump_whitelist_cmds) {
-		memset(ctx->jump_whitelist, 0, exact_size * sizeof(u32));
+		bitmap_zero(ctx->jump_whitelist, batch_cmds);
 		return;
 	}
 
@@ -1223,8 +1223,7 @@ again:
 	}
 
 	DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n");
-	memset(ctx->jump_whitelist, 0,
-		BITS_TO_LONGS(ctx->jump_whitelist_cmds) * sizeof(u32));
+	bitmap_zero(ctx->jump_whitelist, ctx->jump_whitelist_cmds);
 
 	return;
 }

From 6186d66524c25c70d634206dd460bd6388e7e9f9 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 12 Nov 2019 19:13:37 +0100
Subject: [PATCH 59/81] Linux 4.4.201

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 8715489f3462..a86c8aa98dbe 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 200
+SUBLEVEL = 201
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 

From e4575a2d22e1a6ff335ee354bc2a081639b5e99f Mon Sep 17 00:00:00 2001
From: Cody Schuffelen <schuffelen@google.com>
Date: Wed, 13 Nov 2019 18:25:50 -0800
Subject: [PATCH 60/81] commit e82b9b0727ff ("vhost: introduce
 vhost_exceeds_weight()")

Complete backport of vhost_exceeds_weight to android-4.4-p

Test: Compiles
Bug: 143972019
Change-Id: Ifafc3321332d9033be75123e761d7da7d8586e4c
Signed-off-by: Cody Schuffelen <schuffelen@google.com>
---
 drivers/vhost/vsock.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 72e914de473e..81754c33c3a9 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -21,6 +21,14 @@
 #include "vhost.h"
 
 #define VHOST_VSOCK_DEFAULT_HOST_CID	2
+/* Max number of bytes transferred before requeueing the job.
+ * Using this limit prevents one virtqueue from starving others. */
+#define VHOST_VSOCK_WEIGHT 0x80000
+/* Max number of packets transferred before requeueing the job.
+ * Using this limit prevents one virtqueue from starving others with
+ * small pkts.
+ */
+#define VHOST_VSOCK_PKT_WEIGHT 256
 
 enum {
 	VHOST_VSOCK_FEATURES = VHOST_FEATURES,
@@ -529,7 +537,8 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
 	vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
 	vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
 
-	vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs));
+	vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs),
+		       VHOST_VSOCK_PKT_WEIGHT, VHOST_VSOCK_WEIGHT);
 
 	file->private_data = vsock;
 	spin_lock_init(&vsock->send_pkt_list_lock);

From 411b1be44169c62006c31261d2a9ac1385e348cd Mon Sep 17 00:00:00 2001
From: Junaid Shahid <junaids@google.com>
Date: Mon, 11 Nov 2019 16:17:05 -0800
Subject: [PATCH 61/81] kvm: mmu: Don't read PDPTEs when paging is not enabled

[ Upstream commit d35b34a9a70edae7ef923f100e51b8b5ae9fe899 ]

kvm should not attempt to read guest PDPTEs when CR0.PG = 0 and
CR4.PAE = 1.

Signed-off-by: Junaid Shahid <junaids@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 arch/x86/kvm/x86.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2b47fd3d4b8c..ad8e19fee71e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -575,7 +575,7 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu)
 	gfn_t gfn;
 	int r;
 
-	if (is_long_mode(vcpu) || !is_pae(vcpu))
+	if (is_long_mode(vcpu) || !is_pae(vcpu) || !is_paging(vcpu))
 		return false;
 
 	if (!test_bit(VCPU_EXREG_PDPTR,
@@ -7168,7 +7168,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 		kvm_update_cpuid(vcpu);
 
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
-	if (!is_long_mode(vcpu) && is_pae(vcpu)) {
+	if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu)) {
 		load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
 		mmu_reset_needed = 1;
 	}

From 615ae6716945d7263d424a7a735022fd3ef17099 Mon Sep 17 00:00:00 2001
From: Jonas Gorski <jonas.gorski@gmail.com>
Date: Mon, 10 Dec 2018 12:40:38 +0100
Subject: [PATCH 62/81] MIPS: BCM63XX: fix switch core reset on BCM6368

commit 8a38dacf87180738d42b058334c951eba15d2d47 upstream.

The Ethernet Switch core mask was set to 0, causing the switch core to
be not reset on BCM6368 on boot. Provide the proper mask so the switch
core gets reset to a known good state.

Fixes: 799faa626c71 ("MIPS: BCM63XX: add core reset helper")
Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: linux-mips@vger.kernel.org
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Amit Pundir <amit.pundir@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/bcm63xx/reset.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/bcm63xx/reset.c b/arch/mips/bcm63xx/reset.c
index d1fe51edf5e6..4d411da2497b 100644
--- a/arch/mips/bcm63xx/reset.c
+++ b/arch/mips/bcm63xx/reset.c
@@ -119,7 +119,7 @@
 #define BCM6368_RESET_DSL	0
 #define BCM6368_RESET_SAR	SOFTRESET_6368_SAR_MASK
 #define BCM6368_RESET_EPHY	SOFTRESET_6368_EPHY_MASK
-#define BCM6368_RESET_ENETSW	0
+#define BCM6368_RESET_ENETSW	SOFTRESET_6368_ENETSW_MASK
 #define BCM6368_RESET_PCM	SOFTRESET_6368_PCM_MASK
 #define BCM6368_RESET_MPI	SOFTRESET_6368_MPI_MASK
 #define BCM6368_RESET_PCIE	0

From bd52aa88e296f9cdfe30b4e0bd482ddd959d8cc2 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 9 Aug 2016 22:43:46 +1000
Subject: [PATCH 63/81] powerpc/Makefile: Use cflags-y/aflags-y for setting
 endian options

commit 164af597ce945751e2dcd53d0a86e84203a6d117 upstream.

When we introduced the little endian support, we added the endian flags
to CC directly using override. I don't know the history of why we did
that, I suspect no one does.

Although this mostly works, it has one bug, which is that CROSS32CC
doesn't get -mbig-endian. That means when the compiler is little endian
by default and the user is building big endian, vdso32 is incorrectly
compiled as little endian and the kernel fails to build.

Instead we can add the endian flags to cflags-y/aflags-y, and then
append those to KBUILD_CFLAGS/KBUILD_AFLAGS.

This has the advantage of being 1) less ugly, 2) the documented way of
adding flags in the arch Makefile and 3) it fixes building vdso32 with a
LE toolchain.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/Makefile | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 96efd8213c1c..79a5ad2c8c85 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -66,29 +66,28 @@ endif
 UTS_MACHINE := $(OLDARCH)
 
 ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-override CC	+= -mlittle-endian
-ifneq ($(cc-name),clang)
-override CC	+= -mno-strict-align
-endif
-override AS	+= -mlittle-endian
 override LD	+= -EL
-override CROSS32CC += -mlittle-endian
 override CROSS32AS += -mlittle-endian
 LDEMULATION	:= lppc
 GNUTARGET	:= powerpcle
 MULTIPLEWORD	:= -mno-multiple
 KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-save-toc-indirect)
 else
-ifeq ($(call cc-option-yn,-mbig-endian),y)
-override CC	+= -mbig-endian
-override AS	+= -mbig-endian
-endif
 override LD	+= -EB
 LDEMULATION	:= ppc
 GNUTARGET	:= powerpc
 MULTIPLEWORD	:= -mmultiple
 endif
 
+cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mbig-endian)
+cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mlittle-endian
+ifneq ($(cc-name),clang)
+  cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mno-strict-align
+endif
+
+aflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mbig-endian)
+aflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mlittle-endian
+
 ifeq ($(HAS_BIARCH),y)
 override AS	+= -a$(CONFIG_WORD_SIZE)
 override LD	+= -m elf$(CONFIG_WORD_SIZE)$(LDEMULATION)
@@ -212,6 +211,9 @@ cpu-as-$(CONFIG_E200)		+= -Wa,-me200
 KBUILD_AFLAGS += $(cpu-as-y)
 KBUILD_CFLAGS += $(cpu-as-y)
 
+KBUILD_AFLAGS += $(aflags-y)
+KBUILD_CFLAGS += $(cflags-y)
+
 head-y				:= arch/powerpc/kernel/head_$(CONFIG_WORD_SIZE).o
 head-$(CONFIG_8xx)		:= arch/powerpc/kernel/head_8xx.o
 head-$(CONFIG_40x)		:= arch/powerpc/kernel/head_40x.o

From c311fe2c7457c6cd150fcf0465e34f79fc7267d7 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 27 Nov 2016 13:46:20 +1100
Subject: [PATCH 64/81] powerpc: Fix compiling a BE kernel with a powerpc64le
 toolchain

commit 4dc831aa88132f835cefe876aa0206977c4d7710 upstream.

GCC can compile with either endian, but the default ABI version is set
based on the default endianness of the toolchain. Alan Modra says:

  you need both -mbig and -mabi=elfv1 to make a powerpc64le gcc
  generate powerpc64 code

The opposite is true for powerpc64 when generating -mlittle it
requires -mabi=elfv2 to generate v2 ABI, which we were already doing.

This change adds ABI annotations together with endianness for all cases,
LE and BE. This fixes the case of building a BE kernel with a toolchain
that is LE by default.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/Makefile | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 79a5ad2c8c85..d7eb035a9c96 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -79,8 +79,15 @@ GNUTARGET	:= powerpc
 MULTIPLEWORD	:= -mmultiple
 endif
 
-cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mbig-endian)
+ifdef CONFIG_PPC64
+cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mabi=elfv1)
+cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mcall-aixdesc)
+aflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mabi=elfv1)
+aflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mabi=elfv2
+endif
+
 cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mlittle-endian
+cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mbig-endian)
 ifneq ($(cc-name),clang)
   cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mno-strict-align
 endif
@@ -120,7 +127,9 @@ ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc))
 AFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv2)
 else
+CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv1)
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mcall-aixdesc)
+AFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mabi=elfv1)
 endif
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc))
 CFLAGS-$(CONFIG_PPC64)	+= $(call cc-option,-mno-pointers-to-nested-functions)

From 0b42886a355c83e26fcf0a379451d91e8ab7f7af Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Mon, 28 Nov 2016 12:42:26 +1100
Subject: [PATCH 65/81] powerpc/boot: Request no dynamic linker for boot
 wrapper

commit ff45000fcb56b5b0f1a14a865d3541746d838a0a upstream.

The boot wrapper performs its own relocations and does not require
PT_INTERP segment. However currently we don't tell the linker that.

Prior to binutils 2.28 that works OK. But since binutils commit
1a9ccd70f9a7 ("Fix the linker so that it will not silently generate ELF
binaries with invalid program headers. Fix readelf to report such
invalid binaries.") binutils tries to create a program header segment
due to PT_INTERP, and the link fails because there is no space for it:

  ld: arch/powerpc/boot/zImage.pseries: Not enough room for program headers, try linking with -N
  ld: final link failed: Bad value

So tell the linker not to do that, by passing --no-dynamic-linker.

Cc: stable@vger.kernel.org
Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Drop dependency on ld-version.sh and massage change log]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
[ajd: backport to v4.4 (resolve conflict with a comment line)]
Signed-off-by: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/boot/wrapper | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index ceaa75d5a684..be4831acda22 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -161,6 +161,28 @@ case "$elfformat" in
     elf32-powerpc)	format=elf32ppc	;;
 esac
 
+ld_version()
+{
+    # Poached from scripts/ld-version.sh, but we don't want to call that because
+    # this script (wrapper) is distributed separately from the kernel source.
+    # Extract linker version number from stdin and turn into single number.
+    awk '{
+	gsub(".*\\)", "");
+	gsub(".*version ", "");
+	gsub("-.*", "");
+	split($1,a, ".");
+	print a[1]*100000000 + a[2]*1000000 + a[3]*10000;
+	exit
+    }'
+}
+
+# Do not include PT_INTERP segment when linking pie. Non-pie linking
+# just ignores this option.
+LD_VERSION=$(${CROSS}ld --version | ld_version)
+LD_NO_DL_MIN_VERSION=$(echo 2.26 | ld_version)
+if [ "$LD_VERSION" -ge "$LD_NO_DL_MIN_VERSION" ] ; then
+	nodl="--no-dynamic-linker"
+fi
 
 platformo=$object/"$platform".o
 lds=$object/zImage.lds
@@ -412,7 +434,7 @@ if [ "$platform" != "miboot" ]; then
     if [ -n "$link_address" ] ; then
         text_start="-Ttext $link_address"
     fi
-    ${CROSS}ld -m $format -T $lds $text_start $pie -o "$ofile" \
+    ${CROSS}ld -m $format -T $lds $text_start $pie $nodl -o "$ofile" \
 	$platformo $tmp $object/wrapper.a
     rm $tmp
 fi

From 873e2f25ccb7112f8818d7ddb77456e91e188691 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Tue, 8 Oct 2019 23:35:30 +0100
Subject: [PATCH 66/81] KVM: Introduce kvm_get_arch_capabilities()

Extracted from commit 5b76a3cff011 "KVM: VMX: Tell the nested
hypervisor to skip L1D flush on vmentry".  We will need this to let a
nested hypervisor know that we have applied the mitigation for TAA.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/vmx.c              |  3 +--
 arch/x86/kvm/x86.c              | 10 ++++++++++
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 39f202462029..dc60648c55eb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1226,6 +1226,7 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
 void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 					   unsigned long address);
 
+u64 kvm_get_arch_capabilities(void);
 void kvm_define_shared_msr(unsigned index, u32 msr);
 int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f8f9d1b368bf..f6d79f4f7e47 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5079,8 +5079,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 		++vmx->nmsrs;
 	}
 
-	if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
-		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, vmx->arch_capabilities);
+	vmx->arch_capabilities = kvm_get_arch_capabilities();
 
 	vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ad8e19fee71e..9df5bc3d62f1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -995,6 +995,16 @@ static u32 emulated_msrs[] = {
 
 static unsigned num_emulated_msrs;
 
+u64 kvm_get_arch_capabilities(void)
+{
+	u64 data;
+
+	rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data);
+
+	return data;
+}
+EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);
+
 static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
 {
 	if (efer & EFER_FFXSR) {

From eb5a31b4b374ed29aedf597d75394bee4505e30b Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Thu, 7 Mar 2019 15:43:02 -0800
Subject: [PATCH 67/81] KVM: x86: Emulate MSR_IA32_ARCH_CAPABILITIES on AMD
 hosts

commit 0cf9135b773bf32fba9dd8e6699c1b331ee4b749 upstream.

The CPUID flag ARCH_CAPABILITIES is unconditioinally exposed to host
userspace for all x86 hosts, i.e. KVM advertises ARCH_CAPABILITIES
regardless of hardware support under the pretense that KVM fully
emulates MSR_IA32_ARCH_CAPABILITIES.  Unfortunately, only VMX hosts
handle accesses to MSR_IA32_ARCH_CAPABILITIES (despite KVM_GET_MSRS
also reporting MSR_IA32_ARCH_CAPABILITIES for all hosts).

Move the MSR_IA32_ARCH_CAPABILITIES handling to common x86 code so
that it's emulated on AMD hosts.

Fixes: 1eaafe91a0df4 ("kvm: x86: IA32_ARCH_CAPABILITIES is always supported")
Cc: stable@vger.kernel.org
Reported-by: Xiaoyao Li <xiaoyao.li@linux.intel.com>
Cc: Jim Mattson <jmattson@google.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
[bwh: Backported to 4.4: adjust context]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/vmx.c              | 14 --------------
 arch/x86/kvm/x86.c              | 12 ++++++++++++
 3 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dc60648c55eb..dac449879113 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -408,6 +408,7 @@ struct kvm_vcpu_arch {
 	u64 smbase;
 	bool tpr_access_reporting;
 	u64 ia32_xss;
+	u64 arch_capabilities;
 
 	/*
 	 * Paging state of the vcpu
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f6d79f4f7e47..1b3a432f6fd5 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -546,7 +546,6 @@ struct vcpu_vmx {
 	u64 		      msr_guest_kernel_gs_base;
 #endif
 
-	u64 		      arch_capabilities;
 	u64 		      spec_ctrl;
 
 	u32 vm_entry_controls_shadow;
@@ -2866,12 +2865,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 
 		msr_info->data = to_vmx(vcpu)->spec_ctrl;
 		break;
-	case MSR_IA32_ARCH_CAPABILITIES:
-		if (!msr_info->host_initiated &&
-		    !guest_cpuid_has_arch_capabilities(vcpu))
-			return 1;
-		msr_info->data = to_vmx(vcpu)->arch_capabilities;
-		break;
 	case MSR_IA32_SYSENTER_CS:
 		msr_info->data = vmcs_read32(GUEST_SYSENTER_CS);
 		break;
@@ -3028,11 +3021,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD,
 					      MSR_TYPE_W);
 		break;
-	case MSR_IA32_ARCH_CAPABILITIES:
-		if (!msr_info->host_initiated)
-			return 1;
-		vmx->arch_capabilities = data;
-		break;
 	case MSR_IA32_CR_PAT:
 		if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
 			if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
@@ -5079,8 +5067,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 		++vmx->nmsrs;
 	}
 
-	vmx->arch_capabilities = kvm_get_arch_capabilities();
-
 	vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
 
 	/* 22.2.1, 20.8.1 */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9df5bc3d62f1..51d2cd15db92 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2080,6 +2080,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_AMD64_BU_CFG2:
 		break;
 
+	case MSR_IA32_ARCH_CAPABILITIES:
+		if (!msr_info->host_initiated)
+			return 1;
+		vcpu->arch.arch_capabilities = data;
+		break;
 	case MSR_EFER:
 		return set_efer(vcpu, msr_info);
 	case MSR_K7_HWCR:
@@ -2354,6 +2359,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_UCODE_REV:
 		msr_info->data = 0x100000000ULL;
 		break;
+	case MSR_IA32_ARCH_CAPABILITIES:
+		if (!msr_info->host_initiated &&
+		    !guest_cpuid_has_arch_capabilities(vcpu))
+			return 1;
+		msr_info->data = vcpu->arch.arch_capabilities;
+		break;
 	case MSR_MTRRcap:
 	case 0x200 ... 0x2ff:
 		return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
@@ -7402,6 +7413,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
 	int r;
 
+	vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
 	kvm_vcpu_mtrr_init(vcpu);
 	r = vcpu_load(vcpu);
 	if (r)

From f5850490b49c0bdde44c873fac6221d38ff252d0 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Wed, 9 May 2018 14:29:35 -0700
Subject: [PATCH 68/81] kvm: x86: IA32_ARCH_CAPABILITIES is always supported
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 1eaafe91a0df4157521b6417b3dd8430bf5f52f0 upstream.

If there is a possibility that a VM may migrate to a Skylake host,
then the hypervisor should report IA32_ARCH_CAPABILITIES.RSBA[bit 2]
as being set (future work, of course). This implies that
CPUID.(EAX=7,ECX=0):EDX.ARCH_CAPABILITIES[bit 29] should be
set. Therefore, kvm should report this CPUID bit as being supported
whether or not the host supports it.  Userspace is still free to clear
the bit if it chooses.

For more information on RSBA, see Intel's white paper, "Retpoline: A
Branch Target Injection Mitigation" (Document Number 337131-001),
currently available at https://bugzilla.kernel.org/show_bug.cgi?id=199511.

Since the IA32_ARCH_CAPABILITIES MSR is emulated in kvm, there is no
dependency on hardware support for this feature.

Signed-off-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Fixes: 28c1c9fabf48 ("KVM/VMX: Emulate MSR_IA32_ARCH_CAPABILITIES")
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/cpuid.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 53918abccbc3..78ca1e13d77c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -447,6 +447,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 			entry->ebx |= F(TSC_ADJUST);
 			entry->edx &= kvm_cpuid_7_0_edx_x86_features;
 			cpuid_mask(&entry->edx, CPUID_7_EDX);
+			/*
+			 * We emulate ARCH_CAPABILITIES in software even
+			 * if the host doesn't support it.
+			 */
+			entry->edx |= F(ARCH_CAPABILITIES);
 		} else {
 			entry->ebx = 0;
 			entry->edx = 0;

From b6b8d3bde6be69dce236117062949575d5fbaa73 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 19 Aug 2019 17:24:07 +0200
Subject: [PATCH 69/81] KVM: x86: use Intel speculation bugs and features as
 derived in generic x86 code

commit 0c54914d0c52a15db9954a76ce80fee32cf318f4 upstream.

Similar to AMD bits, set the Intel bits from the vendor-independent
feature and bug flags, because KVM_GET_SUPPORTED_CPUID does not care
about the vendor and they should be set on AMD processors as well.

Suggested-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[bwh: Backported to 4.4: adjust context]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/cpuid.c | 7 +++++++
 arch/x86/kvm/x86.c   | 8 ++++++++
 2 files changed, 15 insertions(+)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 78ca1e13d77c..40e415fedcee 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -447,6 +447,13 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 			entry->ebx |= F(TSC_ADJUST);
 			entry->edx &= kvm_cpuid_7_0_edx_x86_features;
 			cpuid_mask(&entry->edx, CPUID_7_EDX);
+			if (boot_cpu_has(X86_FEATURE_IBPB) &&
+			    boot_cpu_has(X86_FEATURE_IBRS))
+				entry->edx |= F(SPEC_CTRL);
+			if (boot_cpu_has(X86_FEATURE_STIBP))
+				entry->edx |= F(INTEL_STIBP);
+			if (boot_cpu_has(X86_FEATURE_SSBD))
+				entry->edx |= F(SPEC_CTRL_SSBD);
 			/*
 			 * We emulate ARCH_CAPABILITIES in software even
 			 * if the host doesn't support it.
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 51d2cd15db92..dd23586ccd1b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1001,8 +1001,16 @@ u64 kvm_get_arch_capabilities(void)
 
 	rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data);
 
+	if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+		data |= ARCH_CAP_RDCL_NO;
+	if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
+		data |= ARCH_CAP_SSB_NO;
+	if (!boot_cpu_has_bug(X86_BUG_MDS))
+		data |= ARCH_CAP_MDS_NO;
+
 	return data;
 }
+
 EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);
 
 static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)

From 124635392ef394772850172bd5370e62cfe781b4 Mon Sep 17 00:00:00 2001
From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Date: Wed, 23 Oct 2019 10:45:50 +0200
Subject: [PATCH 70/81] x86/msr: Add the IA32_TSX_CTRL MSR

commit c2955f270a84762343000f103e0640d29c7a96f3 upstream.

Transactional Synchronization Extensions (TSX) may be used on certain
processors as part of a speculative side channel attack.  A microcode
update for existing processors that are vulnerable to this attack will
add a new MSR - IA32_TSX_CTRL to allow the system administrator the
option to disable TSX as one of the possible mitigations.

The CPUs which get this new MSR after a microcode upgrade are the ones
which do not set MSR_IA32_ARCH_CAPABILITIES.MDS_NO (bit 5) because those
CPUs have CPUID.MD_CLEAR, i.e., the VERW implementation which clears all
CPU buffers takes care of the TAA case as well.

  [ Note that future processors that are not vulnerable will also
    support the IA32_TSX_CTRL MSR. ]

Add defines for the new IA32_TSX_CTRL MSR and its bits.

TSX has two sub-features:

1. Restricted Transactional Memory (RTM) is an explicitly-used feature
   where new instructions begin and end TSX transactions.
2. Hardware Lock Elision (HLE) is implicitly used when certain kinds of
   "old" style locks are used by software.

Bit 7 of the IA32_ARCH_CAPABILITIES indicates the presence of the
IA32_TSX_CTRL MSR.

There are two control bits in IA32_TSX_CTRL MSR:

  Bit 0: When set, it disables the Restricted Transactional Memory (RTM)
         sub-feature of TSX (will force all transactions to abort on the
	 XBEGIN instruction).

  Bit 1: When set, it disables the enumeration of the RTM and HLE feature
         (i.e. it will make CPUID(EAX=7).EBX{bit4} and
	  CPUID(EAX=7).EBX{bit11} read as 0).

The other TSX sub-feature, Hardware Lock Elision (HLE), is
unconditionally disabled by the new microcode but still enumerated
as present by CPUID(EAX=7).EBX{bit4}, unless disabled by
IA32_TSX_CTRL_MSR[1] - TSX_CTRL_CPUID_CLEAR.

Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Neelima Krishnan <neelima.krishnan@intel.com>
Reviewed-by: Mark Gross <mgross@linux.intel.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
[bwh: Backported to 4.4: adjust context]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/msr-index.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 30183770132a..1a749bb8fcb6 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -71,10 +71,15 @@
 						  * Microarchitectural Data
 						  * Sampling (MDS) vulnerabilities.
 						  */
+#define ARCH_CAP_TSX_CTRL_MSR		BIT(7)	/* MSR for TSX control is available. */
 
 #define MSR_IA32_BBL_CR_CTL		0x00000119
 #define MSR_IA32_BBL_CR_CTL3		0x0000011e
 
+#define MSR_IA32_TSX_CTRL		0x00000122
+#define TSX_CTRL_RTM_DISABLE		BIT(0)	/* Disable RTM feature */
+#define TSX_CTRL_CPUID_CLEAR		BIT(1)	/* Disable TSX enumeration */
+
 #define MSR_IA32_SYSENTER_CS		0x00000174
 #define MSR_IA32_SYSENTER_ESP		0x00000175
 #define MSR_IA32_SYSENTER_EIP		0x00000176

From 725afc0d60cf8b9e3bf023920c881f4a4261dc49 Mon Sep 17 00:00:00 2001
From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Date: Wed, 23 Oct 2019 10:52:35 +0200
Subject: [PATCH 71/81] x86/cpu: Add a helper function x86_read_arch_cap_msr()

commit 286836a70433fb64131d2590f4bf512097c255e1 upstream.

Add a helper function to read the IA32_ARCH_CAPABILITIES MSR.

Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Neelima Krishnan <neelima.krishnan@intel.com>
Reviewed-by: Mark Gross <mgross@linux.intel.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/common.c | 15 +++++++++++----
 arch/x86/kernel/cpu/cpu.h    |  2 ++
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 3965235973c8..ef8c98bc775c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -918,19 +918,26 @@ static bool __init cpu_matches(unsigned long which)
 	return m && !!(m->driver_data & which);
 }
 
-static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
+u64 x86_read_arch_cap_msr(void)
 {
 	u64 ia32_cap = 0;
 
+	if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
+		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+
+	return ia32_cap;
+}
+
+static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
+{
+	u64 ia32_cap = x86_read_arch_cap_msr();
+
 	if (cpu_matches(NO_SPECULATION))
 		return;
 
 	setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
 	setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
 
-	if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
-		rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
-
 	if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) &&
 	   !cpu_has(c, X86_FEATURE_AMD_SSB_NO))
 		setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 3b19d82f7932..d4731aab3511 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -49,4 +49,6 @@ extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
 
 extern void x86_spec_ctrl_setup_ap(void);
 
+extern u64 x86_read_arch_cap_msr(void);
+
 #endif /* ARCH_X86_CPU_H */

From ab7b39b99f030a15f4f398ea6c2fbe9600e4818c Mon Sep 17 00:00:00 2001
From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Date: Wed, 23 Oct 2019 11:01:53 +0200
Subject: [PATCH 72/81] x86/cpu: Add a "tsx=" cmdline option with TSX disabled
 by default

commit 95c5824f75f3ba4c9e8e5a4b1a623c95390ac266 upstream.

Add a kernel cmdline parameter "tsx" to control the Transactional
Synchronization Extensions (TSX) feature. On CPUs that support TSX
control, use "tsx=on|off" to enable or disable TSX. Not specifying this
option is equivalent to "tsx=off". This is because on certain processors
TSX may be used as a part of a speculative side channel attack.

Carve out the TSX controlling functionality into a separate compilation
unit because TSX is a CPU feature while the TSX async abort control
machinery will go to cpu/bugs.c.

 [ bp: - Massage, shorten and clear the arg buffer.
       - Clarifications of the tsx= possible options - Josh.
       - Expand on TSX_CTRL availability - Pawan. ]

Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
[bwh: Backported to 4.4:
 - Drop __ro_after_init attribute
 - Adjust filenames, context]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/kernel-parameters.txt |  26 ++++++
 arch/x86/kernel/cpu/Makefile        |   2 +-
 arch/x86/kernel/cpu/common.c        |   2 +
 arch/x86/kernel/cpu/cpu.h           |  16 ++++
 arch/x86/kernel/cpu/intel.c         |   5 ++
 arch/x86/kernel/cpu/tsx.c           | 125 ++++++++++++++++++++++++++++
 6 files changed, 175 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/cpu/tsx.c

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 5b94c0bfba85..58421b7f62b0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -4052,6 +4052,32 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			platforms where RDTSC is slow and this accounting
 			can add overhead.
 
+	tsx=		[X86] Control Transactional Synchronization
+			Extensions (TSX) feature in Intel processors that
+			support TSX control.
+
+			This parameter controls the TSX feature. The options are:
+
+			on	- Enable TSX on the system. Although there are
+				mitigations for all known security vulnerabilities,
+				TSX has been known to be an accelerator for
+				several previous speculation-related CVEs, and
+				so there may be unknown	security risks associated
+				with leaving it enabled.
+
+			off	- Disable TSX on the system. (Note that this
+				option takes effect only on newer CPUs which are
+				not vulnerable to MDS, i.e., have
+				MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1 and which get
+				the new IA32_TSX_CTRL MSR through a microcode
+				update. This new MSR allows for the reliable
+				deactivation of the TSX functionality.)
+
+			Not specifying this option is equivalent to tsx=off.
+
+			See Documentation/hw-vuln/tsx_async_abort.rst
+			for more details.
+
 	turbografx.map[2|3]=	[HW,JOY]
 			TurboGraFX parallel port interface
 			Format:
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 924b65794abd..32cfabbced8c 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -21,7 +21,7 @@ obj-y			+= bugs.o
 obj-$(CONFIG_PROC_FS)	+= proc.o
 obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
 
-obj-$(CONFIG_CPU_SUP_INTEL)		+= intel.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= intel.o tsx.o
 obj-$(CONFIG_CPU_SUP_AMD)		+= amd.o
 obj-$(CONFIG_CPU_SUP_CYRIX_32)		+= cyrix.o
 obj-$(CONFIG_CPU_SUP_CENTAUR)		+= centaur.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ef8c98bc775c..c8201b2df52d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1294,6 +1294,8 @@ void __init identify_boot_cpu(void)
 	enable_sep_cpu();
 #endif
 	cpu_detect_tlb(&boot_cpu_data);
+
+	tsx_init();
 }
 
 void identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index d4731aab3511..c42cc1acd668 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -44,6 +44,22 @@ struct _tlb_table {
 extern const struct cpu_dev *const __x86_cpu_dev_start[],
 			    *const __x86_cpu_dev_end[];
 
+#ifdef CONFIG_CPU_SUP_INTEL
+enum tsx_ctrl_states {
+	TSX_CTRL_ENABLE,
+	TSX_CTRL_DISABLE,
+	TSX_CTRL_NOT_SUPPORTED,
+};
+
+extern enum tsx_ctrl_states tsx_ctrl_state;
+
+extern void __init tsx_init(void);
+extern void tsx_enable(void);
+extern void tsx_disable(void);
+#else
+static inline void tsx_init(void) { }
+#endif /* CONFIG_CPU_SUP_INTEL */
+
 extern void get_cpu_cap(struct cpuinfo_x86 *c);
 extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
 
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index b0e0c7a12e61..7beef3da5904 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -582,6 +582,11 @@ static void init_intel(struct cpuinfo_x86 *c)
 		detect_vmx_virtcap(c);
 
 	init_intel_energy_perf(c);
+
+	if (tsx_ctrl_state == TSX_CTRL_ENABLE)
+		tsx_enable();
+	if (tsx_ctrl_state == TSX_CTRL_DISABLE)
+		tsx_disable();
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c
new file mode 100644
index 000000000000..0f8a60f43206
--- /dev/null
+++ b/arch/x86/kernel/cpu/tsx.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Transactional Synchronization Extensions (TSX) control.
+ *
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * Author:
+ *	Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+ */
+
+#include <linux/cpufeature.h>
+
+#include <asm/cmdline.h>
+
+#include "cpu.h"
+
+enum tsx_ctrl_states tsx_ctrl_state = TSX_CTRL_NOT_SUPPORTED;
+
+void tsx_disable(void)
+{
+	u64 tsx;
+
+	rdmsrl(MSR_IA32_TSX_CTRL, tsx);
+
+	/* Force all transactions to immediately abort */
+	tsx |= TSX_CTRL_RTM_DISABLE;
+
+	/*
+	 * Ensure TSX support is not enumerated in CPUID.
+	 * This is visible to userspace and will ensure they
+	 * do not waste resources trying TSX transactions that
+	 * will always abort.
+	 */
+	tsx |= TSX_CTRL_CPUID_CLEAR;
+
+	wrmsrl(MSR_IA32_TSX_CTRL, tsx);
+}
+
+void tsx_enable(void)
+{
+	u64 tsx;
+
+	rdmsrl(MSR_IA32_TSX_CTRL, tsx);
+
+	/* Enable the RTM feature in the cpu */
+	tsx &= ~TSX_CTRL_RTM_DISABLE;
+
+	/*
+	 * Ensure TSX support is enumerated in CPUID.
+	 * This is visible to userspace and will ensure they
+	 * can enumerate and use the TSX feature.
+	 */
+	tsx &= ~TSX_CTRL_CPUID_CLEAR;
+
+	wrmsrl(MSR_IA32_TSX_CTRL, tsx);
+}
+
+static bool __init tsx_ctrl_is_supported(void)
+{
+	u64 ia32_cap = x86_read_arch_cap_msr();
+
+	/*
+	 * TSX is controlled via MSR_IA32_TSX_CTRL.  However, support for this
+	 * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES.
+	 *
+	 * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a
+	 * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES
+	 * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get
+	 * MSR_IA32_TSX_CTRL support even after a microcode update. Thus,
+	 * tsx= cmdline requests will do nothing on CPUs without
+	 * MSR_IA32_TSX_CTRL support.
+	 */
+	return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR);
+}
+
+void __init tsx_init(void)
+{
+	char arg[4] = {};
+	int ret;
+
+	if (!tsx_ctrl_is_supported())
+		return;
+
+	ret = cmdline_find_option(boot_command_line, "tsx", arg, sizeof(arg));
+	if (ret >= 0) {
+		if (!strcmp(arg, "on")) {
+			tsx_ctrl_state = TSX_CTRL_ENABLE;
+		} else if (!strcmp(arg, "off")) {
+			tsx_ctrl_state = TSX_CTRL_DISABLE;
+		} else {
+			tsx_ctrl_state = TSX_CTRL_DISABLE;
+			pr_err("tsx: invalid option, defaulting to off\n");
+		}
+	} else {
+		/* tsx= not provided, defaulting to off */
+		tsx_ctrl_state = TSX_CTRL_DISABLE;
+	}
+
+	if (tsx_ctrl_state == TSX_CTRL_DISABLE) {
+		tsx_disable();
+
+		/*
+		 * tsx_disable() will change the state of the
+		 * RTM CPUID bit.  Clear it here since it is now
+		 * expected to be not set.
+		 */
+		setup_clear_cpu_cap(X86_FEATURE_RTM);
+	} else if (tsx_ctrl_state == TSX_CTRL_ENABLE) {
+
+		/*
+		 * HW defaults TSX to be enabled at bootup.
+		 * We may still need the TSX enable support
+		 * during init for special cases like
+		 * kexec after TSX is disabled.
+		 */
+		tsx_enable();
+
+		/*
+		 * tsx_enable() will change the state of the
+		 * RTM CPUID bit.  Force it here since it is now
+		 * expected to be set.
+		 */
+		setup_force_cpu_cap(X86_FEATURE_RTM);
+	}
+}

From 1cdc174d7996a5fb413cd9c1a2b66581a0e80a99 Mon Sep 17 00:00:00 2001
From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Date: Wed, 23 Oct 2019 11:30:45 +0200
Subject: [PATCH 73/81] x86/speculation/taa: Add mitigation for TSX Async Abort

commit 1b42f017415b46c317e71d41c34ec088417a1883 upstream.

TSX Async Abort (TAA) is a side channel vulnerability to the internal
buffers in some Intel processors similar to Microachitectural Data
Sampling (MDS). In this case, certain loads may speculatively pass
invalid data to dependent operations when an asynchronous abort
condition is pending in a TSX transaction.

This includes loads with no fault or assist condition. Such loads may
speculatively expose stale data from the uarch data structures as in
MDS. Scope of exposure is within the same-thread and cross-thread. This
issue affects all current processors that support TSX, but do not have
ARCH_CAP_TAA_NO (bit 8) set in MSR_IA32_ARCH_CAPABILITIES.

On CPUs which have their IA32_ARCH_CAPABILITIES MSR bit MDS_NO=0,
CPUID.MD_CLEAR=1 and the MDS mitigation is clearing the CPU buffers
using VERW or L1D_FLUSH, there is no additional mitigation needed for
TAA. On affected CPUs with MDS_NO=1 this issue can be mitigated by
disabling the Transactional Synchronization Extensions (TSX) feature.

A new MSR IA32_TSX_CTRL in future and current processors after a
microcode update can be used to control the TSX feature. There are two
bits in that MSR:

* TSX_CTRL_RTM_DISABLE disables the TSX sub-feature Restricted
Transactional Memory (RTM).

* TSX_CTRL_CPUID_CLEAR clears the RTM enumeration in CPUID. The other
TSX sub-feature, Hardware Lock Elision (HLE), is unconditionally
disabled with updated microcode but still enumerated as present by
CPUID(EAX=7).EBX{bit4}.

The second mitigation approach is similar to MDS which is clearing the
affected CPU buffers on return to user space and when entering a guest.
Relevant microcode update is required for the mitigation to work.  More
details on this approach can be found here:

  https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html

The TSX feature can be controlled by the "tsx" command line parameter.
If it is force-enabled then "Clear CPU buffers" (MDS mitigation) is
deployed. The effective mitigation state can be read from sysfs.

 [ bp:
   - massage + comments cleanup
   - s/TAA_MITIGATION_TSX_DISABLE/TAA_MITIGATION_TSX_DISABLED/g - Josh.
   - remove partial TAA mitigation in update_mds_branch_idle() - Josh.
   - s/tsx_async_abort_cmdline/tsx_async_abort_parse_cmdline/g
 ]

Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
[bwh: Backported to 4.4:
 - Add #include "cpu.h" in bugs.c
 - Drop __ro_after_init attribute
 - Drop "nosmt" support
 - Adjust context, indentation]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/cpufeatures.h   |   1 +
 arch/x86/include/asm/msr-index.h     |   4 ++
 arch/x86/include/asm/nospec-branch.h |   4 +-
 arch/x86/include/asm/processor.h     |   7 ++
 arch/x86/kernel/cpu/bugs.c           | 103 +++++++++++++++++++++++++++
 arch/x86/kernel/cpu/common.c         |  15 ++++
 6 files changed, 132 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 113cb01ebaac..28c717060fe6 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -340,5 +340,6 @@
 #define X86_BUG_MDS		X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
 #define X86_BUG_MSBDS_ONLY	X86_BUG(20) /* CPU is only affected by the  MSDBS variant of BUG_MDS */
 #define X86_BUG_SWAPGS		X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
+#define X86_BUG_TAA		X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 1a749bb8fcb6..40f18a4e6c0d 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -72,6 +72,10 @@
 						  * Sampling (MDS) vulnerabilities.
 						  */
 #define ARCH_CAP_TSX_CTRL_MSR		BIT(7)	/* MSR for TSX control is available. */
+#define ARCH_CAP_TAA_NO			BIT(8)	/*
+						 * Not susceptible to
+						 * TSX Async Abort (TAA) vulnerabilities.
+						 */
 
 #define MSR_IA32_BBL_CR_CTL		0x00000119
 #define MSR_IA32_BBL_CR_CTL3		0x0000011e
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index c3138ac80db2..783f0711895b 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -268,7 +268,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
 #include <asm/segment.h>
 
 /**
- * mds_clear_cpu_buffers - Mitigation for MDS vulnerability
+ * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
  *
  * This uses the otherwise unused and obsolete VERW instruction in
  * combination with microcode which triggers a CPU buffer flush when the
@@ -291,7 +291,7 @@ static inline void mds_clear_cpu_buffers(void)
 }
 
 /**
- * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability
+ * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
  *
  * Clear CPU buffers if the corresponding static key is enabled
  */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index dab73faef9b0..cac54e61c299 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -852,4 +852,11 @@ enum mds_mitigations {
 	MDS_MITIGATION_VMWERV,
 };
 
+enum taa_mitigations {
+	TAA_MITIGATION_OFF,
+	TAA_MITIGATION_UCODE_NEEDED,
+	TAA_MITIGATION_VERW,
+	TAA_MITIGATION_TSX_DISABLED,
+};
+
 #endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 917c63aa1599..36634edd865d 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -30,11 +30,14 @@
 #include <asm/intel-family.h>
 #include <asm/e820.h>
 
+#include "cpu.h"
+
 static void __init spectre_v1_select_mitigation(void);
 static void __init spectre_v2_select_mitigation(void);
 static void __init ssb_select_mitigation(void);
 static void __init l1tf_select_mitigation(void);
 static void __init mds_select_mitigation(void);
+static void __init taa_select_mitigation(void);
 
 /* The base value of the SPEC_CTRL MSR that always has to be preserved. */
 u64 x86_spec_ctrl_base;
@@ -94,6 +97,7 @@ void __init check_bugs(void)
 	ssb_select_mitigation();
 	l1tf_select_mitigation();
 	mds_select_mitigation();
+	taa_select_mitigation();
 
 	arch_smt_update();
 
@@ -246,6 +250,93 @@ static int __init mds_cmdline(char *str)
 }
 early_param("mds", mds_cmdline);
 
+#undef pr_fmt
+#define pr_fmt(fmt)	"TAA: " fmt
+
+/* Default mitigation for TAA-affected CPUs */
+static enum taa_mitigations taa_mitigation = TAA_MITIGATION_VERW;
+
+static const char * const taa_strings[] = {
+	[TAA_MITIGATION_OFF]		= "Vulnerable",
+	[TAA_MITIGATION_UCODE_NEEDED]	= "Vulnerable: Clear CPU buffers attempted, no microcode",
+	[TAA_MITIGATION_VERW]		= "Mitigation: Clear CPU buffers",
+	[TAA_MITIGATION_TSX_DISABLED]	= "Mitigation: TSX disabled",
+};
+
+static void __init taa_select_mitigation(void)
+{
+	u64 ia32_cap;
+
+	if (!boot_cpu_has_bug(X86_BUG_TAA)) {
+		taa_mitigation = TAA_MITIGATION_OFF;
+		return;
+	}
+
+	/* TSX previously disabled by tsx=off */
+	if (!boot_cpu_has(X86_FEATURE_RTM)) {
+		taa_mitigation = TAA_MITIGATION_TSX_DISABLED;
+		goto out;
+	}
+
+	if (cpu_mitigations_off()) {
+		taa_mitigation = TAA_MITIGATION_OFF;
+		return;
+	}
+
+	/* TAA mitigation is turned off on the cmdline (tsx_async_abort=off) */
+	if (taa_mitigation == TAA_MITIGATION_OFF)
+		goto out;
+
+	if (boot_cpu_has(X86_FEATURE_MD_CLEAR))
+		taa_mitigation = TAA_MITIGATION_VERW;
+	else
+		taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
+
+	/*
+	 * VERW doesn't clear the CPU buffers when MD_CLEAR=1 and MDS_NO=1.
+	 * A microcode update fixes this behavior to clear CPU buffers. It also
+	 * adds support for MSR_IA32_TSX_CTRL which is enumerated by the
+	 * ARCH_CAP_TSX_CTRL_MSR bit.
+	 *
+	 * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode
+	 * update is required.
+	 */
+	ia32_cap = x86_read_arch_cap_msr();
+	if ( (ia32_cap & ARCH_CAP_MDS_NO) &&
+	    !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR))
+		taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
+
+	/*
+	 * TSX is enabled, select alternate mitigation for TAA which is
+	 * the same as MDS. Enable MDS static branch to clear CPU buffers.
+	 *
+	 * For guests that can't determine whether the correct microcode is
+	 * present on host, enable the mitigation for UCODE_NEEDED as well.
+	 */
+	static_branch_enable(&mds_user_clear);
+
+out:
+	pr_info("%s\n", taa_strings[taa_mitigation]);
+}
+
+static int __init tsx_async_abort_parse_cmdline(char *str)
+{
+	if (!boot_cpu_has_bug(X86_BUG_TAA))
+		return 0;
+
+	if (!str)
+		return -EINVAL;
+
+	if (!strcmp(str, "off")) {
+		taa_mitigation = TAA_MITIGATION_OFF;
+	} else if (!strcmp(str, "full")) {
+		taa_mitigation = TAA_MITIGATION_VERW;
+	}
+
+	return 0;
+}
+early_param("tsx_async_abort", tsx_async_abort_parse_cmdline);
+
 #undef pr_fmt
 #define pr_fmt(fmt)     "Spectre V1 : " fmt
 
@@ -758,6 +849,7 @@ static void update_mds_branch_idle(void)
 }
 
 #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
+#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
 
 void arch_smt_update(void)
 {
@@ -790,6 +882,17 @@ void arch_smt_update(void)
 		break;
 	}
 
+	switch (taa_mitigation) {
+	case TAA_MITIGATION_VERW:
+	case TAA_MITIGATION_UCODE_NEEDED:
+		if (sched_smt_active())
+			pr_warn_once(TAA_MSG_SMT);
+		break;
+	case TAA_MITIGATION_TSX_DISABLED:
+	case TAA_MITIGATION_OFF:
+		break;
+	}
+
 	mutex_unlock(&spec_ctrl_mutex);
 }
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c8201b2df52d..53f4efe94a5e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -954,6 +954,21 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 	if (!cpu_matches(NO_SWAPGS))
 		setup_force_cpu_bug(X86_BUG_SWAPGS);
 
+	/*
+	 * When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when:
+	 *	- TSX is supported or
+	 *	- TSX_CTRL is present
+	 *
+	 * TSX_CTRL check is needed for cases when TSX could be disabled before
+	 * the kernel boot e.g. kexec.
+	 * TSX_CTRL check alone is not sufficient for cases when the microcode
+	 * update is not present or running as guest that don't get TSX_CTRL.
+	 */
+	if (!(ia32_cap & ARCH_CAP_TAA_NO) &&
+	    (cpu_has(c, X86_FEATURE_RTM) ||
+	     (ia32_cap & ARCH_CAP_TSX_CTRL_MSR)))
+		setup_force_cpu_bug(X86_BUG_TAA);
+
 	if (cpu_matches(NO_MELTDOWN))
 		return;
 

From d601096be42c88a20240dddd426c6273899f2468 Mon Sep 17 00:00:00 2001
From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Date: Wed, 23 Oct 2019 12:19:51 +0200
Subject: [PATCH 74/81] x86/speculation/taa: Add sysfs reporting for TSX Async
 Abort

commit 6608b45ac5ecb56f9e171252229c39580cc85f0f upstream.

Add the sysfs reporting file for TSX Async Abort. It exposes the
vulnerability and the mitigation state similar to the existing files for
the other hardware vulnerabilities.

Sysfs file path is:
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort

Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Neelima Krishnan <neelima.krishnan@intel.com>
Reviewed-by: Mark Gross <mgross@linux.intel.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/bugs.c | 23 +++++++++++++++++++++++
 drivers/base/cpu.c         |  9 +++++++++
 include/linux/cpu.h        |  3 +++
 3 files changed, 35 insertions(+)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 36634edd865d..df103f7358fe 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1300,6 +1300,21 @@ static ssize_t mds_show_state(char *buf)
 		       sched_smt_active() ? "vulnerable" : "disabled");
 }
 
+static ssize_t tsx_async_abort_show_state(char *buf)
+{
+	if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLED) ||
+	    (taa_mitigation == TAA_MITIGATION_OFF))
+		return sprintf(buf, "%s\n", taa_strings[taa_mitigation]);
+
+	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+		return sprintf(buf, "%s; SMT Host state unknown\n",
+			       taa_strings[taa_mitigation]);
+	}
+
+	return sprintf(buf, "%s; SMT %s\n", taa_strings[taa_mitigation],
+		       sched_smt_active() ? "vulnerable" : "disabled");
+}
+
 static char *stibp_state(void)
 {
 	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
@@ -1365,6 +1380,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
 	case X86_BUG_MDS:
 		return mds_show_state(buf);
 
+	case X86_BUG_TAA:
+		return tsx_async_abort_show_state(buf);
+
 	default:
 		break;
 	}
@@ -1401,4 +1419,9 @@ ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *bu
 {
 	return cpu_show_common(dev, attr, buf, X86_BUG_MDS);
 }
+
+ssize_t cpu_show_tsx_async_abort(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return cpu_show_common(dev, attr, buf, X86_BUG_TAA);
+}
 #endif
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 3934aaf9d157..639683b3c939 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -536,12 +536,20 @@ ssize_t __weak cpu_show_mds(struct device *dev,
 	return sprintf(buf, "Not affected\n");
 }
 
+ssize_t __weak cpu_show_tsx_async_abort(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	return sprintf(buf, "Not affected\n");
+}
+
 static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
 static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
 static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
 static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
 static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
 static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
+static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
 
 static struct attribute *cpu_root_vulnerabilities_attrs[] = {
 	&dev_attr_meltdown.attr,
@@ -550,6 +558,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
 	&dev_attr_spec_store_bypass.attr,
 	&dev_attr_l1tf.attr,
 	&dev_attr_mds.attr,
+	&dev_attr_tsx_async_abort.attr,
 	NULL
 };
 
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 664f892d6e73..ce8c22491d05 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -52,6 +52,9 @@ extern ssize_t cpu_show_l1tf(struct device *dev,
 			     struct device_attribute *attr, char *buf);
 extern ssize_t cpu_show_mds(struct device *dev,
 			    struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_tsx_async_abort(struct device *dev,
+					struct device_attribute *attr,
+					char *buf);
 
 extern __printf(4, 5)
 struct device *cpu_device_create(struct device *parent, void *drvdata,

From 5b3b71385e07830aab9ee1c6f03bf140736e1fdc Mon Sep 17 00:00:00 2001
From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Date: Wed, 23 Oct 2019 12:23:33 +0200
Subject: [PATCH 75/81] kvm/x86: Export MDS_NO=0 to guests when TSX is enabled

commit e1d38b63acd843cfdd4222bf19a26700fd5c699e upstream.

Export the IA32_ARCH_CAPABILITIES MSR bit MDS_NO=0 to guests on TSX
Async Abort(TAA) affected hosts that have TSX enabled and updated
microcode. This is required so that the guests don't complain,

  "Vulnerable: Clear CPU buffers attempted, no microcode"

when the host has the updated microcode to clear CPU buffers.

Microcode update also adds support for MSR_IA32_TSX_CTRL which is
enumerated by the ARCH_CAP_TSX_CTRL bit in IA32_ARCH_CAPABILITIES MSR.
Guests can't do this check themselves when the ARCH_CAP_TSX_CTRL bit is
not exported to the guests.

In this case export MDS_NO=0 to the guests. When guests have
CPUID.MD_CLEAR=1, they deploy MDS mitigation which also mitigates TAA.

Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Neelima Krishnan <neelima.krishnan@intel.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/x86.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index dd23586ccd1b..3b711cd261d7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1008,6 +1008,25 @@ u64 kvm_get_arch_capabilities(void)
 	if (!boot_cpu_has_bug(X86_BUG_MDS))
 		data |= ARCH_CAP_MDS_NO;
 
+	/*
+	 * On TAA affected systems, export MDS_NO=0 when:
+	 *	- TSX is enabled on the host, i.e. X86_FEATURE_RTM=1.
+	 *	- Updated microcode is present. This is detected by
+	 *	  the presence of ARCH_CAP_TSX_CTRL_MSR and ensures
+	 *	  that VERW clears CPU buffers.
+	 *
+	 * When MDS_NO=0 is exported, guests deploy clear CPU buffer
+	 * mitigation and don't complain:
+	 *
+	 *	"Vulnerable: Clear CPU buffers attempted, no microcode"
+	 *
+	 * If TSX is disabled on the system, guests are also mitigated against
+	 * TAA and clear CPU buffer mitigation is not required for guests.
+	 */
+	if (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM) &&
+	    (data & ARCH_CAP_TSX_CTRL_MSR))
+		data &= ~ARCH_CAP_MDS_NO;
+
 	return data;
 }
 

From f6eabafe0fa2a007219af36340414f961bedde9d Mon Sep 17 00:00:00 2001
From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Date: Wed, 23 Oct 2019 12:28:57 +0200
Subject: [PATCH 76/81] x86/tsx: Add "auto" option to the tsx= cmdline
 parameter

commit 7531a3596e3272d1f6841e0d601a614555dc6b65 upstream.

Platforms which are not affected by X86_BUG_TAA may want the TSX feature
enabled. Add "auto" option to the TSX cmdline parameter. When tsx=auto
disable TSX when X86_BUG_TAA is present, otherwise enable TSX.

More details on X86_BUG_TAA can be found here:
https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html

 [ bp: Extend the arg buffer to accommodate "auto\0". ]

Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
[bwh: Backported to 4.4: adjust filename]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/kernel-parameters.txt | 3 +++
 arch/x86/kernel/cpu/tsx.c           | 7 ++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 58421b7f62b0..49dd31f30022 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -4073,6 +4073,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 				update. This new MSR allows for the reliable
 				deactivation of the TSX functionality.)
 
+			auto	- Disable TSX if X86_BUG_TAA is present,
+				  otherwise enable TSX on the system.
+
 			Not specifying this option is equivalent to tsx=off.
 
 			See Documentation/hw-vuln/tsx_async_abort.rst
diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c
index 0f8a60f43206..d73bc1ae932b 100644
--- a/arch/x86/kernel/cpu/tsx.c
+++ b/arch/x86/kernel/cpu/tsx.c
@@ -75,7 +75,7 @@ static bool __init tsx_ctrl_is_supported(void)
 
 void __init tsx_init(void)
 {
-	char arg[4] = {};
+	char arg[5] = {};
 	int ret;
 
 	if (!tsx_ctrl_is_supported())
@@ -87,6 +87,11 @@ void __init tsx_init(void)
 			tsx_ctrl_state = TSX_CTRL_ENABLE;
 		} else if (!strcmp(arg, "off")) {
 			tsx_ctrl_state = TSX_CTRL_DISABLE;
+		} else if (!strcmp(arg, "auto")) {
+			if (boot_cpu_has_bug(X86_BUG_TAA))
+				tsx_ctrl_state = TSX_CTRL_DISABLE;
+			else
+				tsx_ctrl_state = TSX_CTRL_ENABLE;
 		} else {
 			tsx_ctrl_state = TSX_CTRL_DISABLE;
 			pr_err("tsx: invalid option, defaulting to off\n");

From d4ce17c877b104cec728b80f66d880d3dc0d5a59 Mon Sep 17 00:00:00 2001
From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Date: Wed, 23 Oct 2019 12:32:55 +0200
Subject: [PATCH 77/81] x86/speculation/taa: Add documentation for TSX Async
 Abort

commit a7a248c593e4fd7a67c50b5f5318fe42a0db335e upstream.

Add the documenation for TSX Async Abort. Include the description of
the issue, how to check the mitigation state, control the mitigation,
guidance for system administrators.

 [ bp: Add proper SPDX tags, touch ups by Josh and me. ]

Co-developed-by: Antonio Gomez Iglesias <antonio.gomez.iglesias@intel.com>

Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Antonio Gomez Iglesias <antonio.gomez.iglesias@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mark Gross <mgross@linux.intel.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
[bwh: Backported to 4.4:
 - Drop changes to ReST index files
 - Drop "nosmt" documentation
 - Adjust filenames, context]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../ABI/testing/sysfs-devices-system-cpu      |   1 +
 Documentation/hw-vuln/tsx_async_abort.rst     | 268 ++++++++++++++++++
 Documentation/kernel-parameters.txt           |  33 +++
 Documentation/x86/tsx_async_abort.rst         | 117 ++++++++
 4 files changed, 419 insertions(+)
 create mode 100644 Documentation/hw-vuln/tsx_async_abort.rst
 create mode 100644 Documentation/x86/tsx_async_abort.rst

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index e4cd3be77663..9eafe1bdaaf4 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -279,6 +279,7 @@ What:		/sys/devices/system/cpu/vulnerabilities
 		/sys/devices/system/cpu/vulnerabilities/spec_store_bypass
 		/sys/devices/system/cpu/vulnerabilities/l1tf
 		/sys/devices/system/cpu/vulnerabilities/mds
+		/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
 Date:		January 2018
 Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:	Information about CPU vulnerabilities
diff --git a/Documentation/hw-vuln/tsx_async_abort.rst b/Documentation/hw-vuln/tsx_async_abort.rst
new file mode 100644
index 000000000000..38beda735f39
--- /dev/null
+++ b/Documentation/hw-vuln/tsx_async_abort.rst
@@ -0,0 +1,268 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+TAA - TSX Asynchronous Abort
+======================================
+
+TAA is a hardware vulnerability that allows unprivileged speculative access to
+data which is available in various CPU internal buffers by using asynchronous
+aborts within an Intel TSX transactional region.
+
+Affected processors
+-------------------
+
+This vulnerability only affects Intel processors that support Intel
+Transactional Synchronization Extensions (TSX) when the TAA_NO bit (bit 8)
+is 0 in the IA32_ARCH_CAPABILITIES MSR.  On processors where the MDS_NO bit
+(bit 5) is 0 in the IA32_ARCH_CAPABILITIES MSR, the existing MDS mitigations
+also mitigate against TAA.
+
+Whether a processor is affected or not can be read out from the TAA
+vulnerability file in sysfs. See :ref:`tsx_async_abort_sys_info`.
+
+Related CVEs
+------------
+
+The following CVE entry is related to this TAA issue:
+
+   ==============  =====  ===================================================
+   CVE-2019-11135  TAA    TSX Asynchronous Abort (TAA) condition on some
+                          microprocessors utilizing speculative execution may
+                          allow an authenticated user to potentially enable
+                          information disclosure via a side channel with
+                          local access.
+   ==============  =====  ===================================================
+
+Problem
+-------
+
+When performing store, load or L1 refill operations, processors write
+data into temporary microarchitectural structures (buffers). The data in
+those buffers can be forwarded to load operations as an optimization.
+
+Intel TSX is an extension to the x86 instruction set architecture that adds
+hardware transactional memory support to improve performance of multi-threaded
+software. TSX lets the processor expose and exploit concurrency hidden in an
+application due to dynamically avoiding unnecessary synchronization.
+
+TSX supports atomic memory transactions that are either committed (success) or
+aborted. During an abort, operations that happened within the transactional region
+are rolled back. An asynchronous abort takes place, among other options, when a
+different thread accesses a cache line that is also used within the transactional
+region when that access might lead to a data race.
+
+Immediately after an uncompleted asynchronous abort, certain speculatively
+executed loads may read data from those internal buffers and pass it to dependent
+operations. This can be then used to infer the value via a cache side channel
+attack.
+
+Because the buffers are potentially shared between Hyper-Threads cross
+Hyper-Thread attacks are possible.
+
+The victim of a malicious actor does not need to make use of TSX. Only the
+attacker needs to begin a TSX transaction and raise an asynchronous abort
+which in turn potenitally leaks data stored in the buffers.
+
+More detailed technical information is available in the TAA specific x86
+architecture section: :ref:`Documentation/x86/tsx_async_abort.rst <tsx_async_abort>`.
+
+
+Attack scenarios
+----------------
+
+Attacks against the TAA vulnerability can be implemented from unprivileged
+applications running on hosts or guests.
+
+As for MDS, the attacker has no control over the memory addresses that can
+be leaked. Only the victim is responsible for bringing data to the CPU. As
+a result, the malicious actor has to sample as much data as possible and
+then postprocess it to try to infer any useful information from it.
+
+A potential attacker only has read access to the data. Also, there is no direct
+privilege escalation by using this technique.
+
+
+.. _tsx_async_abort_sys_info:
+
+TAA system information
+-----------------------
+
+The Linux kernel provides a sysfs interface to enumerate the current TAA status
+of mitigated systems. The relevant sysfs file is:
+
+/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
+
+The possible values in this file are:
+
+.. list-table::
+
+   * - 'Vulnerable'
+     - The CPU is affected by this vulnerability and the microcode and kernel mitigation are not applied.
+   * - 'Vulnerable: Clear CPU buffers attempted, no microcode'
+     - The system tries to clear the buffers but the microcode might not support the operation.
+   * - 'Mitigation: Clear CPU buffers'
+     - The microcode has been updated to clear the buffers. TSX is still enabled.
+   * - 'Mitigation: TSX disabled'
+     - TSX is disabled.
+   * - 'Not affected'
+     - The CPU is not affected by this issue.
+
+.. _ucode_needed:
+
+Best effort mitigation mode
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If the processor is vulnerable, but the availability of the microcode-based
+mitigation mechanism is not advertised via CPUID the kernel selects a best
+effort mitigation mode.  This mode invokes the mitigation instructions
+without a guarantee that they clear the CPU buffers.
+
+This is done to address virtualization scenarios where the host has the
+microcode update applied, but the hypervisor is not yet updated to expose the
+CPUID to the guest. If the host has updated microcode the protection takes
+effect; otherwise a few CPU cycles are wasted pointlessly.
+
+The state in the tsx_async_abort sysfs file reflects this situation
+accordingly.
+
+
+Mitigation mechanism
+--------------------
+
+The kernel detects the affected CPUs and the presence of the microcode which is
+required. If a CPU is affected and the microcode is available, then the kernel
+enables the mitigation by default.
+
+
+The mitigation can be controlled at boot time via a kernel command line option.
+See :ref:`taa_mitigation_control_command_line`.
+
+.. _virt_mechanism:
+
+Virtualization mitigation
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Affected systems where the host has TAA microcode and TAA is mitigated by
+having disabled TSX previously, are not vulnerable regardless of the status
+of the VMs.
+
+In all other cases, if the host either does not have the TAA microcode or
+the kernel is not mitigated, the system might be vulnerable.
+
+
+.. _taa_mitigation_control_command_line:
+
+Mitigation control on the kernel command line
+---------------------------------------------
+
+The kernel command line allows to control the TAA mitigations at boot time with
+the option "tsx_async_abort=". The valid arguments for this option are:
+
+  ============  =============================================================
+  off		This option disables the TAA mitigation on affected platforms.
+                If the system has TSX enabled (see next parameter) and the CPU
+                is affected, the system is vulnerable.
+
+  full	        TAA mitigation is enabled. If TSX is enabled, on an affected
+                system it will clear CPU buffers on ring transitions. On
+                systems which are MDS-affected and deploy MDS mitigation,
+                TAA is also mitigated. Specifying this option on those
+                systems will have no effect.
+  ============  =============================================================
+
+Not specifying this option is equivalent to "tsx_async_abort=full".
+
+The kernel command line also allows to control the TSX feature using the
+parameter "tsx=" on CPUs which support TSX control. MSR_IA32_TSX_CTRL is used
+to control the TSX feature and the enumeration of the TSX feature bits (RTM
+and HLE) in CPUID.
+
+The valid options are:
+
+  ============  =============================================================
+  off		Disables TSX on the system.
+
+                Note that this option takes effect only on newer CPUs which are
+                not vulnerable to MDS, i.e., have MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1
+                and which get the new IA32_TSX_CTRL MSR through a microcode
+                update. This new MSR allows for the reliable deactivation of
+                the TSX functionality.
+
+  on		Enables TSX.
+
+                Although there are mitigations for all known security
+                vulnerabilities, TSX has been known to be an accelerator for
+                several previous speculation-related CVEs, and so there may be
+                unknown security risks associated with leaving it enabled.
+
+  auto		Disables TSX if X86_BUG_TAA is present, otherwise enables TSX
+                on the system.
+  ============  =============================================================
+
+Not specifying this option is equivalent to "tsx=off".
+
+The following combinations of the "tsx_async_abort" and "tsx" are possible. For
+affected platforms tsx=auto is equivalent to tsx=off and the result will be:
+
+  =========  ==========================   =========================================
+  tsx=on     tsx_async_abort=full         The system will use VERW to clear CPU
+                                          buffers. Cross-thread attacks are still
+					  possible on SMT machines.
+  tsx=on     tsx_async_abort=off          The system is vulnerable.
+  tsx=off    tsx_async_abort=full         TSX might be disabled if microcode
+                                          provides a TSX control MSR. If so,
+					  system is not vulnerable.
+  tsx=off    tsx_async_abort=off          ditto
+  =========  ==========================   =========================================
+
+
+For unaffected platforms "tsx=on" and "tsx_async_abort=full" does not clear CPU
+buffers.  For platforms without TSX control (MSR_IA32_ARCH_CAPABILITIES.MDS_NO=0)
+"tsx" command line argument has no effect.
+
+For the affected platforms below table indicates the mitigation status for the
+combinations of CPUID bit MD_CLEAR and IA32_ARCH_CAPABILITIES MSR bits MDS_NO
+and TSX_CTRL_MSR.
+
+  =======  =========  =============  ========================================
+  MDS_NO   MD_CLEAR   TSX_CTRL_MSR   Status
+  =======  =========  =============  ========================================
+    0          0            0        Vulnerable (needs microcode)
+    0          1            0        MDS and TAA mitigated via VERW
+    1          1            0        MDS fixed, TAA vulnerable if TSX enabled
+                                     because MD_CLEAR has no meaning and
+                                     VERW is not guaranteed to clear buffers
+    1          X            1        MDS fixed, TAA can be mitigated by
+                                     VERW or TSX_CTRL_MSR
+  =======  =========  =============  ========================================
+
+Mitigation selection guide
+--------------------------
+
+1. Trusted userspace and guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If all user space applications are from a trusted source and do not execute
+untrusted code which is supplied externally, then the mitigation can be
+disabled. The same applies to virtualized environments with trusted guests.
+
+
+2. Untrusted userspace and guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If there are untrusted applications or guests on the system, enabling TSX
+might allow a malicious actor to leak data from the host or from other
+processes running on the same physical core.
+
+If the microcode is available and the TSX is disabled on the host, attacks
+are prevented in a virtualized environment as well, even if the VMs do not
+explicitly enable the mitigation.
+
+
+.. _taa_default_mitigations:
+
+Default mitigations
+-------------------
+
+The kernel's default action for vulnerable processors is:
+
+  - Deploy TSX disable mitigation (tsx_async_abort=full tsx=off).
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 49dd31f30022..70f05fc3873c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2189,6 +2189,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 					       spectre_v2_user=off [X86]
 					       spec_store_bypass_disable=off [X86]
 					       mds=off [X86]
+					       tsx_async_abort=off [X86]
 
 			auto (default)
 				Mitigate all CPU vulnerabilities, but leave SMT
@@ -4081,6 +4082,38 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			See Documentation/hw-vuln/tsx_async_abort.rst
 			for more details.
 
+	tsx_async_abort= [X86,INTEL] Control mitigation for the TSX Async
+			Abort (TAA) vulnerability.
+
+			Similar to Micro-architectural Data Sampling (MDS)
+			certain CPUs that support Transactional
+			Synchronization Extensions (TSX) are vulnerable to an
+			exploit against CPU internal buffers which can forward
+			information to a disclosure gadget under certain
+			conditions.
+
+			In vulnerable processors, the speculatively forwarded
+			data can be used in a cache side channel attack, to
+			access data to which the attacker does not have direct
+			access.
+
+			This parameter controls the TAA mitigation.  The
+			options are:
+
+			full       - Enable TAA mitigation on vulnerable CPUs
+				     if TSX is enabled.
+
+			off        - Unconditionally disable TAA mitigation
+
+			Not specifying this option is equivalent to
+			tsx_async_abort=full.  On CPUs which are MDS affected
+			and deploy MDS mitigation, TAA mitigation is not
+			required and doesn't provide any additional
+			mitigation.
+
+			For details see:
+			Documentation/hw-vuln/tsx_async_abort.rst
+
 	turbografx.map[2|3]=	[HW,JOY]
 			TurboGraFX parallel port interface
 			Format:
diff --git a/Documentation/x86/tsx_async_abort.rst b/Documentation/x86/tsx_async_abort.rst
new file mode 100644
index 000000000000..4a4336a89372
--- /dev/null
+++ b/Documentation/x86/tsx_async_abort.rst
@@ -0,0 +1,117 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+TSX Async Abort (TAA) mitigation
+================================
+
+.. _tsx_async_abort:
+
+Overview
+--------
+
+TSX Async Abort (TAA) is a side channel attack on internal buffers in some
+Intel processors similar to Microachitectural Data Sampling (MDS).  In this
+case certain loads may speculatively pass invalid data to dependent operations
+when an asynchronous abort condition is pending in a Transactional
+Synchronization Extensions (TSX) transaction.  This includes loads with no
+fault or assist condition. Such loads may speculatively expose stale data from
+the same uarch data structures as in MDS, with same scope of exposure i.e.
+same-thread and cross-thread. This issue affects all current processors that
+support TSX.
+
+Mitigation strategy
+-------------------
+
+a) TSX disable - one of the mitigations is to disable TSX. A new MSR
+IA32_TSX_CTRL will be available in future and current processors after
+microcode update which can be used to disable TSX. In addition, it
+controls the enumeration of the TSX feature bits (RTM and HLE) in CPUID.
+
+b) Clear CPU buffers - similar to MDS, clearing the CPU buffers mitigates this
+vulnerability. More details on this approach can be found in
+:ref:`Documentation/hw-vuln/mds.rst <mds>`.
+
+Kernel internal mitigation modes
+--------------------------------
+
+ =============    ============================================================
+ off              Mitigation is disabled. Either the CPU is not affected or
+                  tsx_async_abort=off is supplied on the kernel command line.
+
+ tsx disabled     Mitigation is enabled. TSX feature is disabled by default at
+                  bootup on processors that support TSX control.
+
+ verw             Mitigation is enabled. CPU is affected and MD_CLEAR is
+                  advertised in CPUID.
+
+ ucode needed     Mitigation is enabled. CPU is affected and MD_CLEAR is not
+                  advertised in CPUID. That is mainly for virtualization
+                  scenarios where the host has the updated microcode but the
+                  hypervisor does not expose MD_CLEAR in CPUID. It's a best
+                  effort approach without guarantee.
+ =============    ============================================================
+
+If the CPU is affected and the "tsx_async_abort" kernel command line parameter is
+not provided then the kernel selects an appropriate mitigation depending on the
+status of RTM and MD_CLEAR CPUID bits.
+
+Below tables indicate the impact of tsx=on|off|auto cmdline options on state of
+TAA mitigation, VERW behavior and TSX feature for various combinations of
+MSR_IA32_ARCH_CAPABILITIES bits.
+
+1. "tsx=off"
+
+=========  =========  ============  ============  ==============  ===================  ======================
+MSR_IA32_ARCH_CAPABILITIES bits     Result with cmdline tsx=off
+----------------------------------  -------------------------------------------------------------------------
+TAA_NO     MDS_NO     TSX_CTRL_MSR  TSX state     VERW can clear  TAA mitigation       TAA mitigation
+                                    after bootup  CPU buffers     tsx_async_abort=off  tsx_async_abort=full
+=========  =========  ============  ============  ==============  ===================  ======================
+    0          0           0         HW default         Yes           Same as MDS           Same as MDS
+    0          0           1        Invalid case   Invalid case       Invalid case          Invalid case
+    0          1           0         HW default         No         Need ucode update     Need ucode update
+    0          1           1          Disabled          Yes           TSX disabled          TSX disabled
+    1          X           1          Disabled           X             None needed           None needed
+=========  =========  ============  ============  ==============  ===================  ======================
+
+2. "tsx=on"
+
+=========  =========  ============  ============  ==============  ===================  ======================
+MSR_IA32_ARCH_CAPABILITIES bits     Result with cmdline tsx=on
+----------------------------------  -------------------------------------------------------------------------
+TAA_NO     MDS_NO     TSX_CTRL_MSR  TSX state     VERW can clear  TAA mitigation       TAA mitigation
+                                    after bootup  CPU buffers     tsx_async_abort=off  tsx_async_abort=full
+=========  =========  ============  ============  ==============  ===================  ======================
+    0          0           0         HW default        Yes            Same as MDS          Same as MDS
+    0          0           1        Invalid case   Invalid case       Invalid case         Invalid case
+    0          1           0         HW default        No          Need ucode update     Need ucode update
+    0          1           1          Enabled          Yes               None              Same as MDS
+    1          X           1          Enabled          X              None needed          None needed
+=========  =========  ============  ============  ==============  ===================  ======================
+
+3. "tsx=auto"
+
+=========  =========  ============  ============  ==============  ===================  ======================
+MSR_IA32_ARCH_CAPABILITIES bits     Result with cmdline tsx=auto
+----------------------------------  -------------------------------------------------------------------------
+TAA_NO     MDS_NO     TSX_CTRL_MSR  TSX state     VERW can clear  TAA mitigation       TAA mitigation
+                                    after bootup  CPU buffers     tsx_async_abort=off  tsx_async_abort=full
+=========  =========  ============  ============  ==============  ===================  ======================
+    0          0           0         HW default    Yes                Same as MDS           Same as MDS
+    0          0           1        Invalid case  Invalid case        Invalid case          Invalid case
+    0          1           0         HW default    No              Need ucode update     Need ucode update
+    0          1           1          Disabled      Yes               TSX disabled          TSX disabled
+    1          X           1          Enabled       X                 None needed           None needed
+=========  =========  ============  ============  ==============  ===================  ======================
+
+In the tables, TSX_CTRL_MSR is a new bit in MSR_IA32_ARCH_CAPABILITIES that
+indicates whether MSR_IA32_TSX_CTRL is supported.
+
+There are two control bits in IA32_TSX_CTRL MSR:
+
+      Bit 0: When set it disables the Restricted Transactional Memory (RTM)
+             sub-feature of TSX (will force all transactions to abort on the
+             XBEGIN instruction).
+
+      Bit 1: When set it disables the enumeration of the RTM and HLE feature
+             (i.e. it will make CPUID(EAX=7).EBX{bit4} and
+             CPUID(EAX=7).EBX{bit11} read as 0).

From faaa0750f5c426102e095b9343b50b868a3a45f0 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Wed, 23 Oct 2019 12:35:50 +0200
Subject: [PATCH 78/81] x86/tsx: Add config options to set tsx=on|off|auto

commit db616173d787395787ecc93eef075fa975227b10 upstream.

There is a general consensus that TSX usage is not largely spread while
the history shows there is a non trivial space for side channel attacks
possible. Therefore the tsx is disabled by default even on platforms
that might have a safe implementation of TSX according to the current
knowledge. This is a fair trade off to make.

There are, however, workloads that really do benefit from using TSX and
updating to a newer kernel with TSX disabled might introduce a
noticeable regressions. This would be especially a problem for Linux
distributions which will provide TAA mitigations.

Introduce config options X86_INTEL_TSX_MODE_OFF, X86_INTEL_TSX_MODE_ON
and X86_INTEL_TSX_MODE_AUTO to control the TSX feature. The config
setting can be overridden by the tsx cmdline options.

 [ bp: Text cleanups from Josh. ]

Suggested-by: Borislav Petkov <bpetkov@suse.de>
Signed-off-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
[bwh: Backported to 4.4: adjust doc filename]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/Kconfig          | 45 +++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/tsx.c | 22 +++++++++++++------
 2 files changed, 61 insertions(+), 6 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4d1262cf630c..53b429811aef 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1695,6 +1695,51 @@ config X86_INTEL_MPX
 
 	  If unsure, say N.
 
+choice
+	prompt "TSX enable mode"
+	depends on CPU_SUP_INTEL
+	default X86_INTEL_TSX_MODE_OFF
+	help
+	  Intel's TSX (Transactional Synchronization Extensions) feature
+	  allows to optimize locking protocols through lock elision which
+	  can lead to a noticeable performance boost.
+
+	  On the other hand it has been shown that TSX can be exploited
+	  to form side channel attacks (e.g. TAA) and chances are there
+	  will be more of those attacks discovered in the future.
+
+	  Therefore TSX is not enabled by default (aka tsx=off). An admin
+	  might override this decision by tsx=on the command line parameter.
+	  Even with TSX enabled, the kernel will attempt to enable the best
+	  possible TAA mitigation setting depending on the microcode available
+	  for the particular machine.
+
+	  This option allows to set the default tsx mode between tsx=on, =off
+	  and =auto. See Documentation/kernel-parameters.txt for more
+	  details.
+
+	  Say off if not sure, auto if TSX is in use but it should be used on safe
+	  platforms or on if TSX is in use and the security aspect of tsx is not
+	  relevant.
+
+config X86_INTEL_TSX_MODE_OFF
+	bool "off"
+	help
+	  TSX is disabled if possible - equals to tsx=off command line parameter.
+
+config X86_INTEL_TSX_MODE_ON
+	bool "on"
+	help
+	  TSX is always enabled on TSX capable HW - equals the tsx=on command
+	  line parameter.
+
+config X86_INTEL_TSX_MODE_AUTO
+	bool "auto"
+	help
+	  TSX is enabled on TSX capable HW that is believed to be safe against
+	  side channel attacks- equals the tsx=auto command line parameter.
+endchoice
+
 config EFI
 	bool "EFI runtime service support"
 	depends on ACPI
diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c
index d73bc1ae932b..c2a9dd816c5c 100644
--- a/arch/x86/kernel/cpu/tsx.c
+++ b/arch/x86/kernel/cpu/tsx.c
@@ -73,6 +73,14 @@ static bool __init tsx_ctrl_is_supported(void)
 	return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR);
 }
 
+static enum tsx_ctrl_states x86_get_tsx_auto_mode(void)
+{
+	if (boot_cpu_has_bug(X86_BUG_TAA))
+		return TSX_CTRL_DISABLE;
+
+	return TSX_CTRL_ENABLE;
+}
+
 void __init tsx_init(void)
 {
 	char arg[5] = {};
@@ -88,17 +96,19 @@ void __init tsx_init(void)
 		} else if (!strcmp(arg, "off")) {
 			tsx_ctrl_state = TSX_CTRL_DISABLE;
 		} else if (!strcmp(arg, "auto")) {
-			if (boot_cpu_has_bug(X86_BUG_TAA))
-				tsx_ctrl_state = TSX_CTRL_DISABLE;
-			else
-				tsx_ctrl_state = TSX_CTRL_ENABLE;
+			tsx_ctrl_state = x86_get_tsx_auto_mode();
 		} else {
 			tsx_ctrl_state = TSX_CTRL_DISABLE;
 			pr_err("tsx: invalid option, defaulting to off\n");
 		}
 	} else {
-		/* tsx= not provided, defaulting to off */
-		tsx_ctrl_state = TSX_CTRL_DISABLE;
+		/* tsx= not provided */
+		if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_AUTO))
+			tsx_ctrl_state = x86_get_tsx_auto_mode();
+		else if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_OFF))
+			tsx_ctrl_state = TSX_CTRL_DISABLE;
+		else
+			tsx_ctrl_state = TSX_CTRL_ENABLE;
 	}
 
 	if (tsx_ctrl_state == TSX_CTRL_DISABLE) {

From b081576edd675fe37690080eb9093c1779422657 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 6 Nov 2019 20:26:46 -0600
Subject: [PATCH 79/81] x86/speculation/taa: Fix printing of TAA_MSG_SMT on
 IBRS_ALL CPUs

commit 012206a822a8b6ac09125bfaa210a95b9eb8f1c1 upstream.

For new IBRS_ALL CPUs, the Enhanced IBRS check at the beginning of
cpu_bugs_smt_update() causes the function to return early, unintentionally
skipping the MDS and TAA logic.

This is not a problem for MDS, because there appears to be no overlap
between IBRS_ALL and MDS-affected CPUs.  So the MDS mitigation would be
disabled and nothing would need to be done in this function anyway.

But for TAA, the TAA_MSG_SMT string will never get printed on Cascade
Lake and newer.

The check is superfluous anyway: when 'spectre_v2_enabled' is
SPECTRE_V2_IBRS_ENHANCED, 'spectre_v2_user' is always
SPECTRE_V2_USER_NONE, and so the 'spectre_v2_user' switch statement
handles it appropriately by doing nothing.  So just remove the check.

Fixes: 1b42f017415b ("x86/speculation/taa: Add mitigation for TSX Async Abort")
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Tyler Hicks <tyhicks@canonical.com>
Reviewed-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/bugs.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index df103f7358fe..4c4c62666d06 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -853,10 +853,6 @@ static void update_mds_branch_idle(void)
 
 void arch_smt_update(void)
 {
-	/* Enhanced IBRS implies STIBP. No update required. */
-	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
-		return;
-
 	mutex_lock(&spec_ctrl_mutex);
 
 	switch (spectre_v2_user) {

From 6534fc5ddf887ca38ee2e04a23b062b4503b3d20 Mon Sep 17 00:00:00 2001
From: Vineela Tummalapalli <vineela.tummalapalli@intel.com>
Date: Mon, 4 Nov 2019 12:22:01 +0100
Subject: [PATCH 80/81] x86/bugs: Add ITLB_MULTIHIT bug infrastructure

commit db4d30fbb71b47e4ecb11c4efa5d8aad4b03dfae upstream.

Some processors may incur a machine check error possibly resulting in an
unrecoverable CPU lockup when an instruction fetch encounters a TLB
multi-hit in the instruction TLB. This can occur when the page size is
changed along with either the physical address or cache type. The relevant
erratum can be found here:

   https://bugzilla.kernel.org/show_bug.cgi?id=205195

There are other processors affected for which the erratum does not fully
disclose the impact.

This issue affects both bare-metal x86 page tables and EPT.

It can be mitigated by either eliminating the use of large pages or by
using careful TLB invalidations when changing the page size in the page
tables.

Just like Spectre, Meltdown, L1TF and MDS, a new bit has been allocated in
MSR_IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) and will be set on CPUs which
are mitigated against this issue.

Signed-off-by: Vineela Tummalapalli <vineela.tummalapalli@intel.com>
Co-developed-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[bwh: Backported to 4.4:
 - No support for X86_VENDOR_HYGON, ATOM_AIRMONT_NP
 - Adjust context, indentation]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../ABI/testing/sysfs-devices-system-cpu      |  1 +
 arch/x86/include/asm/cpufeatures.h            |  1 +
 arch/x86/include/asm/msr-index.h              |  7 +++
 arch/x86/kernel/cpu/bugs.c                    | 13 ++++
 arch/x86/kernel/cpu/common.c                  | 59 ++++++++++---------
 drivers/base/cpu.c                            |  8 +++
 include/linux/cpu.h                           |  2 +
 7 files changed, 64 insertions(+), 27 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 9eafe1bdaaf4..f97d1aaec1f9 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -280,6 +280,7 @@ What:		/sys/devices/system/cpu/vulnerabilities
 		/sys/devices/system/cpu/vulnerabilities/l1tf
 		/sys/devices/system/cpu/vulnerabilities/mds
 		/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
+		/sys/devices/system/cpu/vulnerabilities/itlb_multihit
 Date:		January 2018
 Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:	Information about CPU vulnerabilities
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 28c717060fe6..94491e4d21a7 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -341,5 +341,6 @@
 #define X86_BUG_MSBDS_ONLY	X86_BUG(20) /* CPU is only affected by the  MSDBS variant of BUG_MDS */
 #define X86_BUG_SWAPGS		X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
 #define X86_BUG_TAA		X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
+#define X86_BUG_ITLB_MULTIHIT	X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
 
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 40f18a4e6c0d..854a20efa771 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -71,6 +71,13 @@
 						  * Microarchitectural Data
 						  * Sampling (MDS) vulnerabilities.
 						  */
+#define ARCH_CAP_PSCHANGE_MC_NO		BIT(6)	 /*
+						  * The processor is not susceptible to a
+						  * machine check error due to modifying the
+						  * code page size along with either the
+						  * physical address or cache type
+						  * without TLB invalidation.
+						  */
 #define ARCH_CAP_TSX_CTRL_MSR		BIT(7)	/* MSR for TSX control is available. */
 #define ARCH_CAP_TAA_NO			BIT(8)	/*
 						 * Not susceptible to
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 4c4c62666d06..7fd0a13ae0ba 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1277,6 +1277,11 @@ static void __init l1tf_select_mitigation(void)
 
 #ifdef CONFIG_SYSFS
 
+static ssize_t itlb_multihit_show_state(char *buf)
+{
+	return sprintf(buf, "Processor vulnerable\n");
+}
+
 static ssize_t mds_show_state(char *buf)
 {
 #ifdef CONFIG_HYPERVISOR_GUEST
@@ -1379,6 +1384,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
 	case X86_BUG_TAA:
 		return tsx_async_abort_show_state(buf);
 
+	case X86_BUG_ITLB_MULTIHIT:
+		return itlb_multihit_show_state(buf);
+
 	default:
 		break;
 	}
@@ -1420,4 +1428,9 @@ ssize_t cpu_show_tsx_async_abort(struct device *dev, struct device_attribute *at
 {
 	return cpu_show_common(dev, attr, buf, X86_BUG_TAA);
 }
+
+ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT);
+}
 #endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 53f4efe94a5e..e8fa12c7ad5b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -847,13 +847,14 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
 #endif
 }
 
-#define NO_SPECULATION	BIT(0)
-#define NO_MELTDOWN	BIT(1)
-#define NO_SSB		BIT(2)
-#define NO_L1TF		BIT(3)
-#define NO_MDS		BIT(4)
-#define MSBDS_ONLY	BIT(5)
-#define NO_SWAPGS	BIT(6)
+#define NO_SPECULATION		BIT(0)
+#define NO_MELTDOWN		BIT(1)
+#define NO_SSB			BIT(2)
+#define NO_L1TF			BIT(3)
+#define NO_MDS			BIT(4)
+#define MSBDS_ONLY		BIT(5)
+#define NO_SWAPGS		BIT(6)
+#define NO_ITLB_MULTIHIT	BIT(7)
 
 #define VULNWL(_vendor, _family, _model, _whitelist)	\
 	{ X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
@@ -871,26 +872,26 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
 	VULNWL(NSC,	5, X86_MODEL_ANY,	NO_SPECULATION),
 
 	/* Intel Family 6 */
-	VULNWL_INTEL(ATOM_SALTWELL,		NO_SPECULATION),
-	VULNWL_INTEL(ATOM_SALTWELL_TABLET,	NO_SPECULATION),
-	VULNWL_INTEL(ATOM_SALTWELL_MID,		NO_SPECULATION),
-	VULNWL_INTEL(ATOM_BONNELL,		NO_SPECULATION),
-	VULNWL_INTEL(ATOM_BONNELL_MID,		NO_SPECULATION),
+	VULNWL_INTEL(ATOM_SALTWELL,		NO_SPECULATION | NO_ITLB_MULTIHIT),
+	VULNWL_INTEL(ATOM_SALTWELL_TABLET,	NO_SPECULATION | NO_ITLB_MULTIHIT),
+	VULNWL_INTEL(ATOM_SALTWELL_MID,		NO_SPECULATION | NO_ITLB_MULTIHIT),
+	VULNWL_INTEL(ATOM_BONNELL,		NO_SPECULATION | NO_ITLB_MULTIHIT),
+	VULNWL_INTEL(ATOM_BONNELL_MID,		NO_SPECULATION | NO_ITLB_MULTIHIT),
 
-	VULNWL_INTEL(ATOM_SILVERMONT,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
-	VULNWL_INTEL(ATOM_SILVERMONT_X,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
-	VULNWL_INTEL(ATOM_SILVERMONT_MID,	NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
-	VULNWL_INTEL(ATOM_AIRMONT,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
-	VULNWL_INTEL(XEON_PHI_KNL,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
-	VULNWL_INTEL(XEON_PHI_KNM,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+	VULNWL_INTEL(ATOM_SILVERMONT,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_INTEL(ATOM_SILVERMONT_X,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_INTEL(ATOM_SILVERMONT_MID,	NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_INTEL(ATOM_AIRMONT,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_INTEL(XEON_PHI_KNL,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_INTEL(XEON_PHI_KNM,		NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
 
 	VULNWL_INTEL(CORE_YONAH,		NO_SSB),
 
-	VULNWL_INTEL(ATOM_AIRMONT_MID,		NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+	VULNWL_INTEL(ATOM_AIRMONT_MID,		NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
 
-	VULNWL_INTEL(ATOM_GOLDMONT,		NO_MDS | NO_L1TF | NO_SWAPGS),
-	VULNWL_INTEL(ATOM_GOLDMONT_X,		NO_MDS | NO_L1TF | NO_SWAPGS),
-	VULNWL_INTEL(ATOM_GOLDMONT_PLUS,	NO_MDS | NO_L1TF | NO_SWAPGS),
+	VULNWL_INTEL(ATOM_GOLDMONT,		NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_INTEL(ATOM_GOLDMONT_X,		NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_INTEL(ATOM_GOLDMONT_PLUS,	NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
 
 	/*
 	 * Technically, swapgs isn't serializing on AMD (despite it previously
@@ -901,13 +902,13 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
 	 */
 
 	/* AMD Family 0xf - 0x12 */
-	VULNWL_AMD(0x0f,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
-	VULNWL_AMD(0x10,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
-	VULNWL_AMD(0x11,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
-	VULNWL_AMD(0x12,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
+	VULNWL_AMD(0x0f,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_AMD(0x10,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_AMD(0x11,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+	VULNWL_AMD(0x12,	NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
 
 	/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
-	VULNWL_AMD(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
+	VULNWL_AMD(X86_FAMILY_ANY,	NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
 	{}
 };
 
@@ -932,6 +933,10 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 {
 	u64 ia32_cap = x86_read_arch_cap_msr();
 
+	/* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
+	if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
+		setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
+
 	if (cpu_matches(NO_SPECULATION))
 		return;
 
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 639683b3c939..9666effc0799 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -543,6 +543,12 @@ ssize_t __weak cpu_show_tsx_async_abort(struct device *dev,
 	return sprintf(buf, "Not affected\n");
 }
 
+ssize_t __weak cpu_show_itlb_multihit(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "Not affected\n");
+}
+
 static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
 static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
 static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
@@ -550,6 +556,7 @@ static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
 static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
 static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
 static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
+static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
 
 static struct attribute *cpu_root_vulnerabilities_attrs[] = {
 	&dev_attr_meltdown.attr,
@@ -559,6 +566,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
 	&dev_attr_l1tf.attr,
 	&dev_attr_mds.attr,
 	&dev_attr_tsx_async_abort.attr,
+	&dev_attr_itlb_multihit.attr,
 	NULL
 };
 
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index ce8c22491d05..9f97a5e0cb78 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -55,6 +55,8 @@ extern ssize_t cpu_show_mds(struct device *dev,
 extern ssize_t cpu_show_tsx_async_abort(struct device *dev,
 					struct device_attribute *attr,
 					char *buf);
+extern ssize_t cpu_show_itlb_multihit(struct device *dev,
+				      struct device_attribute *attr, char *buf);
 
 extern __printf(4, 5)
 struct device *cpu_device_create(struct device *parent, void *drvdata,

From bc69c961f59512012af64efd4ff20b3cb67c99ce Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 16 Nov 2019 10:27:52 +0100
Subject: [PATCH 81/81] Linux 4.4.202

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index a86c8aa98dbe..e7fecd28672e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 4
-SUBLEVEL = 201
+SUBLEVEL = 202
 EXTRAVERSION =
 NAME = Blurry Fish Butt