From 911bd54922cdcc259daf73cb1a3a9d0305b9061d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 31 Jan 2017 15:24:03 +0100 Subject: [PATCH 01/22] timerfd: Protect the might cancel mechanism proper commit 1e38da300e1e395a15048b0af1e5305bd91402f6 upstream. The handling of the might_cancel queueing is not properly protected, so parallel operations on the file descriptor can race with each other and lead to list corruptions or use after free. Protect the context for these operations with a seperate lock. The wait queue lock cannot be reused for this because that would create a lock inversion scenario vs. the cancel lock. Replacing might_cancel with an atomic (atomic_t or atomic bit) does not help either because it still can race vs. the actual list operation. Reported-by: Dmitry Vyukov Signed-off-by: Thomas Gleixner Cc: "linux-fsdevel@vger.kernel.org" Cc: syzkaller Cc: Al Viro Cc: linux-fsdevel@vger.kernel.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1701311521430.3457@nanos Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- fs/timerfd.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/fs/timerfd.c b/fs/timerfd.c index 053818dd6c18..1327a02ec778 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -40,6 +40,7 @@ struct timerfd_ctx { short unsigned settime_flags; /* to show in fdinfo */ struct rcu_head rcu; struct list_head clist; + spinlock_t cancel_lock; bool might_cancel; }; @@ -112,7 +113,7 @@ void timerfd_clock_was_set(void) rcu_read_unlock(); } -static void timerfd_remove_cancel(struct timerfd_ctx *ctx) +static void __timerfd_remove_cancel(struct timerfd_ctx *ctx) { if (ctx->might_cancel) { ctx->might_cancel = false; @@ -122,6 +123,13 @@ static void timerfd_remove_cancel(struct timerfd_ctx *ctx) } } +static void timerfd_remove_cancel(struct timerfd_ctx *ctx) +{ + spin_lock(&ctx->cancel_lock); + __timerfd_remove_cancel(ctx); + spin_unlock(&ctx->cancel_lock); +} + static bool timerfd_canceled(struct timerfd_ctx *ctx) { if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX) @@ -132,6 +140,7 @@ static bool timerfd_canceled(struct timerfd_ctx *ctx) static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) { + spin_lock(&ctx->cancel_lock); if ((ctx->clockid == CLOCK_REALTIME || ctx->clockid == CLOCK_REALTIME_ALARM) && (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) { @@ -141,9 +150,10 @@ static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) list_add_rcu(&ctx->clist, &cancel_list); spin_unlock(&cancel_lock); } - } else if (ctx->might_cancel) { - timerfd_remove_cancel(ctx); + } else { + __timerfd_remove_cancel(ctx); } + spin_unlock(&ctx->cancel_lock); } static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) @@ -395,6 +405,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) return -ENOMEM; init_waitqueue_head(&ctx->wqh); + spin_lock_init(&ctx->cancel_lock); ctx->clockid = clockid; if (isalarm(ctx)) From 6c106b55eb477c33c3e7626586cae899df0efce7 Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Fri, 3 Mar 2017 15:41:38 -0800 Subject: [PATCH 02/22] Handle mismatched open calls commit 38bd49064a1ecb67baad33598e3d824448ab11ec upstream. A signal can interrupt a SendReceive call which result in incoming responses to the call being ignored. This is a problem for calls such as open which results in the successful response being ignored. This results in an open file resource on the server. The patch looks into responses which were cancelled after being sent and in case of successful open closes the open fids. For this patch, the check is only done in SendReceive2() RH-bz: 1403319 Signed-off-by: Sachin Prabhu Reviewed-by: Pavel Shilovsky Acked-by: Sachin Prabhu Signed-off-by: Pavel Shilovsky Signed-off-by: Greg Kroah-Hartman --- fs/cifs/cifsglob.h | 11 ++++++++ fs/cifs/cifssmb.c | 4 +++ fs/cifs/connect.c | 13 +++++++-- fs/cifs/smb2misc.c | 44 ++++++++++++++++++++++++++++++ fs/cifs/smb2ops.c | 4 +++ fs/cifs/smb2proto.h | 7 +++++ fs/cifs/smb2transport.c | 60 ++++++++++++++++++++++++++++++++++++----- fs/cifs/transport.c | 2 ++ 8 files changed, 136 insertions(+), 9 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 94906aaa9b7c..e2f6a79e9b01 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -227,6 +227,7 @@ struct smb_version_operations { /* verify the message */ int (*check_message)(char *, unsigned int); bool (*is_oplock_break)(char *, struct TCP_Server_Info *); + int (*handle_cancelled_mid)(char *, struct TCP_Server_Info *); void (*downgrade_oplock)(struct TCP_Server_Info *, struct cifsInodeInfo *, bool); /* process transaction2 response */ @@ -1289,12 +1290,19 @@ struct mid_q_entry { void *callback_data; /* general purpose pointer for callback */ void *resp_buf; /* pointer to received SMB header */ int mid_state; /* wish this were enum but can not pass to wait_event */ + unsigned int mid_flags; __le16 command; /* smb command code */ bool large_buf:1; /* if valid response, is pointer to large buf */ bool multiRsp:1; /* multiple trans2 responses for one request */ bool multiEnd:1; /* both received */ }; +struct close_cancelled_open { + struct cifs_fid fid; + struct cifs_tcon *tcon; + struct work_struct work; +}; + /* Make code in transport.c a little cleaner by moving update of optional stats into function below */ #ifdef CONFIG_CIFS_STATS2 @@ -1426,6 +1434,9 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param, #define MID_RESPONSE_MALFORMED 0x10 #define MID_SHUTDOWN 0x20 +/* Flags */ +#define MID_WAIT_CANCELLED 1 /* Cancelled while waiting for response */ + /* Types of response buffer returned from SendReceive2 */ #define CIFS_NO_BUFFER 0 /* Response buffer not returned */ #define CIFS_SMALL_BUFFER 1 diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index b1104ed8f54c..5e2f8b8ca08a 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1424,6 +1424,8 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid) length = discard_remaining_data(server); dequeue_mid(mid, rdata->result); + mid->resp_buf = server->smallbuf; + server->smallbuf = NULL; return length; } @@ -1538,6 +1540,8 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid) return cifs_readv_discard(server, mid); dequeue_mid(mid, false); + mid->resp_buf = server->smallbuf; + server->smallbuf = NULL; return length; } diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 5d59f25521ce..156bc18eac69 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -924,10 +924,19 @@ cifs_demultiplex_thread(void *p) server->lstrp = jiffies; if (mid_entry != NULL) { + if ((mid_entry->mid_flags & MID_WAIT_CANCELLED) && + mid_entry->mid_state == MID_RESPONSE_RECEIVED && + server->ops->handle_cancelled_mid) + server->ops->handle_cancelled_mid( + mid_entry->resp_buf, + server); + if (!mid_entry->multiRsp || mid_entry->multiEnd) mid_entry->callback(mid_entry); - } else if (!server->ops->is_oplock_break || - !server->ops->is_oplock_break(buf, server)) { + } else if (server->ops->is_oplock_break && + server->ops->is_oplock_break(buf, server)) { + cifs_dbg(FYI, "Received oplock break\n"); + } else { cifs_dbg(VFS, "No task to wake, unknown frame received! NumMids %d\n", atomic_read(&midCount)); cifs_dump_mem("Received Data is: ", buf, diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index e5bc85e49be7..76ccf20fbfb7 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -630,3 +630,47 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) cifs_dbg(FYI, "Can not process oplock break for non-existent connection\n"); return false; } + +void +smb2_cancelled_close_fid(struct work_struct *work) +{ + struct close_cancelled_open *cancelled = container_of(work, + struct close_cancelled_open, work); + + cifs_dbg(VFS, "Close unmatched open\n"); + + SMB2_close(0, cancelled->tcon, cancelled->fid.persistent_fid, + cancelled->fid.volatile_fid); + cifs_put_tcon(cancelled->tcon); + kfree(cancelled); +} + +int +smb2_handle_cancelled_mid(char *buffer, struct TCP_Server_Info *server) +{ + struct smb2_hdr *hdr = (struct smb2_hdr *)buffer; + struct smb2_create_rsp *rsp = (struct smb2_create_rsp *)buffer; + struct cifs_tcon *tcon; + struct close_cancelled_open *cancelled; + + if (hdr->Command != SMB2_CREATE || hdr->Status != STATUS_SUCCESS) + return 0; + + cancelled = kzalloc(sizeof(*cancelled), GFP_KERNEL); + if (!cancelled) + return -ENOMEM; + + tcon = smb2_find_smb_tcon(server, hdr->SessionId, hdr->TreeId); + if (!tcon) { + kfree(cancelled); + return -ENOENT; + } + + cancelled->fid.persistent_fid = rsp->PersistentFileId; + cancelled->fid.volatile_fid = rsp->VolatileFileId; + cancelled->tcon = tcon; + INIT_WORK(&cancelled->work, smb2_cancelled_close_fid); + queue_work(cifsiod_wq, &cancelled->work); + + return 0; +} diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index be34b4860675..087918c4612a 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1511,6 +1511,7 @@ struct smb_version_operations smb20_operations = { .clear_stats = smb2_clear_stats, .print_stats = smb2_print_stats, .is_oplock_break = smb2_is_valid_oplock_break, + .handle_cancelled_mid = smb2_handle_cancelled_mid, .downgrade_oplock = smb2_downgrade_oplock, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, @@ -1589,6 +1590,7 @@ struct smb_version_operations smb21_operations = { .clear_stats = smb2_clear_stats, .print_stats = smb2_print_stats, .is_oplock_break = smb2_is_valid_oplock_break, + .handle_cancelled_mid = smb2_handle_cancelled_mid, .downgrade_oplock = smb2_downgrade_oplock, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, @@ -1670,6 +1672,7 @@ struct smb_version_operations smb30_operations = { .print_stats = smb2_print_stats, .dump_share_caps = smb2_dump_share_caps, .is_oplock_break = smb2_is_valid_oplock_break, + .handle_cancelled_mid = smb2_handle_cancelled_mid, .downgrade_oplock = smb2_downgrade_oplock, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, @@ -1757,6 +1760,7 @@ struct smb_version_operations smb311_operations = { .print_stats = smb2_print_stats, .dump_share_caps = smb2_dump_share_caps, .is_oplock_break = smb2_is_valid_oplock_break, + .handle_cancelled_mid = smb2_handle_cancelled_mid, .downgrade_oplock = smb2_downgrade_oplock, .need_neg = smb2_need_neg, .negotiate = smb2_negotiate, diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 0a406ae78129..adc5234486c3 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -47,6 +47,10 @@ extern struct mid_q_entry *smb2_setup_request(struct cifs_ses *ses, struct smb_rqst *rqst); extern struct mid_q_entry *smb2_setup_async_request( struct TCP_Server_Info *server, struct smb_rqst *rqst); +extern struct cifs_ses *smb2_find_smb_ses(struct TCP_Server_Info *server, + __u64 ses_id); +extern struct cifs_tcon *smb2_find_smb_tcon(struct TCP_Server_Info *server, + __u64 ses_id, __u32 tid); extern int smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server); extern int smb3_calc_signature(struct smb_rqst *rqst, @@ -157,6 +161,9 @@ extern int SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon, extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, const u64 persistent_fid, const u64 volatile_fid, const __u8 oplock_level); +extern int smb2_handle_cancelled_mid(char *buffer, + struct TCP_Server_Info *server); +void smb2_cancelled_close_fid(struct work_struct *work); extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id, struct kstatfs *FSData); diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index d4c5b6f109a7..69e3b322bbfe 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -115,22 +115,68 @@ smb3_crypto_shash_allocate(struct TCP_Server_Info *server) } static struct cifs_ses * -smb2_find_smb_ses(struct smb2_hdr *smb2hdr, struct TCP_Server_Info *server) +smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id) { struct cifs_ses *ses; - spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { - if (ses->Suid != smb2hdr->SessionId) + if (ses->Suid != ses_id) continue; - spin_unlock(&cifs_tcp_ses_lock); return ses; } - spin_unlock(&cifs_tcp_ses_lock); return NULL; } +struct cifs_ses * +smb2_find_smb_ses(struct TCP_Server_Info *server, __u64 ses_id) +{ + struct cifs_ses *ses; + + spin_lock(&cifs_tcp_ses_lock); + ses = smb2_find_smb_ses_unlocked(server, ses_id); + spin_unlock(&cifs_tcp_ses_lock); + + return ses; +} + +static struct cifs_tcon * +smb2_find_smb_sess_tcon_unlocked(struct cifs_ses *ses, __u32 tid) +{ + struct cifs_tcon *tcon; + + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { + if (tcon->tid != tid) + continue; + ++tcon->tc_count; + return tcon; + } + + return NULL; +} + +/* + * Obtain tcon corresponding to the tid in the given + * cifs_ses + */ + +struct cifs_tcon * +smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32 tid) +{ + struct cifs_ses *ses; + struct cifs_tcon *tcon; + + spin_lock(&cifs_tcp_ses_lock); + ses = smb2_find_smb_ses_unlocked(server, ses_id); + if (!ses) { + spin_unlock(&cifs_tcp_ses_lock); + return NULL; + } + tcon = smb2_find_smb_sess_tcon_unlocked(ses, tid); + spin_unlock(&cifs_tcp_ses_lock); + + return tcon; +} int smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) @@ -143,7 +189,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base; struct cifs_ses *ses; - ses = smb2_find_smb_ses(smb2_pdu, server); + ses = smb2_find_smb_ses(server, smb2_pdu->SessionId); if (!ses) { cifs_dbg(VFS, "%s: Could not find session\n", __func__); return 0; @@ -314,7 +360,7 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base; struct cifs_ses *ses; - ses = smb2_find_smb_ses(smb2_pdu, server); + ses = smb2_find_smb_ses(server, smb2_pdu->SessionId); if (!ses) { cifs_dbg(VFS, "%s: Could not find session\n", __func__); return 0; diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 87abe8ed074c..54af10204e83 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -786,9 +786,11 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, rc = wait_for_response(ses->server, midQ); if (rc != 0) { + cifs_dbg(FYI, "Cancelling wait for mid %llu\n", midQ->mid); send_cancel(ses->server, buf, midQ); spin_lock(&GlobalMid_Lock); if (midQ->mid_state == MID_REQUEST_SUBMITTED) { + midQ->mid_flags |= MID_WAIT_CANCELLED; midQ->callback = DeleteMidQEntry; spin_unlock(&GlobalMid_Lock); cifs_small_buf_release(buf); From a2b3b19acfde4a01a327f257bd6a0a85ef6159d8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 24 Apr 2017 14:09:55 +0200 Subject: [PATCH 03/22] ASoC: intel: Fix PM and non-atomic crash in bytcr drivers commit 6e4cac23c5a648d50b107d1b53e9c4e1120c7943 upstream. The FE setups of Intel SST bytcr_rt5640 and bytcr_rt5651 drivers carry the ignore_suspend flag, and this prevents the suspend/resume working properly while the stream is running, since SST core code has the check of the running streams and returns -EBUSY. Drop these superfluous flags for fixing the behavior. Also, the bytcr_rt5640 driver lacks of nonatomic flag in some FE definitions, which leads to the kernel Oops at suspend/resume like: BUG: scheduling while atomic: systemd-sleep/3144/0x00000003 Call Trace: dump_stack+0x5c/0x7a __schedule_bug+0x55/0x70 __schedule+0x63c/0x8c0 schedule+0x3d/0x90 schedule_timeout+0x16b/0x320 ? del_timer_sync+0x50/0x50 ? sst_wait_timeout+0xa9/0x170 [snd_intel_sst_core] ? sst_wait_timeout+0xa9/0x170 [snd_intel_sst_core] ? remove_wait_queue+0x60/0x60 ? sst_prepare_and_post_msg+0x275/0x960 [snd_intel_sst_core] ? sst_pause_stream+0x9b/0x110 [snd_intel_sst_core] .... This patch addresses these appropriately, too. [tiwai: applied only to bytcr_rt5640 as bytcr_rt5651 isn't present in 4.4.x yet] Signed-off-by: Takashi Iwai Acked-by: Vinod Koul Signed-off-by: Mark Brown Cc: # v4.1+ Signed-off-by: Greg Kroah-Hartman --- sound/soc/intel/boards/bytcr_rt5640.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 7a5c9a36c1db..daba8c56b43b 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -139,7 +139,7 @@ static struct snd_soc_dai_link byt_dailink[] = { .codec_dai_name = "snd-soc-dummy-dai", .codec_name = "snd-soc-dummy", .platform_name = "sst-mfld-platform", - .ignore_suspend = 1, + .nonatomic = true, .dynamic = 1, .dpcm_playback = 1, .dpcm_capture = 1, @@ -166,6 +166,7 @@ static struct snd_soc_dai_link byt_dailink[] = { | SND_SOC_DAIFMT_CBS_CFS, .be_hw_params_fixup = byt_codec_fixup, .ignore_suspend = 1, + .nonatomic = true, .dpcm_playback = 1, .dpcm_capture = 1, .ops = &byt_be_ssp2_ops, From 938206b8d7980c60644d2ac46b13c3d236cb3eab Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Jul 2016 17:07:45 +0200 Subject: [PATCH 04/22] ALSA: ppc/awacs: shut up maybe-uninitialized warning commit b268c34e5ee92a4cc3099b0caaf26e6bfbdf0f18 upstream. The awacs sound driver produces a false-positive warning in ppc64_defconfig: sound/ppc/awacs.c: In function 'snd_pmac_awacs_init': include/sound/control.h:219:9: warning: 'master_vol' may be used uninitialized in this function [-Wmaybe-uninitialized] I haven't come up with a good way to rewrite the code to avoid the warning, so here is a bad one: I initialize the variable before the conditionall initialization so gcc no longer has to worry about it. Signed-off-by: Arnd Bergmann Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/ppc/awacs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/ppc/awacs.c b/sound/ppc/awacs.c index 09da7b52bc2e..1468e4b7bf93 100644 --- a/sound/ppc/awacs.c +++ b/sound/ppc/awacs.c @@ -991,6 +991,7 @@ snd_pmac_awacs_init(struct snd_pmac *chip) if (err < 0) return err; } + master_vol = NULL; if (pm7500) err = build_mixers(chip, ARRAY_SIZE(snd_pmac_awacs_mixers_pmac7500), From ee6b88767e872fcc7af8e786a05d29d1ba229e75 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Fri, 20 Mar 2015 15:47:22 +0100 Subject: [PATCH 05/22] drbd: avoid redefinition of BITS_PER_PAGE commit 2630628b2dbc3fc320aafaf84836119e4e3d62f1 upstream. Apparently we now implicitly get definitions for BITS_PER_PAGE and BITS_PER_PAGE_MASK from the pid_namespace.h Instead of renaming our defines, I chose to define only if not yet defined, but to double check the value if already defined. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg Signed-off-by: Jens Axboe Cc: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/block/drbd/drbd_bitmap.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 9462d2752850..8bdc34dbaedf 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -479,8 +479,14 @@ void drbd_bm_cleanup(struct drbd_device *device) * this masks out the remaining bits. * Returns the number of bits cleared. */ +#ifndef BITS_PER_PAGE #define BITS_PER_PAGE (1UL << (PAGE_SHIFT + 3)) #define BITS_PER_PAGE_MASK (BITS_PER_PAGE - 1) +#else +# if BITS_PER_PAGE != (1UL << (PAGE_SHIFT + 3)) +# error "ambiguous BITS_PER_PAGE" +# endif +#endif #define BITS_PER_LONG_MASK (BITS_PER_LONG - 1) static int bm_clear_surplus(struct drbd_bitmap *b) { From fd79e436325841863d070f8e72246b437c0f5a15 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 29 Feb 2016 13:20:28 +0100 Subject: [PATCH 06/22] mtd: avoid stack overflow in MTD CFI code commit fddcca5107051adf9e4481d2a79ae0616577fd2c upstream. When map_word gets too large, we use a lot of kernel stack, and for MTD_MAP_BANK_WIDTH_32, this means we use more than the recommended 1024 bytes in a number of functions: drivers/mtd/chips/cfi_cmdset_0020.c: In function 'cfi_staa_write_buffers': drivers/mtd/chips/cfi_cmdset_0020.c:651:1: warning: the frame size of 1336 bytes is larger than 1024 bytes [-Wframe-larger-than=] drivers/mtd/chips/cfi_cmdset_0020.c: In function 'cfi_staa_erase_varsize': drivers/mtd/chips/cfi_cmdset_0020.c:972:1: warning: the frame size of 1208 bytes is larger than 1024 bytes [-Wframe-larger-than=] drivers/mtd/chips/cfi_cmdset_0001.c: In function 'do_write_buffer': drivers/mtd/chips/cfi_cmdset_0001.c:1835:1: warning: the frame size of 1240 bytes is larger than 1024 bytes [-Wframe-larger-than=] This can be avoided if all operations on the map word are done indirectly and the stack gets reused between the calls. We can mostly achieve this by selecting MTD_COMPLEX_MAPPINGS whenever MTD_MAP_BANK_WIDTH_32 is set, but for the case that no other bank width is enabled, we also need to use a non-constant map_bankwidth() to convince the compiler to use less stack. Signed-off-by: Arnd Bergmann [Brian: this patch mostly achieves its goal by forcing MTD_COMPLEX_MAPPINGS (and the accompanying indirection) for 256-bit mappings; the rest of the change is mostly a wash, though it helps reduce stack size slightly. If we really care about supporting 256-bit mappings though, we should consider rewriting some of this code to avoid keeping and assigning so many 256-bit objects on the stack.] Signed-off-by: Brian Norris Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/chips/Kconfig | 1 + include/linux/mtd/map.h | 19 +++++++------------ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/mtd/chips/Kconfig b/drivers/mtd/chips/Kconfig index 54479c481a7a..8a25adced79f 100644 --- a/drivers/mtd/chips/Kconfig +++ b/drivers/mtd/chips/Kconfig @@ -111,6 +111,7 @@ config MTD_MAP_BANK_WIDTH_16 config MTD_MAP_BANK_WIDTH_32 bool "Support 256-bit buswidth" if MTD_CFI_GEOMETRY + select MTD_COMPLEX_MAPPINGS if HAS_IOMEM default n help If you wish to support CFI devices on a physical bus which is diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index 366cf77953b5..806d0ab845e0 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -122,18 +122,13 @@ #endif #ifdef CONFIG_MTD_MAP_BANK_WIDTH_32 -# ifdef map_bankwidth -# undef map_bankwidth -# define map_bankwidth(map) ((map)->bankwidth) -# undef map_bankwidth_is_large -# define map_bankwidth_is_large(map) (map_bankwidth(map) > BITS_PER_LONG/8) -# undef map_words -# define map_words(map) map_calc_words(map) -# else -# define map_bankwidth(map) 32 -# define map_bankwidth_is_large(map) (1) -# define map_words(map) map_calc_words(map) -# endif +/* always use indirect access for 256-bit to preserve kernel stack */ +# undef map_bankwidth +# define map_bankwidth(map) ((map)->bankwidth) +# undef map_bankwidth_is_large +# define map_bankwidth_is_large(map) (map_bankwidth(map) > BITS_PER_LONG/8) +# undef map_words +# define map_words(map) map_calc_words(map) #define map_bankwidth_is_32(map) (map_bankwidth(map) == 32) #undef MAX_MAP_BANKWIDTH #define MAX_MAP_BANKWIDTH 32 From ec2170f98f9a218c62b9d49163dbbd0d8897a2a4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 29 Jan 2016 12:39:15 +0100 Subject: [PATCH 07/22] net: tg3: avoid uninitialized variable warning commit e434e04110704eb91acfecbd0fb8ca8e2da9c29b upstream. The tg3_set_eeprom() function correctly initializes the 'start' variable, but gcc generates a false warning: drivers/net/ethernet/broadcom/tg3.c: In function 'tg3_set_eeprom': drivers/net/ethernet/broadcom/tg3.c:12057:4: warning: 'start' may be used uninitialized in this function [-Wmaybe-uninitialized] I have not come up with a way to restructure the code in a way that avoids the warning without making it less readable, so this adds an initialization for the declaration to shut up that warning. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/tg3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 49056c33be74..21e5b9ed1ead 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -12031,7 +12031,7 @@ static int tg3_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, int ret; u32 offset, len, b_offset, odd_len; u8 *buf; - __be32 start, end; + __be32 start = 0, end; if (tg3_flag(tp, NO_NVRAM) || eeprom->magic != TG3_EEPROM_MAGIC) From 69a9e016f0cc091f225a2c485c66bd7703f144ee Mon Sep 17 00:00:00 2001 From: Uma Krishnan Date: Fri, 2 Sep 2016 15:38:48 -0500 Subject: [PATCH 08/22] scsi: cxlflash: Scan host only after the port is ready for I/O commit bbbfae962b7c221237c0f92547ee0c83f7204747 upstream. When a port link is established, the AFU sends a 'link up' interrupt. After the link is up, corresponding initialization steps are performed on the card. Following that, when the card is ready for I/O, the AFU sends 'login succeeded' interrupt. Today, cxlflash invokes scsi_scan_host() upon receipt of both interrupts. SCSI commands sent to the port prior to the 'login succeeded' interrupt will fail with 'port not available' error. This is not desirable. Moreover, when async_scan is active for the host, subsequent scan calls are terminated with error. Due to this, the scsi_scan_host() call performed after 'login succeeded' interrupt could portentially return error and the devices may not be scanned properly. To avoid this problem, scsi_scan_host() should be called only after the 'login succeeded' interrupt. Signed-off-by: Uma Krishnan Acked-by: Matthew R. Ochs Signed-off-by: Martin K. Petersen Cc: Sumit Semwal Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/cxlflash/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index 2882bcac918a..6150d29e1d68 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -1137,7 +1137,7 @@ static const struct asyc_intr_info ainfo[] = { {SISL_ASTATUS_FC0_LOGI_F, "login failed", 0, CLR_FC_ERROR}, {SISL_ASTATUS_FC0_LOGI_S, "login succeeded", 0, SCAN_HOST}, {SISL_ASTATUS_FC0_LINK_DN, "link down", 0, 0}, - {SISL_ASTATUS_FC0_LINK_UP, "link up", 0, SCAN_HOST}, + {SISL_ASTATUS_FC0_LINK_UP, "link up", 0, 0}, {SISL_ASTATUS_FC1_OTHER, "other error", 1, CLR_FC_ERROR | LINK_RESET}, {SISL_ASTATUS_FC1_LOGO, "target initiated LOGO", 1, 0}, {SISL_ASTATUS_FC1_CRC_T, "CRC threshold exceeded", 1, LINK_RESET}, @@ -1145,7 +1145,7 @@ static const struct asyc_intr_info ainfo[] = { {SISL_ASTATUS_FC1_LOGI_F, "login failed", 1, CLR_FC_ERROR}, {SISL_ASTATUS_FC1_LOGI_S, "login succeeded", 1, SCAN_HOST}, {SISL_ASTATUS_FC1_LINK_DN, "link down", 1, 0}, - {SISL_ASTATUS_FC1_LINK_UP, "link up", 1, SCAN_HOST}, + {SISL_ASTATUS_FC1_LINK_UP, "link up", 1, 0}, {0x0, "", 0, 0} /* terminator */ }; From 24d17d7853fa64189d32ac6416c6cacc3fed449c Mon Sep 17 00:00:00 2001 From: "Matthew R. Ochs" Date: Fri, 2 Sep 2016 15:39:30 -0500 Subject: [PATCH 09/22] scsi: cxlflash: Fix to avoid EEH and host reset collisions commit 1d3324c382b1a617eb567e3650dcb51f22dfec9a upstream. The EEH reset handler is ignorant to the current state of the driver when processing a frozen event and initiating a device reset. This can be an issue if an EEH event occurs while a user or stack initiated reset is executing. More specifically, if an EEH occurs while the SCSI host reset handler is active, the reset initiated by the EEH thread will likely collide with the host reset thread. This can leave the device in an inconsistent state, or worse, cause a system crash. As a remedy, the EEH handler is updated to evaluate the device state and take appropriate action (proceed, wait, or disconnect host). The host reset handler is also updated to handle situations where an EEH occurred during a host reset. In such situations, the host reset handler will delay reporting back a success to give the EEH reset an opportunity to complete. Signed-off-by: Matthew R. Ochs Acked-by: Uma Krishnan Signed-off-by: Martin K. Petersen Cc: Sumit Semwal Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/cxlflash/main.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index 6150d29e1d68..d0eed076fa72 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -1962,6 +1962,11 @@ retry: * cxlflash_eh_host_reset_handler() - reset the host adapter * @scp: SCSI command from stack identifying host. * + * Following a reset, the state is evaluated again in case an EEH occurred + * during the reset. In such a scenario, the host reset will either yield + * until the EEH recovery is complete or return success or failure based + * upon the current device state. + * * Return: * SUCCESS as defined in scsi/scsi.h * FAILED as defined in scsi/scsi.h @@ -1993,7 +1998,8 @@ static int cxlflash_eh_host_reset_handler(struct scsi_cmnd *scp) } else cfg->state = STATE_NORMAL; wake_up_all(&cfg->reset_waitq); - break; + ssleep(1); + /* fall through */ case STATE_RESET: wait_event(cfg->reset_waitq, cfg->state != STATE_RESET); if (cfg->state == STATE_NORMAL) @@ -2534,6 +2540,9 @@ static void drain_ioctls(struct cxlflash_cfg *cfg) * @pdev: PCI device struct. * @state: PCI channel state. * + * When an EEH occurs during an active reset, wait until the reset is + * complete and then take action based upon the device state. + * * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT */ static pci_ers_result_t cxlflash_pci_error_detected(struct pci_dev *pdev, @@ -2547,6 +2556,10 @@ static pci_ers_result_t cxlflash_pci_error_detected(struct pci_dev *pdev, switch (state) { case pci_channel_io_frozen: + wait_event(cfg->reset_waitq, cfg->state != STATE_RESET); + if (cfg->state == STATE_FAILTERM) + return PCI_ERS_RESULT_DISCONNECT; + cfg->state = STATE_RESET; scsi_block_requests(cfg->host); drain_ioctls(cfg); From 6f81dea4037c878e27e3cb25cabc85c2f897fdfd Mon Sep 17 00:00:00 2001 From: "Matthew R. Ochs" Date: Fri, 2 Sep 2016 15:40:03 -0500 Subject: [PATCH 10/22] scsi: cxlflash: Improve EEH recovery time commit 05dab43230fdc0d14ca885b473a2740fe017ecb1 upstream. When an EEH occurs during device initialization, the port timeout logic can cause excessive delays as MMIO reads will fail. Depending on where they are experienced, these delays can lead to a prolonged reset, causing an unnecessary triggering of other timeout logic in the SCSI stack or user applications. To expedite recovery, the port timeout logic is updated to decay the timeout at a much faster rate when in the presence of a likely EEH frozen event. Signed-off-by: Matthew R. Ochs Acked-by: Uma Krishnan Signed-off-by: Martin K. Petersen Cc: Sumit Semwal Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/cxlflash/main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c index d0eed076fa72..0b096730c72a 100644 --- a/drivers/scsi/cxlflash/main.c +++ b/drivers/scsi/cxlflash/main.c @@ -996,6 +996,8 @@ static int wait_port_online(__be64 __iomem *fc_regs, u32 delay_us, u32 nretry) do { msleep(delay_us / 1000); status = readq_be(&fc_regs[FC_MTIP_STATUS / 8]); + if (status == U64_MAX) + nretry /= 2; } while ((status & FC_MTIP_STATUS_MASK) != FC_MTIP_STATUS_ONLINE && nretry--); @@ -1027,6 +1029,8 @@ static int wait_port_offline(__be64 __iomem *fc_regs, u32 delay_us, u32 nretry) do { msleep(delay_us / 1000); status = readq_be(&fc_regs[FC_MTIP_STATUS / 8]); + if (status == U64_MAX) + nretry /= 2; } while ((status & FC_MTIP_STATUS_MASK) != FC_MTIP_STATUS_OFFLINE && nretry--); From 35c9bfa51154cc3519e93970c69afe13f93757ac Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Wed, 28 Dec 2016 16:42:00 -0200 Subject: [PATCH 11/22] 8250_pci: Fix potential use-after-free in error path commit c130b666a9a711f985a0a44b58699ebe14bb7245 upstream. Commit f209fa03fc9d ("serial: 8250_pci: Detach low-level driver during PCI error recovery") introduces a potential use-after-free in case the pciserial_init_ports call in serial8250_io_resume fails, which may happen if a memory allocation fails or if the .init quirk failed for whatever reason). If this happen, further pci_get_drvdata will return a pointer to freed memory. This patch reworks the PCI recovery resume hook to restore the old priv structure in this case, which should be ok, since the ports were already detached. Such error during recovery causes us to give up on the recovery. Fixes: f209fa03fc9d ("serial: 8250_pci: Detach low-level driver during PCI error recovery") Reported-by: Michal Suchanek Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Guilherme G. Piccoli Cc: Sumit Semwal Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 83ff1724ec79..cf3da51a3536 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -5850,17 +5850,15 @@ static pci_ers_result_t serial8250_io_slot_reset(struct pci_dev *dev) static void serial8250_io_resume(struct pci_dev *dev) { struct serial_private *priv = pci_get_drvdata(dev); - const struct pciserial_board *board; + struct serial_private *new; if (!priv) return; - board = priv->board; - kfree(priv); - priv = pciserial_init_ports(dev, board); - - if (!IS_ERR(priv)) { - pci_set_drvdata(dev, priv); + new = pciserial_init_ports(dev, priv->board); + if (!IS_ERR(new)) { + pci_set_drvdata(dev, new); + kfree(priv); } } From a8d47b4b3cb654c161c7f24d731ea4f3963748b1 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Wed, 3 May 2017 09:44:19 +0100 Subject: [PATCH 12/22] netlink: Allow direct reclaim for fallback allocation The backport of d35c99ff77ec ("netlink: do not enter direct reclaim from netlink_dump()") to the 4.4 branch (first in 4.4.32) mistakenly removed direct claim from the initial large allocation _and_ the fallback allocation which means that allocations can spuriously fail. Fix the issue by adding back the direct reclaim flag to the fallback allocation. Fixes: 6d123f1d396b ("netlink: do not enter direct reclaim from netlink_dump()") Signed-off-by: Ross Lagerwall Signed-off-by: Greg Kroah-Hartman --- net/netlink/af_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8e33019d8e7b..acfb16fdcd55 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2107,7 +2107,7 @@ static int netlink_dump(struct sock *sk) if (!skb) { alloc_size = alloc_min_size; skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, - (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM)); + GFP_KERNEL); } if (!skb) goto errout_skb; From 56cd2ed3e25ad14a49b99cbd4068aeaaa422fa32 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 May 2017 22:52:02 +0200 Subject: [PATCH 13/22] IB/qib: rename BITS_PER_PAGE to RVT_BITS_PER_PAGE We get this build warning on arm64 drivers/infiniband/hw/qib/qib_qp.c:44:0: error: "BITS_PER_PAGE" redefined [-Werror] #define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE) This is fixed upstream in commit 898fa52b4ac3 ("IB/qib: Remove qpn, qp tables and related variables from qib"), which does a lot of other things as well. Instead, I just backport the rename of the local BITS_PER_PAGE definition to RVT_BITS_PER_PAGE. The driver first showed up in linux-2.6.35, and the fixup should still apply to that. The upstream fix went into v4.6, so we could apply this workaround to both 3.18 and 4.4. Fixes: f931551bafe1 ("IB/qib: Add new qib driver for QLogic PCIe InfiniBand adapters") Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/qib/qib_qp.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 3eff35c2d453..2684605fe67f 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -41,13 +41,13 @@ #include "qib.h" -#define BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE) -#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) +#define RVT_BITS_PER_PAGE (PAGE_SIZE*BITS_PER_BYTE) +#define RVT_BITS_PER_PAGE_MASK (RVT_BITS_PER_PAGE-1) static inline unsigned mk_qpn(struct qib_qpn_table *qpt, struct qpn_map *map, unsigned off) { - return (map - qpt->map) * BITS_PER_PAGE + off; + return (map - qpt->map) * RVT_BITS_PER_PAGE + off; } static inline unsigned find_next_offset(struct qib_qpn_table *qpt, @@ -59,7 +59,7 @@ static inline unsigned find_next_offset(struct qib_qpn_table *qpt, if (((off & qpt->mask) >> 1) >= n) off = (off | qpt->mask) + 2; } else - off = find_next_zero_bit(map->page, BITS_PER_PAGE, off); + off = find_next_zero_bit(map->page, RVT_BITS_PER_PAGE, off); return off; } @@ -147,8 +147,8 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, qpn = 2; if (qpt->mask && ((qpn & qpt->mask) >> 1) >= dd->n_krcv_queues) qpn = (qpn | qpt->mask) + 2; - offset = qpn & BITS_PER_PAGE_MASK; - map = &qpt->map[qpn / BITS_PER_PAGE]; + offset = qpn & RVT_BITS_PER_PAGE_MASK; + map = &qpt->map[qpn / RVT_BITS_PER_PAGE]; max_scan = qpt->nmaps - !offset; for (i = 0;;) { if (unlikely(!map->page)) { @@ -173,7 +173,7 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt, * We just need to be sure we don't loop * forever. */ - } while (offset < BITS_PER_PAGE && qpn < QPN_MAX); + } while (offset < RVT_BITS_PER_PAGE && qpn < QPN_MAX); /* * In order to keep the number of pages allocated to a * minimum, we scan the all existing pages before increasing @@ -204,9 +204,9 @@ static void free_qpn(struct qib_qpn_table *qpt, u32 qpn) { struct qpn_map *map; - map = qpt->map + qpn / BITS_PER_PAGE; + map = qpt->map + qpn / RVT_BITS_PER_PAGE; if (map->page) - clear_bit(qpn & BITS_PER_PAGE_MASK, map->page); + clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); } static inline unsigned qpn_hash(struct qib_ibdev *dev, u32 qpn) From fee1f42b961e6cc3ec570e1fdba224d7d49b517d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 May 2017 22:52:03 +0200 Subject: [PATCH 14/22] IB/ehca: fix maybe-uninitialized warnings The driver causes two warnings about possibly uninitialized variables: drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_set_pagebuf': drivers/infiniband/hw/ehca/ehca_mrmw.c:1908:4: warning: 'prev_pgaddr' may be used uninitialized in this function [-Wmaybe-uninitialized] drivers/infiniband/hw/ehca/ehca_mrmw.c:1924:14: note: 'prev_pgaddr' was declared here drivers/infiniband/hw/ehca/ehca_mrmw.c: In function 'ehca_reg_mr': drivers/infiniband/hw/ehca/ehca_mrmw.c:2430:5: warning: 'hret' may be used uninitialized in this function [-Wmaybe-uninitialized] The first one is definitely a false positive, the second one may or may not be one. In both cases, adding an intialization is the safe and easy workaround. The driver was removed in mainline in commit e581d111dad3 ("staging/rdma: remove deprecated ehca driver"), in linux-4.6. In 4.4, the file is located in drivers/staging/rdma/ehca/ehca_mrmw.c, and the fix still applies. Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rdma/ehca/ehca_mrmw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/rdma/ehca/ehca_mrmw.c b/drivers/staging/rdma/ehca/ehca_mrmw.c index f914b30999f8..4d52ca42644a 100644 --- a/drivers/staging/rdma/ehca/ehca_mrmw.c +++ b/drivers/staging/rdma/ehca/ehca_mrmw.c @@ -1921,7 +1921,7 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo, u64 *kpage) { int ret = 0; - u64 pgaddr, prev_pgaddr; + u64 pgaddr, prev_pgaddr = 0; u32 j = 0; int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE; int nr_kpages = kpages_per_hwpage; @@ -2417,6 +2417,7 @@ static int ehca_reg_bmap_mr_rpages(struct ehca_shca *shca, ehca_err(&shca->ib_device, "kpage alloc failed"); return -ENOMEM; } + hret = H_SUCCESS; for (top = 0; top < EHCA_MAP_ENTRIES; top++) { if (!ehca_bmap_valid(ehca_bmap->top[top])) continue; From e2968fb8e7980dccc199dac2593ad476db20969f Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Fri, 30 Sep 2016 01:49:55 -0400 Subject: [PATCH 15/22] ext4: require encryption feature for EXT4_IOC_SET_ENCRYPTION_POLICY commit 9a200d075e5d05be1fcad4547a0f8aee4e2f9a04 upstream. ...otherwise an user can enable encryption for certain files even when the filesystem is unable to support it. Such a case would be a filesystem created by mkfs.ext4's default settings, 1KiB block size. Ext4 supports encyption only when block size is equal to PAGE_SIZE. But this constraint is only checked when the encryption feature flag is set. Signed-off-by: Richard Weinberger Signed-off-by: Theodore Ts'o Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ioctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 789e2d6724a9..bcd7c4788903 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -622,6 +622,9 @@ resizefs_out: struct ext4_encryption_policy policy; int err = 0; + if (!ext4_has_feature_encrypt(sb)) + return -EOPNOTSUPP; + if (copy_from_user(&policy, (struct ext4_encryption_policy __user *)arg, sizeof(policy))) { From 2faff9d1dfc560da3bfba6a8ab0c30246fd6c1cb Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 7 Feb 2016 19:35:05 -0500 Subject: [PATCH 16/22] ext4 crypto: revalidate dentry after adding or removing the key commit 28b4c263961c47da84ed8b5be0b5116bad1133eb upstream. Add a validation check for dentries for encrypted directory to make sure we're not caching stale data after a key has been added or removed. Also check to make sure that status of the encryption key is updated when readdir(2) is executed. Signed-off-by: Theodore Ts'o Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- fs/ext4/crypto.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/ext4/dir.c | 6 ++++++ fs/ext4/ext4.h | 1 + fs/ext4/namei.c | 18 +++++++++++++++++ 4 files changed, 75 insertions(+) diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index 1a0835073663..13a69e0fb90d 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -469,3 +469,53 @@ uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size) return size; return 0; } + +/* + * Validate dentries for encrypted directories to make sure we aren't + * potentially caching stale data after a key has been added or + * removed. + */ +static int ext4_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct inode *dir = d_inode(dentry->d_parent); + struct ext4_crypt_info *ci = EXT4_I(dir)->i_crypt_info; + int dir_has_key, cached_with_key; + + if (!ext4_encrypted_inode(dir)) + return 0; + + /* this should eventually be an flag in d_flags */ + cached_with_key = dentry->d_fsdata != NULL; + dir_has_key = (ci != NULL); + + /* + * If the dentry was cached without the key, and it is a + * negative dentry, it might be a valid name. We can't check + * if the key has since been made available due to locking + * reasons, so we fail the validation so ext4_lookup() can do + * this check. + * + * We also fail the validation if the dentry was created with + * the key present, but we no longer have the key, or vice versa. + */ + if ((!cached_with_key && d_is_negative(dentry)) || + (!cached_with_key && dir_has_key) || + (cached_with_key && !dir_has_key)) { +#if 0 /* Revalidation debug */ + char buf[80]; + char *cp = simple_dname(dentry, buf, sizeof(buf)); + + if (IS_ERR(cp)) + cp = (char *) "???"; + pr_err("revalidate: %s %p %d %d %d\n", cp, dentry->d_fsdata, + cached_with_key, d_is_negative(dentry), + dir_has_key); +#endif + return 0; + } + return 1; +} + +const struct dentry_operations ext4_encrypted_d_ops = { + .d_revalidate = ext4_d_revalidate, +}; diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 1d1bca74f844..6d17f31a31d7 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -111,6 +111,12 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) int dir_has_error = 0; struct ext4_str fname_crypto_str = {.name = NULL, .len = 0}; + if (ext4_encrypted_inode(inode)) { + err = ext4_get_encryption_info(inode); + if (err && err != -ENOKEY) + return err; + } + if (is_dx_dir(inode)) { err = ext4_dx_readdir(file, ctx); if (err != ERR_BAD_DX_DIR) { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 362d59b24f1d..3de9bb357b4f 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2268,6 +2268,7 @@ struct page *ext4_encrypt(struct inode *inode, struct page *plaintext_page); int ext4_decrypt(struct page *page); int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex); +extern const struct dentry_operations ext4_encrypted_d_ops; #ifdef CONFIG_EXT4_FS_ENCRYPTION int ext4_init_crypto(void); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 573b4cbb0cb9..fafa903ab3c0 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1557,6 +1557,24 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi struct ext4_dir_entry_2 *de; struct buffer_head *bh; + if (ext4_encrypted_inode(dir)) { + int res = ext4_get_encryption_info(dir); + + /* + * This should be a properly defined flag for + * dentry->d_flags when we uplift this to the VFS. + * d_fsdata is set to (void *) 1 if if the dentry is + * created while the directory was encrypted and we + * don't have access to the key. + */ + dentry->d_fsdata = NULL; + if (ext4_encryption_info(dir)) + dentry->d_fsdata = (void *) 1; + d_set_d_op(dentry, &ext4_encrypted_d_ops); + if (res && res != -ENOKEY) + return ERR_PTR(res); + } + if (dentry->d_name.len > EXT4_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); From 41948f88a521b95e8abc793602015a6d45f52a06 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 26 Mar 2016 16:15:42 -0400 Subject: [PATCH 17/22] ext4 crypto: use dget_parent() in ext4_d_revalidate() commit 3d43bcfef5f0548845a425365011c499875491b0 upstream. This avoids potential problems caused by a race where the inode gets renamed out from its parent directory and the parent directory is deleted while ext4_d_revalidate() is running. Fixes: 28b4c263961c Reported-by: Al Viro Signed-off-by: Theodore Ts'o Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- fs/ext4/crypto.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index 13a69e0fb90d..1259ade2f10a 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -477,16 +477,21 @@ uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size) */ static int ext4_d_revalidate(struct dentry *dentry, unsigned int flags) { - struct inode *dir = d_inode(dentry->d_parent); - struct ext4_crypt_info *ci = EXT4_I(dir)->i_crypt_info; + struct dentry *dir; + struct ext4_crypt_info *ci; int dir_has_key, cached_with_key; - if (!ext4_encrypted_inode(dir)) + dir = dget_parent(dentry); + if (!ext4_encrypted_inode(d_inode(dir))) { + dput(dir); return 0; + } + ci = EXT4_I(d_inode(dir))->i_crypt_info; /* this should eventually be an flag in d_flags */ cached_with_key = dentry->d_fsdata != NULL; dir_has_key = (ci != NULL); + dput(dir); /* * If the dentry was cached without the key, and it is a From 16fb859f9b2bb246a18b4d0edc605774193c33b7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 12 Apr 2016 16:05:36 -0700 Subject: [PATCH 18/22] ext4/fscrypto: avoid RCU lookup in d_revalidate commit 03a8bb0e53d9562276045bdfcf2b5de2e4cff5a1 upstream. As Al pointed, d_revalidate should return RCU lookup before using d_inode. This was originally introduced by: commit 34286d666230 ("fs: rcu-walk aware d_revalidate method"). Reported-by: Al Viro Signed-off-by: Jaegeuk Kim Cc: Theodore Ts'o Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman --- fs/ext4/crypto.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index 1259ade2f10a..9d6c2dcf1bd0 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "ext4_extents.h" #include "xattr.h" @@ -481,6 +482,9 @@ static int ext4_d_revalidate(struct dentry *dentry, unsigned int flags) struct ext4_crypt_info *ci; int dir_has_key, cached_with_key; + if (flags & LOOKUP_RCU) + return -ECHILD; + dir = dget_parent(dentry); if (!ext4_encrypted_inode(d_inode(dir))) { dput(dir); From 35e13333c217f0255a1e657ce075f0f72bc6383c Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 25 Apr 2017 16:21:34 -0400 Subject: [PATCH 19/22] nfsd4: minor NFSv2/v3 write decoding cleanup commit db44bac41bbfc0c0d9dd943092d8bded3c9db19b upstream. Use a couple shortcuts that will simplify a following bugfix. (Minor backporting required to account for a change from f34b95689d2c "The NFSv2/NFSv3 server does not handle zero length WRITE requests correctly".) Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs3xdr.c | 8 ++++---- fs/nfsd/nfsxdr.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 00575d776d91..34f4d1b74a23 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -358,6 +358,7 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, { unsigned int len, v, hdr, dlen; u32 max_blocksize = svc_max_payload(rqstp); + struct kvec *head = rqstp->rq_arg.head; p = decode_fh(p, &args->fh); if (!p) @@ -377,9 +378,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, * Check to make sure that we got the right number of * bytes. */ - hdr = (void*)p - rqstp->rq_arg.head[0].iov_base; - dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len - - hdr; + hdr = (void*)p - head->iov_base; + dlen = head->iov_len + rqstp->rq_arg.page_len - hdr; /* * Round the length of the data which was specified up to * the next multiple of XDR units and then compare that @@ -396,7 +396,7 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, len = args->len = max_blocksize; } rqstp->rq_vec[0].iov_base = (void*)p; - rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; + rqstp->rq_vec[0].iov_len = head->iov_len - hdr; v = 0; while (len > rqstp->rq_vec[v].iov_len) { len -= rqstp->rq_vec[v].iov_len; diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 79d964aa8079..894f2a8ffe7a 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -280,6 +280,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_writeargs *args) { unsigned int len, hdr, dlen; + struct kvec *head = rqstp->rq_arg.head; int v; p = decode_fh(p, &args->fh); @@ -300,9 +301,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, * Check to make sure that we got the right number of * bytes. */ - hdr = (void*)p - rqstp->rq_arg.head[0].iov_base; - dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len - - hdr; + hdr = (void*)p - head->iov_base; + dlen = head->iov_len + rqstp->rq_arg.page_len - hdr; /* * Round the length of the data which was specified up to @@ -316,7 +316,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, return 0; rqstp->rq_vec[0].iov_base = (void*)p; - rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; + rqstp->rq_vec[0].iov_len = head->iov_len - hdr; v = 0; while (len > rqstp->rq_vec[v].iov_len) { len -= rqstp->rq_vec[v].iov_len; From da1ce38aaac7f08d319b4b76130aa4fd27c4489f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 21 Apr 2017 15:26:30 -0400 Subject: [PATCH 20/22] nfsd: stricter decoding of write-like NFSv2/v3 ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 13bf9fbff0e5e099e2b6f003a0ab8ae145436309 upstream. The NFSv2/v3 code does not systematically check whether we decode past the end of the buffer. This generally appears to be harmless, but there are a few places where we do arithmetic on the pointers involved and don't account for the possibility that a length could be negative. Add checks to catch these. Reported-by: Tuomas Haanpää Reported-by: Ari Kauppi Reviewed-by: NeilBrown Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs3xdr.c | 4 ++++ fs/nfsd/nfsxdr.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 34f4d1b74a23..7162ab7bc093 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -368,6 +368,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, args->count = ntohl(*p++); args->stable = ntohl(*p++); len = args->len = ntohl(*p++); + if ((void *)p > head->iov_base + head->iov_len) + return 0; /* * The count must equal the amount of data passed. */ @@ -471,6 +473,8 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, /* first copy and check from the first page */ old = (char*)p; vec = &rqstp->rq_arg.head[0]; + if ((void *)old > vec->iov_base + vec->iov_len) + return 0; avail = vec->iov_len - (old - (char*)vec->iov_base); while (len && avail && *old) { *new++ = *old++; diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 894f2a8ffe7a..bf913201a6ad 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -302,6 +302,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p, * bytes. */ hdr = (void*)p - head->iov_base; + if (hdr > head->iov_len) + return 0; dlen = head->iov_len + rqstp->rq_arg.page_len - hdr; /* From 3b9983bb354eb107808a1fcf2eac569fd51c0595 Mon Sep 17 00:00:00 2001 From: Adrian Salido Date: Thu, 27 Apr 2017 10:32:55 -0700 Subject: [PATCH 21/22] dm ioctl: prevent stack leak in dm ioctl call commit 4617f564c06117c7d1b611be49521a4430042287 upstream. When calling a dm ioctl that doesn't process any data (IOCTL_FLAGS_NO_PARAMS), the contents of the data field in struct dm_ioctl are left initialized. Current code is incorrectly extending the size of data copied back to user, causing the contents of kernel stack to be leaked to user. Fix by only copying contents before data and allow the functions processing the ioctl to override. Signed-off-by: Adrian Salido Reviewed-by: Alasdair G Kergon Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 80a439543259..e503279c34fc 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1843,7 +1843,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) if (r) goto out; - param->data_size = sizeof(*param); + param->data_size = offsetof(struct dm_ioctl, data); r = fn(param, input_param_size); if (unlikely(param->flags & DM_BUFFER_FULL_FLAG) && From dec1dc2a0ee8e929c12360c01f211daad862e96c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 8 May 2017 07:46:17 +0200 Subject: [PATCH 22/22] Linux 4.4.67 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1cd052823c03..c987902ae1ee 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 66 +SUBLEVEL = 67 EXTRAVERSION = NAME = Blurry Fish Butt