diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 71e28e4a047d..a7516e4c8ce8 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -220,6 +220,14 @@ tcp_base_mss - INTEGER Path MTU discovery (MTU probing). If MTU probing is enabled, this is the initial MSS used by the connection. +tcp_min_snd_mss - INTEGER + TCP SYN and SYNACK messages usually advertise an ADVMSS option, + as described in RFC 1122 and RFC 6691. + If this ADVMSS option is smaller than tcp_min_snd_mss, + it is silently capped to tcp_min_snd_mss. + + Default : 48 (at least 8 bytes of payload per segment) + tcp_congestion_control - STRING Set the congestion control algorithm to be used for new connections. The algorithm "reno" is always available, but diff --git a/Makefile b/Makefile index ec0c7baa5a2c..1aa2bfeb00a2 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 4 -SUBLEVEL = 181 +SUBLEVEL = 182 EXTRAVERSION = NAME = Blurry Fish Butt diff --git a/drivers/android/binder.c b/drivers/android/binder.c index bff9f44161c2..91fe6c39c804 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -3130,6 +3130,7 @@ static void binder_transaction(struct binder_proc *proc, if (target_node && target_node->txn_security_ctx) { u32 secid; + size_t added_size; security_task_getsecid(proc->tsk, &secid); ret = security_secid_to_secctx(secid, &secctx, &secctx_sz); @@ -3139,7 +3140,15 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_get_secctx_failed; } - extra_buffers_size += ALIGN(secctx_sz, sizeof(u64)); + added_size = ALIGN(secctx_sz, sizeof(u64)); + extra_buffers_size += added_size; + if (extra_buffers_size < added_size) { + /* integer overflow of extra_buffers_size */ + return_error = BR_FAILED_REPLY; + return_error_param = EINVAL; + return_error_line = __LINE__; + goto err_bad_extra_size; + } } trace_binder_transaction(reply, t, target_node); @@ -3441,6 +3450,7 @@ err_copy_data_failed: t->buffer->transaction = NULL; binder_alloc_free_buf(&target_proc->alloc, t->buffer); err_binder_alloc_buf_failed: +err_bad_extra_size: if (secctx) security_release_secctx(secctx, secctx_sz); err_get_secctx_failed: diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index baecb4bf9d9b..c8614f3c70c4 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -924,15 +924,14 @@ enum lru_status binder_alloc_free_page(struct list_head *item, index = page - alloc->pages; page_addr = (uintptr_t)alloc->buffer + index * PAGE_SIZE; + + mm = alloc->vma_vm_mm; + /* Same as mmget_not_zero() in later kernel versions */ + if (!atomic_inc_not_zero(&alloc->vma_vm_mm->mm_users)) + goto err_mmget; + if (!down_write_trylock(&mm->mmap_sem)) + goto err_down_write_mmap_sem_failed; vma = alloc->vma; - if (vma) { - /* Same as mmget_not_zero() in later kernel versions */ - if (!atomic_inc_not_zero(&alloc->vma_vm_mm->mm_users)) - goto err_mmget; - mm = alloc->vma_vm_mm; - if (!down_write_trylock(&mm->mmap_sem)) - goto err_down_write_mmap_sem_failed; - } list_lru_isolate(lru, item); spin_unlock(lock); @@ -946,10 +945,9 @@ enum lru_status binder_alloc_free_page(struct list_head *item, PAGE_SIZE, NULL); trace_binder_unmap_user_end(alloc, index); - - up_write(&mm->mmap_sem); - mmput(mm); } + up_write(&mm->mmap_sem); + mmput(mm); trace_binder_unmap_kernel_start(alloc, index); diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 747404dbe506..085da1707cea 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -419,4 +419,7 @@ static inline void tcp_saved_syn_free(struct tcp_sock *tp) tp->saved_syn = NULL; } +int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, + int shiftlen); + #endif /* _LINUX_TCP_H */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index c68926b4899c..61c38f87ea07 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -88,6 +88,7 @@ struct netns_ipv4 { int sysctl_tcp_fwmark_accept; int sysctl_tcp_mtu_probing; int sysctl_tcp_base_mss; + int sysctl_tcp_min_snd_mss; int sysctl_tcp_probe_threshold; u32 sysctl_tcp_probe_interval; diff --git a/include/net/tcp.h b/include/net/tcp.h index dd161b625cd7..b9b5bc27b844 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -54,6 +54,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_HEADER (128 + MAX_HEADER) #define MAX_TCP_OPTION_SPACE 40 +#define TCP_MIN_SND_MSS 48 +#define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE) /* * Never offer a window over 32767 without using window scaling. Some diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 9de808ebce05..422183f396d5 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -281,6 +281,7 @@ enum LINUX_MIB_TCPKEEPALIVE, /* TCPKeepAlive */ LINUX_MIB_TCPMTUPFAIL, /* TCPMTUPFail */ LINUX_MIB_TCPMTUPSUCCESS, /* TCPMTUPSuccess */ + LINUX_MIB_TCPWQUEUETOOBIG, /* TCPWqueueTooBig */ __LINUX_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index b001ad668108..555586fc7840 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -303,6 +303,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE), SNMP_MIB_ITEM("TCPMTUPFail", LINUX_MIB_TCPMTUPFAIL), SNMP_MIB_ITEM("TCPMTUPSuccess", LINUX_MIB_TCPMTUPSUCCESS), + SNMP_MIB_ITEM("TCPWqueueTooBig", LINUX_MIB_TCPWQUEUETOOBIG), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 81739ffc4168..a56b3eb930b9 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -36,6 +36,8 @@ static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; static int tcp_adv_win_scale_min = -31; static int tcp_adv_win_scale_max = 31; +static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS; +static int tcp_min_snd_mss_max = 65535; static int ip_ttl_min = 1; static int ip_ttl_max = 255; static int tcp_syn_retries_min = 1; @@ -993,6 +995,15 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_min_snd_mss", + .data = &init_net.ipv4.sysctl_tcp_min_snd_mss, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_min_snd_mss_min, + .extra2 = &tcp_min_snd_mss_max, + }, { .procname = "tcp_probe_threshold", .data = &init_net.ipv4.sysctl_tcp_probe_threshold, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 16d2b59989ec..0fe1eaf0bc31 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3207,6 +3207,7 @@ void __init tcp_init(void) int max_rshare, max_wshare, cnt; unsigned int i; + BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE); sock_skb_cb_check_size(sizeof(struct tcp_skb_cb)); percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 95273c47a513..2fb43a1f259d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1276,7 +1276,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, TCP_SKB_CB(skb)->seq += shifted; tcp_skb_pcount_add(prev, pcount); - BUG_ON(tcp_skb_pcount(skb) < pcount); + WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount); tcp_skb_pcount_add(skb, -pcount); /* When we're adding to gso_segs == 1, gso_size will be zero, @@ -1338,6 +1338,21 @@ static int skb_can_shift(const struct sk_buff *skb) return !skb_headlen(skb) && skb_is_nonlinear(skb); } +int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, + int pcount, int shiftlen) +{ + /* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE) + * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need + * to make sure not storing more than 65535 * 8 bytes per skb, + * even if current MSS is bigger. + */ + if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE)) + return 0; + if (unlikely(tcp_skb_pcount(to) + pcount > 65535)) + return 0; + return skb_shift(to, from, shiftlen); +} + /* Try collapsing SACK blocks spanning across multiple skbs to a single * skb. */ @@ -1349,6 +1364,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *prev; int mss; + int next_pcount; int pcount = 0; int len; int in_sack; @@ -1443,7 +1459,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una)) goto fallback; - if (!skb_shift(prev, skb, len)) + if (!tcp_skb_shift(prev, skb, pcount, len)) goto fallback; if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) goto out; @@ -1462,11 +1478,11 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, goto out; len = skb->len; - if (skb_shift(prev, skb, len)) { - pcount += tcp_skb_pcount(skb); - tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0); + next_pcount = tcp_skb_pcount(skb); + if (tcp_skb_shift(prev, skb, next_pcount, len)) { + pcount += next_pcount; + tcp_shifted_skb(sk, skb, state, next_pcount, len, mss, 0); } - out: state->fack_count += pcount; return prev; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d1b911cde946..d2d95cf398e8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2435,6 +2435,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_ecn_fallback = 1; net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS; + net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS; net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9ada5e4a28e5..6f7303d51274 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1161,6 +1161,11 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, if (nsize < 0) nsize = 0; + if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG); + return -ENOMEM; + } + if (skb_unclone(skb, gfp)) return -ENOMEM; @@ -1327,8 +1332,7 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu) mss_now -= icsk->icsk_ext_hdr_len; /* Then reserve room for full set of TCP options and 8 bytes of data */ - if (mss_now < 48) - mss_now = 48; + mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss); return mss_now; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 2221e3c36bd5..28fed4ade750 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -166,6 +166,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; mss = min(net->ipv4.sysctl_tcp_base_mss, mss); mss = max(mss, 68 - tp->tcp_header_len); + mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss); icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); }