From 071d5080e33d6f24139e4213c2d9f97a2c21b602 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 9 Jul 2015 13:16:29 -0700 Subject: [PATCH 1/3] tcp: add tcp_in_slow_start helper Add a helper to test the slow start condition in various congestion control modules and other places. This is to prepare a slight improvement in policy as to exactly when to slow start. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: Nandita Dukkipati Signed-off-by: David S. Miller --- include/net/tcp.h | 7 ++++++- net/ipv4/tcp_bic.c | 2 +- net/ipv4/tcp_cong.c | 2 +- net/ipv4/tcp_cubic.c | 4 ++-- net/ipv4/tcp_highspeed.c | 2 +- net/ipv4/tcp_htcp.c | 2 +- net/ipv4/tcp_illinois.c | 2 +- net/ipv4/tcp_metrics.c | 2 +- net/ipv4/tcp_scalable.c | 2 +- net/ipv4/tcp_vegas.c | 6 +++--- net/ipv4/tcp_veno.c | 2 +- 11 files changed, 19 insertions(+), 14 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 950cfecaad3c..dba22fc1b065 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -989,6 +989,11 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) #define TCP_INFINITE_SSTHRESH 0x7fffffff +static inline bool tcp_in_slow_start(const struct tcp_sock *tp) +{ + return tp->snd_cwnd <= tp->snd_ssthresh; +} + static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp) { return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH; @@ -1065,7 +1070,7 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk) const struct tcp_sock *tp = tcp_sk(sk); /* If in slow start, ensure cwnd grows to twice what was ACKed. */ - if (tp->snd_cwnd <= tp->snd_ssthresh) + if (tcp_in_slow_start(tp)) return tp->snd_cwnd < 2 * tp->max_packets_out; return tp->is_cwnd_limited; diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index c037644eafb7..fd1405d37c14 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -146,7 +146,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tp->snd_cwnd <= tp->snd_ssthresh) + if (tcp_in_slow_start(tp)) tcp_slow_start(tp, acked); else { bictcp_update(ca, tp->snd_cwnd); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 84be008c945c..654729a8cb23 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -413,7 +413,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked) return; /* In "safe" area, increase. */ - if (tp->snd_cwnd <= tp->snd_ssthresh) { + if (tcp_in_slow_start(tp)) { acked = tcp_slow_start(tp, acked); if (!acked) return; diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 06d3d665a9fd..28011fb1f4a2 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -320,7 +320,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tp->snd_cwnd <= tp->snd_ssthresh) { + if (tcp_in_slow_start(tp)) { if (hystart && after(ack, ca->end_seq)) bictcp_hystart_reset(sk); acked = tcp_slow_start(tp, acked); @@ -439,7 +439,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) ca->delay_min = delay; /* hystart triggers when cwnd is larger than some threshold */ - if (hystart && tp->snd_cwnd <= tp->snd_ssthresh && + if (hystart && tcp_in_slow_start(tp) && tp->snd_cwnd >= hystart_low_window) hystart_update(sk, delay); } diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 882c08aae2f5..db7842495a64 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -116,7 +116,7 @@ static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tp->snd_cwnd <= tp->snd_ssthresh) + if (tcp_in_slow_start(tp)) tcp_slow_start(tp, acked); else { /* Update AIMD parameters. diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 58469fff6c18..82f0d9ed60f5 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -236,7 +236,7 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tp->snd_cwnd <= tp->snd_ssthresh) + if (tcp_in_slow_start(tp)) tcp_slow_start(tp, acked); else { /* In dangerous area, increase slowly. diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index f71002e4db0b..2ab9bbb6faff 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -268,7 +268,7 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked) return; /* In slow start */ - if (tp->snd_cwnd <= tp->snd_ssthresh) + if (tcp_in_slow_start(tp)) tcp_slow_start(tp, acked); else { diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index a51d63a43e33..b3d64f61d922 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -461,7 +461,7 @@ void tcp_update_metrics(struct sock *sk) tcp_metric_set(tm, TCP_METRIC_CWND, tp->snd_cwnd); } - } else if (tp->snd_cwnd > tp->snd_ssthresh && + } else if (!tcp_in_slow_start(tp) && icsk->icsk_ca_state == TCP_CA_Open) { /* Cong. avoidance phase, cwnd is reliable. */ if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 333bcb2415ff..bf5ea9e9bbc1 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -22,7 +22,7 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) if (!tcp_is_cwnd_limited(sk)) return; - if (tp->snd_cwnd <= tp->snd_ssthresh) + if (tcp_in_slow_start(tp)) tcp_slow_start(tp, acked); else tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT), diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index a6cea1d5e20d..13951c4087d4 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -225,7 +225,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) */ diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT; - if (diff > gamma && tp->snd_cwnd <= tp->snd_ssthresh) { + if (diff > gamma && tcp_in_slow_start(tp)) { /* Going too fast. Time to slow down * and switch to congestion avoidance. */ @@ -240,7 +240,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1); tp->snd_ssthresh = tcp_vegas_ssthresh(tp); - } else if (tp->snd_cwnd <= tp->snd_ssthresh) { + } else if (tcp_in_slow_start(tp)) { /* Slow start. */ tcp_slow_start(tp, acked); } else { @@ -281,7 +281,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) vegas->minRTT = 0x7fffffff; } /* Use normal slow start */ - else if (tp->snd_cwnd <= tp->snd_ssthresh) + else if (tcp_in_slow_start(tp)) tcp_slow_start(tp, acked); } diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 112151eeee45..0d094b995cd9 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -150,7 +150,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) veno->diff = (tp->snd_cwnd << V_PARAM_SHIFT) - target_cwnd; - if (tp->snd_cwnd <= tp->snd_ssthresh) { + if (tcp_in_slow_start(tp)) { /* Slow start. */ tcp_slow_start(tp, acked); } else { From 76174004a0f19785a328f40388e87e982bbf69b9 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 9 Jul 2015 13:16:30 -0700 Subject: [PATCH 2/3] tcp: do not slow start when cwnd equals ssthresh In the original design slow start is only used to raise cwnd when cwnd is stricly below ssthresh. It makes little sense to slow start when cwnd == ssthresh: especially when hystart has set ssthresh in the initial ramp, or after recovery when cwnd resets to ssthresh. Not doing so will also help reduce the buffer bloat slightly. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: Nandita Dukkipati Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- net/ipv4/tcp_cdg.c | 2 +- net/ipv4/tcp_cong.c | 4 +--- net/ipv4/tcp_hybla.c | 2 +- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index dba22fc1b065..364426a2be5a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -991,7 +991,7 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) static inline bool tcp_in_slow_start(const struct tcp_sock *tp) { - return tp->snd_cwnd <= tp->snd_ssthresh; + return tp->snd_cwnd < tp->snd_ssthresh; } static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp) diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index 8c6fd3d5e40f..167b6a3e1b98 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -264,7 +264,7 @@ static void tcp_cdg_cong_avoid(struct sock *sk, u32 ack, u32 acked) u32 prior_snd_cwnd; u32 incr; - if (tp->snd_cwnd < tp->snd_ssthresh && hystart_detect) + if (tcp_in_slow_start(tp) && hystart_detect) tcp_cdg_hystart_update(sk); if (after(ack, ca->rtt_seq) && ca->rtt.v64) { diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 654729a8cb23..a2ed23c595cf 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -365,10 +365,8 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) */ u32 tcp_slow_start(struct tcp_sock *tp, u32 acked) { - u32 cwnd = tp->snd_cwnd + acked; + u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh); - if (cwnd > tp->snd_ssthresh) - cwnd = tp->snd_ssthresh + 1; acked -= cwnd - tp->snd_cwnd; tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index f963b274f2b0..083831e359df 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -112,7 +112,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked) rho_fractions = ca->rho_3ls - (ca->rho << 3); - if (tp->snd_cwnd < tp->snd_ssthresh) { + if (tcp_in_slow_start(tp)) { /* * slow start * INC = 2^RHO - 1 From b20a3fa30a281b52b2576b509efbe5cd47a5a79b Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Thu, 9 Jul 2015 13:16:31 -0700 Subject: [PATCH 3/3] tcp: update congestion state first before raising cwnd The congestion state and cwnd can be updated in the wrong order. For example, upon receiving a dubious ACK, we incorrectly raise the cwnd first (tcp_may_raise_cwnd()/tcp_cong_avoid()) because the state is still Open, then enter recovery state to reduce cwnd. For another example, if the ACK indicates spurious timeout or retransmits, we first revert the cwnd reduction and congestion state back to Open state. But we don't raise the cwnd even though the ACK does not indicate any congestion. To fix this problem we should first call tcp_fastretrans_alert() to process the dubious ACK and update the congestion state, then call tcp_may_raise_cwnd() that raises cwnd based on the current state. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: Nandita Dukkipati Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7f4a8d5f6eb0..1578fc2a6f39 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3568,10 +3568,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) &sack_state); acked -= tp->packets_out; - /* Advance cwnd if state allows */ - if (tcp_may_raise_cwnd(sk, flag)) - tcp_cong_avoid(sk, ack, acked); - if (tcp_ack_is_dubious(sk, flag)) { is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); tcp_fastretrans_alert(sk, acked, prior_unsacked, @@ -3580,6 +3576,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); + /* Advance cwnd if state allows */ + if (tcp_may_raise_cwnd(sk, flag)) + tcp_cong_avoid(sk, ack, acked); + if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) { struct dst_entry *dst = __sk_dst_get(sk); if (dst)