--- linux-2.4.18/Makefile Mon Feb 25 21:37:52 2002 +++ linux-2.4.18-frto/Makefile Mon Apr 8 15:41:41 2002 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 4 SUBLEVEL = 18 -EXTRAVERSION = +EXTRAVERSION =-ps KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) --- linux-2.4.18/include/net/tcp.h Thu Nov 22 21:47:22 2001 +++ linux-2.4.18-frto/include/net/tcp.h Fri Apr 19 18:31:52 2002 @@ -460,6 +460,7 @@ extern int sysctl_tcp_rmem[3]; extern int sysctl_tcp_app_win; extern int sysctl_tcp_adv_win_scale; +extern int sysctl_tcp_frto; extern atomic_t tcp_memory_allocated; extern atomic_t tcp_sockets_allocated; @@ -1826,5 +1827,17 @@ } #define TCP_CHECK_TIMER(sk) do { } while (0); + +static __inline__ int tcp_use_frto(const struct sock *sk) +{ + const struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + + /* F-RTO must be activated in sysctl and there must be some + * unsent new data, and the advertised window should allow + * sending it. + */ + return (sysctl_tcp_frto && tp->send_head && + before(tp->snd_nxt, tp->snd_una + tp->snd_wnd)); +} #endif /* _TCP_H */ --- linux-2.4.18/include/net/sock.h Fri Dec 21 19:42:04 2001 +++ linux-2.4.18-frto/include/net/sock.h Thu Apr 18 17:58:02 2002 @@ -417,6 +417,9 @@ unsigned int keepalive_intvl; /* time interval between keep alive probes */ int linger2; + int frto_counter; /* State of F-RTO recovery */ + __u32 frto_highmark; /* snd_nxt when RTO occurred */ + unsigned long last_synq_overflow; }; --- linux-2.4.18/include/linux/sysctl.h Mon Nov 26 15:29:17 2001 +++ linux-2.4.18-frto/include/linux/sysctl.h Mon Apr 8 15:41:49 2002 @@ -289,7 +289,8 @@ NET_TCP_ADV_WIN_SCALE=87, NET_IPV4_NONLOCAL_BIND=88, NET_IPV4_ICMP_RATELIMIT=89, - NET_IPV4_ICMP_RATEMASK=90 + NET_IPV4_ICMP_RATEMASK=90, + NET_TCP_FRTO=91 }; enum { diff -u --recursive --new-file --exclude-from=diffexclude -I Id: linux-2.4.18/net/ipv4/sysctl_net_ipv4.c linux-2.4.18-frto/net/ipv4/sysctl_net_ipv4.c --- linux-2.4.18/net/ipv4/sysctl_net_ipv4.c Wed Oct 31 01:08:12 2001 +++ linux-2.4.18-frto/net/ipv4/sysctl_net_ipv4.c Mon Apr 8 15:54:00 2002 @@ -219,6 +219,8 @@ &sysctl_icmp_ratelimit, sizeof(int), 0644, NULL, &proc_dointvec}, {NET_IPV4_ICMP_RATEMASK, "icmp_ratemask", &sysctl_icmp_ratemask, sizeof(int), 0644, NULL, &proc_dointvec}, + {NET_TCP_FRTO, "tcp_frto", + &sysctl_tcp_frto, sizeof(int), 0644, NULL, &proc_dointvec}, {0} }; diff -u --recursive --new-file --exclude-from=diffexclude -I Id: linux-2.4.18/net/ipv4/tcp_input.c linux-2.4.18-frto/net/ipv4/tcp_input.c --- linux-2.4.18/net/ipv4/tcp_input.c Mon Feb 25 21:38:14 2002 +++ linux-2.4.18-frto/net/ipv4/tcp_input.c Fri Apr 19 17:59:42 2002 @@ -87,6 +87,8 @@ int sysctl_tcp_rfc1337 = 0; int sysctl_tcp_max_orphans = NR_FILE; +int sysctl_tcp_frto = 0; + #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ #define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */ @@ -980,19 +982,40 @@ struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; struct sk_buff *skb; int cnt = 0; + /* PS: Idea of F-RTO is as follows: do not start retransmitting + * immediately after RTO. Instead send two new segments to see + * whether there are really data lost. However, the segment that + * triggered RTO is always retransmitted. + * + * When F-RTO is enabled, enter_loss can be called two times for + * a single RTO: On first pass it does not yet mark packets lost, + * but if acknowledgements indicate so, enter_loss is called second + * time for the sender to start retransmitting. + * postpone_loss is true, if F-RTO is enabled and we are on + * the first pass. + */ + int postpone_loss = (tcp_use_frto(sk) && !tp->frto_counter); /* Reduce ssthresh if it has not yet been made inside this window. */ if (tp->ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) { - tp->prior_ssthresh = tcp_current_ssthresh(tp); - tp->snd_ssthresh = tcp_recalc_ssthresh(tp); + if (!tp->frto_counter) { + tp->prior_ssthresh = tcp_current_ssthresh(tp); + tp->snd_ssthresh = tcp_recalc_ssthresh(tp); + } + } + if (!tcp_use_frto(sk)) { + /* PS: When F-RTO is enabled, cwnd is adjusted + * in tcp_process_frto */ + tp->snd_cwnd = 1; } - tp->snd_cwnd = 1; tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_time_stamp; - - tcp_clear_retrans(tp); + + /* PS: clear_retrans only once, if RTO is used */ + if (!tp->frto_counter) + tcp_clear_retrans(tp); /* Push undo marker, if it was plain RTO and nothing * was retransmitted. */ @@ -1003,21 +1026,50 @@ cnt++; if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS) tp->undo_marker = 0; - TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; - if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out++; - } else { - tp->sacked_out++; - tp->fackets_out = cnt; + + if (!tp->frto_counter && (tp->frto_highmark == 0 || after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))) { + tp->frto_highmark = TCP_SKB_CB(skb)->end_seq; } + + /* PS: This clears retrans flags. When F-RTO is used, it should + * be done only once per RTO + */ + if (!tp->frto_counter) { + TCP_SKB_CB(skb)->sacked &= + (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; + } + + if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) { + TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; + + /* PS: If F-RTO is enabled, mark those packets lost + * which were outstanding when RTO occurred. This is + * done only on the second pass. + */ + if (!tcp_use_frto(sk) || + (tp->frto_counter && + !after(TCP_SKB_CB(skb)->end_seq, + tp->frto_highmark))) { + TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; + tp->lost_out++; + } + } else { + tp->sacked_out++; + tp->fackets_out = cnt; + } } tcp_sync_left_out(tp); - + tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); - tp->ca_state = TCP_CA_Loss; - tp->high_seq = tp->snd_nxt; + if (!postpone_loss) { + tp->ca_state = TCP_CA_Loss; + if (tp->frto_counter) { + tp->high_seq = tp->frto_highmark; + } else { + tp->high_seq = tp->snd_nxt; + } + tp->frto_counter = 0; + } TCP_ECN_queue_cwr(tp); } @@ -1528,7 +1580,8 @@ /* E. Check state exit conditions. State can be terminated * when high_seq is ACKed. */ if (tp->ca_state == TCP_CA_Open) { - BUG_TRAP(tp->retrans_out == 0); + if (!sysctl_tcp_frto) + BUG_TRAP(tp->retrans_out == 0); tp->retrans_stamp = 0; } else if (!before(tp->snd_una, tp->high_seq)) { switch (tp->ca_state) { @@ -1894,6 +1947,47 @@ return flag; } +static void tcp_process_frto(struct sock *sk, u32 prior_snd_una) +{ + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + + tcp_sync_left_out(tp); + + if (tp->frto_counter == 1) { + if (tp->snd_una == prior_snd_una) { + /* RTO was caused by a loss */ + tp->snd_cwnd = 1; + tcp_enter_loss(sk, 0); + tp->undo_marker = 0; + return; + } + tp->snd_cwnd = tcp_packets_in_flight(tp) + 2; + } + + if (tp->frto_counter == 2) { + if (tp->snd_una == prior_snd_una) { + /* RTO was caused by a loss. Because two round-trips + * have passed after RTO, cwnd is adjusted to let + * three packets out. */ + tcp_enter_loss(sk, 0); + tp->snd_cwnd = tcp_packets_in_flight(tp) + 3; + tp->undo_marker = 0; + } else { + /* A non-retransmitted segment is acknowledged and + * RTO was very likely spurious. However, we'll be + * careful and reduce the congestion window to half + */ + tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); + } + } + + /* F-RTO affects on two ACKs following RTO. On third ACK the TCP + * behavor is back to normal. + */ + if (tp->frto_counter) tp->frto_counter++; + if (tp->frto_counter > 2) tp->frto_counter = 0; +} + /* This routine deals with incoming acks, but not outgoing ones. */ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) { @@ -1950,6 +2044,9 @@ /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk); + + if (tp->frto_counter) + tcp_process_frto(sk, prior_snd_una); if (tcp_ack_is_dubious(tp, flag)) { /* Advanve CWND, if state allows this. */ diff -u --recursive --new-file --exclude-from=diffexclude -I Id: linux-2.4.18/net/ipv4/tcp_ipv4.c linux-2.4.18-frto/net/ipv4/tcp_ipv4.c --- linux-2.4.18/net/ipv4/tcp_ipv4.c Mon Feb 25 21:38:14 2002 +++ linux-2.4.18-frto/net/ipv4/tcp_ipv4.c Fri Apr 12 13:49:40 2002 @@ -1907,6 +1907,9 @@ */ tp->snd_cwnd = 2; + tp->frto_counter = 0; + tp->frto_highmark = 0; + /* See draft-stevens-tcpca-spec-01 for discussion of the * initialization of these values. */ diff -u --recursive --new-file --exclude-from=diffexclude -I Id: linux-2.4.18/net/ipv4/tcp_minisocks.c linux-2.4.18-frto/net/ipv4/tcp_minisocks.c --- linux-2.4.18/net/ipv4/tcp_minisocks.c Mon Oct 1 19:19:57 2001 +++ linux-2.4.18-frto/net/ipv4/tcp_minisocks.c Fri Apr 12 13:50:03 2002 @@ -716,6 +716,9 @@ newtp->snd_cwnd = 2; newtp->snd_cwnd_cnt = 0; + newtp->frto_counter = 0; + newtp->frto_highmark = 0; + newtp->ca_state = TCP_CA_Open; tcp_init_xmit_timers(newsk); skb_queue_head_init(&newtp->out_of_order_queue); diff -u --recursive --new-file --exclude-from=diffexclude -I Id: linux-2.4.18/net/ipv4/tcp_timer.c linux-2.4.18-frto/net/ipv4/tcp_timer.c --- linux-2.4.18/net/ipv4/tcp_timer.c Mon Oct 1 19:19:57 2001 +++ linux-2.4.18-frto/net/ipv4/tcp_timer.c Fri Apr 19 17:57:26 2002 @@ -372,7 +372,15 @@ } } + /* Reset F-RTO status on timeout */ + tp->frto_counter = 0; + tcp_enter_loss(sk, 0); + + if (tcp_use_frto(sk)) { + tp->frto_counter = 1; + tp->ca_state = TCP_CA_Open; + } if (tcp_retransmit_skb(sk, skb_peek(&sk->write_queue)) > 0) { /* Retransmission failed because of local congestion,