From 32efcc06d2a15fa87585614d12d6c2308cc2d3f3 Mon Sep 17 00:00:00 2001
From: Abdul Kabbani <akabbani@google.com>
Date: Fri, 24 Jan 2020 16:34:02 -0500
Subject: [PATCH] tcp: export count for rehash attempts

Using IPv6 flow-label to swiftly route around avoid congested or
disconnected network path can greatly improve TCP reliability.

This patch adds SNMP counters and a OPT_STATS counter to track both
host-level and connection-level statistics. Network administrators
can use these counters to evaluate the impact of this new ability better.

Export count for rehash attempts to
1) two SNMP counters: TcpTimeoutRehash (rehash due to timeouts),
   and TcpDuplicateDataRehash (rehash due to receiving duplicate
   packets)
2) Timestamping API SOF_TIMESTAMPING_OPT_STATS.

Signed-off-by: Abdul Kabbani <akabbani@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Kevin(Yudong) Yang <yyd@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h       | 2 ++
 include/uapi/linux/snmp.h | 2 ++
 include/uapi/linux/tcp.h  | 1 +
 net/ipv4/proc.c           | 2 ++
 net/ipv4/tcp.c            | 2 ++
 net/ipv4/tcp_input.c      | 4 +++-
 net/ipv4/tcp_timer.c      | 6 ++++++
 7 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4e2124607d325c..1cf73e6f85ca83 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -386,6 +386,8 @@ struct tcp_sock {
 #define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) 0
 #endif
 
+	u16 timeout_rehash;	/* Timeout-triggered rehash attempts */
+
 	u32 rcv_ooopack; /* Received out-of-order packets, for tcpinfo */
 
 /* Receiver side RTT estimation */
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 7eee233e78d274..7d91f4debc483f 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -285,6 +285,8 @@ enum
 	LINUX_MIB_TCPRCVQDROP,			/* TCPRcvQDrop */
 	LINUX_MIB_TCPWQUEUETOOBIG,		/* TCPWqueueTooBig */
 	LINUX_MIB_TCPFASTOPENPASSIVEALTKEY,	/* TCPFastOpenPassiveAltKey */
+	LINUX_MIB_TCPTIMEOUTREHASH,		/* TCPTimeoutRehash */
+	LINUX_MIB_TCPDUPLICATEDATAREHASH,	/* TCPDuplicateDataRehash */
 	__LINUX_MIB_MAX
 };
 
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index d87184e673ca83..fd9eb8f6bcae37 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -311,6 +311,7 @@ enum {
 	TCP_NLA_DSACK_DUPS,	/* DSACK blocks received */
 	TCP_NLA_REORD_SEEN,	/* reordering events seen */
 	TCP_NLA_SRTT,		/* smoothed RTT in usecs */
+	TCP_NLA_TIMEOUT_REHASH, /* Timeout-triggered rehash attempts */
 };
 
 /* for TCP_MD5SIG socket option */
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index cc90243ccf7678..2580303249e23e 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -289,6 +289,8 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP),
 	SNMP_MIB_ITEM("TCPWqueueTooBig", LINUX_MIB_TCPWQUEUETOOBIG),
 	SNMP_MIB_ITEM("TCPFastOpenPassiveAltKey", LINUX_MIB_TCPFASTOPENPASSIVEALTKEY),
+	SNMP_MIB_ITEM("TcpTimeoutRehash", LINUX_MIB_TCPTIMEOUTREHASH),
+	SNMP_MIB_ITEM("TcpDuplicateDataRehash", LINUX_MIB_TCPDUPLICATEDATAREHASH),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2857c853e2fdc6..484485ae74c26e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3337,6 +3337,7 @@ static size_t tcp_opt_stats_get_size(void)
 		nla_total_size(sizeof(u32)) + /* TCP_NLA_DSACK_DUPS */
 		nla_total_size(sizeof(u32)) + /* TCP_NLA_REORD_SEEN */
 		nla_total_size(sizeof(u32)) + /* TCP_NLA_SRTT */
+		nla_total_size(sizeof(u16)) + /* TCP_NLA_TIMEOUT_REHASH */
 		0;
 }
 
@@ -3391,6 +3392,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk)
 	nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups);
 	nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen);
 	nla_put_u32(stats, TCP_NLA_SRTT, tp->srtt_us >> 3);
+	nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, tp->timeout_rehash);
 
 	return stats;
 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4915de65d278df..e8b840a4767e14 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4271,8 +4271,10 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
 	 * The receiver remembers and reflects via DSACKs. Leverage the
 	 * DSACK state and change the txhash to re-route speculatively.
 	 */
-	if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq)
+	if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq) {
 		sk_rethink_txhash(sk);
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH);
+	}
 }
 
 static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 1097b438befe14..c3f26dcd6704ee 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -223,6 +223,9 @@ static int tcp_write_timeout(struct sock *sk)
 			dst_negative_advice(sk);
 		} else {
 			sk_rethink_txhash(sk);
+			tp->timeout_rehash++;
+			__NET_INC_STATS(sock_net(sk),
+					LINUX_MIB_TCPTIMEOUTREHASH);
 		}
 		retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
 		expired = icsk->icsk_retransmits >= retry_until;
@@ -234,6 +237,9 @@ static int tcp_write_timeout(struct sock *sk)
 			dst_negative_advice(sk);
 		} else {
 			sk_rethink_txhash(sk);
+			tp->timeout_rehash++;
+			__NET_INC_STATS(sock_net(sk),
+					LINUX_MIB_TCPTIMEOUTREHASH);
 		}
 
 		retry_until = net->ipv4.sysctl_tcp_retries2;
-- 
GitLab