diff --git a/include/linux/udp.h b/include/linux/udp.h index 320d49d85484d4ac2d0fdbbe365ad339bf949c0b..0a9c54e76305abc4241fb2fc769e58c48ecef61e 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -49,7 +49,13 @@ struct udp_sock { unsigned int corkflag; /* Cork is required */ __u8 encap_type; /* Is this an Encapsulation socket? */ unsigned char no_check6_tx:1,/* Send zero UDP6 checksums on TX? */ - no_check6_rx:1;/* Allow zero UDP6 checksums on RX? */ + no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */ + encap_enabled:1, /* This socket enabled encap + * processing; UDP tunnels and + * different encapsulation layer set + * this + */ + gro_enabled:1; /* Can accept GRO packets */ /* * Following member retains the information to create a UDP header * when the socket is uncorked. @@ -115,6 +121,23 @@ static inline bool udp_get_no_check6_rx(struct sock *sk) return udp_sk(sk)->no_check6_rx; } +static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) +{ + int gso_size; + + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { + gso_size = skb_shinfo(skb)->gso_size; + put_cmsg(msg, SOL_UDP, UDP_GRO, sizeof(gso_size), &gso_size); + } +} + +static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb) +{ + return !udp_sk(sk)->gro_enabled && skb_is_gso(skb) && + skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4; +} + #define udp_portaddr_for_each_entry(__sk, list) \ hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node) diff --git a/include/net/ip.h b/include/net/ip.h index 462182f78236caed327c12e5080aefd5f73fe966..8866bfce61214b6af1c3c59b4885c8c39e8e0e90 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -155,6 +155,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, void ip_list_rcv(struct list_head *head, struct packet_type *pt, struct net_device *orig_dev); int ip_local_deliver(struct sk_buff *skb); +void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int proto); int ip_mr_input(struct sk_buff *skb); int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb); int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 829650540780c9e091dc7c6ea01201d68548ca23..daf80863d3a5037c8defe820908a4fbdfb4817e3 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -975,6 +975,8 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb); int ip6_forward(struct sk_buff *skb); int ip6_input(struct sk_buff *skb); int ip6_mc_input(struct sk_buff *skb); +void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, + bool have_final); int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb); diff --git a/include/net/udp.h b/include/net/udp.h index a496e441645e14baacd92826315b09509be0a3b1..eccca2325ee6def47cd4c8f4203ec72627b89a34 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -417,17 +417,24 @@ static inline int copy_linear_skb(struct sk_buff *skb, int len, int off, } while(0) #if IS_ENABLED(CONFIG_IPV6) -#define __UDPX_INC_STATS(sk, field) \ -do { \ - if ((sk)->sk_family == AF_INET) \ - __UDP_INC_STATS(sock_net(sk), field, 0); \ - else \ - __UDP6_INC_STATS(sock_net(sk), field, 0); \ -} while (0) +#define __UDPX_MIB(sk, ipv4) \ +({ \ + ipv4 ? (IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \ + sock_net(sk)->mib.udp_statistics) : \ + (IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_stats_in6 : \ + sock_net(sk)->mib.udp_stats_in6); \ +}) #else -#define __UDPX_INC_STATS(sk, field) __UDP_INC_STATS(sock_net(sk), field, 0) +#define __UDPX_MIB(sk, ipv4) \ +({ \ + IS_UDPLITE(sk) ? sock_net(sk)->mib.udplite_statistics : \ + sock_net(sk)->mib.udp_statistics; \ +}) #endif +#define __UDPX_INC_STATS(sk, field) \ + __SNMP_INC_STATS(__UDPX_MIB(sk, (sk)->sk_family == AF_INET), field) + #ifdef CONFIG_PROC_FS struct udp_seq_afinfo { sa_family_t family; @@ -461,4 +468,26 @@ DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key); void udpv6_encap_enable(void); #endif +static inline struct sk_buff *udp_rcv_segment(struct sock *sk, + struct sk_buff *skb, bool ipv4) +{ + struct sk_buff *segs; + + /* the GSO CB lays after the UDP one, no need to save and restore any + * CB fragment + */ + segs = __skb_gso_segment(skb, NETIF_F_SG, false); + if (unlikely(IS_ERR_OR_NULL(segs))) { + int segs_nr = skb_shinfo(skb)->gso_segs; + + atomic_add(segs_nr, &sk->sk_drops); + SNMP_ADD_STATS(__UDPX_MIB(sk, ipv4), UDP_MIB_INERRORS, segs_nr); + kfree_skb(skb); + return NULL; + } + + consume_skb(skb); + return segs; +} + #endif /* _UDP_H */ diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index fe680ab6b15a18aba378d161d57461176b75d8cb..3fbe56430e3b381749fe97f006e2b66332e18447 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -165,6 +165,12 @@ static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum) static inline void udp_tunnel_encap_enable(struct socket *sock) { + struct udp_sock *up = udp_sk(sock->sk); + + if (up->encap_enabled) + return; + + up->encap_enabled = 1; #if IS_ENABLED(CONFIG_IPV6) if (sock->sk->sk_family == PF_INET6) ipv6_stub->udpv6_encap_enable(); diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h index 09502de447f57203db8952d9c197342317da59e9..30baccb6c9c4e25980ffe01dcf425be4fe6a8949 100644 --- a/include/uapi/linux/udp.h +++ b/include/uapi/linux/udp.h @@ -33,6 +33,7 @@ struct udphdr { #define UDP_NO_CHECK6_TX 101 /* Disable sending checksum for UDP6X */ #define UDP_NO_CHECK6_RX 102 /* Disable accpeting checksum for UDP6 */ #define UDP_SEGMENT 103 /* Set GSO segmentation size */ +#define UDP_GRO 104 /* This socket can receive UDP GRO packets */ /* UDP encapsulation types */ #define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 35a786c0aaa064888540774863cd08e1f21f12a8..72250b4e466d71556ce45e6e1d7db183f6b36a86 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -188,51 +188,50 @@ bool ip_call_ra_chain(struct sk_buff *skb) return false; } -static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol) { - __skb_pull(skb, skb_network_header_len(skb)); - - rcu_read_lock(); - { - int protocol = ip_hdr(skb)->protocol; - const struct net_protocol *ipprot; - int raw; + const struct net_protocol *ipprot; + int raw, ret; - resubmit: - raw = raw_local_deliver(skb, protocol); +resubmit: + raw = raw_local_deliver(skb, protocol); - ipprot = rcu_dereference(inet_protos[protocol]); - if (ipprot) { - int ret; - - if (!ipprot->no_policy) { - if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - kfree_skb(skb); - goto out; - } - nf_reset(skb); + ipprot = rcu_dereference(inet_protos[protocol]); + if (ipprot) { + if (!ipprot->no_policy) { + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { + kfree_skb(skb); + return; } - ret = ipprot->handler(skb); - if (ret < 0) { - protocol = -ret; - goto resubmit; + nf_reset(skb); + } + ret = ipprot->handler(skb); + if (ret < 0) { + protocol = -ret; + goto resubmit; + } + __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS); + } else { + if (!raw) { + if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { + __IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS); + icmp_send(skb, ICMP_DEST_UNREACH, + ICMP_PROT_UNREACH, 0); } - __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS); + kfree_skb(skb); } else { - if (!raw) { - if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - __IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS); - icmp_send(skb, ICMP_DEST_UNREACH, - ICMP_PROT_UNREACH, 0); - } - kfree_skb(skb); - } else { - __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS); - consume_skb(skb); - } + __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS); + consume_skb(skb); } } - out: +} + +static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + __skb_pull(skb, skb_network_header_len(skb)); + + rcu_read_lock(); + ip_protocol_deliver_rcu(net, skb, ip_hdr(skb)->protocol); rcu_read_unlock(); return 0; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cf73c9194bb6947632c4cb220ca472d821dc89ed..3488650b90ac3a97467fef6c64a3d42f73887568 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -115,6 +115,7 @@ #include "udp_impl.h" #include <net/sock_reuseport.h> #include <net/addrconf.h> +#include <net/udp_tunnel.h> struct udp_table udp_table __read_mostly; EXPORT_SYMBOL(udp_table); @@ -1710,6 +1711,10 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); *addr_len = sizeof(*sin); } + + if (udp_sk(sk)->gro_enabled) + udp_cmsg_recv(msg, sk, skb); + if (inet->cmsg_flags) ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off); @@ -1901,7 +1906,7 @@ EXPORT_SYMBOL(udp_encap_enable); * Note that in the success and error cases, the skb is assumed to * have either been requeued or freed. */ -static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); int is_udplite = IS_UDPLITE(sk); @@ -2004,6 +2009,27 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) return -1; } +static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + struct sk_buff *next, *segs; + int ret; + + if (likely(!udp_unexpected_gso(sk, skb))) + return udp_queue_rcv_one_skb(sk, skb); + + BUILD_BUG_ON(sizeof(struct udp_skb_cb) > SKB_SGO_CB_OFFSET); + __skb_push(skb, -skb_mac_offset(skb)); + segs = udp_rcv_segment(sk, skb, true); + for (skb = segs; skb; skb = next) { + next = skb->next; + __skb_pull(skb, skb_transport_offset(skb)); + ret = udp_queue_rcv_one_skb(sk, skb); + if (ret > 0) + ip_protocol_deliver_rcu(dev_net(skb->dev), skb, -ret); + } + return 0; +} + /* For TCP sockets, sk_rx_dst is protected by socket lock * For UDP, we use xchg() to guard against concurrent changes. */ @@ -2395,11 +2421,15 @@ void udp_destroy_sock(struct sock *sk) bool slow = lock_sock_fast(sk); udp_flush_pending_frames(sk); unlock_sock_fast(sk, slow); - if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) { - void (*encap_destroy)(struct sock *sk); - encap_destroy = READ_ONCE(up->encap_destroy); - if (encap_destroy) - encap_destroy(sk); + if (static_branch_unlikely(&udp_encap_needed_key)) { + if (up->encap_type) { + void (*encap_destroy)(struct sock *sk); + encap_destroy = READ_ONCE(up->encap_destroy); + if (encap_destroy) + encap_destroy(sk); + } + if (up->encap_enabled) + static_branch_disable(&udp_encap_needed_key); } } @@ -2444,7 +2474,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, /* FALLTHROUGH */ case UDP_ENCAP_L2TPINUDP: up->encap_type = val; - udp_encap_enable(); + lock_sock(sk); + udp_tunnel_encap_enable(sk->sk_socket); + release_sock(sk); break; default: err = -ENOPROTOOPT; @@ -2466,6 +2498,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, up->gso_size = val; break; + case UDP_GRO: + lock_sock(sk); + if (valbool) + udp_tunnel_encap_enable(sk->sk_socket); + up->gro_enabled = valbool; + release_sock(sk); + break; + /* * UDP-Lite's partial checksum coverage (RFC 3828). */ diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 802f2bc00d69751a40856a6f4f82b32bd244617d..0646d61f4fa832d289ee997f839cfc2f9a29a23f 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -343,6 +343,54 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, return segs; } +#define UDP_GRO_CNT_MAX 64 +static struct sk_buff *udp_gro_receive_segment(struct list_head *head, + struct sk_buff *skb) +{ + struct udphdr *uh = udp_hdr(skb); + struct sk_buff *pp = NULL; + struct udphdr *uh2; + struct sk_buff *p; + + /* requires non zero csum, for symmetry with GSO */ + if (!uh->check) { + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + /* pull encapsulating udp header */ + skb_gro_pull(skb, sizeof(struct udphdr)); + skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); + + list_for_each_entry(p, head, list) { + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + uh2 = udp_hdr(p); + + /* Match ports only, as csum is always non zero */ + if ((*(u32 *)&uh->source != *(u32 *)&uh2->source)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + + /* Terminate the flow on len mismatch or if it grow "too much". + * Under small packet flood GRO count could elsewhere grow a lot + * leading to execessive truesize values + */ + if (!skb_gro_receive(p, skb) && + NAPI_GRO_CB(p)->count >= UDP_GRO_CNT_MAX) + pp = p; + else if (uh->len != uh2->len) + pp = p; + + return pp; + } + + /* mismatch, but we never need to flush */ + return NULL; +} + struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, struct udphdr *uh, udp_lookup_t lookup) { @@ -353,23 +401,27 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, int flush = 1; struct sock *sk; + rcu_read_lock(); + sk = (*lookup)(skb, uh->source, uh->dest); + if (!sk) + goto out_unlock; + + if (udp_sk(sk)->gro_enabled) { + pp = call_gro_receive(udp_gro_receive_segment, head, skb); + rcu_read_unlock(); + return pp; + } + if (NAPI_GRO_CB(skb)->encap_mark || (skb->ip_summed != CHECKSUM_PARTIAL && NAPI_GRO_CB(skb)->csum_cnt == 0 && - !NAPI_GRO_CB(skb)->csum_valid)) - goto out; + !NAPI_GRO_CB(skb)->csum_valid) || + !udp_sk(sk)->gro_receive) + goto out_unlock; /* mark that this skb passed once through the tunnel gro layer */ NAPI_GRO_CB(skb)->encap_mark = 1; - rcu_read_lock(); - sk = (*lookup)(skb, uh->source, uh->dest); - - if (sk && udp_sk(sk)->gro_receive) - goto unflush; - goto out_unlock; - -unflush: flush = 0; list_for_each_entry(p, head, list) { @@ -394,7 +446,6 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, out_unlock: rcu_read_unlock(); -out: skb_gro_flush_final(skb, pp, flush); return pp; } @@ -427,6 +478,19 @@ static struct sk_buff *udp4_gro_receive(struct list_head *head, return NULL; } +static int udp_gro_complete_segment(struct sk_buff *skb) +{ + struct udphdr *uh = udp_hdr(skb); + + skb->csum_start = (unsigned char *)uh - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4; + return 0; +} + int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup) { @@ -437,16 +501,21 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff, uh->len = newlen; - /* Set encapsulation before calling into inner gro_complete() functions - * to make them set up the inner offsets. - */ - skb->encapsulation = 1; - rcu_read_lock(); sk = (*lookup)(skb, uh->source, uh->dest); - if (sk && udp_sk(sk)->gro_complete) + if (sk && udp_sk(sk)->gro_enabled) { + err = udp_gro_complete_segment(skb); + } else if (sk && udp_sk(sk)->gro_complete) { + skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM + : SKB_GSO_UDP_TUNNEL; + + /* Set encapsulation before calling into inner gro_complete() + * functions to make them set up the inner offsets. + */ + skb->encapsulation = 1; err = udp_sk(sk)->gro_complete(sk, skb, nhoff + sizeof(struct udphdr)); + } rcu_read_unlock(); if (skb->remcsum_offload) @@ -461,13 +530,9 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff) const struct iphdr *iph = ip_hdr(skb); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); - if (uh->check) { - skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; + if (uh->check) uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr, iph->daddr, 0); - } else { - skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; - } return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb); } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index df58e1100226e26e79726250b30e5e8fa86e3f1e..3c06cc9e9b796e4e549e30404494f4588a6a8f46 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -319,28 +319,26 @@ void ipv6_list_rcv(struct list_head *head, struct packet_type *pt, /* * Deliver the packet to the host */ - - -static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, + bool have_final) { const struct inet6_protocol *ipprot; struct inet6_dev *idev; unsigned int nhoff; - int nexthdr; bool raw; - bool have_final = false; /* * Parse extension headers */ - rcu_read_lock(); resubmit: idev = ip6_dst_idev(skb_dst(skb)); - if (!pskb_pull(skb, skb_transport_offset(skb))) - goto discard; nhoff = IP6CB(skb)->nhoff; - nexthdr = skb_network_header(skb)[nhoff]; + if (!have_final) { + if (!pskb_pull(skb, skb_transport_offset(skb))) + goto discard; + nexthdr = skb_network_header(skb)[nhoff]; + } resubmit_final: raw = raw6_local_deliver(skb, nexthdr); @@ -423,13 +421,19 @@ static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *sk consume_skb(skb); } } - rcu_read_unlock(); - return 0; + return; discard: __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); - rcu_read_unlock(); kfree_skb(skb); +} + +static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + rcu_read_lock(); + ip6_protocol_deliver_rcu(net, skb, 0, false); + rcu_read_unlock(); + return 0; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index a25571c12a8a2c5f745ce6712de26fcb1906a12c..c55698d19d6847f0dab33d3b222a8e02fc81df95 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -417,6 +417,9 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, *addr_len = sizeof(*sin6); } + if (udp_sk(sk)->gro_enabled) + udp_cmsg_recv(msg, sk, skb); + if (np->rxopt.all) ip6_datagram_recv_common_ctl(sk, msg, skb); @@ -551,7 +554,7 @@ void udpv6_encap_enable(void) } EXPORT_SYMBOL(udpv6_encap_enable); -static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); int is_udplite = IS_UDPLITE(sk); @@ -634,6 +637,28 @@ static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) return -1; } +static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +{ + struct sk_buff *next, *segs; + int ret; + + if (likely(!udp_unexpected_gso(sk, skb))) + return udpv6_queue_rcv_one_skb(sk, skb); + + __skb_push(skb, -skb_mac_offset(skb)); + segs = udp_rcv_segment(sk, skb, false); + for (skb = segs; skb; skb = next) { + next = skb->next; + __skb_pull(skb, skb_transport_offset(skb)); + + ret = udpv6_queue_rcv_one_skb(sk, skb); + if (ret > 0) + ip6_protocol_deliver_rcu(dev_net(skb->dev), skb, ret, + true); + } + return 0; +} + static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk, __be16 loc_port, const struct in6_addr *loc_addr, __be16 rmt_port, const struct in6_addr *rmt_addr, @@ -1456,11 +1481,15 @@ void udpv6_destroy_sock(struct sock *sk) udp_v6_flush_pending_frames(sk); release_sock(sk); - if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) { - void (*encap_destroy)(struct sock *sk); - encap_destroy = READ_ONCE(up->encap_destroy); - if (encap_destroy) - encap_destroy(sk); + if (static_branch_unlikely(&udpv6_encap_needed_key)) { + if (up->encap_type) { + void (*encap_destroy)(struct sock *sk); + encap_destroy = READ_ONCE(up->encap_destroy); + if (encap_destroy) + encap_destroy(sk); + } + if (up->encap_enabled) + static_branch_disable(&udpv6_encap_needed_key); } inet6_destroy_sock(sk); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 1b8e161ac527dd1c2f594680ef532cd7cf453378..828b2457f97b289edae92bfa45e99d0976c7127c 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -147,13 +147,9 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff) const struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); - if (uh->check) { - skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; + if (uh->check) uh->check = ~udp_v6_check(skb->len - nhoff, &ipv6h->saddr, &ipv6h->daddr, 0); - } else { - skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; - } return udp_gro_complete(skb, nhoff, udp6_lib_lookup_skb); } diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e39dfb4e7970f73550bd5c8e3f50e71850519de6..4a54236475ae290206e03d9bbc33d28aa6648758 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -37,7 +37,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \ - test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o + test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o \ + xdp_dummy.o # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ diff --git a/tools/testing/selftests/bpf/xdp_dummy.c b/tools/testing/selftests/bpf/xdp_dummy.c new file mode 100644 index 0000000000000000000000000000000000000000..43b0ef1001ede9c415eb43854956ade4987fa317 --- /dev/null +++ b/tools/testing/selftests/bpf/xdp_dummy.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define KBUILD_MODNAME "xdp_dummy" +#include <linux/bpf.h> +#include "bpf_helpers.h" + +SEC("xdp_dummy") +int xdp_dummy_prog(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 256d82d5fa8751aa400a1eef4f22e5c60b0f4dea..eec359895feb73b36ed1782dd86f94f6a951b54d 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -7,6 +7,7 @@ CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh +TEST_PROGS += udpgro_bench.sh udpgro.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh new file mode 100755 index 0000000000000000000000000000000000000000..e94ef8067173c6615aaf74a2ac2c370f3c5b2e07 --- /dev/null +++ b/tools/testing/selftests/net/udpgro.sh @@ -0,0 +1,148 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Run a series of udpgro functional tests. + +readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" + +cleanup() { + local -r jobs="$(jobs -p)" + local -r ns="$(ip netns list|grep $PEER_NS)" + + [ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null + [ -n "$ns" ] && ip netns del $ns 2>/dev/null +} +trap cleanup EXIT + +cfg_veth() { + ip netns add "${PEER_NS}" + ip -netns "${PEER_NS}" link set lo up + ip link add type veth + ip link set dev veth0 up + ip addr add dev veth0 192.168.1.2/24 + ip addr add dev veth0 2001:db8::2/64 nodad + + ip link set dev veth1 netns "${PEER_NS}" + ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 + ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad + ip -netns "${PEER_NS}" link set dev veth1 up + ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy +} + +run_one() { + # use 'rx' as separator between sender args and receiver args + local -r all="$@" + local -r tx_args=${all%rx*} + local -r rx_args=${all#*rx} + + cfg_veth + + ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} && \ + echo "ok" || \ + echo "failed" & + + # Hack: let bg programs complete the startup + sleep 0.1 + ./udpgso_bench_tx ${tx_args} + wait $(jobs -p) +} + +run_test() { + local -r args=$@ + + printf " %-40s" "$1" + ./in_netns.sh $0 __subprocess $2 rx -G -r $3 +} + +run_one_nat() { + # use 'rx' as separator between sender args and receiver args + local addr1 addr2 pid family="" ipt_cmd=ip6tables + local -r all="$@" + local -r tx_args=${all%rx*} + local -r rx_args=${all#*rx} + + if [[ ${tx_args} = *-4* ]]; then + ipt_cmd=iptables + family=-4 + addr1=192.168.1.1 + addr2=192.168.1.3/24 + else + addr1=2001:db8::1 + addr2="2001:db8::3/64 nodad" + fi + + cfg_veth + ip -netns "${PEER_NS}" addr add dev veth1 ${addr2} + + # fool the GRO engine changing the destination address ... + ip netns exec "${PEER_NS}" $ipt_cmd -t nat -I PREROUTING -d ${addr1} -j DNAT --to-destination ${addr2%/*} + + # ... so that GRO will match the UDP_GRO enabled socket, but packets + # will land on the 'plain' one + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 & + pid=$! + ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${family} -b ${addr2%/*} ${rx_args} && \ + echo "ok" || \ + echo "failed"& + + sleep 0.1 + ./udpgso_bench_tx ${tx_args} + kill -INT $pid + wait $(jobs -p) +} + +run_nat_test() { + local -r args=$@ + + printf " %-40s" "$1" + ./in_netns.sh $0 __subprocess_nat $2 rx -r $3 +} + +run_all() { + local -r core_args="-l 4" + local -r ipv4_args="${core_args} -4 -D 192.168.1.1" + local -r ipv6_args="${core_args} -6 -D 2001:db8::1" + + echo "ipv4" + run_test "no GRO" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400" + + # explicitly check we are not receiving UDP_SEGMENT cmsg (-S -1) + # when GRO does not take place + run_test "no GRO chk cmsg" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400 -S -1" + + # the GSO packets are aggregated because: + # * veth schedule napi after each xmit + # * segmentation happens in BH context, veth napi poll is delayed after + # the transmission of the last segment + run_test "GRO" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720" + run_test "GRO chk cmsg" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472" + run_test "GRO with custom segment size" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720" + run_test "GRO with custom segment size cmsg" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720 -S 500" + + run_nat_test "bad GRO lookup" "${ipv4_args} -M 1 -s 14720 -S 0" "-n 10 -l 1472" + + echo "ipv6" + run_test "no GRO" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400" + run_test "no GRO chk cmsg" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400 -S -1" + run_test "GRO" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520" + run_test "GRO chk cmsg" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520 -S 1452" + run_test "GRO with custom segment size" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520" + run_test "GRO with custom segment size cmsg" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520 -S 500" + + run_nat_test "bad GRO lookup" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 10 -l 1452" +} + +if [ ! -f ../bpf/xdp_dummy.o ]; then + echo "Missing xdp_dummy helper. Build bpf selftest first" + exit -1 +fi + +if [[ $# -eq 0 ]]; then + run_all +elif [[ $1 == "__subprocess" ]]; then + shift + run_one $@ +elif [[ $1 == "__subprocess_nat" ]]; then + shift + run_one_nat $@ +fi diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh new file mode 100755 index 0000000000000000000000000000000000000000..820bc50f6b6871fdf99a46774e2dbb6ac1e2c1f0 --- /dev/null +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -0,0 +1,95 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Run a series of udpgro benchmarks + +readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" + +cleanup() { + local -r jobs="$(jobs -p)" + local -r ns="$(ip netns list|grep $PEER_NS)" + + [ -n "${jobs}" ] && kill -INT ${jobs} 2>/dev/null + [ -n "$ns" ] && ip netns del $ns 2>/dev/null +} +trap cleanup EXIT + +run_one() { + # use 'rx' as separator between sender args and receiver args + local -r all="$@" + local -r tx_args=${all%rx*} + local rx_args=${all#*rx} + + [[ "${tx_args}" == *"-4"* ]] && rx_args="${rx_args} -4" + + ip netns add "${PEER_NS}" + ip -netns "${PEER_NS}" link set lo up + ip link add type veth + ip link set dev veth0 up + ip addr add dev veth0 192.168.1.2/24 + ip addr add dev veth0 2001:db8::2/64 nodad + + ip link set dev veth1 netns "${PEER_NS}" + ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 + ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad + ip -netns "${PEER_NS}" link set dev veth1 up + + ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy + ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r & + + # Hack: let bg programs complete the startup + sleep 0.1 + ./udpgso_bench_tx ${tx_args} +} + +run_in_netns() { + local -r args=$@ + + ./in_netns.sh $0 __subprocess ${args} +} + +run_udp() { + local -r args=$@ + + echo "udp gso - over veth touching data" + run_in_netns ${args} -S 0 rx + + echo "udp gso and gro - over veth touching data" + run_in_netns ${args} -S 0 rx -G +} + +run_tcp() { + local -r args=$@ + + echo "tcp - over veth touching data" + run_in_netns ${args} -t rx +} + +run_all() { + local -r core_args="-l 4" + local -r ipv4_args="${core_args} -4 -D 192.168.1.1" + local -r ipv6_args="${core_args} -6 -D 2001:db8::1" + + echo "ipv4" + run_tcp "${ipv4_args}" + run_udp "${ipv4_args}" + + echo "ipv6" + run_tcp "${ipv4_args}" + run_udp "${ipv6_args}" +} + +if [ ! -f ../bpf/xdp_dummy.o ]; then + echo "Missing xdp_dummy helper. Build bpf selftest first" + exit -1 +fi + +if [[ $# -eq 0 ]]; then + run_all +elif [[ $1 == "__subprocess" ]]; then + shift + run_one $@ +else + run_in_netns $@ +fi diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index 99e537ab5ad9a3418b0350d7e193e71205608b04..0f0628613f818d8bad788683e5ab5e4fb0b7277a 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -34,7 +34,7 @@ run_udp() { run_in_netns ${args} echo "udp gso" - run_in_netns ${args} -S + run_in_netns ${args} -S 0 } run_tcp() { diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c index 727cf67a3f7586921df5ad8666ad86d9b40cc80f..0c960f673324072430a5258da15b3eefa03b1d46 100644 --- a/tools/testing/selftests/net/udpgso_bench_rx.c +++ b/tools/testing/selftests/net/udpgso_bench_rx.c @@ -31,9 +31,21 @@ #include <sys/wait.h> #include <unistd.h> +#ifndef UDP_GRO +#define UDP_GRO 104 +#endif + static int cfg_port = 8000; static bool cfg_tcp; static bool cfg_verify; +static bool cfg_read_all; +static bool cfg_gro_segment; +static int cfg_family = PF_INET6; +static int cfg_alen = sizeof(struct sockaddr_in6); +static int cfg_expected_pkt_nr; +static int cfg_expected_pkt_len; +static int cfg_expected_gso_size; +static struct sockaddr_storage cfg_bind_addr; static bool interrupted; static unsigned long packets, bytes; @@ -44,6 +56,29 @@ static void sigint_handler(int signum) interrupted = true; } +static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr) +{ + struct sockaddr_in6 *addr6 = (void *) sockaddr; + struct sockaddr_in *addr4 = (void *) sockaddr; + + switch (domain) { + case PF_INET: + addr4->sin_family = AF_INET; + addr4->sin_port = htons(cfg_port); + if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) + error(1, 0, "ipv4 parse error: %s", str_addr); + break; + case PF_INET6: + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) + error(1, 0, "ipv6 parse error: %s", str_addr); + break; + default: + error(1, 0, "illegal domain"); + } +} + static unsigned long gettimeofday_ms(void) { struct timeval tv; @@ -63,6 +98,8 @@ static void do_poll(int fd) do { ret = poll(&pfd, 1, 10); + if (interrupted) + break; if (ret == -1) error(1, errno, "poll"); if (ret == 0) @@ -70,15 +107,14 @@ static void do_poll(int fd) if (pfd.revents != POLLIN) error(1, errno, "poll: 0x%x expected 0x%x\n", pfd.revents, POLLIN); - } while (!ret && !interrupted); + } while (!ret); } static int do_socket(bool do_tcp) { - struct sockaddr_in6 addr = {0}; int fd, val; - fd = socket(PF_INET6, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0); + fd = socket(cfg_family, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0); if (fd == -1) error(1, errno, "socket"); @@ -89,10 +125,7 @@ static int do_socket(bool do_tcp) if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val))) error(1, errno, "setsockopt reuseport"); - addr.sin6_family = PF_INET6; - addr.sin6_port = htons(cfg_port); - addr.sin6_addr = in6addr_any; - if (bind(fd, (void *) &addr, sizeof(addr))) + if (bind(fd, (void *)&cfg_bind_addr, cfg_alen)) error(1, errno, "bind"); if (do_tcp) { @@ -102,6 +135,8 @@ static int do_socket(bool do_tcp) error(1, errno, "listen"); do_poll(accept_fd); + if (interrupted) + exit(0); fd = accept(accept_fd, NULL, NULL); if (fd == -1) @@ -164,51 +199,123 @@ static void do_verify_udp(const char *data, int len) } } +static int recv_msg(int fd, char *buf, int len, int *gso_size) +{ + char control[CMSG_SPACE(sizeof(uint16_t))] = {0}; + struct msghdr msg = {0}; + struct iovec iov = {0}; + struct cmsghdr *cmsg; + uint16_t *gsosizeptr; + int ret; + + iov.iov_base = buf; + iov.iov_len = len; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + *gso_size = -1; + ret = recvmsg(fd, &msg, MSG_TRUNC | MSG_DONTWAIT); + if (ret != -1) { + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level == SOL_UDP + && cmsg->cmsg_type == UDP_GRO) { + gsosizeptr = (uint16_t *) CMSG_DATA(cmsg); + *gso_size = *gsosizeptr; + break; + } + } + } + return ret; +} + /* Flush all outstanding datagrams. Verify first few bytes of each. */ static void do_flush_udp(int fd) { - static char rbuf[ETH_DATA_LEN]; - int ret, len, budget = 256; + static char rbuf[ETH_MAX_MTU]; + int ret, len, gso_size, budget = 256; - len = cfg_verify ? sizeof(rbuf) : 0; + len = cfg_read_all ? sizeof(rbuf) : 0; while (budget--) { /* MSG_TRUNC will make return value full datagram length */ - ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT); + if (!cfg_expected_gso_size) + ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT); + else + ret = recv_msg(fd, rbuf, len, &gso_size); if (ret == -1 && errno == EAGAIN) - return; + break; if (ret == -1) error(1, errno, "recv"); - if (len) { + if (cfg_expected_pkt_len && ret != cfg_expected_pkt_len) + error(1, 0, "recv: bad packet len, got %d," + " expected %d\n", ret, cfg_expected_pkt_len); + if (len && cfg_verify) { if (ret == 0) error(1, errno, "recv: 0 byte datagram\n"); do_verify_udp(rbuf, ret); } + if (cfg_expected_gso_size && cfg_expected_gso_size != gso_size) + error(1, 0, "recv: bad gso size, got %d, expected %d " + "(-1 == no gso cmsg))\n", gso_size, + cfg_expected_gso_size); packets++; bytes += ret; + if (cfg_expected_pkt_nr && packets >= cfg_expected_pkt_nr) + break; } } static void usage(const char *filepath) { - error(1, 0, "Usage: %s [-tv] [-p port]", filepath); + error(1, 0, "Usage: %s [-Grtv] [-b addr] [-p port] [-l pktlen] [-n packetnr] [-S gsosize]", filepath); } static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "ptv")) != -1) { + /* bind to any by default */ + setup_sockaddr(PF_INET6, "::", &cfg_bind_addr); + while ((c = getopt(argc, argv, "4b:Gl:n:p:rS:tv")) != -1) { switch (c) { + case '4': + cfg_family = PF_INET; + cfg_alen = sizeof(struct sockaddr_in); + setup_sockaddr(PF_INET, "0.0.0.0", &cfg_bind_addr); + break; + case 'b': + setup_sockaddr(cfg_family, optarg, &cfg_bind_addr); + break; + case 'G': + cfg_gro_segment = true; + break; + case 'l': + cfg_expected_pkt_len = strtoul(optarg, NULL, 0); + break; + case 'n': + cfg_expected_pkt_nr = strtoul(optarg, NULL, 0); + break; case 'p': - cfg_port = htons(strtoul(optarg, NULL, 0)); + cfg_port = strtoul(optarg, NULL, 0); + break; + case 'r': + cfg_read_all = true; + break; + case 'S': + cfg_expected_gso_size = strtol(optarg, NULL, 0); break; case 't': cfg_tcp = true; break; case 'v': cfg_verify = true; + cfg_read_all = true; break; } } @@ -223,12 +330,23 @@ static void parse_opts(int argc, char **argv) static void do_recv(void) { unsigned long tnow, treport; - int fd; + int fd, loop = 0; fd = do_socket(cfg_tcp); + if (cfg_gro_segment && !cfg_tcp) { + int val = 1; + if (setsockopt(fd, IPPROTO_UDP, UDP_GRO, &val, sizeof(val))) + error(1, errno, "setsockopt UDP_GRO"); + } + treport = gettimeofday_ms() + 1000; do { + /* force termination after the second poll(); this cope both + * with sender slower than receiver and missing packet errors + */ + if (cfg_expected_pkt_nr && loop++) + interrupted = true; do_poll(fd); if (cfg_tcp) @@ -249,6 +367,10 @@ static void do_recv(void) } while (!interrupted); + if (cfg_expected_pkt_nr && (packets != cfg_expected_pkt_nr)) + error(1, 0, "wrong packet number! got %ld, expected %d\n", + packets, cfg_expected_pkt_nr); + if (close(fd)) error(1, errno, "close"); } diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c index e821564053cfb7d0482bb85800c7d8f0d977232f..4074538b5df594346ab2f88a427472bbaf29659b 100644 --- a/tools/testing/selftests/net/udpgso_bench_tx.c +++ b/tools/testing/selftests/net/udpgso_bench_tx.c @@ -52,6 +52,8 @@ static bool cfg_segment; static bool cfg_sendmmsg; static bool cfg_tcp; static bool cfg_zerocopy; +static int cfg_msg_nr; +static uint16_t cfg_gso_size; static socklen_t cfg_alen; static struct sockaddr_storage cfg_dst_addr; @@ -205,14 +207,14 @@ static void send_udp_segment_cmsg(struct cmsghdr *cm) cm->cmsg_level = SOL_UDP; cm->cmsg_type = UDP_SEGMENT; - cm->cmsg_len = CMSG_LEN(sizeof(cfg_mss)); + cm->cmsg_len = CMSG_LEN(sizeof(cfg_gso_size)); valp = (void *)CMSG_DATA(cm); - *valp = cfg_mss; + *valp = cfg_gso_size; } static int send_udp_segment(int fd, char *data) { - char control[CMSG_SPACE(sizeof(cfg_mss))] = {0}; + char control[CMSG_SPACE(sizeof(cfg_gso_size))] = {0}; struct msghdr msg = {0}; struct iovec iov = {0}; int ret; @@ -241,7 +243,7 @@ static int send_udp_segment(int fd, char *data) static void usage(const char *filepath) { - error(1, 0, "Usage: %s [-46cmStuz] [-C cpu] [-D dst ip] [-l secs] [-p port] [-s sendsize]", + error(1, 0, "Usage: %s [-46cmtuz] [-C cpu] [-D dst ip] [-l secs] [-m messagenr] [-p port] [-s sendsize] [-S gsosize]", filepath); } @@ -250,7 +252,7 @@ static void parse_opts(int argc, char **argv) int max_len, hdrlen; int c; - while ((c = getopt(argc, argv, "46cC:D:l:mp:s:Stuz")) != -1) { + while ((c = getopt(argc, argv, "46cC:D:l:mM:p:s:S:tuz")) != -1) { switch (c) { case '4': if (cfg_family != PF_UNSPEC) @@ -279,6 +281,9 @@ static void parse_opts(int argc, char **argv) case 'm': cfg_sendmmsg = true; break; + case 'M': + cfg_msg_nr = strtoul(optarg, NULL, 10); + break; case 'p': cfg_port = strtoul(optarg, NULL, 0); break; @@ -286,6 +291,7 @@ static void parse_opts(int argc, char **argv) cfg_payload_len = strtoul(optarg, NULL, 0); break; case 'S': + cfg_gso_size = strtoul(optarg, NULL, 0); cfg_segment = true; break; case 't': @@ -317,6 +323,8 @@ static void parse_opts(int argc, char **argv) cfg_mss = ETH_DATA_LEN - hdrlen; max_len = ETH_MAX_MTU - hdrlen; + if (!cfg_gso_size) + cfg_gso_size = cfg_mss; if (cfg_payload_len > max_len) error(1, 0, "payload length %u exceeds max %u", @@ -392,10 +400,12 @@ int main(int argc, char **argv) else num_sends += send_udp(fd, buf[i]); num_msgs++; - if (cfg_zerocopy && ((num_msgs & 0xF) == 0)) flush_zerocopy(fd); + if (cfg_msg_nr && num_msgs >= cfg_msg_nr) + break; + tnow = gettimeofday_ms(); if (tnow > treport) { fprintf(stderr,