Skip to content
Snippets Groups Projects
  • Ahmed Abdelsalam's avatar
    ipv6: sr: Compute flowlabel for outer IPv6 header of seg6 encap mode · b5facfdb
    Ahmed Abdelsalam authored
    
    ECMP (equal-cost multipath) hashes are typically computed on the packets'
    5-tuple(src IP, dst IP, src port, dst port, L4 proto).
    
    For encapsulated packets, the L4 data is not readily available and ECMP
    hashing will often revert to (src IP, dst IP). This will lead to traffic
    polarization on a single ECMP path, causing congestion and waste of network
    capacity.
    
    In IPv6, the 20-bit flow label field is also used as part of the ECMP hash.
    In the lack of L4 data, the hashing will be on (src IP, dst IP, flow
    label). Having a non-zero flow label is thus important for proper traffic
    load balancing when L4 data is unavailable (i.e., when packets are
    encapsulated).
    
    Currently, the seg6_do_srh_encap() function extracts the original packet's
    flow label and set it as the outer IPv6 flow label. There are two issues
    with this behaviour:
    
    a) There is no guarantee that the inner flow label is set by the source.
    b) If the original packet is not IPv6, the flow label will be set to
    zero (e.g., IPv4 or L2 encap).
    
    This patch adds a function, named seg6_make_flowlabel(), that computes a
    flow label from a given skb. It supports IPv6, IPv4 and L2 payloads, and
    leverages the per namespace 'seg6_flowlabel" sysctl value.
    
    The currently support behaviours are as follows:
    -1 set flowlabel to zero.
    0 copy flowlabel from Inner paceket in case of Inner IPv6
    (Set flowlabel to 0 in case IPv4/L2)
    1 Compute the flowlabel using seg6_make_flowlabel()
    
    This patch has been tested for IPv6, IPv4, and L2 traffic.
    
    Signed-off-by: default avatarAhmed Abdelsalam <amsalam20@gmail.com>
    Acked-by: default avatarDavid Lebrun <dlebrun@google.com>
    Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
    b5facfdb
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
seg6_iptunnel.c 11.53 KiB
/*
 *  SR-IPv6 implementation
 *
 *  Author:
 *  David Lebrun <david.lebrun@uclouvain.be>
 *
 *
 *  This program is free software; you can redistribute it and/or
 *        modify it under the terms of the GNU General Public License
 *        as published by the Free Software Foundation; either version
 *        2 of the License, or (at your option) any later version.
 */

#include <linux/types.h>
#include <linux/skbuff.h>
#include <linux/net.h>
#include <linux/module.h>
#include <net/ip.h>
#include <net/ip_tunnels.h>
#include <net/lwtunnel.h>
#include <net/netevent.h>
#include <net/netns/generic.h>
#include <net/ip6_fib.h>
#include <net/route.h>
#include <net/seg6.h>
#include <linux/seg6.h>
#include <linux/seg6_iptunnel.h>
#include <net/addrconf.h>
#include <net/ip6_route.h>
#include <net/dst_cache.h>
#ifdef CONFIG_IPV6_SEG6_HMAC
#include <net/seg6_hmac.h>
#endif

struct seg6_lwt {
	struct dst_cache cache;
	struct seg6_iptunnel_encap tuninfo[0];
};

static inline struct seg6_lwt *seg6_lwt_lwtunnel(struct lwtunnel_state *lwt)
{
	return (struct seg6_lwt *)lwt->data;
}

static inline struct seg6_iptunnel_encap *
seg6_encap_lwtunnel(struct lwtunnel_state *lwt)
{
	return seg6_lwt_lwtunnel(lwt)->tuninfo;
}

static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = {
	[SEG6_IPTUNNEL_SRH]	= { .type = NLA_BINARY },
};

static int nla_put_srh(struct sk_buff *skb, int attrtype,
		       struct seg6_iptunnel_encap *tuninfo)
{
	struct seg6_iptunnel_encap *data;
	struct nlattr *nla;
	int len;

	len = SEG6_IPTUN_ENCAP_SIZE(tuninfo);

	nla = nla_reserve(skb, attrtype, len);
	if (!nla)
		return -EMSGSIZE;

	data = nla_data(nla);
	memcpy(data, tuninfo, len);