ZONE for connection tracking

Keywords: C++ network iptables Linux sudo

brief introduction

At present, a connection trace quintuple is the source destination IP, the transport layer protocol, and the source destination port. In multi-tenant environment, the private address network of tenants may overlap. If only one CT is distinguished by these five elements, it can not meet the needs of multi-tenants. So the concept of zone is introduced. Zone is an integer of 16 bits. Different users use different IDs to ensure the isolation between tenants.

Realization

Connect zone members in the tracking control block:

struct nf_conn {
    /* Usage count in here is 1 for hash table, 1 per skb,
     * plus 1 for any connection(s) we are `master' for
     *
     * Hint, SKB address this struct and refcnt via skb->_nfct and
     * helpers nf_conntrack_get() and nf_conntrack_put().
     * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt,
     * beware nf_ct_get() is different and don't inc refcnt.
     */
    struct nf_conntrack ct_general;

    spinlock_t    lock;
    u16        cpu;
    //Connect trace zone members.
#ifdef CONFIG_NF_CONNTRACK_ZONES
    struct nf_conntrack_zone zone;
#endif
    ...
};

zone definition

struct nf_conntrack_zone {
    u16    id;//id
    u8    flags;//Flag, currently there is only one flag, NF_CT_FLAG_MARK, which means SKB - > mark is used as zone-id, otherwise ID members are used as zone-id.
    u8    dir;//Direction, by default, is bidirectional, i.e. the message received from a certain network port uses the same zoneid in both response and request directions, which is the most common.
            //See the macro NF_CT_DEFAULT_ZONE_DIR.
};
#define NF_CT_DEFAULT_ZONE_DIR    (NF_CT_ZONE_DIR_ORIG | NF_CT_ZONE_DIR_REPL)
//See function for details:
static inline const struct nf_conntrack_zone *
nf_ct_zone_tmpl(const struct nf_conn *tmpl, const struct sk_buff *skb,
        struct nf_conntrack_zone *tmp)
{
#ifdef CONFIG_NF_CONNTRACK_ZONES
    if (!tmpl)
        return &nf_ct_zone_dflt;
    //Set zone
    if (tmpl->zone.flags & NF_CT_FLAG_MARK)
        return nf_ct_zone_init(tmp, skb->mark, tmpl->zone.dir, 0);
#endif
    return nf_ct_zone(tmpl);
}
static inline const struct nf_conntrack_zone *
nf_ct_zone(const struct nf_conn *ct)
{
#ifdef CONFIG_NF_CONNTRACK_ZONES
    return &ct->zone;
#else
    return &nf_ct_zone_dflt;
#endif
}
//Initialize the zone for connection tracking.
static inline const struct nf_conntrack_zone *
nf_ct_zone_init(struct nf_conntrack_zone *zone, u16 id, u8 dir, u8 flags)
{
    zone->id = id;
    zone->flags = flags;
    zone->dir = dir;

    return zone;
}

Default connection tracking zone definition

/* Built-in default zone used e.g. by modules. */
const struct nf_conntrack_zone nf_ct_zone_dflt = {
    .id    = NF_CT_DEFAULT_ZONE_ID,
    .dir    = NF_CT_DEFAULT_ZONE_DIR,
};
EXPORT_SYMBOL_GPL(nf_ct_zone_dflt);
#define NF_CT_DEFAULT_ZONE_ID    0

#define NF_CT_ZONE_DIR_ORIG    (1 << IP_CT_DIR_ORIGINAL)
#define NF_CT_ZONE_DIR_REPL    (1 << IP_CT_DIR_REPLY)

#define NF_CT_DEFAULT_ZONE_DIR    (NF_CT_ZONE_DIR_ORIG | NF_CT_ZONE_DIR_REPL)

Common Operating Functions of Zones

static inline const struct nf_conntrack_zone *
nf_ct_zone(const struct nf_conn *ct)
{
#ifdef CONFIG_NF_CONNTRACK_ZONES
    return &ct->zone;
#else
    return &nf_ct_zone_dflt;
#endif
}
//Set the zone for connection tracking.
static inline const struct nf_conntrack_zone *
nf_ct_zone_init(struct nf_conntrack_zone *zone, u16 id, u8 dir, u8 flags)
{
    zone->id = id;
    zone->flags = flags;
    zone->dir = dir;

    return zone;
}

static inline const struct nf_conntrack_zone *
nf_ct_zone_tmpl(const struct nf_conn *tmpl, const struct sk_buff *skb,
        struct nf_conntrack_zone *tmp)
{
#ifdef CONFIG_NF_CONNTRACK_ZONES
    if (!tmpl)
        return &nf_ct_zone_dflt;
    //Set zone
    if (tmpl->zone.flags & NF_CT_FLAG_MARK)
        return nf_ct_zone_init(tmp, skb->mark, tmpl->zone.dir, 0);
#endif
    return nf_ct_zone(tmpl);
}
//Setting zone of ct
static inline void nf_ct_zone_add(struct nf_conn *ct,
                  const struct nf_conntrack_zone *zone)
{
#ifdef CONFIG_NF_CONNTRACK_ZONES
    ct->zone = *zone;
#endif
}

static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone,
                      enum ip_conntrack_dir dir)
{
    return zone->dir & (1 << dir);
}
//Or zone id in one direction of ct
static inline u16 nf_ct_zone_id(const struct nf_conntrack_zone *zone,
                enum ip_conntrack_dir dir)
{
#ifdef CONFIG_NF_CONNTRACK_ZONES
    return nf_ct_zone_matches_dir(zone, dir) ?
           zone->id : NF_CT_DEFAULT_ZONE_ID;
#else
    return NF_CT_DEFAULT_ZONE_ID;
#endif
}
//Judging whether the zone IDs of two ct images in the same direction are equal
static inline bool nf_ct_zone_equal(const struct nf_conn *a,
                    const struct nf_conntrack_zone *b,
                    enum ip_conntrack_dir dir)
{
#ifdef CONFIG_NF_CONNTRACK_ZONES
    return nf_ct_zone_id(nf_ct_zone(a), dir) ==
           nf_ct_zone_id(b, dir);
#else
    return true;
#endif
}
//Comparing whether zone s in any direction of connection tracking a and b are equal
static inline bool nf_ct_zone_equal_any(const struct nf_conn *a,
                    const struct nf_conntrack_zone *b)
{
#ifdef CONFIG_NF_CONNTRACK_ZONES
    return nf_ct_zone(a)->id == b->id;
#else
    return true;
#endif
}

Use of ZONE

By mapping devices to different zones, the mapping of tenant traffic to zones can be realized, and the zones of traffic can also be set by using the mark ing function of iptables. linux uses CT target to set traffic zones. The CT command creates a connection tracking template in the kernel, and the flow that hits the rule sets the template CT, and the first package initializes with the template as a reference when creating the CT, thus passing the parameters we set in the CT target to the connection tracking.

   CT
       The  CT  target  sets parameters for a packet or its associated connection. The target attaches a "template" connection tracking entry to the packet, which is then used by the conntrack core
       when initializing a new ct entry. This target is thus only valid in the "raw" table.

       --notrack
              Disables connection tracking for this packet.

       --helper name
              Use the helper identified by name for the connection. This is more flexible than loading the conntrack helper modules with preset ports.

       --ctevents event[,...]
              Only generate the specified conntrack events for this connection. Possible event types are: new, related, destroy, reply, assured, protoinfo, helper, mark (this refers to the  ctmark,
              not nfmark), natseqinfo, secmark (ctsecmark).

       --expevents event[,...]
              Only generate the specified expectation events for this connection.  Possible event types are: new.

       --zone-orig {id|mark}
              For  traffic  coming from ORIGINAL direction, assign this packet to zone id and only have lookups done in that zone. If mark is used instead of id, the zone is derived from the packet
              nfmark.

       --zone-reply {id|mark}
              For traffic coming from REPLY direction, assign this packet to zone id and only have lookups done in that zone. If mark is used instead of id, the zone  is  derived  from  the  packet
              nfmark.

       --zone {id|mark}
              Assign  this  packet  to  zone id and only have lookups done in that zone.  If mark is used instead of id, the zone is derived from the packet nfmark. By default, packets have zone 0.
              This option applies to both directions.

       --timeout name
              Use the timeout policy identified by name for the connection. This is provides more flexible timeout policy definition than global timeout values  available  at  /proc/sys/net/netfil‐
              ter/nf_conntrack_*_timeout_*.

case

sudo iptables -t raw -A PREROUTING -i ens39 -j CT --zone 2
#This command maps the message received by ens39 to zone 2, and achieves traffic connection tracking isolation received by different interfaces.

Realization and Analysis of CT target

struct xt_ct_target_info_v1 {
    __u16 flags;//Logo, see the following enumeration
    __u16 zone;//zone id
    __u32 ct_events;
    __u32 exp_events;
    char helper[16];
    char timeout[32];//User-defined timeout

    /* Used internally by the kernel */
    /* Connection tracking template */
    struct nf_conn    *ct __attribute__((aligned(8)));
};

enum {
    XT_CT_NOTRACK        = 1 << 0,//Set the -- notrack parameter
    XT_CT_NOTRACK_ALIAS    = 1 << 1,//Set the -- notrack parameter
    XT_CT_ZONE_DIR_ORIG    = 1 << 2,//zone sets the direction of the request
    XT_CT_ZONE_DIR_REPL    = 1 << 3,//The response direction of zone settings can also be both flags.
    XT_CT_ZONE_MARK        = 1 << 4,//zone comes from nfmark

    XT_CT_MASK        = XT_CT_NOTRACK | XT_CT_NOTRACK_ALIAS |
                  XT_CT_ZONE_DIR_ORIG | XT_CT_ZONE_DIR_REPL |
                  XT_CT_ZONE_MARK,
};

Building templates

static int xt_ct_tg_check_v2(const struct xt_tgchk_param *par)
{
    struct xt_ct_target_info_v1 *info = par->targinfo;

    if (info->flags & ~XT_CT_MASK)//One option is not set, exit directly.
        return -EINVAL;

    return xt_ct_tg_check(par, par->targinfo);
}

static int xt_ct_tg_check(const struct xt_tgchk_param *par,
              struct xt_ct_target_info_v1 *info)
{
    struct nf_conntrack_zone zone;
    struct nf_conn_help *help;
    struct nf_conn *ct;
    int ret = -EOPNOTSUPP;

    if (info->flags & XT_CT_NOTRACK) {
        ct = NULL;
        goto out;
    }

#ifndef CONFIG_NF_CONNTRACK_ZONES
    if (info->zone || info->flags & (XT_CT_ZONE_DIR_ORIG |
                     XT_CT_ZONE_DIR_REPL |
                     XT_CT_ZONE_MARK))
        goto err1;
#endif

    ret = nf_ct_netns_get(par->net, par->family);
    if (ret < 0)
        goto err1;

    memset(&zone, 0, sizeof(zone));
    zone.id = info->zone;
    zone.dir = xt_ct_flags_to_dir(info);
    if (info->flags & XT_CT_ZONE_MARK)
        zone.flags |= NF_CT_FLAG_MARK;
    //Allocation of ct templates
    ct = nf_ct_tmpl_alloc(par->net, &zone, GFP_KERNEL);
    if (!ct) {
        ret = -ENOMEM;
        goto err2;
    }
    ...
        
    return ret;
}

Implementation of target

static unsigned int xt_ct_target_v1(struct sk_buff *skb,
                    const struct xt_action_param *par)
{
    //Getting Rule Information
    const struct xt_ct_target_info_v1 *info = par->targinfo;
    struct nf_conn *ct = info->ct;//Getting the rule's CT template

    return xt_ct_target(skb, ct);
}
static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct)
{
    /* Previously seen (loopback)? Ignore. */
    if (skb->_nfct != 0)
        return XT_CONTINUE;

    if (ct) {//Set up the CT template of the message.
        atomic_inc(&ct->ct_general.use);
        nf_ct_set(skb, ct, IP_CT_NEW);
    } else {
        nf_ct_set(skb, ct, IP_CT_UNTRACKED);
    }

    return XT_CONTINUE;
}

Template Processing by Connection Tracking

unsigned int
nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
        struct sk_buff *skb)
{
    const struct nf_conntrack_l3proto *l3proto;
    const struct nf_conntrack_l4proto *l4proto;
    struct nf_conn *ct, *tmpl;
    enum ip_conntrack_info ctinfo;
    unsigned int *timeouts;
    unsigned int dataoff;
    u_int8_t protonum;
    int ret;

    tmpl = nf_ct_get(skb, &ctinfo);//Get template
    if (tmpl || ctinfo == IP_CT_UNTRACKED) {
        /* Previously seen (loopback or untracked)?  Ignore. */
        /* Your own ping message will enter the protocol stack through lo from prerouting, because the message has already been sent
        ** Connection tracking was performed in out. So here's a direct acceptance.
        ** Only messages with a loopback interface (any address of ping itself) carry CT, and they are not templates.
        ** With CT action, setting the message to IP_CT_UNTRACKED will also be returned directly.
        */ 
        if ((tmpl && !nf_ct_is_template(tmpl)) ||//If zone is set, there will be tmpl, and nf_ct_is_template is true.
             ctinfo == IP_CT_UNTRACKED) {
            NF_CT_STAT_INC_ATOMIC(net, ignore);
            return NF_ACCEPT;
        }
        skb->_nfct = 0;
    }
    ...

    return ret;
}

/* On success, returns 0, sets skb->_nfct | ctinfo */
static int
resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
          struct sk_buff *skb,
          unsigned int dataoff,
          u_int16_t l3num,
          u_int8_t protonum,
          const struct nf_conntrack_l3proto *l3proto,
          const struct nf_conntrack_l4proto *l4proto)
{
    const struct nf_conntrack_zone *zone;
    struct nf_conntrack_tuple tuple;
    struct nf_conntrack_tuple_hash *h;
    enum ip_conntrack_info ctinfo;
    struct nf_conntrack_zone tmp;
    struct nf_conn *ct;
    u32 hash;

    if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
                 dataoff, l3num, protonum, net, &tuple, l3proto,
                 l4proto)) {
        pr_debug("Can't get tuple\n");
        return 0;
    }

    /* look for tuple match When looking up CT, use the zone of the template */
    zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
    hash = hash_conntrack_raw(&tuple, net);
    h = __nf_conntrack_find_get(net, zone, &tuple, hash);
    if (!h) {
        //If not, look for the desired connection
        h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
                   skb, dataoff, hash);
        if (!h)
            return 0;
        if (IS_ERR(h))
            return PTR_ERR(h);
    }
    ...
    return 0;
}



/* Allocate a new conntrack: we return -ENOMEM if classification
   failed due to stress.  Otherwise it really is unclassifiable. */
static noinline struct nf_conntrack_tuple_hash *
init_conntrack(struct net *net, struct nf_conn *tmpl,
           const struct nf_conntrack_tuple *tuple,
           const struct nf_conntrack_l3proto *l3proto,
           const struct nf_conntrack_l4proto *l4proto,
           struct sk_buff *skb,
           unsigned int dataoff, u32 hash)
{
    struct nf_conn *ct;
    struct nf_conn_help *help;
    struct nf_conntrack_tuple repl_tuple;
    struct nf_conntrack_ecache *ecache;
    struct nf_conntrack_expect *exp = NULL;
    const struct nf_conntrack_zone *zone;
    struct nf_conn_timeout *timeout_ext;
    struct nf_conntrack_zone tmp;
    unsigned int *timeouts;

    if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
        pr_debug("Can't invert tuple.\n");
        return NULL;
    }
    //Initializing ct also uses only the zone of the template.
    zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
    //Assignment connection tracking
    ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
                  hash);
    ...
}

Posted by jlp09550 on Sat, 12 Oct 2019 07:05:49 -0700