接続追跡のZONE


概要
現在、接続追跡の5つのグループは、ソースのIP、トランスポート層プロトコル、ソースのポートです.マルチテナント環境では、テナントのプライベートアドレスネットワークが重複する可能性があり、この5つの要素だけでCTを区別すると、マルチテナントのニーズを満たすことができません.したがってzoneの概念を導入し、zoneは16 bitの整数数であり、異なるユーザーが異なるidを使用し、テナント間の隔離を保証する.
インプリメンテーション
トレース制御ブロックのzoneメンバーの接続:
struct nf_conn {
    /* Usage count in here is 1 for hash table, 1 per skb,
     * plus 1 for any connection(s) we are `master' for
     *
     * Hint, SKB address this struct and refcnt via skb->_nfct and
     * helpers nf_conntrack_get() and nf_conntrack_put().
     * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt,
     * beware nf_ct_get() is different and don't inc refcnt.
     */
    struct nf_conntrack ct_general;

    spinlock_t    lock;
    u16        cpu;
    //    zone  。
#ifdef CONFIG_NF_CONNTRACK_ZONES
    struct nf_conntrack_zone zone;
#endif
    ...
};

zone定義
struct nf_conntrack_zone {
    u16    id;//id
    u8    flags;//  ,        NF_CT_FLAG_MARK,    skb->mark  zone-id,    id    zone-id。
    u8    dir;//  ,      ,                            zoneid,   。
            //   NF_CT_DEFAULT_ZONE_DIR。
};
#define NF_CT_DEFAULT_ZONE_DIR    (NF_CT_ZONE_DIR_ORIG | NF_CT_ZONE_DIR_REPL)
//    :
static inline const struct nf_conntrack_zone *
nf_ct_zone_tmpl(const struct nf_conn *tmpl, const struct sk_buff *skb,
        struct nf_conntrack_zone *tmp)
{
#ifdef CONFIG_NF_CONNTRACK_ZONES
    if (!tmpl)
        return &nf_ct_zone_dflt;
    //  zone
    if (tmpl->zone.flags & NF_CT_FLAG_MARK)
        return nf_ct_zone_init(tmp, skb->mark, tmpl->zone.dir, 0);
#endif
    return nf_ct_zone(tmpl);
}
static inline const struct nf_conntrack_zone *
nf_ct_zone(const struct nf_conn *ct)
{
#ifdef CONFIG_NF_CONNTRACK_ZONES
    return &ct->zone;
#else
    return &nf_ct_zone_dflt;
#endif
}
//        zone。
static inline const struct nf_conntrack_zone *
nf_ct_zone_init(struct nf_conntrack_zone *zone, u16 id, u8 dir, u8 flags)
{
    zone->id = id;
    zone->flags = flags;
    zone->dir = dir;

    return zone;
}

デフォルトの接続トレースzone定義
/* Built-in default zone used e.g. by modules. */
const struct nf_conntrack_zone nf_ct_zone_dflt = {
    .id    = NF_CT_DEFAULT_ZONE_ID,
    .dir    = NF_CT_DEFAULT_ZONE_DIR,
};
EXPORT_SYMBOL_GPL(nf_ct_zone_dflt);
#define NF_CT_DEFAULT_ZONE_ID    0

#define NF_CT_ZONE_DIR_ORIG    (1 << IP_CT_DIR_ORIGINAL)
#define NF_CT_ZONE_DIR_REPL    (1 << IP_CT_DIR_REPLY)

#define NF_CT_DEFAULT_ZONE_DIR    (NF_CT_ZONE_DIR_ORIG | NF_CT_ZONE_DIR_REPL)

zoneの一般的な操作関数
static inline const struct nf_conntrack_zone *
nf_ct_zone(const struct nf_conn *ct)
{
#ifdef CONFIG_NF_CONNTRACK_ZONES
    return &ct->zone;
#else
    return &nf_ct_zone_dflt;
#endif
}
//       zone。
static inline const struct nf_conntrack_zone *
nf_ct_zone_init(struct nf_conntrack_zone *zone, u16 id, u8 dir, u8 flags)
{
    zone->id = id;
    zone->flags = flags;
    zone->dir = dir;

    return zone;
}

static inline const struct nf_conntrack_zone *
nf_ct_zone_tmpl(const struct nf_conn *tmpl, const struct sk_buff *skb,
        struct nf_conntrack_zone *tmp)
{
#ifdef CONFIG_NF_CONNTRACK_ZONES
    if (!tmpl)
        return &nf_ct_zone_dflt;
    //  zone
    if (tmpl->zone.flags & NF_CT_FLAG_MARK)
        return nf_ct_zone_init(tmp, skb->mark, tmpl->zone.dir, 0);
#endif
    return nf_ct_zone(tmpl);
}
//  ct zone
static inline void nf_ct_zone_add(struct nf_conn *ct,
                  const struct nf_conntrack_zone *zone)
{
#ifdef CONFIG_NF_CONNTRACK_ZONES
    ct->zone = *zone;
#endif
}

static inline bool nf_ct_zone_matches_dir(const struct nf_conntrack_zone *zone,
                      enum ip_conntrack_dir dir)
{
    return zone->dir & (1 << dir);
}
//  ct       zone id
static inline u16 nf_ct_zone_id(const struct nf_conntrack_zone *zone,
                enum ip_conntrack_dir dir)
{
#ifdef CONFIG_NF_CONNTRACK_ZONES
    return nf_ct_zone_matches_dir(zone, dir) ?
           zone->id : NF_CT_DEFAULT_ZONE_ID;
#else
    return NF_CT_DEFAULT_ZONE_ID;
#endif
}
//    ct        zone id    
static inline bool nf_ct_zone_equal(const struct nf_conn *a,
                    const struct nf_conntrack_zone *b,
                    enum ip_conntrack_dir dir)
{
#ifdef CONFIG_NF_CONNTRACK_ZONES
    return nf_ct_zone_id(nf_ct_zone(a), dir) ==
           nf_ct_zone_id(b, dir);
#else
    return true;
#endif
}
//      a b     zone    
static inline bool nf_ct_zone_equal_any(const struct nf_conn *a,
                    const struct nf_conntrack_zone *b)
{
#ifdef CONFIG_NF_CONNTRACK_ZONES
    return nf_ct_zone(a)->id == b->id;
#else
    return true;
#endif
}

ZONEの使用
デバイスを異なるzoneにマッピングすることで、テナントトラフィックからzoneへのマッピングを実現し、iptablesのmark機能を使用してトラフィックのzoneを設定することもできます.linuxはCT targetを使用して流量のzoneを設定します.CTコマンドはカーネルに接続追跡テンプレートを作成し、このルールにヒットしたflowはテンプレートCTを設定し、最初のパッケージはCTを作成するときにテンプレートを参照して初期化し、CT targetで設定したパラメータを接続追跡に渡します.
   CT
       The  CT  target  sets parameters for a packet or its associated connection. The target attaches a "template" connection tracking entry to the packet, which is then used by the conntrack core
       when initializing a new ct entry. This target is thus only valid in the "raw" table.

       --notrack
              Disables connection tracking for this packet.

       --helper name
              Use the helper identified by name for the connection. This is more flexible than loading the conntrack helper modules with preset ports.

       --ctevents event[,...]
              Only generate the specified conntrack events for this connection. Possible event types are: new, related, destroy, reply, assured, protoinfo, helper, mark (this refers to the  ctmark,
              not nfmark), natseqinfo, secmark (ctsecmark).

       --expevents event[,...]
              Only generate the specified expectation events for this connection.  Possible event types are: new.

       --zone-orig {id|mark}
              For  traffic  coming from ORIGINAL direction, assign this packet to zone id and only have lookups done in that zone. If mark is used instead of id, the zone is derived from the packet
              nfmark.

       --zone-reply {id|mark}
              For traffic coming from REPLY direction, assign this packet to zone id and only have lookups done in that zone. If mark is used instead of id, the zone  is  derived  from  the  packet
              nfmark.

       --zone {id|mark}
              Assign  this  packet  to  zone id and only have lookups done in that zone.  If mark is used instead of id, the zone is derived from the packet nfmark. By default, packets have zone 0.
              This option applies to both directions.

       --timeout name
              Use the timeout policy identified by name for the connection. This is provides more flexible timeout policy definition than global timeout values  available  at  /proc/sys/net/netfil‐
              ter/nf_conntrack_*_timeout_*.

ケース
sudo iptables -t raw -A PREROUTING -i ens39 -j CT --zone 2
#    ens39          zone 2 ,                 。

CT target実現分析
struct xt_ct_target_info_v1 {
    __u16 flags;//  ,     
    __u16 zone;//zone id
    __u32 ct_events;
    __u32 exp_events;
    char helper[16];
    char timeout[32];//         

    /* Used internally by the kernel */
    /*        */
    struct nf_conn    *ct __attribute__((aligned(8)));
};

enum {
    XT_CT_NOTRACK        = 1 << 0,//   --notrack  
    XT_CT_NOTRACK_ALIAS    = 1 << 1,//   --notrack  
    XT_CT_ZONE_DIR_ORIG    = 1 << 2,//zone        
    XT_CT_ZONE_DIR_REPL    = 1 << 3,//zone       ,         
    XT_CT_ZONE_MARK        = 1 << 4,//zone   nfmark

    XT_CT_MASK        = XT_CT_NOTRACK | XT_CT_NOTRACK_ALIAS |
                  XT_CT_ZONE_DIR_ORIG | XT_CT_ZONE_DIR_REPL |
                  XT_CT_ZONE_MARK,
};

テンプレートの構築
static int xt_ct_tg_check_v2(const struct xt_tgchk_param *par)
{
    struct xt_ct_target_info_v1 *info = par->targinfo;

    if (info->flags & ~XT_CT_MASK)//        ,    。
        return -EINVAL;

    return xt_ct_tg_check(par, par->targinfo);
}

static int xt_ct_tg_check(const struct xt_tgchk_param *par,
              struct xt_ct_target_info_v1 *info)
{
    struct nf_conntrack_zone zone;
    struct nf_conn_help *help;
    struct nf_conn *ct;
    int ret = -EOPNOTSUPP;

    if (info->flags & XT_CT_NOTRACK) {
        ct = NULL;
        goto out;
    }

#ifndef CONFIG_NF_CONNTRACK_ZONES
    if (info->zone || info->flags & (XT_CT_ZONE_DIR_ORIG |
                     XT_CT_ZONE_DIR_REPL |
                     XT_CT_ZONE_MARK))
        goto err1;
#endif

    ret = nf_ct_netns_get(par->net, par->family);
    if (ret < 0)
        goto err1;

    memset(&zone, 0, sizeof(zone));
    zone.id = info->zone;
    zone.dir = xt_ct_flags_to_dir(info);
    if (info->flags & XT_CT_ZONE_MARK)
        zone.flags |= NF_CT_FLAG_MARK;
    //  ct  
    ct = nf_ct_tmpl_alloc(par->net, &zone, GFP_KERNEL);
    if (!ct) {
        ret = -ENOMEM;
        goto err2;
    }
    ...
        
    return ret;
}

targetの実行
static unsigned int xt_ct_target_v1(struct sk_buff *skb,
                    const struct xt_action_param *par)
{
    //      
    const struct xt_ct_target_info_v1 *info = par->targinfo;
    struct nf_conn *ct = info->ct;//     CT  

    return xt_ct_target(skb, ct);
}
static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct)
{
    /* Previously seen (loopback)? Ignore. */
    if (skb->_nfct != 0)
        return XT_CONTINUE;

    if (ct) {//     CT  。
        atomic_inc(&ct->ct_general.use);
        nf_ct_set(skb, ct, IP_CT_NEW);
    } else {
        nf_ct_set(skb, ct, IP_CT_UNTRACKED);
    }

    return XT_CONTINUE;
}

接続トレースによるテンプレートの処理
unsigned int
nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
        struct sk_buff *skb)
{
    const struct nf_conntrack_l3proto *l3proto;
    const struct nf_conntrack_l4proto *l4proto;
    struct nf_conn *ct, *tmpl;
    enum ip_conntrack_info ctinfo;
    unsigned int *timeouts;
    unsigned int dataoff;
    u_int8_t protonum;
    int ret;

    tmpl = nf_ct_get(skb, &ctinfo);//    
    if (tmpl || ctinfo == IP_CT_UNTRACKED) {
        /* Previously seen (loopback or untracked)?  Ignore. */
        /*   ping        lo prerouting     ,      
        **  out       。        。
        **       (ping         )      CT,       。
        **   CT  ,     IP_CT_UNTRACKED      。
        */ 
        if ((tmpl && !nf_ct_is_template(tmpl)) ||//   zone  ,  tmpl,  nf_ct_is_template  。
             ctinfo == IP_CT_UNTRACKED) {
            NF_CT_STAT_INC_ATOMIC(net, ignore);
            return NF_ACCEPT;
        }
        skb->_nfct = 0;
    }
    ...

    return ret;
}

/* On success, returns 0, sets skb->_nfct | ctinfo */
static int
resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
          struct sk_buff *skb,
          unsigned int dataoff,
          u_int16_t l3num,
          u_int8_t protonum,
          const struct nf_conntrack_l3proto *l3proto,
          const struct nf_conntrack_l4proto *l4proto)
{
    const struct nf_conntrack_zone *zone;
    struct nf_conntrack_tuple tuple;
    struct nf_conntrack_tuple_hash *h;
    enum ip_conntrack_info ctinfo;
    struct nf_conntrack_zone tmp;
    struct nf_conn *ct;
    u32 hash;

    if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
                 dataoff, l3num, protonum, net, &tuple, l3proto,
                 l4proto)) {
        pr_debug("Can't get tuple
"); return 0; } /* look for tuple match CT , zone */ zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); hash = hash_conntrack_raw(&tuple, net); h = __nf_conntrack_find_get(net, zone, &tuple, hash); if (!h) { // , h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, skb, dataoff, hash); if (!h) return 0; if (IS_ERR(h)) return PTR_ERR(h); } ... return 0; } /* Allocate a new conntrack: we return -ENOMEM if classification failed due to stress. Otherwise it really is unclassifiable. */ static noinline struct nf_conntrack_tuple_hash * init_conntrack(struct net *net, struct nf_conn *tmpl, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l3proto *l3proto, const struct nf_conntrack_l4proto *l4proto, struct sk_buff *skb, unsigned int dataoff, u32 hash) { struct nf_conn *ct; struct nf_conn_help *help; struct nf_conntrack_tuple repl_tuple; struct nf_conntrack_ecache *ecache; struct nf_conntrack_expect *exp = NULL; const struct nf_conntrack_zone *zone; struct nf_conn_timeout *timeout_ext; struct nf_conntrack_zone tmp; unsigned int *timeouts; if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { pr_debug("Can't invert tuple.
"); return NULL; } // ct zone。 zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); // ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC, hash); ... }