Linuxカーネル中の流量制御(23)


本文書のCopylityはすべてyfydzに帰属します.GPLを使って発表します.自由にコピー、転載できます.転載する時は文書の完全性を維持してください.商業用途に使用することは厳禁です.
msn:[email protected]
ソース:http://yfydz.cublog.cn

8.7 ipt      

ipt    netfilter     ,   netfilter target               ,         ,               ,                 。   net/sched/act_ipt.c   。

8.7.1            
/* include/net/tc_act/tc_ipt.h */
// ipt    
struct tcf_ipt {
//     
 struct tcf_common common;
// hook 
 u32   tcfi_hook;
// target  
 char   *tcfi_tname;
// target  
 struct xt_entry_target *tcfi_t;
};
#define to_ipt(pc) \
 container_of(pc, struct tcf_ipt, common)

/* net/sched/act_ipt.c */

static struct tcf_hashinfo ipt_hash_info = {
 .htab = tcf_ipt_ht,
 .hmask = IPT_TAB_MASK,
 .lock = &ipt_lock,
};

// ipt      
static struct tc_action_ops act_ipt_ops = {
//   
 .kind  = "ipt",
 .hinfo  = &ipt_hash_info,
//   
 .type  = TCA_ACT_IPT,
 .capab  = TCA_CAP_NONE,
 .owner  = THIS_MODULE,
 .act  = tcf_ipt,
 .dump  = tcf_ipt_dump,
 .cleanup = tcf_ipt_cleanup,
//   ,     
 .lookup  = tcf_hash_search,
 .init  = tcf_ipt_init,
//   ,     
 .walk  = tcf_generic_walker
};
 

8.7.2    
 
static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est,
   struct tc_action *a, int ovr, int bind)
{
 struct rtattr *tb[TCA_IPT_MAX];
 struct tcf_ipt *ipt;
 struct tcf_common *pc;
 struct ipt_entry_target *td, *t;
 char *tname;
 int ret = 0, err;
 u32 hook = 0;
 u32 index = 0;
//       
 if (rta == NULL || rtattr_parse_nested(tb, TCA_IPT_MAX, rta) < 0)
  return -EINVAL;
//    hook  
 if (tb[TCA_IPT_HOOK-1] == NULL ||
     RTA_PAYLOAD(tb[TCA_IPT_HOOK-1]) < sizeof(u32))
  return -EINVAL;
//    target  
 if (tb[TCA_IPT_TARG-1] == NULL ||
     RTA_PAYLOAD(tb[TCA_IPT_TARG-1]) < sizeof(*t))
  return -EINVAL;
// netfilter  
 td = (struct ipt_entry_target *)RTA_DATA(tb[TCA_IPT_TARG-1]);
//   target        
 if (RTA_PAYLOAD(tb[TCA_IPT_TARG-1]) < td->u.target_size)
  return -EINVAL;
//    
 if (tb[TCA_IPT_INDEX-1] != NULL &&
     RTA_PAYLOAD(tb[TCA_IPT_INDEX-1]) >= sizeof(u32))
  index = *(u32 *)RTA_DATA(tb[TCA_IPT_INDEX-1]);
//        common  ,    a  (priv)
 pc = tcf_hash_check(index, a, bind, &ipt_hash_info);
 if (!pc) {
//     ,     common  
  pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind,
         &ipt_idx_gen, &ipt_hash_info);
  if (unlikely(!pc))
   return -ENOMEM;
  ret = ACT_P_CREATED;
 } else {
// ovr     ,         ,       ,     
  if (!ovr) {
//   
   tcf_ipt_release(to_ipt(pc), bind);
   return -EEXIST;
  }
 }
//
 ipt = to_ipt(pc);
// hook 
 hook = *(u32 *)RTA_DATA(tb[TCA_IPT_HOOK-1]);
 err = -ENOMEM;
//            
 tname = kmalloc(IFNAMSIZ, GFP_KERNEL);
 if (unlikely(!tname))
  goto err1;
//   iptables    ,    mangle 
 if (tb[TCA_IPT_TABLE - 1] == NULL ||
     rtattr_strlcpy(tname, tb[TCA_IPT_TABLE-1], IFNAMSIZ) >= IFNAMSIZ)
  strcpy(tname, "mangle");
//       
 t = kmalloc(td->u.target_size, GFP_KERNEL);
 if (unlikely(!t))
  goto err2;
//           
 memcpy(t, td, td->u.target_size);

//      
 if ((err = ipt_init_target(t, tname, hook)) < 0)
  goto err3;
 spin_lock_bh(&ipt->tcf_lock);
 if (ret != ACT_P_CREATED) {
//       ,        
  ipt_destroy_target(ipt->tcfi_t);
  kfree(ipt->tcfi_tname);
  kfree(ipt->tcfi_t);
 }
//     
 ipt->tcfi_tname = tname;
 ipt->tcfi_t     = t;
 ipt->tcfi_hook  = hook;
 spin_unlock_bh(&ipt->tcf_lock);
//     ,      
 if (ret == ACT_P_CREATED)
  tcf_hash_insert(pc, &ipt_hash_info);
 return ret;

    ,            
err3:
 kfree(t);
err2:
 kfree(tname);
err1:
 kfree(pc);
 return err;
}

//      
static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
{
 struct ipt_target *target;
 int ret = 0;
//       target
 target = xt_find_target(AF_INET, t->u.user.name, t->u.user.revision);
//       
 if (!target)
  return -ENOENT;
 t->u.kernel.target = target;
// target    ,        ,   , hook,      
 ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
         table, hook, 0, 0);
 if (ret)
  return ret;
//   target       
 if (t->u.kernel.target->checkentry
     && !t->u.kernel.target->checkentry(table, NULL,
                t->u.kernel.target, t->data,
            hook)) {
  module_put(t->u.kernel.target->me);
  ret = -EINVAL;
 }
 return ret;
}
 
8.7.3   

static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
     struct tcf_result *res)
{
 int ret = 0, result = 0;
//     
 struct tcf_ipt *ipt = a->priv;
 if (skb_cloned(skb)) {
//       ,                   
  if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
   return TC_ACT_UNSPEC;
 }
 spin_lock(&ipt->tcf_lock);
//       
 ipt->tcf_tm.lastuse = jiffies;
 ipt->tcf_bstats.bytes += skb->len;
 ipt->tcf_bstats.packets++;
 /* yes, we have to worry about both in and out dev
  worry later - danger - this API seems to have changed
  from earlier kernels */
 /* iptables targets take a double skb pointer in case the skb
  * needs to be replaced. We don't own the skb, so this must not
  * happen. The pskb_expand_head above should make sure of this */
//   target  
 ret = ipt->tcfi_t->u.kernel.target->target(&skb, skb->dev, NULL,
         ipt->tcfi_hook,
         ipt->tcfi_t->u.kernel.target,
         ipt->tcfi_t->data);
 switch (ret) {
 case NF_ACCEPT:
//   
  result = TC_ACT_OK;
  break;
 case NF_DROP:
//   
  result = TC_ACT_SHOT;
  ipt->tcf_qstats.drops++;
  break;
 case IPT_CONTINUE:
//   
  result = TC_ACT_PIPE;
  break;
 default:
//       
  if (net_ratelimit())
   printk("Bogus netfilter code %d assume ACCEPT
", ret); result = TC_POLICE_OK; break; } spin_unlock(&ipt->tcf_lock); return result; } 8.7.4 static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { // unsigned char *b = skb->tail; // ipt struct tcf_ipt *ipt = a->priv; struct ipt_entry_target *t; struct tcf_t tm; struct tc_cnt c; /* for simple targets kernel size == user size ** user name = target name ** for foolproof you need to not assume this */ // target , t = kmalloc(ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); if (unlikely(!t)) goto rtattr_failure; // c.bindcnt = ipt->tcf_bindcnt - bind; c.refcnt = ipt->tcf_refcnt - ref; // target memcpy(t, ipt->tcfi_t, ipt->tcfi_t->u.user.target_size); // target strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name); // target RTA_PUT(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t); // RTA_PUT(skb, TCA_IPT_INDEX, 4, &ipt->tcf_index); // hook RTA_PUT(skb, TCA_IPT_HOOK, 4, &ipt->tcfi_hook); // RTA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c); // RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, ipt->tcfi_tname); // tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install); tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse); tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires); RTA_PUT(skb, TCA_IPT_TM, sizeof (tm), &tm); // target kfree(t); return skb->len; rtattr_failure: skb_trim(skb, b - skb->data); kfree(t); return -1; } 8.7.5 // tcf_ipt_release static int tcf_ipt_cleanup(struct tc_action *a, int bind) { // ipt struct tcf_ipt *ipt = a->priv; return tcf_ipt_release(ipt, bind); } // ipt static int tcf_ipt_release(struct tcf_ipt *ipt, int bind) { int ret = 0; if (ipt) { // if (bind) ipt->tcf_bindcnt--; // ipt->tcf_refcnt--; // 0 ipt if (ipt->tcf_bindcnt <= 0 && ipt->tcf_refcnt <= 0) { // ipt_destroy_target(ipt->tcfi_t); // target kfree(ipt->tcfi_tname); // target kfree(ipt->tcfi_t); // tcf_hash_destroy(&ipt->common, &ipt_hash_info); ret = ACT_P_DELETED; } } return ret; } static void ipt_destroy_target(struct ipt_entry_target *t) { // target destroy , target if (t->u.kernel.target->destroy) t->u.kernel.target->destroy(t->u.kernel.target, t->data); // module module_put(t->u.kernel.target->me); } 8.8 gact(Generic actions) gact TC , net/sched/act_gact.c . 8.8.1 /* include/net/tc_act/tc_gact.h */ // GACT struct tcf_gact { struct tcf_common common; #ifdef CONFIG_GACT_PROB u16 tcfg_ptype; u16 tcfg_pval; int tcfg_paction; #endif }; #define to_gact(pc) \ container_of(pc, struct tcf_gact, common) /* include/linux/tc_act/tc_gact.h */ #define TCA_ACT_GACT 5 struct tc_gact { // TC tc_gen; }; #define tc_gen \ __u32 index; \ __u32 capab; \ int action; \ int refcnt; \ int bindcnt struct tc_gact_p { #define PGACT_NONE 0 #define PGACT_NETRAND 1 #define PGACT_DETERM 2 #define MAX_RAND (PGACT_DETERM + 1 ) __u16 ptype; __u16 pval; int paction; }; /* net/sched/act_gact.c */ // GACT static struct tcf_hashinfo gact_hash_info = { .htab = tcf_gact_ht, .hmask = GACT_TAB_MASK, .lock = &gact_lock, }; // gact static struct tc_action_ops act_gact_ops = { .kind = "gact", // .hinfo = &gact_hash_info, .type = TCA_ACT_GACT, .capab = TCA_CAP_NONE, .owner = THIS_MODULE, .act = tcf_gact, .dump = tcf_gact_dump, .cleanup = tcf_gact_cleanup, // .lookup = tcf_hash_search, .init = tcf_gact_init, // .walk = tcf_generic_walker }; 8.8.2 static int tcf_gact_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, int ovr, int bind) { struct rtattr *tb[TCA_GACT_MAX]; struct tc_gact *parm; struct tcf_gact *gact; struct tcf_common *pc; int ret = 0; // , tb , if (rta == NULL || rtattr_parse_nested(tb, TCA_GACT_MAX, rta) < 0) return -EINVAL; // if (tb[TCA_GACT_PARMS - 1] == NULL || RTA_PAYLOAD(tb[TCA_GACT_PARMS - 1]) < sizeof(*parm)) return -EINVAL; parm = RTA_DATA(tb[TCA_GACT_PARMS - 1]); // PROB if (tb[TCA_GACT_PROB-1] != NULL) #ifdef CONFIG_GACT_PROB if (RTA_PAYLOAD(tb[TCA_GACT_PROB-1]) < sizeof(struct tc_gact_p)) return -EINVAL; #else return -EOPNOTSUPP; #endif // common pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info); if (!pc) { // pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), bind, &gact_idx_gen, &gact_hash_info); if (unlikely(!pc)) return -ENOMEM; ret = ACT_P_CREATED; } else { // , , if (!ovr) { tcf_hash_release(pc, bind, &gact_hash_info); return -EEXIST; } } // GACT gact = to_gact(pc); spin_lock_bh(&gact->tcf_lock); // GACT // gact->tcf_action = parm->action; #ifdef CONFIG_GACT_PROB if (tb[TCA_GACT_PROB-1] != NULL) { struct tc_gact_p *p_parm = RTA_DATA(tb[TCA_GACT_PROB-1]); gact->tcfg_paction = p_parm->paction; gact->tcfg_pval = p_parm->pval; gact->tcfg_ptype = p_parm->ptype; } #endif spin_unlock_bh(&gact->tcf_lock); // , if (ret == ACT_P_CREATED) tcf_hash_insert(pc, &gact_hash_info); return ret; } 8.8.3 static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { // GACT a struct tcf_gact *gact = a->priv; // int action = TC_ACT_SHOT; spin_lock(&gact->tcf_lock); #ifdef CONFIG_GACT_PROB // GACT_PROB // gact_rand , , if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL) action = gact_rand[gact->tcfg_ptype](gact); else action = gact->tcf_action; #else // TC action = gact->tcf_action; #endif // gact->tcf_bstats.bytes += skb->len; gact->tcf_bstats.packets++; // , if (action == TC_ACT_SHOT) gact->tcf_qstats.drops++; // gact->tcf_tm.lastuse = jiffies; spin_unlock(&gact->tcf_lock); return action; } gact_rand : typedef int (*g_rand)(struct tcf_gact *gact); static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ }; // static int gact_net_rand(struct tcf_gact *gact) { // pval , 0 tcfg_paction , // tcfg_action if (!gact->tcfg_pval || net_random() % gact->tcfg_pval) return gact->tcf_action; return gact->tcfg_paction; } // static int gact_determ(struct tcf_gact *gact) { // pval , 0 tcfg_paction , // tcfg_action if (!gact->tcfg_pval || gact->tcf_bstats.packets % gact->tcfg_pval) return gact->tcf_action; return gact->tcfg_paction; } 8.8.4 static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { // unsigned char *b = skb->tail; // GACT struct tc_gact opt; // GACT struct tcf_gact *gact = a->priv; // struct tcf_t t; // GACT opt.index = gact->tcf_index; opt.refcnt = gact->tcf_refcnt - ref; opt.bindcnt = gact->tcf_bindcnt - bind; opt.action = gact->tcf_action; // skb RTA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt); #ifdef CONFIG_GACT_PROB // GACT_PROB if (gact->tcfg_ptype) { struct tc_gact_p p_opt; p_opt.paction = gact->tcfg_paction; p_opt.pval = gact->tcfg_pval; p_opt.ptype = gact->tcfg_ptype; RTA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt); } #endif // t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse); t.expires = jiffies_to_clock_t(gact->tcf_tm.expires); // skb RTA_PUT(skb, TCA_GACT_TM, sizeof(t), &t); // , netlink return skb->len; rtattr_failure: skb_trim(skb, b - skb->data); return -1; } 8.8.5 // tcf_hash_release static int tcf_gact_cleanup(struct tc_action *a, int bind) { struct tcf_gact *gact = a->priv; if (gact) return tcf_hash_release(&gact->common, bind, &gact_hash_info); return 0; } 8.9 simple simple TC , net/sched/act_simple.c . 8.9.1 /* net/sched/act_simple.c */ // simple static struct tcf_hashinfo simp_hash_info = { .htab = tcf_simp_ht, .hmask = SIMP_TAB_MASK, .lock = &simp_lock, }; // simple // lookup static struct tc_action_ops act_simp_ops = { .kind = "simple", .hinfo = &simp_hash_info, .type = TCA_ACT_SIMP, .capab = TCA_CAP_NONE, .owner = THIS_MODULE, .act = tcf_simp, .dump = tcf_simp_dump, .cleanup = tcf_simp_cleanup, .init = tcf_simp_init, // .walk = tcf_generic_walker, }; 8.9.2 static int tcf_simp_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, int ovr, int bind) { struct rtattr *tb[TCA_DEF_MAX]; // , simple , , struct tc_defact *parm; struct tcf_defact *d; struct tcf_common *pc; void *defdata; u32 datalen = 0; int ret = 0; // , tb , if (rta == NULL || rtattr_parse_nested(tb, TCA_DEF_MAX, rta) < 0) return -EINVAL; // if (tb[TCA_DEF_PARMS - 1] == NULL || RTA_PAYLOAD(tb[TCA_DEF_PARMS - 1]) < sizeof(*parm)) return -EINVAL; // parm = RTA_DATA(tb[TCA_DEF_PARMS - 1]); defdata = RTA_DATA(tb[TCA_DEF_DATA - 1]); if (defdata == NULL) return -EINVAL; // datalen = RTA_PAYLOAD(tb[TCA_DEF_DATA - 1]); if (datalen <= 0) return -EINVAL; // common pc = tcf_hash_check(parm->index, a, bind, &simp_hash_info); if (!pc) { // pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, &simp_idx_gen, &simp_hash_info); if (unlikely(!pc)) return -ENOMEM; d = to_defact(pc); // , defdata ret = alloc_defdata(d, datalen, defdata); if (ret < 0) { kfree(pc); return ret; } // ret = ACT_P_CREATED; } else { // d = to_defact(pc); // , , if (!ovr) { // simple tcf_simp_release(d, bind); return -EEXIST; } // , realloc_defdata(d, datalen, defdata); } spin_lock_bh(&d->tcf_lock); // d->tcf_action = parm->action; spin_unlock_bh(&d->tcf_lock); // , if (ret == ACT_P_CREATED) tcf_hash_insert(pc, &simp_hash_info); return ret; } // static int alloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata) { // d->tcfd_defdata = kmalloc(datalen, GFP_KERNEL); if (unlikely(!d->tcfd_defdata)) return -ENOMEM; // d->tcfd_datalen = datalen; // memcpy(d->tcfd_defdata, defdata, datalen); return 0; } // static int realloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata) { // defdata kfree(d->tcfd_defdata); // defdata return alloc_defdata(d, datalen, defdata); } 8.9.3 static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { // simple , a struct tcf_defact *d = a->priv; spin_lock(&d->tcf_lock); // d->tcf_tm.lastuse = jiffies; // d->tcf_bstats.bytes += skb->len; d->tcf_bstats.packets++; /* print policy string followed by _ then packet count * Example if this was the 3rd packet and the string was "hello" * then it would look like "hello_3" (without quotes) **/ printk("simple: %s_%d
", (char *)d->tcfd_defdata, d->tcf_bstats.packets); spin_unlock(&d->tcf_lock); // return d->tcf_action; } 8.9.4 static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb->tail; struct tcf_defact *d = a->priv; struct tc_defact opt; struct tcf_t t; // // opt.index = d->tcf_index; // opt.refcnt = d->tcf_refcnt - ref; // opt.bindcnt = d->tcf_bindcnt - bind; // opt.action = d->tcf_action; RTA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt); // defdata RTA_PUT(skb, TCA_DEF_DATA, d->tcfd_datalen, d->tcfd_defdata); // // t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); // t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); // t.expires = jiffies_to_clock_t(d->tcf_tm.expires); RTA_PUT(skb, TCA_DEF_TM, sizeof(t), &t); return skb->len; rtattr_failure: skb_trim(skb, b - skb->data); return -1; } 8.9.5 // tcf_simp_release static inline int tcf_simp_cleanup(struct tc_action *a, int bind) { struct tcf_defact *d = a->priv; if (d) return tcf_simp_release(d, bind); return 0; } // simple static int tcf_simp_release(struct tcf_defact *d, int bind) { int ret = 0; if (d) { // if (bind) d->tcf_bindcnt--; // d->tcf_refcnt--; // 0 if (d->tcf_bindcnt <= 0 && d->tcf_refcnt <= 0) { // defdata kfree(d->tcfd_defdata); // tcf_hash_destroy(&d->common, &simp_hash_info); ret = 1; } } return ret; } ...... ......