Linuxカーネル中の流量制御(23)
本文書のCopylityはすべてyfydzに帰属します.GPLを使って発表します.自由にコピー、転載できます.転載する時は文書の完全性を維持してください.商業用途に使用することは厳禁です.
msn:[email protected]
ソース:http://yfydz.cublog.cn
msn:[email protected]
ソース:http://yfydz.cublog.cn
8.7 ipt
ipt netfilter , netfilter target , , , 。 net/sched/act_ipt.c 。
8.7.1
/* include/net/tc_act/tc_ipt.h */
// ipt
struct tcf_ipt {
//
struct tcf_common common;
// hook
u32 tcfi_hook;
// target
char *tcfi_tname;
// target
struct xt_entry_target *tcfi_t;
};
#define to_ipt(pc) \
container_of(pc, struct tcf_ipt, common)
/* net/sched/act_ipt.c */
static struct tcf_hashinfo ipt_hash_info = {
.htab = tcf_ipt_ht,
.hmask = IPT_TAB_MASK,
.lock = &ipt_lock,
};
// ipt
static struct tc_action_ops act_ipt_ops = {
//
.kind = "ipt",
.hinfo = &ipt_hash_info,
//
.type = TCA_ACT_IPT,
.capab = TCA_CAP_NONE,
.owner = THIS_MODULE,
.act = tcf_ipt,
.dump = tcf_ipt_dump,
.cleanup = tcf_ipt_cleanup,
// ,
.lookup = tcf_hash_search,
.init = tcf_ipt_init,
// ,
.walk = tcf_generic_walker
};
8.7.2
static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est,
struct tc_action *a, int ovr, int bind)
{
struct rtattr *tb[TCA_IPT_MAX];
struct tcf_ipt *ipt;
struct tcf_common *pc;
struct ipt_entry_target *td, *t;
char *tname;
int ret = 0, err;
u32 hook = 0;
u32 index = 0;
//
if (rta == NULL || rtattr_parse_nested(tb, TCA_IPT_MAX, rta) < 0)
return -EINVAL;
// hook
if (tb[TCA_IPT_HOOK-1] == NULL ||
RTA_PAYLOAD(tb[TCA_IPT_HOOK-1]) < sizeof(u32))
return -EINVAL;
// target
if (tb[TCA_IPT_TARG-1] == NULL ||
RTA_PAYLOAD(tb[TCA_IPT_TARG-1]) < sizeof(*t))
return -EINVAL;
// netfilter
td = (struct ipt_entry_target *)RTA_DATA(tb[TCA_IPT_TARG-1]);
// target
if (RTA_PAYLOAD(tb[TCA_IPT_TARG-1]) < td->u.target_size)
return -EINVAL;
//
if (tb[TCA_IPT_INDEX-1] != NULL &&
RTA_PAYLOAD(tb[TCA_IPT_INDEX-1]) >= sizeof(u32))
index = *(u32 *)RTA_DATA(tb[TCA_IPT_INDEX-1]);
// common , a (priv)
pc = tcf_hash_check(index, a, bind, &ipt_hash_info);
if (!pc) {
// , common
pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind,
&ipt_idx_gen, &ipt_hash_info);
if (unlikely(!pc))
return -ENOMEM;
ret = ACT_P_CREATED;
} else {
// ovr , , ,
if (!ovr) {
//
tcf_ipt_release(to_ipt(pc), bind);
return -EEXIST;
}
}
//
ipt = to_ipt(pc);
// hook
hook = *(u32 *)RTA_DATA(tb[TCA_IPT_HOOK-1]);
err = -ENOMEM;
//
tname = kmalloc(IFNAMSIZ, GFP_KERNEL);
if (unlikely(!tname))
goto err1;
// iptables , mangle
if (tb[TCA_IPT_TABLE - 1] == NULL ||
rtattr_strlcpy(tname, tb[TCA_IPT_TABLE-1], IFNAMSIZ) >= IFNAMSIZ)
strcpy(tname, "mangle");
//
t = kmalloc(td->u.target_size, GFP_KERNEL);
if (unlikely(!t))
goto err2;
//
memcpy(t, td, td->u.target_size);
//
if ((err = ipt_init_target(t, tname, hook)) < 0)
goto err3;
spin_lock_bh(&ipt->tcf_lock);
if (ret != ACT_P_CREATED) {
// ,
ipt_destroy_target(ipt->tcfi_t);
kfree(ipt->tcfi_tname);
kfree(ipt->tcfi_t);
}
//
ipt->tcfi_tname = tname;
ipt->tcfi_t = t;
ipt->tcfi_hook = hook;
spin_unlock_bh(&ipt->tcf_lock);
// ,
if (ret == ACT_P_CREATED)
tcf_hash_insert(pc, &ipt_hash_info);
return ret;
,
err3:
kfree(t);
err2:
kfree(tname);
err1:
kfree(pc);
return err;
}
//
static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
{
struct ipt_target *target;
int ret = 0;
// target
target = xt_find_target(AF_INET, t->u.user.name, t->u.user.revision);
//
if (!target)
return -ENOENT;
t->u.kernel.target = target;
// target , , , hook,
ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
table, hook, 0, 0);
if (ret)
return ret;
// target
if (t->u.kernel.target->checkentry
&& !t->u.kernel.target->checkentry(table, NULL,
t->u.kernel.target, t->data,
hook)) {
module_put(t->u.kernel.target->me);
ret = -EINVAL;
}
return ret;
}
8.7.3
static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
struct tcf_result *res)
{
int ret = 0, result = 0;
//
struct tcf_ipt *ipt = a->priv;
if (skb_cloned(skb)) {
// ,
if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
return TC_ACT_UNSPEC;
}
spin_lock(&ipt->tcf_lock);
//
ipt->tcf_tm.lastuse = jiffies;
ipt->tcf_bstats.bytes += skb->len;
ipt->tcf_bstats.packets++;
/* yes, we have to worry about both in and out dev
worry later - danger - this API seems to have changed
from earlier kernels */
/* iptables targets take a double skb pointer in case the skb
* needs to be replaced. We don't own the skb, so this must not
* happen. The pskb_expand_head above should make sure of this */
// target
ret = ipt->tcfi_t->u.kernel.target->target(&skb, skb->dev, NULL,
ipt->tcfi_hook,
ipt->tcfi_t->u.kernel.target,
ipt->tcfi_t->data);
switch (ret) {
case NF_ACCEPT:
//
result = TC_ACT_OK;
break;
case NF_DROP:
//
result = TC_ACT_SHOT;
ipt->tcf_qstats.drops++;
break;
case IPT_CONTINUE:
//
result = TC_ACT_PIPE;
break;
default:
//
if (net_ratelimit())
printk("Bogus netfilter code %d assume ACCEPT
", ret);
result = TC_POLICE_OK;
break;
}
spin_unlock(&ipt->tcf_lock);
return result;
}
8.7.4
static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
//
unsigned char *b = skb->tail;
// ipt
struct tcf_ipt *ipt = a->priv;
struct ipt_entry_target *t;
struct tcf_t tm;
struct tc_cnt c;
/* for simple targets kernel size == user size
** user name = target name
** for foolproof you need to not assume this
*/
// target ,
t = kmalloc(ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
if (unlikely(!t))
goto rtattr_failure;
//
c.bindcnt = ipt->tcf_bindcnt - bind;
c.refcnt = ipt->tcf_refcnt - ref;
// target
memcpy(t, ipt->tcfi_t, ipt->tcfi_t->u.user.target_size);
// target
strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name);
// target
RTA_PUT(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t);
//
RTA_PUT(skb, TCA_IPT_INDEX, 4, &ipt->tcf_index);
// hook
RTA_PUT(skb, TCA_IPT_HOOK, 4, &ipt->tcfi_hook);
//
RTA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c);
//
RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, ipt->tcfi_tname);
//
tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install);
tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse);
tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires);
RTA_PUT(skb, TCA_IPT_TM, sizeof (tm), &tm);
// target
kfree(t);
return skb->len;
rtattr_failure:
skb_trim(skb, b - skb->data);
kfree(t);
return -1;
}
8.7.5
// tcf_ipt_release
static int tcf_ipt_cleanup(struct tc_action *a, int bind)
{
// ipt
struct tcf_ipt *ipt = a->priv;
return tcf_ipt_release(ipt, bind);
}
// ipt
static int tcf_ipt_release(struct tcf_ipt *ipt, int bind)
{
int ret = 0;
if (ipt) {
//
if (bind)
ipt->tcf_bindcnt--;
//
ipt->tcf_refcnt--;
// 0 ipt
if (ipt->tcf_bindcnt <= 0 && ipt->tcf_refcnt <= 0) {
//
ipt_destroy_target(ipt->tcfi_t);
// target
kfree(ipt->tcfi_tname);
// target
kfree(ipt->tcfi_t);
//
tcf_hash_destroy(&ipt->common, &ipt_hash_info);
ret = ACT_P_DELETED;
}
}
return ret;
}
static void ipt_destroy_target(struct ipt_entry_target *t)
{
// target destroy , target
if (t->u.kernel.target->destroy)
t->u.kernel.target->destroy(t->u.kernel.target, t->data);
// module
module_put(t->u.kernel.target->me);
}
8.8 gact(Generic actions)
gact TC , net/sched/act_gact.c .
8.8.1
/* include/net/tc_act/tc_gact.h */
// GACT
struct tcf_gact {
struct tcf_common common;
#ifdef CONFIG_GACT_PROB
u16 tcfg_ptype;
u16 tcfg_pval;
int tcfg_paction;
#endif
};
#define to_gact(pc) \
container_of(pc, struct tcf_gact, common)
/* include/linux/tc_act/tc_gact.h */
#define TCA_ACT_GACT 5
struct tc_gact
{
// TC
tc_gen;
};
#define tc_gen \
__u32 index; \
__u32 capab; \
int action; \
int refcnt; \
int bindcnt
struct tc_gact_p
{
#define PGACT_NONE 0
#define PGACT_NETRAND 1
#define PGACT_DETERM 2
#define MAX_RAND (PGACT_DETERM + 1 )
__u16 ptype;
__u16 pval;
int paction;
};
/* net/sched/act_gact.c */
// GACT
static struct tcf_hashinfo gact_hash_info = {
.htab = tcf_gact_ht,
.hmask = GACT_TAB_MASK,
.lock = &gact_lock,
};
// gact
static struct tc_action_ops act_gact_ops = {
.kind = "gact",
//
.hinfo = &gact_hash_info,
.type = TCA_ACT_GACT,
.capab = TCA_CAP_NONE,
.owner = THIS_MODULE,
.act = tcf_gact,
.dump = tcf_gact_dump,
.cleanup = tcf_gact_cleanup,
//
.lookup = tcf_hash_search,
.init = tcf_gact_init,
//
.walk = tcf_generic_walker
};
8.8.2
static int tcf_gact_init(struct rtattr *rta, struct rtattr *est,
struct tc_action *a, int ovr, int bind)
{
struct rtattr *tb[TCA_GACT_MAX];
struct tc_gact *parm;
struct tcf_gact *gact;
struct tcf_common *pc;
int ret = 0;
// , tb ,
if (rta == NULL || rtattr_parse_nested(tb, TCA_GACT_MAX, rta) < 0)
return -EINVAL;
//
if (tb[TCA_GACT_PARMS - 1] == NULL ||
RTA_PAYLOAD(tb[TCA_GACT_PARMS - 1]) < sizeof(*parm))
return -EINVAL;
parm = RTA_DATA(tb[TCA_GACT_PARMS - 1]);
// PROB
if (tb[TCA_GACT_PROB-1] != NULL)
#ifdef CONFIG_GACT_PROB
if (RTA_PAYLOAD(tb[TCA_GACT_PROB-1]) < sizeof(struct tc_gact_p))
return -EINVAL;
#else
return -EOPNOTSUPP;
#endif
// common
pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info);
if (!pc) {
//
pc = tcf_hash_create(parm->index, est, a, sizeof(*gact),
bind, &gact_idx_gen, &gact_hash_info);
if (unlikely(!pc))
return -ENOMEM;
ret = ACT_P_CREATED;
} else {
// , ,
if (!ovr) {
tcf_hash_release(pc, bind, &gact_hash_info);
return -EEXIST;
}
}
// GACT
gact = to_gact(pc);
spin_lock_bh(&gact->tcf_lock);
// GACT
//
gact->tcf_action = parm->action;
#ifdef CONFIG_GACT_PROB
if (tb[TCA_GACT_PROB-1] != NULL) {
struct tc_gact_p *p_parm = RTA_DATA(tb[TCA_GACT_PROB-1]);
gact->tcfg_paction = p_parm->paction;
gact->tcfg_pval = p_parm->pval;
gact->tcfg_ptype = p_parm->ptype;
}
#endif
spin_unlock_bh(&gact->tcf_lock);
// ,
if (ret == ACT_P_CREATED)
tcf_hash_insert(pc, &gact_hash_info);
return ret;
}
8.8.3
static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
{
// GACT a
struct tcf_gact *gact = a->priv;
//
int action = TC_ACT_SHOT;
spin_lock(&gact->tcf_lock);
#ifdef CONFIG_GACT_PROB
// GACT_PROB
// gact_rand , ,
if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL)
action = gact_rand[gact->tcfg_ptype](gact);
else
action = gact->tcf_action;
#else
// TC
action = gact->tcf_action;
#endif
//
gact->tcf_bstats.bytes += skb->len;
gact->tcf_bstats.packets++;
// ,
if (action == TC_ACT_SHOT)
gact->tcf_qstats.drops++;
//
gact->tcf_tm.lastuse = jiffies;
spin_unlock(&gact->tcf_lock);
return action;
}
gact_rand :
typedef int (*g_rand)(struct tcf_gact *gact);
static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ };
//
static int gact_net_rand(struct tcf_gact *gact)
{
// pval , 0 tcfg_paction ,
// tcfg_action
if (!gact->tcfg_pval || net_random() % gact->tcfg_pval)
return gact->tcf_action;
return gact->tcfg_paction;
}
//
static int gact_determ(struct tcf_gact *gact)
{
// pval , 0 tcfg_paction ,
// tcfg_action
if (!gact->tcfg_pval || gact->tcf_bstats.packets % gact->tcfg_pval)
return gact->tcf_action;
return gact->tcfg_paction;
}
8.8.4
static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
//
unsigned char *b = skb->tail;
// GACT
struct tc_gact opt;
// GACT
struct tcf_gact *gact = a->priv;
//
struct tcf_t t;
// GACT
opt.index = gact->tcf_index;
opt.refcnt = gact->tcf_refcnt - ref;
opt.bindcnt = gact->tcf_bindcnt - bind;
opt.action = gact->tcf_action;
// skb
RTA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt);
#ifdef CONFIG_GACT_PROB
// GACT_PROB
if (gact->tcfg_ptype) {
struct tc_gact_p p_opt;
p_opt.paction = gact->tcfg_paction;
p_opt.pval = gact->tcfg_pval;
p_opt.ptype = gact->tcfg_ptype;
RTA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt);
}
#endif
//
t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install);
t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse);
t.expires = jiffies_to_clock_t(gact->tcf_tm.expires);
// skb
RTA_PUT(skb, TCA_GACT_TM, sizeof(t), &t);
// , netlink
return skb->len;
rtattr_failure:
skb_trim(skb, b - skb->data);
return -1;
}
8.8.5
// tcf_hash_release
static int tcf_gact_cleanup(struct tc_action *a, int bind)
{
struct tcf_gact *gact = a->priv;
if (gact)
return tcf_hash_release(&gact->common, bind, &gact_hash_info);
return 0;
}
8.9 simple
simple TC , net/sched/act_simple.c .
8.9.1
/* net/sched/act_simple.c */
// simple
static struct tcf_hashinfo simp_hash_info = {
.htab = tcf_simp_ht,
.hmask = SIMP_TAB_MASK,
.lock = &simp_lock,
};
// simple
// lookup
static struct tc_action_ops act_simp_ops = {
.kind = "simple",
.hinfo = &simp_hash_info,
.type = TCA_ACT_SIMP,
.capab = TCA_CAP_NONE,
.owner = THIS_MODULE,
.act = tcf_simp,
.dump = tcf_simp_dump,
.cleanup = tcf_simp_cleanup,
.init = tcf_simp_init,
//
.walk = tcf_generic_walker,
};
8.9.2
static int tcf_simp_init(struct rtattr *rta, struct rtattr *est,
struct tc_action *a, int ovr, int bind)
{
struct rtattr *tb[TCA_DEF_MAX];
// , simple , ,
struct tc_defact *parm;
struct tcf_defact *d;
struct tcf_common *pc;
void *defdata;
u32 datalen = 0;
int ret = 0;
// , tb ,
if (rta == NULL || rtattr_parse_nested(tb, TCA_DEF_MAX, rta) < 0)
return -EINVAL;
//
if (tb[TCA_DEF_PARMS - 1] == NULL ||
RTA_PAYLOAD(tb[TCA_DEF_PARMS - 1]) < sizeof(*parm))
return -EINVAL;
//
parm = RTA_DATA(tb[TCA_DEF_PARMS - 1]);
defdata = RTA_DATA(tb[TCA_DEF_DATA - 1]);
if (defdata == NULL)
return -EINVAL;
//
datalen = RTA_PAYLOAD(tb[TCA_DEF_DATA - 1]);
if (datalen <= 0)
return -EINVAL;
// common
pc = tcf_hash_check(parm->index, a, bind, &simp_hash_info);
if (!pc) {
//
pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
&simp_idx_gen, &simp_hash_info);
if (unlikely(!pc))
return -ENOMEM;
d = to_defact(pc);
// , defdata
ret = alloc_defdata(d, datalen, defdata);
if (ret < 0) {
kfree(pc);
return ret;
}
//
ret = ACT_P_CREATED;
} else {
//
d = to_defact(pc);
// , ,
if (!ovr) {
// simple
tcf_simp_release(d, bind);
return -EEXIST;
}
// ,
realloc_defdata(d, datalen, defdata);
}
spin_lock_bh(&d->tcf_lock);
//
d->tcf_action = parm->action;
spin_unlock_bh(&d->tcf_lock);
// ,
if (ret == ACT_P_CREATED)
tcf_hash_insert(pc, &simp_hash_info);
return ret;
}
//
static int alloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata)
{
//
d->tcfd_defdata = kmalloc(datalen, GFP_KERNEL);
if (unlikely(!d->tcfd_defdata))
return -ENOMEM;
//
d->tcfd_datalen = datalen;
//
memcpy(d->tcfd_defdata, defdata, datalen);
return 0;
}
//
static int realloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata)
{
// defdata
kfree(d->tcfd_defdata);
// defdata
return alloc_defdata(d, datalen, defdata);
}
8.9.3
static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
{
// simple , a
struct tcf_defact *d = a->priv;
spin_lock(&d->tcf_lock);
//
d->tcf_tm.lastuse = jiffies;
//
d->tcf_bstats.bytes += skb->len;
d->tcf_bstats.packets++;
/* print policy string followed by _ then packet count
* Example if this was the 3rd packet and the string was "hello"
* then it would look like "hello_3" (without quotes)
**/
printk("simple: %s_%d
",
(char *)d->tcfd_defdata, d->tcf_bstats.packets);
spin_unlock(&d->tcf_lock);
//
return d->tcf_action;
}
8.9.4
static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
unsigned char *b = skb->tail;
struct tcf_defact *d = a->priv;
struct tc_defact opt;
struct tcf_t t;
//
//
opt.index = d->tcf_index;
//
opt.refcnt = d->tcf_refcnt - ref;
//
opt.bindcnt = d->tcf_bindcnt - bind;
//
opt.action = d->tcf_action;
RTA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt);
// defdata
RTA_PUT(skb, TCA_DEF_DATA, d->tcfd_datalen, d->tcfd_defdata);
//
//
t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
//
t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
//
t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
RTA_PUT(skb, TCA_DEF_TM, sizeof(t), &t);
return skb->len;
rtattr_failure:
skb_trim(skb, b - skb->data);
return -1;
}
8.9.5
// tcf_simp_release
static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
{
struct tcf_defact *d = a->priv;
if (d)
return tcf_simp_release(d, bind);
return 0;
}
// simple
static int tcf_simp_release(struct tcf_defact *d, int bind)
{
int ret = 0;
if (d) {
//
if (bind)
d->tcf_bindcnt--;
//
d->tcf_refcnt--;
// 0
if (d->tcf_bindcnt <= 0 && d->tcf_refcnt <= 0) {
// defdata
kfree(d->tcfd_defdata);
//
tcf_hash_destroy(&d->common, &simp_hash_info);
ret = 1;
}
}
return ret;
}
...... ......