Linux-2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
diff --git a/net/sched/police.c b/net/sched/police.c
new file mode 100644
index 0000000..c03545f
--- /dev/null
+++ b/net/sched/police.c
@@ -0,0 +1,612 @@
+/*
+ * net/sched/police.c Input police filter.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ * J Hadi Salim (action changes)
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <linux/bitops.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/module.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <net/sock.h>
+#include <net/act_api.h>
+
+#define L2T(p,L) ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log])
+#define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log])
+#define PRIV(a) ((struct tcf_police *) (a)->priv)
+
+/* use generic hash table */
+#define MY_TAB_SIZE 16
+#define MY_TAB_MASK 15
+static u32 idx_gen;
+static struct tcf_police *tcf_police_ht[MY_TAB_SIZE];
+/* Policer hash table lock */
+static DEFINE_RWLOCK(police_lock);
+
+/* Each policer is serialized by its individual spinlock */
+
+static __inline__ unsigned tcf_police_hash(u32 index)
+{
+ return index&0xF;
+}
+
+static __inline__ struct tcf_police * tcf_police_lookup(u32 index)
+{
+ struct tcf_police *p;
+
+ read_lock(&police_lock);
+ for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) {
+ if (p->index == index)
+ break;
+ }
+ read_unlock(&police_lock);
+ return p;
+}
+
+#ifdef CONFIG_NET_CLS_ACT
+static int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
+ int type, struct tc_action *a)
+{
+ struct tcf_police *p;
+ int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
+ struct rtattr *r;
+
+ read_lock(&police_lock);
+
+ s_i = cb->args[0];
+
+ for (i = 0; i < MY_TAB_SIZE; i++) {
+ p = tcf_police_ht[tcf_police_hash(i)];
+
+ for (; p; p = p->next) {
+ index++;
+ if (index < s_i)
+ continue;
+ a->priv = p;
+ a->order = index;
+ r = (struct rtattr*) skb->tail;
+ RTA_PUT(skb, a->order, 0, NULL);
+ if (type == RTM_DELACTION)
+ err = tcf_action_dump_1(skb, a, 0, 1);
+ else
+ err = tcf_action_dump_1(skb, a, 0, 0);
+ if (err < 0) {
+ index--;
+ skb_trim(skb, (u8*)r - skb->data);
+ goto done;
+ }
+ r->rta_len = skb->tail - (u8*)r;
+ n_i++;
+ }
+ }
+done:
+ read_unlock(&police_lock);
+ if (n_i)
+ cb->args[0] += n_i;
+ return n_i;
+
+rtattr_failure:
+ skb_trim(skb, (u8*)r - skb->data);
+ goto done;
+}
+
+static inline int
+tcf_hash_search(struct tc_action *a, u32 index)
+{
+ struct tcf_police *p = tcf_police_lookup(index);
+
+ if (p != NULL) {
+ a->priv = p;
+ return 1;
+ } else {
+ return 0;
+ }
+}
+#endif
+
+static inline u32 tcf_police_new_index(void)
+{
+ do {
+ if (++idx_gen == 0)
+ idx_gen = 1;
+ } while (tcf_police_lookup(idx_gen));
+
+ return idx_gen;
+}
+
+void tcf_police_destroy(struct tcf_police *p)
+{
+ unsigned h = tcf_police_hash(p->index);
+ struct tcf_police **p1p;
+
+ for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) {
+ if (*p1p == p) {
+ write_lock_bh(&police_lock);
+ *p1p = p->next;
+ write_unlock_bh(&police_lock);
+#ifdef CONFIG_NET_ESTIMATOR
+ gen_kill_estimator(&p->bstats, &p->rate_est);
+#endif
+ if (p->R_tab)
+ qdisc_put_rtab(p->R_tab);
+ if (p->P_tab)
+ qdisc_put_rtab(p->P_tab);
+ kfree(p);
+ return;
+ }
+ }
+ BUG_TRAP(0);
+}
+
+#ifdef CONFIG_NET_CLS_ACT
+static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
+ struct tc_action *a, int ovr, int bind)
+{
+ unsigned h;
+ int ret = 0, err;
+ struct rtattr *tb[TCA_POLICE_MAX];
+ struct tc_police *parm;
+ struct tcf_police *p;
+ struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
+
+ if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
+ return -EINVAL;
+
+ if (tb[TCA_POLICE_TBF-1] == NULL ||
+ RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm))
+ return -EINVAL;
+ parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
+
+ if (tb[TCA_POLICE_RESULT-1] != NULL &&
+ RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
+ return -EINVAL;
+ if (tb[TCA_POLICE_RESULT-1] != NULL &&
+ RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
+ return -EINVAL;
+
+ if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
+ a->priv = p;
+ if (bind) {
+ p->bindcnt += 1;
+ p->refcnt += 1;
+ }
+ if (ovr)
+ goto override;
+ return ret;
+ }
+
+ p = kmalloc(sizeof(*p), GFP_KERNEL);
+ if (p == NULL)
+ return -ENOMEM;
+ memset(p, 0, sizeof(*p));
+
+ ret = ACT_P_CREATED;
+ p->refcnt = 1;
+ spin_lock_init(&p->lock);
+ p->stats_lock = &p->lock;
+ if (bind)
+ p->bindcnt = 1;
+override:
+ if (parm->rate.rate) {
+ err = -ENOMEM;
+ R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
+ if (R_tab == NULL)
+ goto failure;
+ if (parm->peakrate.rate) {
+ P_tab = qdisc_get_rtab(&parm->peakrate,
+ tb[TCA_POLICE_PEAKRATE-1]);
+ if (p->P_tab == NULL) {
+ qdisc_put_rtab(R_tab);
+ goto failure;
+ }
+ }
+ }
+ /* No failure allowed after this point */
+ spin_lock_bh(&p->lock);
+ if (R_tab != NULL) {
+ qdisc_put_rtab(p->R_tab);
+ p->R_tab = R_tab;
+ }
+ if (P_tab != NULL) {
+ qdisc_put_rtab(p->P_tab);
+ p->P_tab = P_tab;
+ }
+
+ if (tb[TCA_POLICE_RESULT-1])
+ p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
+ p->toks = p->burst = parm->burst;
+ p->mtu = parm->mtu;
+ if (p->mtu == 0) {
+ p->mtu = ~0;
+ if (p->R_tab)
+ p->mtu = 255<<p->R_tab->rate.cell_log;
+ }
+ if (p->P_tab)
+ p->ptoks = L2T_P(p, p->mtu);
+ p->action = parm->action;
+
+#ifdef CONFIG_NET_ESTIMATOR
+ if (tb[TCA_POLICE_AVRATE-1])
+ p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
+ if (est)
+ gen_replace_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
+#endif
+
+ spin_unlock_bh(&p->lock);
+ if (ret != ACT_P_CREATED)
+ return ret;
+
+ PSCHED_GET_TIME(p->t_c);
+ p->index = parm->index ? : tcf_police_new_index();
+ h = tcf_police_hash(p->index);
+ write_lock_bh(&police_lock);
+ p->next = tcf_police_ht[h];
+ tcf_police_ht[h] = p;
+ write_unlock_bh(&police_lock);
+
+ a->priv = p;
+ return ret;
+
+failure:
+ if (ret == ACT_P_CREATED)
+ kfree(p);
+ return err;
+}
+
+static int tcf_act_police_cleanup(struct tc_action *a, int bind)
+{
+ struct tcf_police *p = PRIV(a);
+
+ if (p != NULL)
+ return tcf_police_release(p, bind);
+ return 0;
+}
+
+static int tcf_act_police(struct sk_buff **pskb, struct tc_action *a)
+{
+ psched_time_t now;
+ struct sk_buff *skb = *pskb;
+ struct tcf_police *p = PRIV(a);
+ long toks;
+ long ptoks = 0;
+
+ spin_lock(&p->lock);
+
+ p->bstats.bytes += skb->len;
+ p->bstats.packets++;
+
+#ifdef CONFIG_NET_ESTIMATOR
+ if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
+ p->qstats.overlimits++;
+ spin_unlock(&p->lock);
+ return p->action;
+ }
+#endif
+
+ if (skb->len <= p->mtu) {
+ if (p->R_tab == NULL) {
+ spin_unlock(&p->lock);
+ return p->result;
+ }
+
+ PSCHED_GET_TIME(now);
+
+ toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
+
+ if (p->P_tab) {
+ ptoks = toks + p->ptoks;
+ if (ptoks > (long)L2T_P(p, p->mtu))
+ ptoks = (long)L2T_P(p, p->mtu);
+ ptoks -= L2T_P(p, skb->len);
+ }
+ toks += p->toks;
+ if (toks > (long)p->burst)
+ toks = p->burst;
+ toks -= L2T(p, skb->len);
+
+ if ((toks|ptoks) >= 0) {
+ p->t_c = now;
+ p->toks = toks;
+ p->ptoks = ptoks;
+ spin_unlock(&p->lock);
+ return p->result;
+ }
+ }
+
+ p->qstats.overlimits++;
+ spin_unlock(&p->lock);
+ return p->action;
+}
+
+static int
+tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+ unsigned char *b = skb->tail;
+ struct tc_police opt;
+ struct tcf_police *p = PRIV(a);
+
+ opt.index = p->index;
+ opt.action = p->action;
+ opt.mtu = p->mtu;
+ opt.burst = p->burst;
+ opt.refcnt = p->refcnt - ref;
+ opt.bindcnt = p->bindcnt - bind;
+ if (p->R_tab)
+ opt.rate = p->R_tab->rate;
+ else
+ memset(&opt.rate, 0, sizeof(opt.rate));
+ if (p->P_tab)
+ opt.peakrate = p->P_tab->rate;
+ else
+ memset(&opt.peakrate, 0, sizeof(opt.peakrate));
+ RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
+ if (p->result)
+ RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
+#ifdef CONFIG_NET_ESTIMATOR
+ if (p->ewma_rate)
+ RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
+#endif
+ return skb->len;
+
+rtattr_failure:
+ skb_trim(skb, b - skb->data);
+ return -1;
+}
+
+MODULE_AUTHOR("Alexey Kuznetsov");
+MODULE_DESCRIPTION("Policing actions");
+MODULE_LICENSE("GPL");
+
+static struct tc_action_ops act_police_ops = {
+ .kind = "police",
+ .type = TCA_ID_POLICE,
+ .capab = TCA_CAP_NONE,
+ .owner = THIS_MODULE,
+ .act = tcf_act_police,
+ .dump = tcf_act_police_dump,
+ .cleanup = tcf_act_police_cleanup,
+ .lookup = tcf_hash_search,
+ .init = tcf_act_police_locate,
+ .walk = tcf_generic_walker
+};
+
+static int __init
+police_init_module(void)
+{
+ return tcf_register_action(&act_police_ops);
+}
+
+static void __exit
+police_cleanup_module(void)
+{
+ tcf_unregister_action(&act_police_ops);
+}
+
+module_init(police_init_module);
+module_exit(police_cleanup_module);
+
+#endif
+
+struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
+{
+ unsigned h;
+ struct tcf_police *p;
+ struct rtattr *tb[TCA_POLICE_MAX];
+ struct tc_police *parm;
+
+ if (rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
+ return NULL;
+
+ if (tb[TCA_POLICE_TBF-1] == NULL ||
+ RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm))
+ return NULL;
+
+ parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
+
+ if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
+ p->refcnt++;
+ return p;
+ }
+
+ p = kmalloc(sizeof(*p), GFP_KERNEL);
+ if (p == NULL)
+ return NULL;
+
+ memset(p, 0, sizeof(*p));
+ p->refcnt = 1;
+ spin_lock_init(&p->lock);
+ p->stats_lock = &p->lock;
+ if (parm->rate.rate) {
+ p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
+ if (p->R_tab == NULL)
+ goto failure;
+ if (parm->peakrate.rate) {
+ p->P_tab = qdisc_get_rtab(&parm->peakrate,
+ tb[TCA_POLICE_PEAKRATE-1]);
+ if (p->P_tab == NULL)
+ goto failure;
+ }
+ }
+ if (tb[TCA_POLICE_RESULT-1]) {
+ if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
+ goto failure;
+ p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
+ }
+#ifdef CONFIG_NET_ESTIMATOR
+ if (tb[TCA_POLICE_AVRATE-1]) {
+ if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32))
+ goto failure;
+ p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
+ }
+#endif
+ p->toks = p->burst = parm->burst;
+ p->mtu = parm->mtu;
+ if (p->mtu == 0) {
+ p->mtu = ~0;
+ if (p->R_tab)
+ p->mtu = 255<<p->R_tab->rate.cell_log;
+ }
+ if (p->P_tab)
+ p->ptoks = L2T_P(p, p->mtu);
+ PSCHED_GET_TIME(p->t_c);
+ p->index = parm->index ? : tcf_police_new_index();
+ p->action = parm->action;
+#ifdef CONFIG_NET_ESTIMATOR
+ if (est)
+ gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
+#endif
+ h = tcf_police_hash(p->index);
+ write_lock_bh(&police_lock);
+ p->next = tcf_police_ht[h];
+ tcf_police_ht[h] = p;
+ write_unlock_bh(&police_lock);
+ return p;
+
+failure:
+ if (p->R_tab)
+ qdisc_put_rtab(p->R_tab);
+ kfree(p);
+ return NULL;
+}
+
+int tcf_police(struct sk_buff *skb, struct tcf_police *p)
+{
+ psched_time_t now;
+ long toks;
+ long ptoks = 0;
+
+ spin_lock(&p->lock);
+
+ p->bstats.bytes += skb->len;
+ p->bstats.packets++;
+
+#ifdef CONFIG_NET_ESTIMATOR
+ if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
+ p->qstats.overlimits++;
+ spin_unlock(&p->lock);
+ return p->action;
+ }
+#endif
+
+ if (skb->len <= p->mtu) {
+ if (p->R_tab == NULL) {
+ spin_unlock(&p->lock);
+ return p->result;
+ }
+
+ PSCHED_GET_TIME(now);
+
+ toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
+
+ if (p->P_tab) {
+ ptoks = toks + p->ptoks;
+ if (ptoks > (long)L2T_P(p, p->mtu))
+ ptoks = (long)L2T_P(p, p->mtu);
+ ptoks -= L2T_P(p, skb->len);
+ }
+ toks += p->toks;
+ if (toks > (long)p->burst)
+ toks = p->burst;
+ toks -= L2T(p, skb->len);
+
+ if ((toks|ptoks) >= 0) {
+ p->t_c = now;
+ p->toks = toks;
+ p->ptoks = ptoks;
+ spin_unlock(&p->lock);
+ return p->result;
+ }
+ }
+
+ p->qstats.overlimits++;
+ spin_unlock(&p->lock);
+ return p->action;
+}
+
+int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p)
+{
+ unsigned char *b = skb->tail;
+ struct tc_police opt;
+
+ opt.index = p->index;
+ opt.action = p->action;
+ opt.mtu = p->mtu;
+ opt.burst = p->burst;
+ if (p->R_tab)
+ opt.rate = p->R_tab->rate;
+ else
+ memset(&opt.rate, 0, sizeof(opt.rate));
+ if (p->P_tab)
+ opt.peakrate = p->P_tab->rate;
+ else
+ memset(&opt.peakrate, 0, sizeof(opt.peakrate));
+ RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
+ if (p->result)
+ RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
+#ifdef CONFIG_NET_ESTIMATOR
+ if (p->ewma_rate)
+ RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
+#endif
+ return skb->len;
+
+rtattr_failure:
+ skb_trim(skb, b - skb->data);
+ return -1;
+}
+
+int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p)
+{
+ struct gnet_dump d;
+
+ if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
+ TCA_XSTATS, p->stats_lock, &d) < 0)
+ goto errout;
+
+ if (gnet_stats_copy_basic(&d, &p->bstats) < 0 ||
+#ifdef CONFIG_NET_ESTIMATOR
+ gnet_stats_copy_rate_est(&d, &p->rate_est) < 0 ||
+#endif
+ gnet_stats_copy_queue(&d, &p->qstats) < 0)
+ goto errout;
+
+ if (gnet_stats_finish_copy(&d) < 0)
+ goto errout;
+
+ return 0;
+
+errout:
+ return -1;
+}
+
+
+EXPORT_SYMBOL(tcf_police);
+EXPORT_SYMBOL(tcf_police_destroy);
+EXPORT_SYMBOL(tcf_police_dump);
+EXPORT_SYMBOL(tcf_police_dump_stats);
+EXPORT_SYMBOL(tcf_police_hash);
+EXPORT_SYMBOL(tcf_police_ht);
+EXPORT_SYMBOL(tcf_police_locate);
+EXPORT_SYMBOL(tcf_police_lookup);
+EXPORT_SYMBOL(tcf_police_new_index);