blob: 76c92e710a33486926c196e4f16b5c0ad4450ba2 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 */
12
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/module.h>
14#include <asm/uaccess.h>
15#include <asm/system.h>
16#include <linux/bitops.h>
17#include <linux/types.h>
18#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <linux/string.h>
20#include <linux/mm.h>
21#include <linux/socket.h>
22#include <linux/sockios.h>
23#include <linux/in.h>
24#include <linux/errno.h>
25#include <linux/interrupt.h>
26#include <linux/if_ether.h>
27#include <linux/inet.h>
28#include <linux/netdevice.h>
29#include <linux/etherdevice.h>
30#include <linux/notifier.h>
31#include <net/ip.h>
32#include <net/route.h>
33#include <linux/skbuff.h>
34#include <net/sock.h>
35#include <net/pkt_sched.h>
36
37
38/* Class-Based Queueing (CBQ) algorithm.
39 =======================================
40
41 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090042 Management Models for Packet Networks",
Linus Torvalds1da177e2005-04-16 15:20:36 -070043 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
44
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090045 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
Linus Torvalds1da177e2005-04-16 15:20:36 -070046
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090047 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
Linus Torvalds1da177e2005-04-16 15:20:36 -070048 Parameters", 1996
49
50 [4] Sally Floyd and Michael Speer, "Experimental Results
51 for Class-Based Queueing", 1998, not published.
52
53 -----------------------------------------------------------------------
54
55 Algorithm skeleton was taken from NS simulator cbq.cc.
56 If someone wants to check this code against the LBL version,
57 he should take into account that ONLY the skeleton was borrowed,
58 the implementation is different. Particularly:
59
60 --- The WRR algorithm is different. Our version looks more
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090061 reasonable (I hope) and works when quanta are allowed to be
62 less than MTU, which is always the case when real time classes
63 have small rates. Note, that the statement of [3] is
64 incomplete, delay may actually be estimated even if class
65 per-round allotment is less than MTU. Namely, if per-round
66 allotment is W*r_i, and r_1+...+r_k = r < 1
Linus Torvalds1da177e2005-04-16 15:20:36 -070067
68 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
69
70 In the worst case we have IntServ estimate with D = W*r+k*MTU
71 and C = MTU*r. The proof (if correct at all) is trivial.
72
73
74 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
75 interpret some places, which look like wrong translations
76 from NS. Anyone is advised to find these differences
77 and explain to me, why I am wrong 8).
78
79 --- Linux has no EOI event, so that we cannot estimate true class
80 idle time. Workaround is to consider the next dequeue event
81 as sign that previous packet is finished. This is wrong because of
82 internal device queueing, but on a permanently loaded link it is true.
83 Moreover, combined with clock integrator, this scheme looks
84 very close to an ideal solution. */
85
86struct cbq_sched_data;
87
88
89struct cbq_class
90{
91 struct cbq_class *next; /* hash table link */
92 struct cbq_class *next_alive; /* next class with backlog in this priority band */
93
94/* Parameters */
95 u32 classid;
96 unsigned char priority; /* class priority */
97 unsigned char priority2; /* priority to be used after overlimit */
98 unsigned char ewma_log; /* time constant for idle time calculation */
99 unsigned char ovl_strategy;
100#ifdef CONFIG_NET_CLS_POLICE
101 unsigned char police;
102#endif
103
104 u32 defmap;
105
106 /* Link-sharing scheduler parameters */
107 long maxidle; /* Class parameters: see below. */
108 long offtime;
109 long minidle;
110 u32 avpkt;
111 struct qdisc_rate_table *R_tab;
112
113 /* Overlimit strategy parameters */
114 void (*overlimit)(struct cbq_class *cl);
115 long penalty;
116
117 /* General scheduler (WRR) parameters */
118 long allot;
119 long quantum; /* Allotment per WRR round */
120 long weight; /* Relative allotment: see below */
121
122 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
123 struct cbq_class *split; /* Ptr to split node */
124 struct cbq_class *share; /* Ptr to LS parent in the class tree */
125 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
126 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
127 parent otherwise */
128 struct cbq_class *sibling; /* Sibling chain */
129 struct cbq_class *children; /* Pointer to children chain */
130
131 struct Qdisc *q; /* Elementary queueing discipline */
132
133
134/* Variables */
135 unsigned char cpriority; /* Effective priority */
136 unsigned char delayed;
137 unsigned char level; /* level of the class in hierarchy:
138 0 for leaf classes, and maximal
139 level of children + 1 for nodes.
140 */
141
142 psched_time_t last; /* Last end of service */
143 psched_time_t undertime;
144 long avgidle;
145 long deficit; /* Saved deficit for WRR */
146 unsigned long penalized;
147 struct gnet_stats_basic bstats;
148 struct gnet_stats_queue qstats;
149 struct gnet_stats_rate_est rate_est;
150 spinlock_t *stats_lock;
151 struct tc_cbq_xstats xstats;
152
153 struct tcf_proto *filter_list;
154
155 int refcnt;
156 int filters;
157
158 struct cbq_class *defaults[TC_PRIO_MAX+1];
159};
160
161struct cbq_sched_data
162{
163 struct cbq_class *classes[16]; /* Hash table of all classes */
164 int nclasses[TC_CBQ_MAXPRIO+1];
165 unsigned quanta[TC_CBQ_MAXPRIO+1];
166
167 struct cbq_class link;
168
169 unsigned activemask;
170 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
171 with backlog */
172
173#ifdef CONFIG_NET_CLS_POLICE
174 struct cbq_class *rx_class;
175#endif
176 struct cbq_class *tx_class;
177 struct cbq_class *tx_borrowed;
178 int tx_len;
179 psched_time_t now; /* Cached timestamp */
180 psched_time_t now_rt; /* Cached real time */
181 unsigned pmask;
182
183 struct timer_list delay_timer;
184 struct timer_list wd_timer; /* Watchdog timer,
185 started when CBQ has
186 backlog, but cannot
187 transmit just now */
188 long wd_expires;
189 int toplevel;
190 u32 hgenerator;
191};
192
193
194#define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
195
196
197static __inline__ unsigned cbq_hash(u32 h)
198{
199 h ^= h>>8;
200 h ^= h>>4;
201 return h&0xF;
202}
203
204static __inline__ struct cbq_class *
205cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
206{
207 struct cbq_class *cl;
208
209 for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
210 if (cl->classid == classid)
211 return cl;
212 return NULL;
213}
214
215#ifdef CONFIG_NET_CLS_POLICE
216
217static struct cbq_class *
218cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
219{
220 struct cbq_class *cl, *new;
221
222 for (cl = this->tparent; cl; cl = cl->tparent)
223 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
224 return new;
225
226 return NULL;
227}
228
229#endif
230
231/* Classify packet. The procedure is pretty complicated, but
232 it allows us to combine link sharing and priority scheduling
233 transparently.
234
235 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
236 so that it resolves to split nodes. Then packets are classified
237 by logical priority, or a more specific classifier may be attached
238 to the split node.
239 */
240
241static struct cbq_class *
242cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
243{
244 struct cbq_sched_data *q = qdisc_priv(sch);
245 struct cbq_class *head = &q->link;
246 struct cbq_class **defmap;
247 struct cbq_class *cl = NULL;
248 u32 prio = skb->priority;
249 struct tcf_result res;
250
251 /*
252 * Step 1. If skb->priority points to one of our classes, use it.
253 */
254 if (TC_H_MAJ(prio^sch->handle) == 0 &&
255 (cl = cbq_class_lookup(q, prio)) != NULL)
256 return cl;
257
Jamal Hadi Salim29f1df62006-01-08 22:35:55 -0800258 *qerr = NET_XMIT_BYPASS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 for (;;) {
260 int result = 0;
261 defmap = head->defaults;
262
263 /*
264 * Step 2+n. Apply classifier.
265 */
266 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
267 goto fallback;
268
269 if ((cl = (void*)res.class) == NULL) {
270 if (TC_H_MAJ(res.classid))
271 cl = cbq_class_lookup(q, res.classid);
272 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
273 cl = defmap[TC_PRIO_BESTEFFORT];
274
275 if (cl == NULL || cl->level >= head->level)
276 goto fallback;
277 }
278
279#ifdef CONFIG_NET_CLS_ACT
280 switch (result) {
281 case TC_ACT_QUEUED:
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900282 case TC_ACT_STOLEN:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 *qerr = NET_XMIT_SUCCESS;
284 case TC_ACT_SHOT:
285 return NULL;
286 }
287#elif defined(CONFIG_NET_CLS_POLICE)
288 switch (result) {
289 case TC_POLICE_RECLASSIFY:
290 return cbq_reclassify(skb, cl);
291 case TC_POLICE_SHOT:
292 return NULL;
293 default:
294 break;
295 }
296#endif
297 if (cl->level == 0)
298 return cl;
299
300 /*
301 * Step 3+n. If classifier selected a link sharing class,
302 * apply agency specific classifier.
303 * Repeat this procdure until we hit a leaf node.
304 */
305 head = cl;
306 }
307
308fallback:
309 cl = head;
310
311 /*
312 * Step 4. No success...
313 */
314 if (TC_H_MAJ(prio) == 0 &&
315 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
316 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
317 return head;
318
319 return cl;
320}
321
322/*
323 A packet has just been enqueued on the empty class.
324 cbq_activate_class adds it to the tail of active class list
325 of its priority band.
326 */
327
328static __inline__ void cbq_activate_class(struct cbq_class *cl)
329{
330 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
331 int prio = cl->cpriority;
332 struct cbq_class *cl_tail;
333
334 cl_tail = q->active[prio];
335 q->active[prio] = cl;
336
337 if (cl_tail != NULL) {
338 cl->next_alive = cl_tail->next_alive;
339 cl_tail->next_alive = cl;
340 } else {
341 cl->next_alive = cl;
342 q->activemask |= (1<<prio);
343 }
344}
345
346/*
347 Unlink class from active chain.
348 Note that this same procedure is done directly in cbq_dequeue*
349 during round-robin procedure.
350 */
351
352static void cbq_deactivate_class(struct cbq_class *this)
353{
354 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
355 int prio = this->cpriority;
356 struct cbq_class *cl;
357 struct cbq_class *cl_prev = q->active[prio];
358
359 do {
360 cl = cl_prev->next_alive;
361 if (cl == this) {
362 cl_prev->next_alive = cl->next_alive;
363 cl->next_alive = NULL;
364
365 if (cl == q->active[prio]) {
366 q->active[prio] = cl_prev;
367 if (cl == q->active[prio]) {
368 q->active[prio] = NULL;
369 q->activemask &= ~(1<<prio);
370 return;
371 }
372 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373 return;
374 }
375 } while ((cl_prev = cl) != q->active[prio]);
376}
377
378static void
379cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
380{
381 int toplevel = q->toplevel;
382
383 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
384 psched_time_t now;
385 psched_tdiff_t incr;
386
387 PSCHED_GET_TIME(now);
388 incr = PSCHED_TDIFF(now, q->now_rt);
389 PSCHED_TADD2(q->now, incr, now);
390
391 do {
392 if (PSCHED_TLESS(cl->undertime, now)) {
393 q->toplevel = cl->level;
394 return;
395 }
396 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
397 }
398}
399
400static int
401cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
402{
403 struct cbq_sched_data *q = qdisc_priv(sch);
404 int len = skb->len;
405 int ret;
406 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
407
408#ifdef CONFIG_NET_CLS_POLICE
409 q->rx_class = cl;
410#endif
411 if (cl == NULL) {
Jamal Hadi Salim29f1df62006-01-08 22:35:55 -0800412 if (ret == NET_XMIT_BYPASS)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413 sch->qstats.drops++;
414 kfree_skb(skb);
415 return ret;
416 }
417
418#ifdef CONFIG_NET_CLS_POLICE
419 cl->q->__parent = sch;
420#endif
421 if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
422 sch->q.qlen++;
423 sch->bstats.packets++;
424 sch->bstats.bytes+=len;
425 cbq_mark_toplevel(q, cl);
426 if (!cl->next_alive)
427 cbq_activate_class(cl);
428 return ret;
429 }
430
431 sch->qstats.drops++;
432 cbq_mark_toplevel(q, cl);
433 cl->qstats.drops++;
434 return ret;
435}
436
437static int
438cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
439{
440 struct cbq_sched_data *q = qdisc_priv(sch);
441 struct cbq_class *cl;
442 int ret;
443
444 if ((cl = q->tx_class) == NULL) {
445 kfree_skb(skb);
446 sch->qstats.drops++;
447 return NET_XMIT_CN;
448 }
449 q->tx_class = NULL;
450
451 cbq_mark_toplevel(q, cl);
452
453#ifdef CONFIG_NET_CLS_POLICE
454 q->rx_class = cl;
455 cl->q->__parent = sch;
456#endif
457 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
458 sch->q.qlen++;
459 sch->qstats.requeues++;
460 if (!cl->next_alive)
461 cbq_activate_class(cl);
462 return 0;
463 }
464 sch->qstats.drops++;
465 cl->qstats.drops++;
466 return ret;
467}
468
469/* Overlimit actions */
470
471/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
472
473static void cbq_ovl_classic(struct cbq_class *cl)
474{
475 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
476 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
477
478 if (!cl->delayed) {
479 delay += cl->offtime;
480
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900481 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482 Class goes to sleep, so that it will have no
483 chance to work avgidle. Let's forgive it 8)
484
485 BTW cbq-2.0 has a crap in this
486 place, apparently they forgot to shift it by cl->ewma_log.
487 */
488 if (cl->avgidle < 0)
489 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
490 if (cl->avgidle < cl->minidle)
491 cl->avgidle = cl->minidle;
492 if (delay <= 0)
493 delay = 1;
494 PSCHED_TADD2(q->now, delay, cl->undertime);
495
496 cl->xstats.overactions++;
497 cl->delayed = 1;
498 }
499 if (q->wd_expires == 0 || q->wd_expires > delay)
500 q->wd_expires = delay;
501
502 /* Dirty work! We must schedule wakeups based on
503 real available rate, rather than leaf rate,
504 which may be tiny (even zero).
505 */
506 if (q->toplevel == TC_CBQ_MAXLEVEL) {
507 struct cbq_class *b;
508 psched_tdiff_t base_delay = q->wd_expires;
509
510 for (b = cl->borrow; b; b = b->borrow) {
511 delay = PSCHED_TDIFF(b->undertime, q->now);
512 if (delay < base_delay) {
513 if (delay <= 0)
514 delay = 1;
515 base_delay = delay;
516 }
517 }
518
519 q->wd_expires = base_delay;
520 }
521}
522
523/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
524 they go overlimit
525 */
526
527static void cbq_ovl_rclassic(struct cbq_class *cl)
528{
529 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
530 struct cbq_class *this = cl;
531
532 do {
533 if (cl->level > q->toplevel) {
534 cl = NULL;
535 break;
536 }
537 } while ((cl = cl->borrow) != NULL);
538
539 if (cl == NULL)
540 cl = this;
541 cbq_ovl_classic(cl);
542}
543
544/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
545
546static void cbq_ovl_delay(struct cbq_class *cl)
547{
548 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
549 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
550
551 if (!cl->delayed) {
552 unsigned long sched = jiffies;
553
554 delay += cl->offtime;
555 if (cl->avgidle < 0)
556 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
557 if (cl->avgidle < cl->minidle)
558 cl->avgidle = cl->minidle;
559 PSCHED_TADD2(q->now, delay, cl->undertime);
560
561 if (delay > 0) {
562 sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
563 cl->penalized = sched;
564 cl->cpriority = TC_CBQ_MAXPRIO;
565 q->pmask |= (1<<TC_CBQ_MAXPRIO);
566 if (del_timer(&q->delay_timer) &&
567 (long)(q->delay_timer.expires - sched) > 0)
568 q->delay_timer.expires = sched;
569 add_timer(&q->delay_timer);
570 cl->delayed = 1;
571 cl->xstats.overactions++;
572 return;
573 }
574 delay = 1;
575 }
576 if (q->wd_expires == 0 || q->wd_expires > delay)
577 q->wd_expires = delay;
578}
579
580/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
581
582static void cbq_ovl_lowprio(struct cbq_class *cl)
583{
584 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
585
586 cl->penalized = jiffies + cl->penalty;
587
588 if (cl->cpriority != cl->priority2) {
589 cl->cpriority = cl->priority2;
590 q->pmask |= (1<<cl->cpriority);
591 cl->xstats.overactions++;
592 }
593 cbq_ovl_classic(cl);
594}
595
596/* TC_CBQ_OVL_DROP: penalize class by dropping */
597
598static void cbq_ovl_drop(struct cbq_class *cl)
599{
600 if (cl->q->ops->drop)
601 if (cl->q->ops->drop(cl->q))
602 cl->qdisc->q.qlen--;
603 cl->xstats.overactions++;
604 cbq_ovl_classic(cl);
605}
606
607static void cbq_watchdog(unsigned long arg)
608{
609 struct Qdisc *sch = (struct Qdisc*)arg;
610
611 sch->flags &= ~TCQ_F_THROTTLED;
612 netif_schedule(sch->dev);
613}
614
615static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
616{
617 struct cbq_class *cl;
618 struct cbq_class *cl_prev = q->active[prio];
619 unsigned long now = jiffies;
620 unsigned long sched = now;
621
622 if (cl_prev == NULL)
623 return now;
624
625 do {
626 cl = cl_prev->next_alive;
627 if ((long)(now - cl->penalized) > 0) {
628 cl_prev->next_alive = cl->next_alive;
629 cl->next_alive = NULL;
630 cl->cpriority = cl->priority;
631 cl->delayed = 0;
632 cbq_activate_class(cl);
633
634 if (cl == q->active[prio]) {
635 q->active[prio] = cl_prev;
636 if (cl == q->active[prio]) {
637 q->active[prio] = NULL;
638 return 0;
639 }
640 }
641
642 cl = cl_prev->next_alive;
643 } else if ((long)(sched - cl->penalized) > 0)
644 sched = cl->penalized;
645 } while ((cl_prev = cl) != q->active[prio]);
646
647 return (long)(sched - now);
648}
649
650static void cbq_undelay(unsigned long arg)
651{
652 struct Qdisc *sch = (struct Qdisc*)arg;
653 struct cbq_sched_data *q = qdisc_priv(sch);
654 long delay = 0;
655 unsigned pmask;
656
657 pmask = q->pmask;
658 q->pmask = 0;
659
660 while (pmask) {
661 int prio = ffz(~pmask);
662 long tmp;
663
664 pmask &= ~(1<<prio);
665
666 tmp = cbq_undelay_prio(q, prio);
667 if (tmp > 0) {
668 q->pmask |= 1<<prio;
669 if (tmp < delay || delay == 0)
670 delay = tmp;
671 }
672 }
673
674 if (delay) {
675 q->delay_timer.expires = jiffies + delay;
676 add_timer(&q->delay_timer);
677 }
678
679 sch->flags &= ~TCQ_F_THROTTLED;
680 netif_schedule(sch->dev);
681}
682
683
684#ifdef CONFIG_NET_CLS_POLICE
685
686static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
687{
688 int len = skb->len;
689 struct Qdisc *sch = child->__parent;
690 struct cbq_sched_data *q = qdisc_priv(sch);
691 struct cbq_class *cl = q->rx_class;
692
693 q->rx_class = NULL;
694
695 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
696
697 cbq_mark_toplevel(q, cl);
698
699 q->rx_class = cl;
700 cl->q->__parent = sch;
701
702 if (cl->q->enqueue(skb, cl->q) == 0) {
703 sch->q.qlen++;
704 sch->bstats.packets++;
705 sch->bstats.bytes+=len;
706 if (!cl->next_alive)
707 cbq_activate_class(cl);
708 return 0;
709 }
710 sch->qstats.drops++;
711 return 0;
712 }
713
714 sch->qstats.drops++;
715 return -1;
716}
717#endif
718
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900719/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720 It is mission critical procedure.
721
722 We "regenerate" toplevel cutoff, if transmitting class
723 has backlog and it is not regulated. It is not part of
724 original CBQ description, but looks more reasonable.
725 Probably, it is wrong. This question needs further investigation.
726*/
727
728static __inline__ void
729cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
730 struct cbq_class *borrowed)
731{
732 if (cl && q->toplevel >= borrowed->level) {
733 if (cl->q->q.qlen > 1) {
734 do {
735 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
736 q->toplevel = borrowed->level;
737 return;
738 }
739 } while ((borrowed=borrowed->borrow) != NULL);
740 }
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900741#if 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742 /* It is not necessary now. Uncommenting it
743 will save CPU cycles, but decrease fairness.
744 */
745 q->toplevel = TC_CBQ_MAXLEVEL;
746#endif
747 }
748}
749
750static void
751cbq_update(struct cbq_sched_data *q)
752{
753 struct cbq_class *this = q->tx_class;
754 struct cbq_class *cl = this;
755 int len = q->tx_len;
756
757 q->tx_class = NULL;
758
759 for ( ; cl; cl = cl->share) {
760 long avgidle = cl->avgidle;
761 long idle;
762
763 cl->bstats.packets++;
764 cl->bstats.bytes += len;
765
766 /*
767 (now - last) is total time between packet right edges.
768 (last_pktlen/rate) is "virtual" busy time, so that
769
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900770 idle = (now - last) - last_pktlen/rate
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771 */
772
773 idle = PSCHED_TDIFF(q->now, cl->last);
774 if ((unsigned long)idle > 128*1024*1024) {
775 avgidle = cl->maxidle;
776 } else {
777 idle -= L2T(cl, len);
778
779 /* true_avgidle := (1-W)*true_avgidle + W*idle,
780 where W=2^{-ewma_log}. But cl->avgidle is scaled:
781 cl->avgidle == true_avgidle/W,
782 hence:
783 */
784 avgidle += idle - (avgidle>>cl->ewma_log);
785 }
786
787 if (avgidle <= 0) {
788 /* Overlimit or at-limit */
789
790 if (avgidle < cl->minidle)
791 avgidle = cl->minidle;
792
793 cl->avgidle = avgidle;
794
795 /* Calculate expected time, when this class
796 will be allowed to send.
797 It will occur, when:
798 (1-W)*true_avgidle + W*delay = 0, i.e.
799 idle = (1/W - 1)*(-true_avgidle)
800 or
801 idle = (1 - W)*(-cl->avgidle);
802 */
803 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
804
805 /*
806 That is not all.
807 To maintain the rate allocated to the class,
808 we add to undertime virtual clock,
809 necessary to complete transmitted packet.
810 (len/phys_bandwidth has been already passed
811 to the moment of cbq_update)
812 */
813
814 idle -= L2T(&q->link, len);
815 idle += L2T(cl, len);
816
817 PSCHED_AUDIT_TDIFF(idle);
818
819 PSCHED_TADD2(q->now, idle, cl->undertime);
820 } else {
821 /* Underlimit */
822
823 PSCHED_SET_PASTPERFECT(cl->undertime);
824 if (avgidle > cl->maxidle)
825 cl->avgidle = cl->maxidle;
826 else
827 cl->avgidle = avgidle;
828 }
829 cl->last = q->now;
830 }
831
832 cbq_update_toplevel(q, this, q->tx_borrowed);
833}
834
835static __inline__ struct cbq_class *
836cbq_under_limit(struct cbq_class *cl)
837{
838 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
839 struct cbq_class *this_cl = cl;
840
841 if (cl->tparent == NULL)
842 return cl;
843
844 if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
845 !PSCHED_TLESS(q->now, cl->undertime)) {
846 cl->delayed = 0;
847 return cl;
848 }
849
850 do {
851 /* It is very suspicious place. Now overlimit
852 action is generated for not bounded classes
853 only if link is completely congested.
854 Though it is in agree with ancestor-only paradigm,
855 it looks very stupid. Particularly,
856 it means that this chunk of code will either
857 never be called or result in strong amplification
858 of burstiness. Dangerous, silly, and, however,
859 no another solution exists.
860 */
861 if ((cl = cl->borrow) == NULL) {
862 this_cl->qstats.overlimits++;
863 this_cl->overlimit(this_cl);
864 return NULL;
865 }
866 if (cl->level > q->toplevel)
867 return NULL;
868 } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
869 PSCHED_TLESS(q->now, cl->undertime));
870
871 cl->delayed = 0;
872 return cl;
873}
874
875static __inline__ struct sk_buff *
876cbq_dequeue_prio(struct Qdisc *sch, int prio)
877{
878 struct cbq_sched_data *q = qdisc_priv(sch);
879 struct cbq_class *cl_tail, *cl_prev, *cl;
880 struct sk_buff *skb;
881 int deficit;
882
883 cl_tail = cl_prev = q->active[prio];
884 cl = cl_prev->next_alive;
885
886 do {
887 deficit = 0;
888
889 /* Start round */
890 do {
891 struct cbq_class *borrow = cl;
892
893 if (cl->q->q.qlen &&
894 (borrow = cbq_under_limit(cl)) == NULL)
895 goto skip_class;
896
897 if (cl->deficit <= 0) {
898 /* Class exhausted its allotment per
899 this round. Switch to the next one.
900 */
901 deficit = 1;
902 cl->deficit += cl->quantum;
903 goto next_class;
904 }
905
906 skb = cl->q->dequeue(cl->q);
907
908 /* Class did not give us any skb :-(
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900909 It could occur even if cl->q->q.qlen != 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910 f.e. if cl->q == "tbf"
911 */
912 if (skb == NULL)
913 goto skip_class;
914
915 cl->deficit -= skb->len;
916 q->tx_class = cl;
917 q->tx_borrowed = borrow;
918 if (borrow != cl) {
919#ifndef CBQ_XSTATS_BORROWS_BYTES
920 borrow->xstats.borrows++;
921 cl->xstats.borrows++;
922#else
923 borrow->xstats.borrows += skb->len;
924 cl->xstats.borrows += skb->len;
925#endif
926 }
927 q->tx_len = skb->len;
928
929 if (cl->deficit <= 0) {
930 q->active[prio] = cl;
931 cl = cl->next_alive;
932 cl->deficit += cl->quantum;
933 }
934 return skb;
935
936skip_class:
937 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
938 /* Class is empty or penalized.
939 Unlink it from active chain.
940 */
941 cl_prev->next_alive = cl->next_alive;
942 cl->next_alive = NULL;
943
944 /* Did cl_tail point to it? */
945 if (cl == cl_tail) {
946 /* Repair it! */
947 cl_tail = cl_prev;
948
949 /* Was it the last class in this band? */
950 if (cl == cl_tail) {
951 /* Kill the band! */
952 q->active[prio] = NULL;
953 q->activemask &= ~(1<<prio);
954 if (cl->q->q.qlen)
955 cbq_activate_class(cl);
956 return NULL;
957 }
958
959 q->active[prio] = cl_tail;
960 }
961 if (cl->q->q.qlen)
962 cbq_activate_class(cl);
963
964 cl = cl_prev;
965 }
966
967next_class:
968 cl_prev = cl;
969 cl = cl->next_alive;
970 } while (cl_prev != cl_tail);
971 } while (deficit);
972
973 q->active[prio] = cl_prev;
974
975 return NULL;
976}
977
978static __inline__ struct sk_buff *
979cbq_dequeue_1(struct Qdisc *sch)
980{
981 struct cbq_sched_data *q = qdisc_priv(sch);
982 struct sk_buff *skb;
983 unsigned activemask;
984
985 activemask = q->activemask&0xFF;
986 while (activemask) {
987 int prio = ffz(~activemask);
988 activemask &= ~(1<<prio);
989 skb = cbq_dequeue_prio(sch, prio);
990 if (skb)
991 return skb;
992 }
993 return NULL;
994}
995
996static struct sk_buff *
997cbq_dequeue(struct Qdisc *sch)
998{
999 struct sk_buff *skb;
1000 struct cbq_sched_data *q = qdisc_priv(sch);
1001 psched_time_t now;
1002 psched_tdiff_t incr;
1003
1004 PSCHED_GET_TIME(now);
1005 incr = PSCHED_TDIFF(now, q->now_rt);
1006
1007 if (q->tx_class) {
1008 psched_tdiff_t incr2;
1009 /* Time integrator. We calculate EOS time
1010 by adding expected packet transmission time.
1011 If real time is greater, we warp artificial clock,
1012 so that:
1013
1014 cbq_time = max(real_time, work);
1015 */
1016 incr2 = L2T(&q->link, q->tx_len);
1017 PSCHED_TADD(q->now, incr2);
1018 cbq_update(q);
1019 if ((incr -= incr2) < 0)
1020 incr = 0;
1021 }
1022 PSCHED_TADD(q->now, incr);
1023 q->now_rt = now;
1024
1025 for (;;) {
1026 q->wd_expires = 0;
1027
1028 skb = cbq_dequeue_1(sch);
1029 if (skb) {
1030 sch->q.qlen--;
1031 sch->flags &= ~TCQ_F_THROTTLED;
1032 return skb;
1033 }
1034
1035 /* All the classes are overlimit.
1036
1037 It is possible, if:
1038
1039 1. Scheduler is empty.
1040 2. Toplevel cutoff inhibited borrowing.
1041 3. Root class is overlimit.
1042
1043 Reset 2d and 3d conditions and retry.
1044
1045 Note, that NS and cbq-2.0 are buggy, peeking
1046 an arbitrary class is appropriate for ancestor-only
1047 sharing, but not for toplevel algorithm.
1048
1049 Our version is better, but slower, because it requires
1050 two passes, but it is unavoidable with top-level sharing.
1051 */
1052
1053 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1054 PSCHED_IS_PASTPERFECT(q->link.undertime))
1055 break;
1056
1057 q->toplevel = TC_CBQ_MAXLEVEL;
1058 PSCHED_SET_PASTPERFECT(q->link.undertime);
1059 }
1060
1061 /* No packets in scheduler or nobody wants to give them to us :-(
1062 Sigh... start watchdog timer in the last case. */
1063
1064 if (sch->q.qlen) {
1065 sch->qstats.overlimits++;
1066 if (q->wd_expires) {
1067 long delay = PSCHED_US2JIFFIE(q->wd_expires);
1068 if (delay <= 0)
1069 delay = 1;
1070 mod_timer(&q->wd_timer, jiffies + delay);
1071 sch->flags |= TCQ_F_THROTTLED;
1072 }
1073 }
1074 return NULL;
1075}
1076
1077/* CBQ class maintanance routines */
1078
1079static void cbq_adjust_levels(struct cbq_class *this)
1080{
1081 if (this == NULL)
1082 return;
1083
1084 do {
1085 int level = 0;
1086 struct cbq_class *cl;
1087
1088 if ((cl = this->children) != NULL) {
1089 do {
1090 if (cl->level > level)
1091 level = cl->level;
1092 } while ((cl = cl->sibling) != this->children);
1093 }
1094 this->level = level+1;
1095 } while ((this = this->tparent) != NULL);
1096}
1097
1098static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1099{
1100 struct cbq_class *cl;
1101 unsigned h;
1102
1103 if (q->quanta[prio] == 0)
1104 return;
1105
1106 for (h=0; h<16; h++) {
1107 for (cl = q->classes[h]; cl; cl = cl->next) {
1108 /* BUGGGG... Beware! This expression suffer of
1109 arithmetic overflows!
1110 */
1111 if (cl->priority == prio) {
1112 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1113 q->quanta[prio];
1114 }
1115 if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1116 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1117 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1118 }
1119 }
1120 }
1121}
1122
1123static void cbq_sync_defmap(struct cbq_class *cl)
1124{
1125 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1126 struct cbq_class *split = cl->split;
1127 unsigned h;
1128 int i;
1129
1130 if (split == NULL)
1131 return;
1132
1133 for (i=0; i<=TC_PRIO_MAX; i++) {
1134 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1135 split->defaults[i] = NULL;
1136 }
1137
1138 for (i=0; i<=TC_PRIO_MAX; i++) {
1139 int level = split->level;
1140
1141 if (split->defaults[i])
1142 continue;
1143
1144 for (h=0; h<16; h++) {
1145 struct cbq_class *c;
1146
1147 for (c = q->classes[h]; c; c = c->next) {
1148 if (c->split == split && c->level < level &&
1149 c->defmap&(1<<i)) {
1150 split->defaults[i] = c;
1151 level = c->level;
1152 }
1153 }
1154 }
1155 }
1156}
1157
1158static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1159{
1160 struct cbq_class *split = NULL;
1161
1162 if (splitid == 0) {
1163 if ((split = cl->split) == NULL)
1164 return;
1165 splitid = split->classid;
1166 }
1167
1168 if (split == NULL || split->classid != splitid) {
1169 for (split = cl->tparent; split; split = split->tparent)
1170 if (split->classid == splitid)
1171 break;
1172 }
1173
1174 if (split == NULL)
1175 return;
1176
1177 if (cl->split != split) {
1178 cl->defmap = 0;
1179 cbq_sync_defmap(cl);
1180 cl->split = split;
1181 cl->defmap = def&mask;
1182 } else
1183 cl->defmap = (cl->defmap&~mask)|(def&mask);
1184
1185 cbq_sync_defmap(cl);
1186}
1187
1188static void cbq_unlink_class(struct cbq_class *this)
1189{
1190 struct cbq_class *cl, **clp;
1191 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1192
1193 for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1194 if (cl == this) {
1195 *clp = cl->next;
1196 cl->next = NULL;
1197 break;
1198 }
1199 }
1200
1201 if (this->tparent) {
1202 clp=&this->sibling;
1203 cl = *clp;
1204 do {
1205 if (cl == this) {
1206 *clp = cl->sibling;
1207 break;
1208 }
1209 clp = &cl->sibling;
1210 } while ((cl = *clp) != this->sibling);
1211
1212 if (this->tparent->children == this) {
1213 this->tparent->children = this->sibling;
1214 if (this->sibling == this)
1215 this->tparent->children = NULL;
1216 }
1217 } else {
1218 BUG_TRAP(this->sibling == this);
1219 }
1220}
1221
1222static void cbq_link_class(struct cbq_class *this)
1223{
1224 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1225 unsigned h = cbq_hash(this->classid);
1226 struct cbq_class *parent = this->tparent;
1227
1228 this->sibling = this;
1229 this->next = q->classes[h];
1230 q->classes[h] = this;
1231
1232 if (parent == NULL)
1233 return;
1234
1235 if (parent->children == NULL) {
1236 parent->children = this;
1237 } else {
1238 this->sibling = parent->children->sibling;
1239 parent->children->sibling = this;
1240 }
1241}
1242
1243static unsigned int cbq_drop(struct Qdisc* sch)
1244{
1245 struct cbq_sched_data *q = qdisc_priv(sch);
1246 struct cbq_class *cl, *cl_head;
1247 int prio;
1248 unsigned int len;
1249
1250 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1251 if ((cl_head = q->active[prio]) == NULL)
1252 continue;
1253
1254 cl = cl_head;
1255 do {
1256 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1257 sch->q.qlen--;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001258 if (!cl->q->q.qlen)
1259 cbq_deactivate_class(cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260 return len;
1261 }
1262 } while ((cl = cl->next_alive) != cl_head);
1263 }
1264 return 0;
1265}
1266
1267static void
1268cbq_reset(struct Qdisc* sch)
1269{
1270 struct cbq_sched_data *q = qdisc_priv(sch);
1271 struct cbq_class *cl;
1272 int prio;
1273 unsigned h;
1274
1275 q->activemask = 0;
1276 q->pmask = 0;
1277 q->tx_class = NULL;
1278 q->tx_borrowed = NULL;
1279 del_timer(&q->wd_timer);
1280 del_timer(&q->delay_timer);
1281 q->toplevel = TC_CBQ_MAXLEVEL;
1282 PSCHED_GET_TIME(q->now);
1283 q->now_rt = q->now;
1284
1285 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1286 q->active[prio] = NULL;
1287
1288 for (h = 0; h < 16; h++) {
1289 for (cl = q->classes[h]; cl; cl = cl->next) {
1290 qdisc_reset(cl->q);
1291
1292 cl->next_alive = NULL;
1293 PSCHED_SET_PASTPERFECT(cl->undertime);
1294 cl->avgidle = cl->maxidle;
1295 cl->deficit = cl->quantum;
1296 cl->cpriority = cl->priority;
1297 }
1298 }
1299 sch->q.qlen = 0;
1300}
1301
1302
1303static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1304{
1305 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1306 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1307 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1308 }
1309 if (lss->change&TCF_CBQ_LSS_EWMA)
1310 cl->ewma_log = lss->ewma_log;
1311 if (lss->change&TCF_CBQ_LSS_AVPKT)
1312 cl->avpkt = lss->avpkt;
1313 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1314 cl->minidle = -(long)lss->minidle;
1315 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1316 cl->maxidle = lss->maxidle;
1317 cl->avgidle = lss->maxidle;
1318 }
1319 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1320 cl->offtime = lss->offtime;
1321 return 0;
1322}
1323
1324static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1325{
1326 q->nclasses[cl->priority]--;
1327 q->quanta[cl->priority] -= cl->weight;
1328 cbq_normalize_quanta(q, cl->priority);
1329}
1330
1331static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1332{
1333 q->nclasses[cl->priority]++;
1334 q->quanta[cl->priority] += cl->weight;
1335 cbq_normalize_quanta(q, cl->priority);
1336}
1337
1338static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1339{
1340 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1341
1342 if (wrr->allot)
1343 cl->allot = wrr->allot;
1344 if (wrr->weight)
1345 cl->weight = wrr->weight;
1346 if (wrr->priority) {
1347 cl->priority = wrr->priority-1;
1348 cl->cpriority = cl->priority;
1349 if (cl->priority >= cl->priority2)
1350 cl->priority2 = TC_CBQ_MAXPRIO-1;
1351 }
1352
1353 cbq_addprio(q, cl);
1354 return 0;
1355}
1356
1357static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1358{
1359 switch (ovl->strategy) {
1360 case TC_CBQ_OVL_CLASSIC:
1361 cl->overlimit = cbq_ovl_classic;
1362 break;
1363 case TC_CBQ_OVL_DELAY:
1364 cl->overlimit = cbq_ovl_delay;
1365 break;
1366 case TC_CBQ_OVL_LOWPRIO:
1367 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1368 ovl->priority2-1 <= cl->priority)
1369 return -EINVAL;
1370 cl->priority2 = ovl->priority2-1;
1371 cl->overlimit = cbq_ovl_lowprio;
1372 break;
1373 case TC_CBQ_OVL_DROP:
1374 cl->overlimit = cbq_ovl_drop;
1375 break;
1376 case TC_CBQ_OVL_RCLASSIC:
1377 cl->overlimit = cbq_ovl_rclassic;
1378 break;
1379 default:
1380 return -EINVAL;
1381 }
1382 cl->penalty = (ovl->penalty*HZ)/1000;
1383 return 0;
1384}
1385
1386#ifdef CONFIG_NET_CLS_POLICE
1387static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1388{
1389 cl->police = p->police;
1390
1391 if (cl->q->handle) {
1392 if (p->police == TC_POLICE_RECLASSIFY)
1393 cl->q->reshape_fail = cbq_reshape_fail;
1394 else
1395 cl->q->reshape_fail = NULL;
1396 }
1397 return 0;
1398}
1399#endif
1400
1401static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1402{
1403 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1404 return 0;
1405}
1406
1407static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1408{
1409 struct cbq_sched_data *q = qdisc_priv(sch);
1410 struct rtattr *tb[TCA_CBQ_MAX];
1411 struct tc_ratespec *r;
1412
1413 if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1414 tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1415 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1416 return -EINVAL;
1417
1418 if (tb[TCA_CBQ_LSSOPT-1] &&
1419 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1420 return -EINVAL;
1421
1422 r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1423
1424 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1425 return -EINVAL;
1426
1427 q->link.refcnt = 1;
1428 q->link.sibling = &q->link;
1429 q->link.classid = sch->handle;
1430 q->link.qdisc = sch;
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001431 if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1432 sch->handle)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001433 q->link.q = &noop_qdisc;
1434
1435 q->link.priority = TC_CBQ_MAXPRIO-1;
1436 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1437 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1438 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1439 q->link.overlimit = cbq_ovl_classic;
1440 q->link.allot = psched_mtu(sch->dev);
1441 q->link.quantum = q->link.allot;
1442 q->link.weight = q->link.R_tab->rate.rate;
1443
1444 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1445 q->link.avpkt = q->link.allot/2;
1446 q->link.minidle = -0x7FFFFFFF;
1447 q->link.stats_lock = &sch->dev->queue_lock;
1448
1449 init_timer(&q->wd_timer);
1450 q->wd_timer.data = (unsigned long)sch;
1451 q->wd_timer.function = cbq_watchdog;
1452 init_timer(&q->delay_timer);
1453 q->delay_timer.data = (unsigned long)sch;
1454 q->delay_timer.function = cbq_undelay;
1455 q->toplevel = TC_CBQ_MAXLEVEL;
1456 PSCHED_GET_TIME(q->now);
1457 q->now_rt = q->now;
1458
1459 cbq_link_class(&q->link);
1460
1461 if (tb[TCA_CBQ_LSSOPT-1])
1462 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1463
1464 cbq_addprio(q, &q->link);
1465 return 0;
1466}
1467
1468static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1469{
1470 unsigned char *b = skb->tail;
1471
1472 RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1473 return skb->len;
1474
1475rtattr_failure:
1476 skb_trim(skb, b - skb->data);
1477 return -1;
1478}
1479
1480static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1481{
1482 unsigned char *b = skb->tail;
1483 struct tc_cbq_lssopt opt;
1484
1485 opt.flags = 0;
1486 if (cl->borrow == NULL)
1487 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1488 if (cl->share == NULL)
1489 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1490 opt.ewma_log = cl->ewma_log;
1491 opt.level = cl->level;
1492 opt.avpkt = cl->avpkt;
1493 opt.maxidle = cl->maxidle;
1494 opt.minidle = (u32)(-cl->minidle);
1495 opt.offtime = cl->offtime;
1496 opt.change = ~0;
1497 RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1498 return skb->len;
1499
1500rtattr_failure:
1501 skb_trim(skb, b - skb->data);
1502 return -1;
1503}
1504
1505static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1506{
1507 unsigned char *b = skb->tail;
1508 struct tc_cbq_wrropt opt;
1509
1510 opt.flags = 0;
1511 opt.allot = cl->allot;
1512 opt.priority = cl->priority+1;
1513 opt.cpriority = cl->cpriority+1;
1514 opt.weight = cl->weight;
1515 RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1516 return skb->len;
1517
1518rtattr_failure:
1519 skb_trim(skb, b - skb->data);
1520 return -1;
1521}
1522
1523static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1524{
1525 unsigned char *b = skb->tail;
1526 struct tc_cbq_ovl opt;
1527
1528 opt.strategy = cl->ovl_strategy;
1529 opt.priority2 = cl->priority2+1;
Patrick McHardy8a470772005-06-28 12:56:45 -07001530 opt.pad = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001531 opt.penalty = (cl->penalty*1000)/HZ;
1532 RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1533 return skb->len;
1534
1535rtattr_failure:
1536 skb_trim(skb, b - skb->data);
1537 return -1;
1538}
1539
1540static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1541{
1542 unsigned char *b = skb->tail;
1543 struct tc_cbq_fopt opt;
1544
1545 if (cl->split || cl->defmap) {
1546 opt.split = cl->split ? cl->split->classid : 0;
1547 opt.defmap = cl->defmap;
1548 opt.defchange = ~0;
1549 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1550 }
1551 return skb->len;
1552
1553rtattr_failure:
1554 skb_trim(skb, b - skb->data);
1555 return -1;
1556}
1557
1558#ifdef CONFIG_NET_CLS_POLICE
1559static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1560{
1561 unsigned char *b = skb->tail;
1562 struct tc_cbq_police opt;
1563
1564 if (cl->police) {
1565 opt.police = cl->police;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001566 opt.__res1 = 0;
1567 opt.__res2 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1569 }
1570 return skb->len;
1571
1572rtattr_failure:
1573 skb_trim(skb, b - skb->data);
1574 return -1;
1575}
1576#endif
1577
1578static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1579{
1580 if (cbq_dump_lss(skb, cl) < 0 ||
1581 cbq_dump_rate(skb, cl) < 0 ||
1582 cbq_dump_wrr(skb, cl) < 0 ||
1583 cbq_dump_ovl(skb, cl) < 0 ||
1584#ifdef CONFIG_NET_CLS_POLICE
1585 cbq_dump_police(skb, cl) < 0 ||
1586#endif
1587 cbq_dump_fopt(skb, cl) < 0)
1588 return -1;
1589 return 0;
1590}
1591
1592static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1593{
1594 struct cbq_sched_data *q = qdisc_priv(sch);
1595 unsigned char *b = skb->tail;
1596 struct rtattr *rta;
1597
1598 rta = (struct rtattr*)b;
1599 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1600 if (cbq_dump_attr(skb, &q->link) < 0)
1601 goto rtattr_failure;
1602 rta->rta_len = skb->tail - b;
1603 return skb->len;
1604
1605rtattr_failure:
1606 skb_trim(skb, b - skb->data);
1607 return -1;
1608}
1609
1610static int
1611cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1612{
1613 struct cbq_sched_data *q = qdisc_priv(sch);
1614
1615 q->link.xstats.avgidle = q->link.avgidle;
1616 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1617}
1618
1619static int
1620cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1621 struct sk_buff *skb, struct tcmsg *tcm)
1622{
1623 struct cbq_class *cl = (struct cbq_class*)arg;
1624 unsigned char *b = skb->tail;
1625 struct rtattr *rta;
1626
1627 if (cl->tparent)
1628 tcm->tcm_parent = cl->tparent->classid;
1629 else
1630 tcm->tcm_parent = TC_H_ROOT;
1631 tcm->tcm_handle = cl->classid;
1632 tcm->tcm_info = cl->q->handle;
1633
1634 rta = (struct rtattr*)b;
1635 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1636 if (cbq_dump_attr(skb, cl) < 0)
1637 goto rtattr_failure;
1638 rta->rta_len = skb->tail - b;
1639 return skb->len;
1640
1641rtattr_failure:
1642 skb_trim(skb, b - skb->data);
1643 return -1;
1644}
1645
1646static int
1647cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1648 struct gnet_dump *d)
1649{
1650 struct cbq_sched_data *q = qdisc_priv(sch);
1651 struct cbq_class *cl = (struct cbq_class*)arg;
1652
1653 cl->qstats.qlen = cl->q->q.qlen;
1654 cl->xstats.avgidle = cl->avgidle;
1655 cl->xstats.undertime = 0;
1656
1657 if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1658 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1659
1660 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1661#ifdef CONFIG_NET_ESTIMATOR
1662 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1663#endif
1664 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1665 return -1;
1666
1667 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1668}
1669
1670static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1671 struct Qdisc **old)
1672{
1673 struct cbq_class *cl = (struct cbq_class*)arg;
1674
1675 if (cl) {
1676 if (new == NULL) {
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001677 if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1678 cl->classid)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001679 return -ENOBUFS;
1680 } else {
1681#ifdef CONFIG_NET_CLS_POLICE
1682 if (cl->police == TC_POLICE_RECLASSIFY)
1683 new->reshape_fail = cbq_reshape_fail;
1684#endif
1685 }
1686 sch_tree_lock(sch);
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001687 *old = xchg(&cl->q, new);
Patrick McHardy5e50da02006-11-29 17:36:20 -08001688 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001689 qdisc_reset(*old);
1690 sch_tree_unlock(sch);
1691
1692 return 0;
1693 }
1694 return -ENOENT;
1695}
1696
1697static struct Qdisc *
1698cbq_leaf(struct Qdisc *sch, unsigned long arg)
1699{
1700 struct cbq_class *cl = (struct cbq_class*)arg;
1701
1702 return cl ? cl->q : NULL;
1703}
1704
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001705static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1706{
1707 struct cbq_class *cl = (struct cbq_class *)arg;
1708
1709 if (cl->q->q.qlen == 0)
1710 cbq_deactivate_class(cl);
1711}
1712
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1714{
1715 struct cbq_sched_data *q = qdisc_priv(sch);
1716 struct cbq_class *cl = cbq_class_lookup(q, classid);
1717
1718 if (cl) {
1719 cl->refcnt++;
1720 return (unsigned long)cl;
1721 }
1722 return 0;
1723}
1724
1725static void cbq_destroy_filters(struct cbq_class *cl)
1726{
1727 struct tcf_proto *tp;
1728
1729 while ((tp = cl->filter_list) != NULL) {
1730 cl->filter_list = tp->next;
1731 tcf_destroy(tp);
1732 }
1733}
1734
1735static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1736{
1737 struct cbq_sched_data *q = qdisc_priv(sch);
1738
1739 BUG_TRAP(!cl->filters);
1740
1741 cbq_destroy_filters(cl);
1742 qdisc_destroy(cl->q);
1743 qdisc_put_rtab(cl->R_tab);
1744#ifdef CONFIG_NET_ESTIMATOR
1745 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1746#endif
1747 if (cl != &q->link)
1748 kfree(cl);
1749}
1750
1751static void
1752cbq_destroy(struct Qdisc* sch)
1753{
1754 struct cbq_sched_data *q = qdisc_priv(sch);
1755 struct cbq_class *cl;
1756 unsigned h;
1757
1758#ifdef CONFIG_NET_CLS_POLICE
1759 q->rx_class = NULL;
1760#endif
1761 /*
1762 * Filters must be destroyed first because we don't destroy the
1763 * classes from root to leafs which means that filters can still
1764 * be bound to classes which have been destroyed already. --TGR '04
1765 */
1766 for (h = 0; h < 16; h++)
1767 for (cl = q->classes[h]; cl; cl = cl->next)
1768 cbq_destroy_filters(cl);
1769
1770 for (h = 0; h < 16; h++) {
1771 struct cbq_class *next;
1772
1773 for (cl = q->classes[h]; cl; cl = next) {
1774 next = cl->next;
1775 cbq_destroy_class(sch, cl);
1776 }
1777 }
1778}
1779
1780static void cbq_put(struct Qdisc *sch, unsigned long arg)
1781{
1782 struct cbq_class *cl = (struct cbq_class*)arg;
1783
1784 if (--cl->refcnt == 0) {
1785#ifdef CONFIG_NET_CLS_POLICE
1786 struct cbq_sched_data *q = qdisc_priv(sch);
1787
1788 spin_lock_bh(&sch->dev->queue_lock);
1789 if (q->rx_class == cl)
1790 q->rx_class = NULL;
1791 spin_unlock_bh(&sch->dev->queue_lock);
1792#endif
1793
1794 cbq_destroy_class(sch, cl);
1795 }
1796}
1797
1798static int
1799cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1800 unsigned long *arg)
1801{
1802 int err;
1803 struct cbq_sched_data *q = qdisc_priv(sch);
1804 struct cbq_class *cl = (struct cbq_class*)*arg;
1805 struct rtattr *opt = tca[TCA_OPTIONS-1];
1806 struct rtattr *tb[TCA_CBQ_MAX];
1807 struct cbq_class *parent;
1808 struct qdisc_rate_table *rtab = NULL;
1809
1810 if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1811 return -EINVAL;
1812
1813 if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1814 RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1815 return -EINVAL;
1816
1817 if (tb[TCA_CBQ_FOPT-1] &&
1818 RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1819 return -EINVAL;
1820
1821 if (tb[TCA_CBQ_RATE-1] &&
1822 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1823 return -EINVAL;
1824
1825 if (tb[TCA_CBQ_LSSOPT-1] &&
1826 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1827 return -EINVAL;
1828
1829 if (tb[TCA_CBQ_WRROPT-1] &&
1830 RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1831 return -EINVAL;
1832
1833#ifdef CONFIG_NET_CLS_POLICE
1834 if (tb[TCA_CBQ_POLICE-1] &&
1835 RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1836 return -EINVAL;
1837#endif
1838
1839 if (cl) {
1840 /* Check parent */
1841 if (parentid) {
1842 if (cl->tparent && cl->tparent->classid != parentid)
1843 return -EINVAL;
1844 if (!cl->tparent && parentid != TC_H_ROOT)
1845 return -EINVAL;
1846 }
1847
1848 if (tb[TCA_CBQ_RATE-1]) {
1849 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1850 if (rtab == NULL)
1851 return -EINVAL;
1852 }
1853
1854 /* Change class parameters */
1855 sch_tree_lock(sch);
1856
1857 if (cl->next_alive != NULL)
1858 cbq_deactivate_class(cl);
1859
1860 if (rtab) {
1861 rtab = xchg(&cl->R_tab, rtab);
1862 qdisc_put_rtab(rtab);
1863 }
1864
1865 if (tb[TCA_CBQ_LSSOPT-1])
1866 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1867
1868 if (tb[TCA_CBQ_WRROPT-1]) {
1869 cbq_rmprio(q, cl);
1870 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1871 }
1872
1873 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1874 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1875
1876#ifdef CONFIG_NET_CLS_POLICE
1877 if (tb[TCA_CBQ_POLICE-1])
1878 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1879#endif
1880
1881 if (tb[TCA_CBQ_FOPT-1])
1882 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1883
1884 if (cl->q->q.qlen)
1885 cbq_activate_class(cl);
1886
1887 sch_tree_unlock(sch);
1888
1889#ifdef CONFIG_NET_ESTIMATOR
1890 if (tca[TCA_RATE-1])
1891 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1892 cl->stats_lock, tca[TCA_RATE-1]);
1893#endif
1894 return 0;
1895 }
1896
1897 if (parentid == TC_H_ROOT)
1898 return -EINVAL;
1899
1900 if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1901 tb[TCA_CBQ_LSSOPT-1] == NULL)
1902 return -EINVAL;
1903
1904 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1905 if (rtab == NULL)
1906 return -EINVAL;
1907
1908 if (classid) {
1909 err = -EINVAL;
1910 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1911 goto failure;
1912 } else {
1913 int i;
1914 classid = TC_H_MAKE(sch->handle,0x8000);
1915
1916 for (i=0; i<0x8000; i++) {
1917 if (++q->hgenerator >= 0x8000)
1918 q->hgenerator = 1;
1919 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1920 break;
1921 }
1922 err = -ENOSR;
1923 if (i >= 0x8000)
1924 goto failure;
1925 classid = classid|q->hgenerator;
1926 }
1927
1928 parent = &q->link;
1929 if (parentid) {
1930 parent = cbq_class_lookup(q, parentid);
1931 err = -EINVAL;
1932 if (parent == NULL)
1933 goto failure;
1934 }
1935
1936 err = -ENOBUFS;
Panagiotis Issaris0da974f2006-07-21 14:51:30 -07001937 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001938 if (cl == NULL)
1939 goto failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940 cl->R_tab = rtab;
1941 rtab = NULL;
1942 cl->refcnt = 1;
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001943 if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001944 cl->q = &noop_qdisc;
1945 cl->classid = classid;
1946 cl->tparent = parent;
1947 cl->qdisc = sch;
1948 cl->allot = parent->allot;
1949 cl->quantum = cl->allot;
1950 cl->weight = cl->R_tab->rate.rate;
1951 cl->stats_lock = &sch->dev->queue_lock;
1952
1953 sch_tree_lock(sch);
1954 cbq_link_class(cl);
1955 cl->borrow = cl->tparent;
1956 if (cl->tparent != &q->link)
1957 cl->share = cl->tparent;
1958 cbq_adjust_levels(parent);
1959 cl->minidle = -0x7FFFFFFF;
1960 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1961 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1962 if (cl->ewma_log==0)
1963 cl->ewma_log = q->link.ewma_log;
1964 if (cl->maxidle==0)
1965 cl->maxidle = q->link.maxidle;
1966 if (cl->avpkt==0)
1967 cl->avpkt = q->link.avpkt;
1968 cl->overlimit = cbq_ovl_classic;
1969 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1970 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1971#ifdef CONFIG_NET_CLS_POLICE
1972 if (tb[TCA_CBQ_POLICE-1])
1973 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1974#endif
1975 if (tb[TCA_CBQ_FOPT-1])
1976 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1977 sch_tree_unlock(sch);
1978
1979#ifdef CONFIG_NET_ESTIMATOR
1980 if (tca[TCA_RATE-1])
1981 gen_new_estimator(&cl->bstats, &cl->rate_est,
1982 cl->stats_lock, tca[TCA_RATE-1]);
1983#endif
1984
1985 *arg = (unsigned long)cl;
1986 return 0;
1987
1988failure:
1989 qdisc_put_rtab(rtab);
1990 return err;
1991}
1992
1993static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1994{
1995 struct cbq_sched_data *q = qdisc_priv(sch);
1996 struct cbq_class *cl = (struct cbq_class*)arg;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001997 unsigned int qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001998
1999 if (cl->filters || cl->children || cl == &q->link)
2000 return -EBUSY;
2001
2002 sch_tree_lock(sch);
2003
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08002004 qlen = cl->q->q.qlen;
2005 qdisc_reset(cl->q);
2006 qdisc_tree_decrease_qlen(cl->q, qlen);
2007
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008 if (cl->next_alive)
2009 cbq_deactivate_class(cl);
2010
2011 if (q->tx_borrowed == cl)
2012 q->tx_borrowed = q->tx_class;
2013 if (q->tx_class == cl) {
2014 q->tx_class = NULL;
2015 q->tx_borrowed = NULL;
2016 }
2017#ifdef CONFIG_NET_CLS_POLICE
2018 if (q->rx_class == cl)
2019 q->rx_class = NULL;
2020#endif
2021
2022 cbq_unlink_class(cl);
2023 cbq_adjust_levels(cl->tparent);
2024 cl->defmap = 0;
2025 cbq_sync_defmap(cl);
2026
2027 cbq_rmprio(q, cl);
2028 sch_tree_unlock(sch);
2029
2030 if (--cl->refcnt == 0)
2031 cbq_destroy_class(sch, cl);
2032
2033 return 0;
2034}
2035
2036static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2037{
2038 struct cbq_sched_data *q = qdisc_priv(sch);
2039 struct cbq_class *cl = (struct cbq_class *)arg;
2040
2041 if (cl == NULL)
2042 cl = &q->link;
2043
2044 return &cl->filter_list;
2045}
2046
2047static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2048 u32 classid)
2049{
2050 struct cbq_sched_data *q = qdisc_priv(sch);
2051 struct cbq_class *p = (struct cbq_class*)parent;
2052 struct cbq_class *cl = cbq_class_lookup(q, classid);
2053
2054 if (cl) {
2055 if (p && p->level <= cl->level)
2056 return 0;
2057 cl->filters++;
2058 return (unsigned long)cl;
2059 }
2060 return 0;
2061}
2062
2063static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2064{
2065 struct cbq_class *cl = (struct cbq_class*)arg;
2066
2067 cl->filters--;
2068}
2069
2070static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2071{
2072 struct cbq_sched_data *q = qdisc_priv(sch);
2073 unsigned h;
2074
2075 if (arg->stop)
2076 return;
2077
2078 for (h = 0; h < 16; h++) {
2079 struct cbq_class *cl;
2080
2081 for (cl = q->classes[h]; cl; cl = cl->next) {
2082 if (arg->count < arg->skip) {
2083 arg->count++;
2084 continue;
2085 }
2086 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2087 arg->stop = 1;
2088 return;
2089 }
2090 arg->count++;
2091 }
2092 }
2093}
2094
2095static struct Qdisc_class_ops cbq_class_ops = {
2096 .graft = cbq_graft,
2097 .leaf = cbq_leaf,
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08002098 .qlen_notify = cbq_qlen_notify,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002099 .get = cbq_get,
2100 .put = cbq_put,
2101 .change = cbq_change_class,
2102 .delete = cbq_delete,
2103 .walk = cbq_walk,
2104 .tcf_chain = cbq_find_tcf,
2105 .bind_tcf = cbq_bind_filter,
2106 .unbind_tcf = cbq_unbind_filter,
2107 .dump = cbq_dump_class,
2108 .dump_stats = cbq_dump_class_stats,
2109};
2110
2111static struct Qdisc_ops cbq_qdisc_ops = {
2112 .next = NULL,
2113 .cl_ops = &cbq_class_ops,
2114 .id = "cbq",
2115 .priv_size = sizeof(struct cbq_sched_data),
2116 .enqueue = cbq_enqueue,
2117 .dequeue = cbq_dequeue,
2118 .requeue = cbq_requeue,
2119 .drop = cbq_drop,
2120 .init = cbq_init,
2121 .reset = cbq_reset,
2122 .destroy = cbq_destroy,
2123 .change = NULL,
2124 .dump = cbq_dump,
2125 .dump_stats = cbq_dump_stats,
2126 .owner = THIS_MODULE,
2127};
2128
2129static int __init cbq_module_init(void)
2130{
2131 return register_qdisc(&cbq_qdisc_ops);
2132}
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09002133static void __exit cbq_module_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002134{
2135 unregister_qdisc(&cbq_qdisc_ops);
2136}
2137module_init(cbq_module_init)
2138module_exit(cbq_module_exit)
2139MODULE_LICENSE("GPL");