blob: 9e6cdab6af3befdfd17fd66cab23d784e9b97c97 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 */
12
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/module.h>
14#include <asm/uaccess.h>
15#include <asm/system.h>
16#include <linux/bitops.h>
17#include <linux/types.h>
18#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <linux/string.h>
20#include <linux/mm.h>
21#include <linux/socket.h>
22#include <linux/sockios.h>
23#include <linux/in.h>
24#include <linux/errno.h>
25#include <linux/interrupt.h>
26#include <linux/if_ether.h>
27#include <linux/inet.h>
28#include <linux/netdevice.h>
29#include <linux/etherdevice.h>
30#include <linux/notifier.h>
31#include <net/ip.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070032#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include <net/route.h>
34#include <linux/skbuff.h>
35#include <net/sock.h>
36#include <net/pkt_sched.h>
37
38
39/* Class-Based Queueing (CBQ) algorithm.
40 =======================================
41
42 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090043 Management Models for Packet Networks",
Linus Torvalds1da177e2005-04-16 15:20:36 -070044 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
45
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090046 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
Linus Torvalds1da177e2005-04-16 15:20:36 -070047
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090048 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
Linus Torvalds1da177e2005-04-16 15:20:36 -070049 Parameters", 1996
50
51 [4] Sally Floyd and Michael Speer, "Experimental Results
52 for Class-Based Queueing", 1998, not published.
53
54 -----------------------------------------------------------------------
55
56 Algorithm skeleton was taken from NS simulator cbq.cc.
57 If someone wants to check this code against the LBL version,
58 he should take into account that ONLY the skeleton was borrowed,
59 the implementation is different. Particularly:
60
61 --- The WRR algorithm is different. Our version looks more
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090062 reasonable (I hope) and works when quanta are allowed to be
63 less than MTU, which is always the case when real time classes
64 have small rates. Note, that the statement of [3] is
65 incomplete, delay may actually be estimated even if class
66 per-round allotment is less than MTU. Namely, if per-round
67 allotment is W*r_i, and r_1+...+r_k = r < 1
Linus Torvalds1da177e2005-04-16 15:20:36 -070068
69 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
70
71 In the worst case we have IntServ estimate with D = W*r+k*MTU
72 and C = MTU*r. The proof (if correct at all) is trivial.
73
74
75 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
76 interpret some places, which look like wrong translations
77 from NS. Anyone is advised to find these differences
78 and explain to me, why I am wrong 8).
79
80 --- Linux has no EOI event, so that we cannot estimate true class
81 idle time. Workaround is to consider the next dequeue event
82 as sign that previous packet is finished. This is wrong because of
83 internal device queueing, but on a permanently loaded link it is true.
84 Moreover, combined with clock integrator, this scheme looks
85 very close to an ideal solution. */
86
87struct cbq_sched_data;
88
89
90struct cbq_class
91{
92 struct cbq_class *next; /* hash table link */
93 struct cbq_class *next_alive; /* next class with backlog in this priority band */
94
95/* Parameters */
96 u32 classid;
97 unsigned char priority; /* class priority */
98 unsigned char priority2; /* priority to be used after overlimit */
99 unsigned char ewma_log; /* time constant for idle time calculation */
100 unsigned char ovl_strategy;
101#ifdef CONFIG_NET_CLS_POLICE
102 unsigned char police;
103#endif
104
105 u32 defmap;
106
107 /* Link-sharing scheduler parameters */
108 long maxidle; /* Class parameters: see below. */
109 long offtime;
110 long minidle;
111 u32 avpkt;
112 struct qdisc_rate_table *R_tab;
113
114 /* Overlimit strategy parameters */
115 void (*overlimit)(struct cbq_class *cl);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700116 psched_tdiff_t penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117
118 /* General scheduler (WRR) parameters */
119 long allot;
120 long quantum; /* Allotment per WRR round */
121 long weight; /* Relative allotment: see below */
122
123 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
124 struct cbq_class *split; /* Ptr to split node */
125 struct cbq_class *share; /* Ptr to LS parent in the class tree */
126 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
127 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
128 parent otherwise */
129 struct cbq_class *sibling; /* Sibling chain */
130 struct cbq_class *children; /* Pointer to children chain */
131
132 struct Qdisc *q; /* Elementary queueing discipline */
133
134
135/* Variables */
136 unsigned char cpriority; /* Effective priority */
137 unsigned char delayed;
138 unsigned char level; /* level of the class in hierarchy:
139 0 for leaf classes, and maximal
140 level of children + 1 for nodes.
141 */
142
143 psched_time_t last; /* Last end of service */
144 psched_time_t undertime;
145 long avgidle;
146 long deficit; /* Saved deficit for WRR */
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700147 psched_time_t penalized;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148 struct gnet_stats_basic bstats;
149 struct gnet_stats_queue qstats;
150 struct gnet_stats_rate_est rate_est;
151 spinlock_t *stats_lock;
152 struct tc_cbq_xstats xstats;
153
154 struct tcf_proto *filter_list;
155
156 int refcnt;
157 int filters;
158
159 struct cbq_class *defaults[TC_PRIO_MAX+1];
160};
161
162struct cbq_sched_data
163{
164 struct cbq_class *classes[16]; /* Hash table of all classes */
165 int nclasses[TC_CBQ_MAXPRIO+1];
166 unsigned quanta[TC_CBQ_MAXPRIO+1];
167
168 struct cbq_class link;
169
170 unsigned activemask;
171 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
172 with backlog */
173
174#ifdef CONFIG_NET_CLS_POLICE
175 struct cbq_class *rx_class;
176#endif
177 struct cbq_class *tx_class;
178 struct cbq_class *tx_borrowed;
179 int tx_len;
180 psched_time_t now; /* Cached timestamp */
181 psched_time_t now_rt; /* Cached real time */
182 unsigned pmask;
183
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700184 struct hrtimer delay_timer;
Patrick McHardy88a99352007-03-16 01:21:11 -0700185 struct qdisc_watchdog watchdog; /* Watchdog timer,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186 started when CBQ has
187 backlog, but cannot
188 transmit just now */
Patrick McHardy88a99352007-03-16 01:21:11 -0700189 psched_tdiff_t wd_expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190 int toplevel;
191 u32 hgenerator;
192};
193
194
195#define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
196
197
198static __inline__ unsigned cbq_hash(u32 h)
199{
200 h ^= h>>8;
201 h ^= h>>4;
202 return h&0xF;
203}
204
205static __inline__ struct cbq_class *
206cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
207{
208 struct cbq_class *cl;
209
210 for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
211 if (cl->classid == classid)
212 return cl;
213 return NULL;
214}
215
216#ifdef CONFIG_NET_CLS_POLICE
217
218static struct cbq_class *
219cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
220{
221 struct cbq_class *cl, *new;
222
223 for (cl = this->tparent; cl; cl = cl->tparent)
224 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
225 return new;
226
227 return NULL;
228}
229
230#endif
231
232/* Classify packet. The procedure is pretty complicated, but
233 it allows us to combine link sharing and priority scheduling
234 transparently.
235
236 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
237 so that it resolves to split nodes. Then packets are classified
238 by logical priority, or a more specific classifier may be attached
239 to the split node.
240 */
241
242static struct cbq_class *
243cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
244{
245 struct cbq_sched_data *q = qdisc_priv(sch);
246 struct cbq_class *head = &q->link;
247 struct cbq_class **defmap;
248 struct cbq_class *cl = NULL;
249 u32 prio = skb->priority;
250 struct tcf_result res;
251
252 /*
253 * Step 1. If skb->priority points to one of our classes, use it.
254 */
255 if (TC_H_MAJ(prio^sch->handle) == 0 &&
256 (cl = cbq_class_lookup(q, prio)) != NULL)
257 return cl;
258
Jamal Hadi Salim29f1df62006-01-08 22:35:55 -0800259 *qerr = NET_XMIT_BYPASS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 for (;;) {
261 int result = 0;
262 defmap = head->defaults;
263
264 /*
265 * Step 2+n. Apply classifier.
266 */
267 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
268 goto fallback;
269
270 if ((cl = (void*)res.class) == NULL) {
271 if (TC_H_MAJ(res.classid))
272 cl = cbq_class_lookup(q, res.classid);
273 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
274 cl = defmap[TC_PRIO_BESTEFFORT];
275
276 if (cl == NULL || cl->level >= head->level)
277 goto fallback;
278 }
279
280#ifdef CONFIG_NET_CLS_ACT
281 switch (result) {
282 case TC_ACT_QUEUED:
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900283 case TC_ACT_STOLEN:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 *qerr = NET_XMIT_SUCCESS;
285 case TC_ACT_SHOT:
286 return NULL;
287 }
288#elif defined(CONFIG_NET_CLS_POLICE)
289 switch (result) {
290 case TC_POLICE_RECLASSIFY:
291 return cbq_reclassify(skb, cl);
292 case TC_POLICE_SHOT:
293 return NULL;
294 default:
295 break;
296 }
297#endif
298 if (cl->level == 0)
299 return cl;
300
301 /*
302 * Step 3+n. If classifier selected a link sharing class,
303 * apply agency specific classifier.
304 * Repeat this procdure until we hit a leaf node.
305 */
306 head = cl;
307 }
308
309fallback:
310 cl = head;
311
312 /*
313 * Step 4. No success...
314 */
315 if (TC_H_MAJ(prio) == 0 &&
316 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
317 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
318 return head;
319
320 return cl;
321}
322
323/*
324 A packet has just been enqueued on the empty class.
325 cbq_activate_class adds it to the tail of active class list
326 of its priority band.
327 */
328
329static __inline__ void cbq_activate_class(struct cbq_class *cl)
330{
331 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
332 int prio = cl->cpriority;
333 struct cbq_class *cl_tail;
334
335 cl_tail = q->active[prio];
336 q->active[prio] = cl;
337
338 if (cl_tail != NULL) {
339 cl->next_alive = cl_tail->next_alive;
340 cl_tail->next_alive = cl;
341 } else {
342 cl->next_alive = cl;
343 q->activemask |= (1<<prio);
344 }
345}
346
347/*
348 Unlink class from active chain.
349 Note that this same procedure is done directly in cbq_dequeue*
350 during round-robin procedure.
351 */
352
353static void cbq_deactivate_class(struct cbq_class *this)
354{
355 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
356 int prio = this->cpriority;
357 struct cbq_class *cl;
358 struct cbq_class *cl_prev = q->active[prio];
359
360 do {
361 cl = cl_prev->next_alive;
362 if (cl == this) {
363 cl_prev->next_alive = cl->next_alive;
364 cl->next_alive = NULL;
365
366 if (cl == q->active[prio]) {
367 q->active[prio] = cl_prev;
368 if (cl == q->active[prio]) {
369 q->active[prio] = NULL;
370 q->activemask &= ~(1<<prio);
371 return;
372 }
373 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374 return;
375 }
376 } while ((cl_prev = cl) != q->active[prio]);
377}
378
379static void
380cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
381{
382 int toplevel = q->toplevel;
383
384 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
385 psched_time_t now;
386 psched_tdiff_t incr;
387
388 PSCHED_GET_TIME(now);
389 incr = PSCHED_TDIFF(now, q->now_rt);
Patrick McHardy7c59e252007-03-23 11:27:45 -0700390 now = q->now + incr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391
392 do {
Patrick McHardy104e0872007-03-23 11:28:07 -0700393 if (cl->undertime < now) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394 q->toplevel = cl->level;
395 return;
396 }
397 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
398 }
399}
400
401static int
402cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
403{
404 struct cbq_sched_data *q = qdisc_priv(sch);
405 int len = skb->len;
406 int ret;
407 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
408
409#ifdef CONFIG_NET_CLS_POLICE
410 q->rx_class = cl;
411#endif
412 if (cl == NULL) {
Jamal Hadi Salim29f1df62006-01-08 22:35:55 -0800413 if (ret == NET_XMIT_BYPASS)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 sch->qstats.drops++;
415 kfree_skb(skb);
416 return ret;
417 }
418
419#ifdef CONFIG_NET_CLS_POLICE
420 cl->q->__parent = sch;
421#endif
422 if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
423 sch->q.qlen++;
424 sch->bstats.packets++;
425 sch->bstats.bytes+=len;
426 cbq_mark_toplevel(q, cl);
427 if (!cl->next_alive)
428 cbq_activate_class(cl);
429 return ret;
430 }
431
432 sch->qstats.drops++;
433 cbq_mark_toplevel(q, cl);
434 cl->qstats.drops++;
435 return ret;
436}
437
438static int
439cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
440{
441 struct cbq_sched_data *q = qdisc_priv(sch);
442 struct cbq_class *cl;
443 int ret;
444
445 if ((cl = q->tx_class) == NULL) {
446 kfree_skb(skb);
447 sch->qstats.drops++;
448 return NET_XMIT_CN;
449 }
450 q->tx_class = NULL;
451
452 cbq_mark_toplevel(q, cl);
453
454#ifdef CONFIG_NET_CLS_POLICE
455 q->rx_class = cl;
456 cl->q->__parent = sch;
457#endif
458 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
459 sch->q.qlen++;
460 sch->qstats.requeues++;
461 if (!cl->next_alive)
462 cbq_activate_class(cl);
463 return 0;
464 }
465 sch->qstats.drops++;
466 cl->qstats.drops++;
467 return ret;
468}
469
470/* Overlimit actions */
471
472/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
473
474static void cbq_ovl_classic(struct cbq_class *cl)
475{
476 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
477 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
478
479 if (!cl->delayed) {
480 delay += cl->offtime;
481
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900482 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483 Class goes to sleep, so that it will have no
484 chance to work avgidle. Let's forgive it 8)
485
486 BTW cbq-2.0 has a crap in this
487 place, apparently they forgot to shift it by cl->ewma_log.
488 */
489 if (cl->avgidle < 0)
490 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
491 if (cl->avgidle < cl->minidle)
492 cl->avgidle = cl->minidle;
493 if (delay <= 0)
494 delay = 1;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700495 cl->undertime = q->now + delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496
497 cl->xstats.overactions++;
498 cl->delayed = 1;
499 }
500 if (q->wd_expires == 0 || q->wd_expires > delay)
501 q->wd_expires = delay;
502
503 /* Dirty work! We must schedule wakeups based on
504 real available rate, rather than leaf rate,
505 which may be tiny (even zero).
506 */
507 if (q->toplevel == TC_CBQ_MAXLEVEL) {
508 struct cbq_class *b;
509 psched_tdiff_t base_delay = q->wd_expires;
510
511 for (b = cl->borrow; b; b = b->borrow) {
512 delay = PSCHED_TDIFF(b->undertime, q->now);
513 if (delay < base_delay) {
514 if (delay <= 0)
515 delay = 1;
516 base_delay = delay;
517 }
518 }
519
520 q->wd_expires = base_delay;
521 }
522}
523
524/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
525 they go overlimit
526 */
527
528static void cbq_ovl_rclassic(struct cbq_class *cl)
529{
530 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
531 struct cbq_class *this = cl;
532
533 do {
534 if (cl->level > q->toplevel) {
535 cl = NULL;
536 break;
537 }
538 } while ((cl = cl->borrow) != NULL);
539
540 if (cl == NULL)
541 cl = this;
542 cbq_ovl_classic(cl);
543}
544
545/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
546
547static void cbq_ovl_delay(struct cbq_class *cl)
548{
549 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
550 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
551
552 if (!cl->delayed) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700553 psched_time_t sched = q->now;
554 ktime_t expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555
556 delay += cl->offtime;
557 if (cl->avgidle < 0)
558 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
559 if (cl->avgidle < cl->minidle)
560 cl->avgidle = cl->minidle;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700561 cl->undertime = q->now + delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562
563 if (delay > 0) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700564 sched += delay + cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565 cl->penalized = sched;
566 cl->cpriority = TC_CBQ_MAXPRIO;
567 q->pmask |= (1<<TC_CBQ_MAXPRIO);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700568
569 expires = ktime_set(0, 0);
570 expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
571 if (hrtimer_try_to_cancel(&q->delay_timer) &&
572 ktime_to_ns(ktime_sub(q->delay_timer.expires,
573 expires)) > 0)
574 q->delay_timer.expires = expires;
575 hrtimer_restart(&q->delay_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 cl->delayed = 1;
577 cl->xstats.overactions++;
578 return;
579 }
580 delay = 1;
581 }
582 if (q->wd_expires == 0 || q->wd_expires > delay)
583 q->wd_expires = delay;
584}
585
586/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
587
588static void cbq_ovl_lowprio(struct cbq_class *cl)
589{
590 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
591
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700592 cl->penalized = q->now + cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593
594 if (cl->cpriority != cl->priority2) {
595 cl->cpriority = cl->priority2;
596 q->pmask |= (1<<cl->cpriority);
597 cl->xstats.overactions++;
598 }
599 cbq_ovl_classic(cl);
600}
601
602/* TC_CBQ_OVL_DROP: penalize class by dropping */
603
604static void cbq_ovl_drop(struct cbq_class *cl)
605{
606 if (cl->q->ops->drop)
607 if (cl->q->ops->drop(cl->q))
608 cl->qdisc->q.qlen--;
609 cl->xstats.overactions++;
610 cbq_ovl_classic(cl);
611}
612
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700613static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
614 psched_time_t now)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615{
616 struct cbq_class *cl;
617 struct cbq_class *cl_prev = q->active[prio];
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700618 psched_time_t sched = now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619
620 if (cl_prev == NULL)
Patrick McHardye9054a32007-03-16 01:21:40 -0700621 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622
623 do {
624 cl = cl_prev->next_alive;
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700625 if (now - cl->penalized > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626 cl_prev->next_alive = cl->next_alive;
627 cl->next_alive = NULL;
628 cl->cpriority = cl->priority;
629 cl->delayed = 0;
630 cbq_activate_class(cl);
631
632 if (cl == q->active[prio]) {
633 q->active[prio] = cl_prev;
634 if (cl == q->active[prio]) {
635 q->active[prio] = NULL;
636 return 0;
637 }
638 }
639
640 cl = cl_prev->next_alive;
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700641 } else if (sched - cl->penalized > 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642 sched = cl->penalized;
643 } while ((cl_prev = cl) != q->active[prio]);
644
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700645 return sched - now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646}
647
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700648static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649{
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700650 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
651 delay_timer);
652 struct Qdisc *sch = q->watchdog.qdisc;
653 psched_time_t now;
654 psched_tdiff_t delay = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655 unsigned pmask;
656
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700657 PSCHED_GET_TIME(now);
658
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659 pmask = q->pmask;
660 q->pmask = 0;
661
662 while (pmask) {
663 int prio = ffz(~pmask);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700664 psched_tdiff_t tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665
666 pmask &= ~(1<<prio);
667
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700668 tmp = cbq_undelay_prio(q, prio, now);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669 if (tmp > 0) {
670 q->pmask |= 1<<prio;
671 if (tmp < delay || delay == 0)
672 delay = tmp;
673 }
674 }
675
676 if (delay) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700677 ktime_t time;
678
679 time = ktime_set(0, 0);
680 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
681 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682 }
683
684 sch->flags &= ~TCQ_F_THROTTLED;
685 netif_schedule(sch->dev);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700686 return HRTIMER_NORESTART;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687}
688
689
690#ifdef CONFIG_NET_CLS_POLICE
691
692static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
693{
694 int len = skb->len;
695 struct Qdisc *sch = child->__parent;
696 struct cbq_sched_data *q = qdisc_priv(sch);
697 struct cbq_class *cl = q->rx_class;
698
699 q->rx_class = NULL;
700
701 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
702
703 cbq_mark_toplevel(q, cl);
704
705 q->rx_class = cl;
706 cl->q->__parent = sch;
707
708 if (cl->q->enqueue(skb, cl->q) == 0) {
709 sch->q.qlen++;
710 sch->bstats.packets++;
711 sch->bstats.bytes+=len;
712 if (!cl->next_alive)
713 cbq_activate_class(cl);
714 return 0;
715 }
716 sch->qstats.drops++;
717 return 0;
718 }
719
720 sch->qstats.drops++;
721 return -1;
722}
723#endif
724
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900725/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726 It is mission critical procedure.
727
728 We "regenerate" toplevel cutoff, if transmitting class
729 has backlog and it is not regulated. It is not part of
730 original CBQ description, but looks more reasonable.
731 Probably, it is wrong. This question needs further investigation.
732*/
733
734static __inline__ void
735cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
736 struct cbq_class *borrowed)
737{
738 if (cl && q->toplevel >= borrowed->level) {
739 if (cl->q->q.qlen > 1) {
740 do {
741 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
742 q->toplevel = borrowed->level;
743 return;
744 }
745 } while ((borrowed=borrowed->borrow) != NULL);
746 }
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900747#if 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700748 /* It is not necessary now. Uncommenting it
749 will save CPU cycles, but decrease fairness.
750 */
751 q->toplevel = TC_CBQ_MAXLEVEL;
752#endif
753 }
754}
755
756static void
757cbq_update(struct cbq_sched_data *q)
758{
759 struct cbq_class *this = q->tx_class;
760 struct cbq_class *cl = this;
761 int len = q->tx_len;
762
763 q->tx_class = NULL;
764
765 for ( ; cl; cl = cl->share) {
766 long avgidle = cl->avgidle;
767 long idle;
768
769 cl->bstats.packets++;
770 cl->bstats.bytes += len;
771
772 /*
773 (now - last) is total time between packet right edges.
774 (last_pktlen/rate) is "virtual" busy time, so that
775
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900776 idle = (now - last) - last_pktlen/rate
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777 */
778
779 idle = PSCHED_TDIFF(q->now, cl->last);
780 if ((unsigned long)idle > 128*1024*1024) {
781 avgidle = cl->maxidle;
782 } else {
783 idle -= L2T(cl, len);
784
785 /* true_avgidle := (1-W)*true_avgidle + W*idle,
786 where W=2^{-ewma_log}. But cl->avgidle is scaled:
787 cl->avgidle == true_avgidle/W,
788 hence:
789 */
790 avgidle += idle - (avgidle>>cl->ewma_log);
791 }
792
793 if (avgidle <= 0) {
794 /* Overlimit or at-limit */
795
796 if (avgidle < cl->minidle)
797 avgidle = cl->minidle;
798
799 cl->avgidle = avgidle;
800
801 /* Calculate expected time, when this class
802 will be allowed to send.
803 It will occur, when:
804 (1-W)*true_avgidle + W*delay = 0, i.e.
805 idle = (1/W - 1)*(-true_avgidle)
806 or
807 idle = (1 - W)*(-cl->avgidle);
808 */
809 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
810
811 /*
812 That is not all.
813 To maintain the rate allocated to the class,
814 we add to undertime virtual clock,
815 necessary to complete transmitted packet.
816 (len/phys_bandwidth has been already passed
817 to the moment of cbq_update)
818 */
819
820 idle -= L2T(&q->link, len);
821 idle += L2T(cl, len);
822
Patrick McHardy7c59e252007-03-23 11:27:45 -0700823 cl->undertime = q->now + idle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824 } else {
825 /* Underlimit */
826
827 PSCHED_SET_PASTPERFECT(cl->undertime);
828 if (avgidle > cl->maxidle)
829 cl->avgidle = cl->maxidle;
830 else
831 cl->avgidle = avgidle;
832 }
833 cl->last = q->now;
834 }
835
836 cbq_update_toplevel(q, this, q->tx_borrowed);
837}
838
839static __inline__ struct cbq_class *
840cbq_under_limit(struct cbq_class *cl)
841{
842 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
843 struct cbq_class *this_cl = cl;
844
845 if (cl->tparent == NULL)
846 return cl;
847
Patrick McHardy104e0872007-03-23 11:28:07 -0700848 if (PSCHED_IS_PASTPERFECT(cl->undertime) || q->now >= cl->undertime) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849 cl->delayed = 0;
850 return cl;
851 }
852
853 do {
854 /* It is very suspicious place. Now overlimit
855 action is generated for not bounded classes
856 only if link is completely congested.
857 Though it is in agree with ancestor-only paradigm,
858 it looks very stupid. Particularly,
859 it means that this chunk of code will either
860 never be called or result in strong amplification
861 of burstiness. Dangerous, silly, and, however,
862 no another solution exists.
863 */
864 if ((cl = cl->borrow) == NULL) {
865 this_cl->qstats.overlimits++;
866 this_cl->overlimit(this_cl);
867 return NULL;
868 }
869 if (cl->level > q->toplevel)
870 return NULL;
871 } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
Patrick McHardy104e0872007-03-23 11:28:07 -0700872 q->now < cl->undertime);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873
874 cl->delayed = 0;
875 return cl;
876}
877
878static __inline__ struct sk_buff *
879cbq_dequeue_prio(struct Qdisc *sch, int prio)
880{
881 struct cbq_sched_data *q = qdisc_priv(sch);
882 struct cbq_class *cl_tail, *cl_prev, *cl;
883 struct sk_buff *skb;
884 int deficit;
885
886 cl_tail = cl_prev = q->active[prio];
887 cl = cl_prev->next_alive;
888
889 do {
890 deficit = 0;
891
892 /* Start round */
893 do {
894 struct cbq_class *borrow = cl;
895
896 if (cl->q->q.qlen &&
897 (borrow = cbq_under_limit(cl)) == NULL)
898 goto skip_class;
899
900 if (cl->deficit <= 0) {
901 /* Class exhausted its allotment per
902 this round. Switch to the next one.
903 */
904 deficit = 1;
905 cl->deficit += cl->quantum;
906 goto next_class;
907 }
908
909 skb = cl->q->dequeue(cl->q);
910
911 /* Class did not give us any skb :-(
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900912 It could occur even if cl->q->q.qlen != 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913 f.e. if cl->q == "tbf"
914 */
915 if (skb == NULL)
916 goto skip_class;
917
918 cl->deficit -= skb->len;
919 q->tx_class = cl;
920 q->tx_borrowed = borrow;
921 if (borrow != cl) {
922#ifndef CBQ_XSTATS_BORROWS_BYTES
923 borrow->xstats.borrows++;
924 cl->xstats.borrows++;
925#else
926 borrow->xstats.borrows += skb->len;
927 cl->xstats.borrows += skb->len;
928#endif
929 }
930 q->tx_len = skb->len;
931
932 if (cl->deficit <= 0) {
933 q->active[prio] = cl;
934 cl = cl->next_alive;
935 cl->deficit += cl->quantum;
936 }
937 return skb;
938
939skip_class:
940 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
941 /* Class is empty or penalized.
942 Unlink it from active chain.
943 */
944 cl_prev->next_alive = cl->next_alive;
945 cl->next_alive = NULL;
946
947 /* Did cl_tail point to it? */
948 if (cl == cl_tail) {
949 /* Repair it! */
950 cl_tail = cl_prev;
951
952 /* Was it the last class in this band? */
953 if (cl == cl_tail) {
954 /* Kill the band! */
955 q->active[prio] = NULL;
956 q->activemask &= ~(1<<prio);
957 if (cl->q->q.qlen)
958 cbq_activate_class(cl);
959 return NULL;
960 }
961
962 q->active[prio] = cl_tail;
963 }
964 if (cl->q->q.qlen)
965 cbq_activate_class(cl);
966
967 cl = cl_prev;
968 }
969
970next_class:
971 cl_prev = cl;
972 cl = cl->next_alive;
973 } while (cl_prev != cl_tail);
974 } while (deficit);
975
976 q->active[prio] = cl_prev;
977
978 return NULL;
979}
980
981static __inline__ struct sk_buff *
982cbq_dequeue_1(struct Qdisc *sch)
983{
984 struct cbq_sched_data *q = qdisc_priv(sch);
985 struct sk_buff *skb;
986 unsigned activemask;
987
988 activemask = q->activemask&0xFF;
989 while (activemask) {
990 int prio = ffz(~activemask);
991 activemask &= ~(1<<prio);
992 skb = cbq_dequeue_prio(sch, prio);
993 if (skb)
994 return skb;
995 }
996 return NULL;
997}
998
999static struct sk_buff *
1000cbq_dequeue(struct Qdisc *sch)
1001{
1002 struct sk_buff *skb;
1003 struct cbq_sched_data *q = qdisc_priv(sch);
1004 psched_time_t now;
1005 psched_tdiff_t incr;
1006
1007 PSCHED_GET_TIME(now);
1008 incr = PSCHED_TDIFF(now, q->now_rt);
1009
1010 if (q->tx_class) {
1011 psched_tdiff_t incr2;
1012 /* Time integrator. We calculate EOS time
1013 by adding expected packet transmission time.
1014 If real time is greater, we warp artificial clock,
1015 so that:
1016
1017 cbq_time = max(real_time, work);
1018 */
1019 incr2 = L2T(&q->link, q->tx_len);
Patrick McHardy7c59e252007-03-23 11:27:45 -07001020 q->now += incr2;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021 cbq_update(q);
1022 if ((incr -= incr2) < 0)
1023 incr = 0;
1024 }
Patrick McHardy7c59e252007-03-23 11:27:45 -07001025 q->now += incr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026 q->now_rt = now;
1027
1028 for (;;) {
1029 q->wd_expires = 0;
1030
1031 skb = cbq_dequeue_1(sch);
1032 if (skb) {
1033 sch->q.qlen--;
1034 sch->flags &= ~TCQ_F_THROTTLED;
1035 return skb;
1036 }
1037
1038 /* All the classes are overlimit.
1039
1040 It is possible, if:
1041
1042 1. Scheduler is empty.
1043 2. Toplevel cutoff inhibited borrowing.
1044 3. Root class is overlimit.
1045
1046 Reset 2d and 3d conditions and retry.
1047
1048 Note, that NS and cbq-2.0 are buggy, peeking
1049 an arbitrary class is appropriate for ancestor-only
1050 sharing, but not for toplevel algorithm.
1051
1052 Our version is better, but slower, because it requires
1053 two passes, but it is unavoidable with top-level sharing.
1054 */
1055
1056 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1057 PSCHED_IS_PASTPERFECT(q->link.undertime))
1058 break;
1059
1060 q->toplevel = TC_CBQ_MAXLEVEL;
1061 PSCHED_SET_PASTPERFECT(q->link.undertime);
1062 }
1063
1064 /* No packets in scheduler or nobody wants to give them to us :-(
1065 Sigh... start watchdog timer in the last case. */
1066
1067 if (sch->q.qlen) {
1068 sch->qstats.overlimits++;
Patrick McHardy88a99352007-03-16 01:21:11 -07001069 if (q->wd_expires)
1070 qdisc_watchdog_schedule(&q->watchdog,
Patrick McHardybb239ac2007-03-16 12:31:28 -07001071 now + q->wd_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072 }
1073 return NULL;
1074}
1075
1076/* CBQ class maintanance routines */
1077
1078static void cbq_adjust_levels(struct cbq_class *this)
1079{
1080 if (this == NULL)
1081 return;
1082
1083 do {
1084 int level = 0;
1085 struct cbq_class *cl;
1086
1087 if ((cl = this->children) != NULL) {
1088 do {
1089 if (cl->level > level)
1090 level = cl->level;
1091 } while ((cl = cl->sibling) != this->children);
1092 }
1093 this->level = level+1;
1094 } while ((this = this->tparent) != NULL);
1095}
1096
1097static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1098{
1099 struct cbq_class *cl;
1100 unsigned h;
1101
1102 if (q->quanta[prio] == 0)
1103 return;
1104
1105 for (h=0; h<16; h++) {
1106 for (cl = q->classes[h]; cl; cl = cl->next) {
1107 /* BUGGGG... Beware! This expression suffer of
1108 arithmetic overflows!
1109 */
1110 if (cl->priority == prio) {
1111 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1112 q->quanta[prio];
1113 }
1114 if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1115 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1116 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1117 }
1118 }
1119 }
1120}
1121
1122static void cbq_sync_defmap(struct cbq_class *cl)
1123{
1124 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1125 struct cbq_class *split = cl->split;
1126 unsigned h;
1127 int i;
1128
1129 if (split == NULL)
1130 return;
1131
1132 for (i=0; i<=TC_PRIO_MAX; i++) {
1133 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1134 split->defaults[i] = NULL;
1135 }
1136
1137 for (i=0; i<=TC_PRIO_MAX; i++) {
1138 int level = split->level;
1139
1140 if (split->defaults[i])
1141 continue;
1142
1143 for (h=0; h<16; h++) {
1144 struct cbq_class *c;
1145
1146 for (c = q->classes[h]; c; c = c->next) {
1147 if (c->split == split && c->level < level &&
1148 c->defmap&(1<<i)) {
1149 split->defaults[i] = c;
1150 level = c->level;
1151 }
1152 }
1153 }
1154 }
1155}
1156
1157static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1158{
1159 struct cbq_class *split = NULL;
1160
1161 if (splitid == 0) {
1162 if ((split = cl->split) == NULL)
1163 return;
1164 splitid = split->classid;
1165 }
1166
1167 if (split == NULL || split->classid != splitid) {
1168 for (split = cl->tparent; split; split = split->tparent)
1169 if (split->classid == splitid)
1170 break;
1171 }
1172
1173 if (split == NULL)
1174 return;
1175
1176 if (cl->split != split) {
1177 cl->defmap = 0;
1178 cbq_sync_defmap(cl);
1179 cl->split = split;
1180 cl->defmap = def&mask;
1181 } else
1182 cl->defmap = (cl->defmap&~mask)|(def&mask);
1183
1184 cbq_sync_defmap(cl);
1185}
1186
1187static void cbq_unlink_class(struct cbq_class *this)
1188{
1189 struct cbq_class *cl, **clp;
1190 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1191
1192 for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1193 if (cl == this) {
1194 *clp = cl->next;
1195 cl->next = NULL;
1196 break;
1197 }
1198 }
1199
1200 if (this->tparent) {
1201 clp=&this->sibling;
1202 cl = *clp;
1203 do {
1204 if (cl == this) {
1205 *clp = cl->sibling;
1206 break;
1207 }
1208 clp = &cl->sibling;
1209 } while ((cl = *clp) != this->sibling);
1210
1211 if (this->tparent->children == this) {
1212 this->tparent->children = this->sibling;
1213 if (this->sibling == this)
1214 this->tparent->children = NULL;
1215 }
1216 } else {
1217 BUG_TRAP(this->sibling == this);
1218 }
1219}
1220
1221static void cbq_link_class(struct cbq_class *this)
1222{
1223 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1224 unsigned h = cbq_hash(this->classid);
1225 struct cbq_class *parent = this->tparent;
1226
1227 this->sibling = this;
1228 this->next = q->classes[h];
1229 q->classes[h] = this;
1230
1231 if (parent == NULL)
1232 return;
1233
1234 if (parent->children == NULL) {
1235 parent->children = this;
1236 } else {
1237 this->sibling = parent->children->sibling;
1238 parent->children->sibling = this;
1239 }
1240}
1241
1242static unsigned int cbq_drop(struct Qdisc* sch)
1243{
1244 struct cbq_sched_data *q = qdisc_priv(sch);
1245 struct cbq_class *cl, *cl_head;
1246 int prio;
1247 unsigned int len;
1248
1249 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1250 if ((cl_head = q->active[prio]) == NULL)
1251 continue;
1252
1253 cl = cl_head;
1254 do {
1255 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1256 sch->q.qlen--;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001257 if (!cl->q->q.qlen)
1258 cbq_deactivate_class(cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001259 return len;
1260 }
1261 } while ((cl = cl->next_alive) != cl_head);
1262 }
1263 return 0;
1264}
1265
1266static void
1267cbq_reset(struct Qdisc* sch)
1268{
1269 struct cbq_sched_data *q = qdisc_priv(sch);
1270 struct cbq_class *cl;
1271 int prio;
1272 unsigned h;
1273
1274 q->activemask = 0;
1275 q->pmask = 0;
1276 q->tx_class = NULL;
1277 q->tx_borrowed = NULL;
Patrick McHardy88a99352007-03-16 01:21:11 -07001278 qdisc_watchdog_cancel(&q->watchdog);
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001279 hrtimer_cancel(&q->delay_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280 q->toplevel = TC_CBQ_MAXLEVEL;
1281 PSCHED_GET_TIME(q->now);
1282 q->now_rt = q->now;
1283
1284 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1285 q->active[prio] = NULL;
1286
1287 for (h = 0; h < 16; h++) {
1288 for (cl = q->classes[h]; cl; cl = cl->next) {
1289 qdisc_reset(cl->q);
1290
1291 cl->next_alive = NULL;
1292 PSCHED_SET_PASTPERFECT(cl->undertime);
1293 cl->avgidle = cl->maxidle;
1294 cl->deficit = cl->quantum;
1295 cl->cpriority = cl->priority;
1296 }
1297 }
1298 sch->q.qlen = 0;
1299}
1300
1301
1302static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1303{
1304 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1305 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1306 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1307 }
1308 if (lss->change&TCF_CBQ_LSS_EWMA)
1309 cl->ewma_log = lss->ewma_log;
1310 if (lss->change&TCF_CBQ_LSS_AVPKT)
1311 cl->avpkt = lss->avpkt;
1312 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1313 cl->minidle = -(long)lss->minidle;
1314 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1315 cl->maxidle = lss->maxidle;
1316 cl->avgidle = lss->maxidle;
1317 }
1318 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1319 cl->offtime = lss->offtime;
1320 return 0;
1321}
1322
1323static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1324{
1325 q->nclasses[cl->priority]--;
1326 q->quanta[cl->priority] -= cl->weight;
1327 cbq_normalize_quanta(q, cl->priority);
1328}
1329
1330static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1331{
1332 q->nclasses[cl->priority]++;
1333 q->quanta[cl->priority] += cl->weight;
1334 cbq_normalize_quanta(q, cl->priority);
1335}
1336
1337static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1338{
1339 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1340
1341 if (wrr->allot)
1342 cl->allot = wrr->allot;
1343 if (wrr->weight)
1344 cl->weight = wrr->weight;
1345 if (wrr->priority) {
1346 cl->priority = wrr->priority-1;
1347 cl->cpriority = cl->priority;
1348 if (cl->priority >= cl->priority2)
1349 cl->priority2 = TC_CBQ_MAXPRIO-1;
1350 }
1351
1352 cbq_addprio(q, cl);
1353 return 0;
1354}
1355
1356static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1357{
1358 switch (ovl->strategy) {
1359 case TC_CBQ_OVL_CLASSIC:
1360 cl->overlimit = cbq_ovl_classic;
1361 break;
1362 case TC_CBQ_OVL_DELAY:
1363 cl->overlimit = cbq_ovl_delay;
1364 break;
1365 case TC_CBQ_OVL_LOWPRIO:
1366 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1367 ovl->priority2-1 <= cl->priority)
1368 return -EINVAL;
1369 cl->priority2 = ovl->priority2-1;
1370 cl->overlimit = cbq_ovl_lowprio;
1371 break;
1372 case TC_CBQ_OVL_DROP:
1373 cl->overlimit = cbq_ovl_drop;
1374 break;
1375 case TC_CBQ_OVL_RCLASSIC:
1376 cl->overlimit = cbq_ovl_rclassic;
1377 break;
1378 default:
1379 return -EINVAL;
1380 }
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001381 cl->penalty = ovl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382 return 0;
1383}
1384
1385#ifdef CONFIG_NET_CLS_POLICE
1386static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1387{
1388 cl->police = p->police;
1389
1390 if (cl->q->handle) {
1391 if (p->police == TC_POLICE_RECLASSIFY)
1392 cl->q->reshape_fail = cbq_reshape_fail;
1393 else
1394 cl->q->reshape_fail = NULL;
1395 }
1396 return 0;
1397}
1398#endif
1399
1400static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1401{
1402 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1403 return 0;
1404}
1405
1406static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1407{
1408 struct cbq_sched_data *q = qdisc_priv(sch);
1409 struct rtattr *tb[TCA_CBQ_MAX];
1410 struct tc_ratespec *r;
1411
1412 if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1413 tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1414 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1415 return -EINVAL;
1416
1417 if (tb[TCA_CBQ_LSSOPT-1] &&
1418 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1419 return -EINVAL;
1420
1421 r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1422
1423 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1424 return -EINVAL;
1425
1426 q->link.refcnt = 1;
1427 q->link.sibling = &q->link;
1428 q->link.classid = sch->handle;
1429 q->link.qdisc = sch;
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001430 if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1431 sch->handle)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001432 q->link.q = &noop_qdisc;
1433
1434 q->link.priority = TC_CBQ_MAXPRIO-1;
1435 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1436 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1437 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1438 q->link.overlimit = cbq_ovl_classic;
1439 q->link.allot = psched_mtu(sch->dev);
1440 q->link.quantum = q->link.allot;
1441 q->link.weight = q->link.R_tab->rate.rate;
1442
1443 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1444 q->link.avpkt = q->link.allot/2;
1445 q->link.minidle = -0x7FFFFFFF;
1446 q->link.stats_lock = &sch->dev->queue_lock;
1447
Patrick McHardy88a99352007-03-16 01:21:11 -07001448 qdisc_watchdog_init(&q->watchdog, sch);
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001449 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450 q->delay_timer.function = cbq_undelay;
1451 q->toplevel = TC_CBQ_MAXLEVEL;
1452 PSCHED_GET_TIME(q->now);
1453 q->now_rt = q->now;
1454
1455 cbq_link_class(&q->link);
1456
1457 if (tb[TCA_CBQ_LSSOPT-1])
1458 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1459
1460 cbq_addprio(q, &q->link);
1461 return 0;
1462}
1463
1464static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1465{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001466 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001467
1468 RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1469 return skb->len;
1470
1471rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001472 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001473 return -1;
1474}
1475
1476static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1477{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001478 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479 struct tc_cbq_lssopt opt;
1480
1481 opt.flags = 0;
1482 if (cl->borrow == NULL)
1483 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1484 if (cl->share == NULL)
1485 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1486 opt.ewma_log = cl->ewma_log;
1487 opt.level = cl->level;
1488 opt.avpkt = cl->avpkt;
1489 opt.maxidle = cl->maxidle;
1490 opt.minidle = (u32)(-cl->minidle);
1491 opt.offtime = cl->offtime;
1492 opt.change = ~0;
1493 RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1494 return skb->len;
1495
1496rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001497 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498 return -1;
1499}
1500
1501static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1502{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001503 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504 struct tc_cbq_wrropt opt;
1505
1506 opt.flags = 0;
1507 opt.allot = cl->allot;
1508 opt.priority = cl->priority+1;
1509 opt.cpriority = cl->cpriority+1;
1510 opt.weight = cl->weight;
1511 RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1512 return skb->len;
1513
1514rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001515 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516 return -1;
1517}
1518
1519static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1520{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001521 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001522 struct tc_cbq_ovl opt;
1523
1524 opt.strategy = cl->ovl_strategy;
1525 opt.priority2 = cl->priority2+1;
Patrick McHardy8a470772005-06-28 12:56:45 -07001526 opt.pad = 0;
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001527 opt.penalty = cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001528 RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1529 return skb->len;
1530
1531rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001532 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001533 return -1;
1534}
1535
1536static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1537{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001538 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001539 struct tc_cbq_fopt opt;
1540
1541 if (cl->split || cl->defmap) {
1542 opt.split = cl->split ? cl->split->classid : 0;
1543 opt.defmap = cl->defmap;
1544 opt.defchange = ~0;
1545 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1546 }
1547 return skb->len;
1548
1549rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001550 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001551 return -1;
1552}
1553
1554#ifdef CONFIG_NET_CLS_POLICE
1555static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1556{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001557 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558 struct tc_cbq_police opt;
1559
1560 if (cl->police) {
1561 opt.police = cl->police;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001562 opt.__res1 = 0;
1563 opt.__res2 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1565 }
1566 return skb->len;
1567
1568rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001569 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570 return -1;
1571}
1572#endif
1573
1574static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1575{
1576 if (cbq_dump_lss(skb, cl) < 0 ||
1577 cbq_dump_rate(skb, cl) < 0 ||
1578 cbq_dump_wrr(skb, cl) < 0 ||
1579 cbq_dump_ovl(skb, cl) < 0 ||
1580#ifdef CONFIG_NET_CLS_POLICE
1581 cbq_dump_police(skb, cl) < 0 ||
1582#endif
1583 cbq_dump_fopt(skb, cl) < 0)
1584 return -1;
1585 return 0;
1586}
1587
1588static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1589{
1590 struct cbq_sched_data *q = qdisc_priv(sch);
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001591 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001592 struct rtattr *rta;
1593
1594 rta = (struct rtattr*)b;
1595 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1596 if (cbq_dump_attr(skb, &q->link) < 0)
1597 goto rtattr_failure;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001598 rta->rta_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599 return skb->len;
1600
1601rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001602 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603 return -1;
1604}
1605
1606static int
1607cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1608{
1609 struct cbq_sched_data *q = qdisc_priv(sch);
1610
1611 q->link.xstats.avgidle = q->link.avgidle;
1612 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1613}
1614
1615static int
1616cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1617 struct sk_buff *skb, struct tcmsg *tcm)
1618{
1619 struct cbq_class *cl = (struct cbq_class*)arg;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001620 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621 struct rtattr *rta;
1622
1623 if (cl->tparent)
1624 tcm->tcm_parent = cl->tparent->classid;
1625 else
1626 tcm->tcm_parent = TC_H_ROOT;
1627 tcm->tcm_handle = cl->classid;
1628 tcm->tcm_info = cl->q->handle;
1629
1630 rta = (struct rtattr*)b;
1631 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1632 if (cbq_dump_attr(skb, cl) < 0)
1633 goto rtattr_failure;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001634 rta->rta_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635 return skb->len;
1636
1637rtattr_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001638 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001639 return -1;
1640}
1641
1642static int
1643cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1644 struct gnet_dump *d)
1645{
1646 struct cbq_sched_data *q = qdisc_priv(sch);
1647 struct cbq_class *cl = (struct cbq_class*)arg;
1648
1649 cl->qstats.qlen = cl->q->q.qlen;
1650 cl->xstats.avgidle = cl->avgidle;
1651 cl->xstats.undertime = 0;
1652
1653 if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1654 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1655
1656 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1657#ifdef CONFIG_NET_ESTIMATOR
1658 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1659#endif
1660 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1661 return -1;
1662
1663 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1664}
1665
1666static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1667 struct Qdisc **old)
1668{
1669 struct cbq_class *cl = (struct cbq_class*)arg;
1670
1671 if (cl) {
1672 if (new == NULL) {
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001673 if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1674 cl->classid)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001675 return -ENOBUFS;
1676 } else {
1677#ifdef CONFIG_NET_CLS_POLICE
1678 if (cl->police == TC_POLICE_RECLASSIFY)
1679 new->reshape_fail = cbq_reshape_fail;
1680#endif
1681 }
1682 sch_tree_lock(sch);
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001683 *old = xchg(&cl->q, new);
Patrick McHardy5e50da02006-11-29 17:36:20 -08001684 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685 qdisc_reset(*old);
1686 sch_tree_unlock(sch);
1687
1688 return 0;
1689 }
1690 return -ENOENT;
1691}
1692
1693static struct Qdisc *
1694cbq_leaf(struct Qdisc *sch, unsigned long arg)
1695{
1696 struct cbq_class *cl = (struct cbq_class*)arg;
1697
1698 return cl ? cl->q : NULL;
1699}
1700
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001701static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1702{
1703 struct cbq_class *cl = (struct cbq_class *)arg;
1704
1705 if (cl->q->q.qlen == 0)
1706 cbq_deactivate_class(cl);
1707}
1708
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1710{
1711 struct cbq_sched_data *q = qdisc_priv(sch);
1712 struct cbq_class *cl = cbq_class_lookup(q, classid);
1713
1714 if (cl) {
1715 cl->refcnt++;
1716 return (unsigned long)cl;
1717 }
1718 return 0;
1719}
1720
1721static void cbq_destroy_filters(struct cbq_class *cl)
1722{
1723 struct tcf_proto *tp;
1724
1725 while ((tp = cl->filter_list) != NULL) {
1726 cl->filter_list = tp->next;
1727 tcf_destroy(tp);
1728 }
1729}
1730
1731static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1732{
1733 struct cbq_sched_data *q = qdisc_priv(sch);
1734
1735 BUG_TRAP(!cl->filters);
1736
1737 cbq_destroy_filters(cl);
1738 qdisc_destroy(cl->q);
1739 qdisc_put_rtab(cl->R_tab);
1740#ifdef CONFIG_NET_ESTIMATOR
1741 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1742#endif
1743 if (cl != &q->link)
1744 kfree(cl);
1745}
1746
1747static void
1748cbq_destroy(struct Qdisc* sch)
1749{
1750 struct cbq_sched_data *q = qdisc_priv(sch);
1751 struct cbq_class *cl;
1752 unsigned h;
1753
1754#ifdef CONFIG_NET_CLS_POLICE
1755 q->rx_class = NULL;
1756#endif
1757 /*
1758 * Filters must be destroyed first because we don't destroy the
1759 * classes from root to leafs which means that filters can still
1760 * be bound to classes which have been destroyed already. --TGR '04
1761 */
1762 for (h = 0; h < 16; h++)
1763 for (cl = q->classes[h]; cl; cl = cl->next)
1764 cbq_destroy_filters(cl);
1765
1766 for (h = 0; h < 16; h++) {
1767 struct cbq_class *next;
1768
1769 for (cl = q->classes[h]; cl; cl = next) {
1770 next = cl->next;
1771 cbq_destroy_class(sch, cl);
1772 }
1773 }
1774}
1775
1776static void cbq_put(struct Qdisc *sch, unsigned long arg)
1777{
1778 struct cbq_class *cl = (struct cbq_class*)arg;
1779
1780 if (--cl->refcnt == 0) {
1781#ifdef CONFIG_NET_CLS_POLICE
1782 struct cbq_sched_data *q = qdisc_priv(sch);
1783
1784 spin_lock_bh(&sch->dev->queue_lock);
1785 if (q->rx_class == cl)
1786 q->rx_class = NULL;
1787 spin_unlock_bh(&sch->dev->queue_lock);
1788#endif
1789
1790 cbq_destroy_class(sch, cl);
1791 }
1792}
1793
1794static int
1795cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1796 unsigned long *arg)
1797{
1798 int err;
1799 struct cbq_sched_data *q = qdisc_priv(sch);
1800 struct cbq_class *cl = (struct cbq_class*)*arg;
1801 struct rtattr *opt = tca[TCA_OPTIONS-1];
1802 struct rtattr *tb[TCA_CBQ_MAX];
1803 struct cbq_class *parent;
1804 struct qdisc_rate_table *rtab = NULL;
1805
1806 if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1807 return -EINVAL;
1808
1809 if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1810 RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1811 return -EINVAL;
1812
1813 if (tb[TCA_CBQ_FOPT-1] &&
1814 RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1815 return -EINVAL;
1816
1817 if (tb[TCA_CBQ_RATE-1] &&
1818 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1819 return -EINVAL;
1820
1821 if (tb[TCA_CBQ_LSSOPT-1] &&
1822 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1823 return -EINVAL;
1824
1825 if (tb[TCA_CBQ_WRROPT-1] &&
1826 RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1827 return -EINVAL;
1828
1829#ifdef CONFIG_NET_CLS_POLICE
1830 if (tb[TCA_CBQ_POLICE-1] &&
1831 RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1832 return -EINVAL;
1833#endif
1834
1835 if (cl) {
1836 /* Check parent */
1837 if (parentid) {
1838 if (cl->tparent && cl->tparent->classid != parentid)
1839 return -EINVAL;
1840 if (!cl->tparent && parentid != TC_H_ROOT)
1841 return -EINVAL;
1842 }
1843
1844 if (tb[TCA_CBQ_RATE-1]) {
1845 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1846 if (rtab == NULL)
1847 return -EINVAL;
1848 }
1849
1850 /* Change class parameters */
1851 sch_tree_lock(sch);
1852
1853 if (cl->next_alive != NULL)
1854 cbq_deactivate_class(cl);
1855
1856 if (rtab) {
1857 rtab = xchg(&cl->R_tab, rtab);
1858 qdisc_put_rtab(rtab);
1859 }
1860
1861 if (tb[TCA_CBQ_LSSOPT-1])
1862 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1863
1864 if (tb[TCA_CBQ_WRROPT-1]) {
1865 cbq_rmprio(q, cl);
1866 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1867 }
1868
1869 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1870 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1871
1872#ifdef CONFIG_NET_CLS_POLICE
1873 if (tb[TCA_CBQ_POLICE-1])
1874 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1875#endif
1876
1877 if (tb[TCA_CBQ_FOPT-1])
1878 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1879
1880 if (cl->q->q.qlen)
1881 cbq_activate_class(cl);
1882
1883 sch_tree_unlock(sch);
1884
1885#ifdef CONFIG_NET_ESTIMATOR
1886 if (tca[TCA_RATE-1])
1887 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1888 cl->stats_lock, tca[TCA_RATE-1]);
1889#endif
1890 return 0;
1891 }
1892
1893 if (parentid == TC_H_ROOT)
1894 return -EINVAL;
1895
1896 if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1897 tb[TCA_CBQ_LSSOPT-1] == NULL)
1898 return -EINVAL;
1899
1900 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1901 if (rtab == NULL)
1902 return -EINVAL;
1903
1904 if (classid) {
1905 err = -EINVAL;
1906 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1907 goto failure;
1908 } else {
1909 int i;
1910 classid = TC_H_MAKE(sch->handle,0x8000);
1911
1912 for (i=0; i<0x8000; i++) {
1913 if (++q->hgenerator >= 0x8000)
1914 q->hgenerator = 1;
1915 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1916 break;
1917 }
1918 err = -ENOSR;
1919 if (i >= 0x8000)
1920 goto failure;
1921 classid = classid|q->hgenerator;
1922 }
1923
1924 parent = &q->link;
1925 if (parentid) {
1926 parent = cbq_class_lookup(q, parentid);
1927 err = -EINVAL;
1928 if (parent == NULL)
1929 goto failure;
1930 }
1931
1932 err = -ENOBUFS;
Panagiotis Issaris0da974f2006-07-21 14:51:30 -07001933 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001934 if (cl == NULL)
1935 goto failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001936 cl->R_tab = rtab;
1937 rtab = NULL;
1938 cl->refcnt = 1;
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001939 if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940 cl->q = &noop_qdisc;
1941 cl->classid = classid;
1942 cl->tparent = parent;
1943 cl->qdisc = sch;
1944 cl->allot = parent->allot;
1945 cl->quantum = cl->allot;
1946 cl->weight = cl->R_tab->rate.rate;
1947 cl->stats_lock = &sch->dev->queue_lock;
1948
1949 sch_tree_lock(sch);
1950 cbq_link_class(cl);
1951 cl->borrow = cl->tparent;
1952 if (cl->tparent != &q->link)
1953 cl->share = cl->tparent;
1954 cbq_adjust_levels(parent);
1955 cl->minidle = -0x7FFFFFFF;
1956 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1957 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1958 if (cl->ewma_log==0)
1959 cl->ewma_log = q->link.ewma_log;
1960 if (cl->maxidle==0)
1961 cl->maxidle = q->link.maxidle;
1962 if (cl->avpkt==0)
1963 cl->avpkt = q->link.avpkt;
1964 cl->overlimit = cbq_ovl_classic;
1965 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1966 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1967#ifdef CONFIG_NET_CLS_POLICE
1968 if (tb[TCA_CBQ_POLICE-1])
1969 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1970#endif
1971 if (tb[TCA_CBQ_FOPT-1])
1972 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1973 sch_tree_unlock(sch);
1974
1975#ifdef CONFIG_NET_ESTIMATOR
1976 if (tca[TCA_RATE-1])
1977 gen_new_estimator(&cl->bstats, &cl->rate_est,
1978 cl->stats_lock, tca[TCA_RATE-1]);
1979#endif
1980
1981 *arg = (unsigned long)cl;
1982 return 0;
1983
1984failure:
1985 qdisc_put_rtab(rtab);
1986 return err;
1987}
1988
1989static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1990{
1991 struct cbq_sched_data *q = qdisc_priv(sch);
1992 struct cbq_class *cl = (struct cbq_class*)arg;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001993 unsigned int qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001994
1995 if (cl->filters || cl->children || cl == &q->link)
1996 return -EBUSY;
1997
1998 sch_tree_lock(sch);
1999
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08002000 qlen = cl->q->q.qlen;
2001 qdisc_reset(cl->q);
2002 qdisc_tree_decrease_qlen(cl->q, qlen);
2003
Linus Torvalds1da177e2005-04-16 15:20:36 -07002004 if (cl->next_alive)
2005 cbq_deactivate_class(cl);
2006
2007 if (q->tx_borrowed == cl)
2008 q->tx_borrowed = q->tx_class;
2009 if (q->tx_class == cl) {
2010 q->tx_class = NULL;
2011 q->tx_borrowed = NULL;
2012 }
2013#ifdef CONFIG_NET_CLS_POLICE
2014 if (q->rx_class == cl)
2015 q->rx_class = NULL;
2016#endif
2017
2018 cbq_unlink_class(cl);
2019 cbq_adjust_levels(cl->tparent);
2020 cl->defmap = 0;
2021 cbq_sync_defmap(cl);
2022
2023 cbq_rmprio(q, cl);
2024 sch_tree_unlock(sch);
2025
2026 if (--cl->refcnt == 0)
2027 cbq_destroy_class(sch, cl);
2028
2029 return 0;
2030}
2031
2032static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2033{
2034 struct cbq_sched_data *q = qdisc_priv(sch);
2035 struct cbq_class *cl = (struct cbq_class *)arg;
2036
2037 if (cl == NULL)
2038 cl = &q->link;
2039
2040 return &cl->filter_list;
2041}
2042
2043static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2044 u32 classid)
2045{
2046 struct cbq_sched_data *q = qdisc_priv(sch);
2047 struct cbq_class *p = (struct cbq_class*)parent;
2048 struct cbq_class *cl = cbq_class_lookup(q, classid);
2049
2050 if (cl) {
2051 if (p && p->level <= cl->level)
2052 return 0;
2053 cl->filters++;
2054 return (unsigned long)cl;
2055 }
2056 return 0;
2057}
2058
2059static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2060{
2061 struct cbq_class *cl = (struct cbq_class*)arg;
2062
2063 cl->filters--;
2064}
2065
2066static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2067{
2068 struct cbq_sched_data *q = qdisc_priv(sch);
2069 unsigned h;
2070
2071 if (arg->stop)
2072 return;
2073
2074 for (h = 0; h < 16; h++) {
2075 struct cbq_class *cl;
2076
2077 for (cl = q->classes[h]; cl; cl = cl->next) {
2078 if (arg->count < arg->skip) {
2079 arg->count++;
2080 continue;
2081 }
2082 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2083 arg->stop = 1;
2084 return;
2085 }
2086 arg->count++;
2087 }
2088 }
2089}
2090
2091static struct Qdisc_class_ops cbq_class_ops = {
2092 .graft = cbq_graft,
2093 .leaf = cbq_leaf,
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08002094 .qlen_notify = cbq_qlen_notify,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002095 .get = cbq_get,
2096 .put = cbq_put,
2097 .change = cbq_change_class,
2098 .delete = cbq_delete,
2099 .walk = cbq_walk,
2100 .tcf_chain = cbq_find_tcf,
2101 .bind_tcf = cbq_bind_filter,
2102 .unbind_tcf = cbq_unbind_filter,
2103 .dump = cbq_dump_class,
2104 .dump_stats = cbq_dump_class_stats,
2105};
2106
2107static struct Qdisc_ops cbq_qdisc_ops = {
2108 .next = NULL,
2109 .cl_ops = &cbq_class_ops,
2110 .id = "cbq",
2111 .priv_size = sizeof(struct cbq_sched_data),
2112 .enqueue = cbq_enqueue,
2113 .dequeue = cbq_dequeue,
2114 .requeue = cbq_requeue,
2115 .drop = cbq_drop,
2116 .init = cbq_init,
2117 .reset = cbq_reset,
2118 .destroy = cbq_destroy,
2119 .change = NULL,
2120 .dump = cbq_dump,
2121 .dump_stats = cbq_dump_stats,
2122 .owner = THIS_MODULE,
2123};
2124
2125static int __init cbq_module_init(void)
2126{
2127 return register_qdisc(&cbq_qdisc_ops);
2128}
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09002129static void __exit cbq_module_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002130{
2131 unregister_qdisc(&cbq_qdisc_ops);
2132}
2133module_init(cbq_module_init)
2134module_exit(cbq_module_exit)
2135MODULE_LICENSE("GPL");