blob: 585f8a6ec7ec2315ea861f3c135a27585e9e8450 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 */
12
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/types.h>
15#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/skbuff.h>
Patrick McHardy0ba48052007-07-02 22:49:07 -070019#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <net/pkt_sched.h>
21
22
23/* Class-Based Queueing (CBQ) algorithm.
24 =======================================
25
26 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090027 Management Models for Packet Networks",
Linus Torvalds1da177e2005-04-16 15:20:36 -070028 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
29
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090030 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
Linus Torvalds1da177e2005-04-16 15:20:36 -070031
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090032 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
Linus Torvalds1da177e2005-04-16 15:20:36 -070033 Parameters", 1996
34
35 [4] Sally Floyd and Michael Speer, "Experimental Results
36 for Class-Based Queueing", 1998, not published.
37
38 -----------------------------------------------------------------------
39
40 Algorithm skeleton was taken from NS simulator cbq.cc.
41 If someone wants to check this code against the LBL version,
42 he should take into account that ONLY the skeleton was borrowed,
43 the implementation is different. Particularly:
44
45 --- The WRR algorithm is different. Our version looks more
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090046 reasonable (I hope) and works when quanta are allowed to be
47 less than MTU, which is always the case when real time classes
48 have small rates. Note, that the statement of [3] is
49 incomplete, delay may actually be estimated even if class
50 per-round allotment is less than MTU. Namely, if per-round
51 allotment is W*r_i, and r_1+...+r_k = r < 1
Linus Torvalds1da177e2005-04-16 15:20:36 -070052
53 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
54
55 In the worst case we have IntServ estimate with D = W*r+k*MTU
56 and C = MTU*r. The proof (if correct at all) is trivial.
57
58
59 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
60 interpret some places, which look like wrong translations
61 from NS. Anyone is advised to find these differences
62 and explain to me, why I am wrong 8).
63
64 --- Linux has no EOI event, so that we cannot estimate true class
65 idle time. Workaround is to consider the next dequeue event
66 as sign that previous packet is finished. This is wrong because of
67 internal device queueing, but on a permanently loaded link it is true.
68 Moreover, combined with clock integrator, this scheme looks
69 very close to an ideal solution. */
70
71struct cbq_sched_data;
72
73
74struct cbq_class
75{
76 struct cbq_class *next; /* hash table link */
77 struct cbq_class *next_alive; /* next class with backlog in this priority band */
78
79/* Parameters */
80 u32 classid;
81 unsigned char priority; /* class priority */
82 unsigned char priority2; /* priority to be used after overlimit */
83 unsigned char ewma_log; /* time constant for idle time calculation */
84 unsigned char ovl_strategy;
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -070085#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -070086 unsigned char police;
87#endif
88
89 u32 defmap;
90
91 /* Link-sharing scheduler parameters */
92 long maxidle; /* Class parameters: see below. */
93 long offtime;
94 long minidle;
95 u32 avpkt;
96 struct qdisc_rate_table *R_tab;
97
98 /* Overlimit strategy parameters */
99 void (*overlimit)(struct cbq_class *cl);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700100 psched_tdiff_t penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101
102 /* General scheduler (WRR) parameters */
103 long allot;
104 long quantum; /* Allotment per WRR round */
105 long weight; /* Relative allotment: see below */
106
107 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
108 struct cbq_class *split; /* Ptr to split node */
109 struct cbq_class *share; /* Ptr to LS parent in the class tree */
110 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
111 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
112 parent otherwise */
113 struct cbq_class *sibling; /* Sibling chain */
114 struct cbq_class *children; /* Pointer to children chain */
115
116 struct Qdisc *q; /* Elementary queueing discipline */
117
118
119/* Variables */
120 unsigned char cpriority; /* Effective priority */
121 unsigned char delayed;
122 unsigned char level; /* level of the class in hierarchy:
123 0 for leaf classes, and maximal
124 level of children + 1 for nodes.
125 */
126
127 psched_time_t last; /* Last end of service */
128 psched_time_t undertime;
129 long avgidle;
130 long deficit; /* Saved deficit for WRR */
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700131 psched_time_t penalized;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 struct gnet_stats_basic bstats;
133 struct gnet_stats_queue qstats;
134 struct gnet_stats_rate_est rate_est;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 struct tc_cbq_xstats xstats;
136
137 struct tcf_proto *filter_list;
138
139 int refcnt;
140 int filters;
141
142 struct cbq_class *defaults[TC_PRIO_MAX+1];
143};
144
145struct cbq_sched_data
146{
147 struct cbq_class *classes[16]; /* Hash table of all classes */
148 int nclasses[TC_CBQ_MAXPRIO+1];
149 unsigned quanta[TC_CBQ_MAXPRIO+1];
150
151 struct cbq_class link;
152
153 unsigned activemask;
154 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
155 with backlog */
156
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700157#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 struct cbq_class *rx_class;
159#endif
160 struct cbq_class *tx_class;
161 struct cbq_class *tx_borrowed;
162 int tx_len;
163 psched_time_t now; /* Cached timestamp */
164 psched_time_t now_rt; /* Cached real time */
165 unsigned pmask;
166
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700167 struct hrtimer delay_timer;
Patrick McHardy88a99352007-03-16 01:21:11 -0700168 struct qdisc_watchdog watchdog; /* Watchdog timer,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 started when CBQ has
170 backlog, but cannot
171 transmit just now */
Patrick McHardy88a99352007-03-16 01:21:11 -0700172 psched_tdiff_t wd_expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173 int toplevel;
174 u32 hgenerator;
175};
176
177
Jesper Dangaard Brouere9bef552007-09-12 16:35:24 +0200178#define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179
180
181static __inline__ unsigned cbq_hash(u32 h)
182{
183 h ^= h>>8;
184 h ^= h>>4;
185 return h&0xF;
186}
187
188static __inline__ struct cbq_class *
189cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
190{
191 struct cbq_class *cl;
192
193 for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
194 if (cl->classid == classid)
195 return cl;
196 return NULL;
197}
198
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700199#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200
201static struct cbq_class *
202cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
203{
204 struct cbq_class *cl, *new;
205
206 for (cl = this->tparent; cl; cl = cl->tparent)
207 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
208 return new;
209
210 return NULL;
211}
212
213#endif
214
215/* Classify packet. The procedure is pretty complicated, but
216 it allows us to combine link sharing and priority scheduling
217 transparently.
218
219 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
220 so that it resolves to split nodes. Then packets are classified
221 by logical priority, or a more specific classifier may be attached
222 to the split node.
223 */
224
225static struct cbq_class *
226cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
227{
228 struct cbq_sched_data *q = qdisc_priv(sch);
229 struct cbq_class *head = &q->link;
230 struct cbq_class **defmap;
231 struct cbq_class *cl = NULL;
232 u32 prio = skb->priority;
233 struct tcf_result res;
234
235 /*
236 * Step 1. If skb->priority points to one of our classes, use it.
237 */
238 if (TC_H_MAJ(prio^sch->handle) == 0 &&
239 (cl = cbq_class_lookup(q, prio)) != NULL)
240 return cl;
241
Jamal Hadi Salim29f1df62006-01-08 22:35:55 -0800242 *qerr = NET_XMIT_BYPASS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 for (;;) {
244 int result = 0;
245 defmap = head->defaults;
246
247 /*
248 * Step 2+n. Apply classifier.
249 */
Patrick McHardy73ca4912007-07-15 00:02:31 -0700250 if (!head->filter_list ||
251 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 goto fallback;
253
254 if ((cl = (void*)res.class) == NULL) {
255 if (TC_H_MAJ(res.classid))
256 cl = cbq_class_lookup(q, res.classid);
257 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
258 cl = defmap[TC_PRIO_BESTEFFORT];
259
260 if (cl == NULL || cl->level >= head->level)
261 goto fallback;
262 }
263
264#ifdef CONFIG_NET_CLS_ACT
265 switch (result) {
266 case TC_ACT_QUEUED:
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900267 case TC_ACT_STOLEN:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 *qerr = NET_XMIT_SUCCESS;
269 case TC_ACT_SHOT:
270 return NULL;
Patrick McHardy73ca4912007-07-15 00:02:31 -0700271 case TC_ACT_RECLASSIFY:
272 return cbq_reclassify(skb, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274#endif
275 if (cl->level == 0)
276 return cl;
277
278 /*
279 * Step 3+n. If classifier selected a link sharing class,
280 * apply agency specific classifier.
281 * Repeat this procdure until we hit a leaf node.
282 */
283 head = cl;
284 }
285
286fallback:
287 cl = head;
288
289 /*
290 * Step 4. No success...
291 */
292 if (TC_H_MAJ(prio) == 0 &&
293 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
294 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
295 return head;
296
297 return cl;
298}
299
300/*
301 A packet has just been enqueued on the empty class.
302 cbq_activate_class adds it to the tail of active class list
303 of its priority band.
304 */
305
306static __inline__ void cbq_activate_class(struct cbq_class *cl)
307{
308 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
309 int prio = cl->cpriority;
310 struct cbq_class *cl_tail;
311
312 cl_tail = q->active[prio];
313 q->active[prio] = cl;
314
315 if (cl_tail != NULL) {
316 cl->next_alive = cl_tail->next_alive;
317 cl_tail->next_alive = cl;
318 } else {
319 cl->next_alive = cl;
320 q->activemask |= (1<<prio);
321 }
322}
323
324/*
325 Unlink class from active chain.
326 Note that this same procedure is done directly in cbq_dequeue*
327 during round-robin procedure.
328 */
329
330static void cbq_deactivate_class(struct cbq_class *this)
331{
332 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
333 int prio = this->cpriority;
334 struct cbq_class *cl;
335 struct cbq_class *cl_prev = q->active[prio];
336
337 do {
338 cl = cl_prev->next_alive;
339 if (cl == this) {
340 cl_prev->next_alive = cl->next_alive;
341 cl->next_alive = NULL;
342
343 if (cl == q->active[prio]) {
344 q->active[prio] = cl_prev;
345 if (cl == q->active[prio]) {
346 q->active[prio] = NULL;
347 q->activemask &= ~(1<<prio);
348 return;
349 }
350 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351 return;
352 }
353 } while ((cl_prev = cl) != q->active[prio]);
354}
355
356static void
357cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
358{
359 int toplevel = q->toplevel;
360
361 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
362 psched_time_t now;
363 psched_tdiff_t incr;
364
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700365 now = psched_get_time();
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700366 incr = now - q->now_rt;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700367 now = q->now + incr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368
369 do {
Patrick McHardy104e0872007-03-23 11:28:07 -0700370 if (cl->undertime < now) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371 q->toplevel = cl->level;
372 return;
373 }
374 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
375 }
376}
377
378static int
379cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
380{
381 struct cbq_sched_data *q = qdisc_priv(sch);
382 int len = skb->len;
Satyam Sharmaddeee3c2007-09-16 14:54:05 -0700383 int uninitialized_var(ret);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
385
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700386#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387 q->rx_class = cl;
388#endif
389 if (cl == NULL) {
Jamal Hadi Salim29f1df62006-01-08 22:35:55 -0800390 if (ret == NET_XMIT_BYPASS)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391 sch->qstats.drops++;
392 kfree_skb(skb);
393 return ret;
394 }
395
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700396#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397 cl->q->__parent = sch;
398#endif
399 if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
400 sch->q.qlen++;
401 sch->bstats.packets++;
402 sch->bstats.bytes+=len;
403 cbq_mark_toplevel(q, cl);
404 if (!cl->next_alive)
405 cbq_activate_class(cl);
406 return ret;
407 }
408
409 sch->qstats.drops++;
410 cbq_mark_toplevel(q, cl);
411 cl->qstats.drops++;
412 return ret;
413}
414
415static int
416cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
417{
418 struct cbq_sched_data *q = qdisc_priv(sch);
419 struct cbq_class *cl;
420 int ret;
421
422 if ((cl = q->tx_class) == NULL) {
423 kfree_skb(skb);
424 sch->qstats.drops++;
425 return NET_XMIT_CN;
426 }
427 q->tx_class = NULL;
428
429 cbq_mark_toplevel(q, cl);
430
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700431#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432 q->rx_class = cl;
433 cl->q->__parent = sch;
434#endif
435 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
436 sch->q.qlen++;
437 sch->qstats.requeues++;
438 if (!cl->next_alive)
439 cbq_activate_class(cl);
440 return 0;
441 }
442 sch->qstats.drops++;
443 cl->qstats.drops++;
444 return ret;
445}
446
447/* Overlimit actions */
448
449/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
450
451static void cbq_ovl_classic(struct cbq_class *cl)
452{
453 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700454 psched_tdiff_t delay = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455
456 if (!cl->delayed) {
457 delay += cl->offtime;
458
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900459 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460 Class goes to sleep, so that it will have no
461 chance to work avgidle. Let's forgive it 8)
462
463 BTW cbq-2.0 has a crap in this
464 place, apparently they forgot to shift it by cl->ewma_log.
465 */
466 if (cl->avgidle < 0)
467 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
468 if (cl->avgidle < cl->minidle)
469 cl->avgidle = cl->minidle;
470 if (delay <= 0)
471 delay = 1;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700472 cl->undertime = q->now + delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473
474 cl->xstats.overactions++;
475 cl->delayed = 1;
476 }
477 if (q->wd_expires == 0 || q->wd_expires > delay)
478 q->wd_expires = delay;
479
480 /* Dirty work! We must schedule wakeups based on
481 real available rate, rather than leaf rate,
482 which may be tiny (even zero).
483 */
484 if (q->toplevel == TC_CBQ_MAXLEVEL) {
485 struct cbq_class *b;
486 psched_tdiff_t base_delay = q->wd_expires;
487
488 for (b = cl->borrow; b; b = b->borrow) {
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700489 delay = b->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490 if (delay < base_delay) {
491 if (delay <= 0)
492 delay = 1;
493 base_delay = delay;
494 }
495 }
496
497 q->wd_expires = base_delay;
498 }
499}
500
501/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
502 they go overlimit
503 */
504
505static void cbq_ovl_rclassic(struct cbq_class *cl)
506{
507 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
508 struct cbq_class *this = cl;
509
510 do {
511 if (cl->level > q->toplevel) {
512 cl = NULL;
513 break;
514 }
515 } while ((cl = cl->borrow) != NULL);
516
517 if (cl == NULL)
518 cl = this;
519 cbq_ovl_classic(cl);
520}
521
522/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
523
524static void cbq_ovl_delay(struct cbq_class *cl)
525{
526 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700527 psched_tdiff_t delay = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528
529 if (!cl->delayed) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700530 psched_time_t sched = q->now;
531 ktime_t expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532
533 delay += cl->offtime;
534 if (cl->avgidle < 0)
535 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
536 if (cl->avgidle < cl->minidle)
537 cl->avgidle = cl->minidle;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700538 cl->undertime = q->now + delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539
540 if (delay > 0) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700541 sched += delay + cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542 cl->penalized = sched;
543 cl->cpriority = TC_CBQ_MAXPRIO;
544 q->pmask |= (1<<TC_CBQ_MAXPRIO);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700545
546 expires = ktime_set(0, 0);
547 expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
548 if (hrtimer_try_to_cancel(&q->delay_timer) &&
549 ktime_to_ns(ktime_sub(q->delay_timer.expires,
550 expires)) > 0)
551 q->delay_timer.expires = expires;
552 hrtimer_restart(&q->delay_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553 cl->delayed = 1;
554 cl->xstats.overactions++;
555 return;
556 }
557 delay = 1;
558 }
559 if (q->wd_expires == 0 || q->wd_expires > delay)
560 q->wd_expires = delay;
561}
562
563/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
564
565static void cbq_ovl_lowprio(struct cbq_class *cl)
566{
567 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
568
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700569 cl->penalized = q->now + cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570
571 if (cl->cpriority != cl->priority2) {
572 cl->cpriority = cl->priority2;
573 q->pmask |= (1<<cl->cpriority);
574 cl->xstats.overactions++;
575 }
576 cbq_ovl_classic(cl);
577}
578
579/* TC_CBQ_OVL_DROP: penalize class by dropping */
580
581static void cbq_ovl_drop(struct cbq_class *cl)
582{
583 if (cl->q->ops->drop)
584 if (cl->q->ops->drop(cl->q))
585 cl->qdisc->q.qlen--;
586 cl->xstats.overactions++;
587 cbq_ovl_classic(cl);
588}
589
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700590static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
591 psched_time_t now)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592{
593 struct cbq_class *cl;
594 struct cbq_class *cl_prev = q->active[prio];
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700595 psched_time_t sched = now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596
597 if (cl_prev == NULL)
Patrick McHardye9054a32007-03-16 01:21:40 -0700598 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599
600 do {
601 cl = cl_prev->next_alive;
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700602 if (now - cl->penalized > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 cl_prev->next_alive = cl->next_alive;
604 cl->next_alive = NULL;
605 cl->cpriority = cl->priority;
606 cl->delayed = 0;
607 cbq_activate_class(cl);
608
609 if (cl == q->active[prio]) {
610 q->active[prio] = cl_prev;
611 if (cl == q->active[prio]) {
612 q->active[prio] = NULL;
613 return 0;
614 }
615 }
616
617 cl = cl_prev->next_alive;
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700618 } else if (sched - cl->penalized > 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619 sched = cl->penalized;
620 } while ((cl_prev = cl) != q->active[prio]);
621
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700622 return sched - now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623}
624
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700625static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626{
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700627 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
628 delay_timer);
629 struct Qdisc *sch = q->watchdog.qdisc;
630 psched_time_t now;
631 psched_tdiff_t delay = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632 unsigned pmask;
633
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700634 now = psched_get_time();
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700635
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636 pmask = q->pmask;
637 q->pmask = 0;
638
639 while (pmask) {
640 int prio = ffz(~pmask);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700641 psched_tdiff_t tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642
643 pmask &= ~(1<<prio);
644
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700645 tmp = cbq_undelay_prio(q, prio, now);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646 if (tmp > 0) {
647 q->pmask |= 1<<prio;
648 if (tmp < delay || delay == 0)
649 delay = tmp;
650 }
651 }
652
653 if (delay) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700654 ktime_t time;
655
656 time = ktime_set(0, 0);
657 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
658 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659 }
660
661 sch->flags &= ~TCQ_F_THROTTLED;
662 netif_schedule(sch->dev);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700663 return HRTIMER_NORESTART;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700664}
665
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700666#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
668{
669 int len = skb->len;
670 struct Qdisc *sch = child->__parent;
671 struct cbq_sched_data *q = qdisc_priv(sch);
672 struct cbq_class *cl = q->rx_class;
673
674 q->rx_class = NULL;
675
676 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
677
678 cbq_mark_toplevel(q, cl);
679
680 q->rx_class = cl;
681 cl->q->__parent = sch;
682
683 if (cl->q->enqueue(skb, cl->q) == 0) {
684 sch->q.qlen++;
685 sch->bstats.packets++;
686 sch->bstats.bytes+=len;
687 if (!cl->next_alive)
688 cbq_activate_class(cl);
689 return 0;
690 }
691 sch->qstats.drops++;
692 return 0;
693 }
694
695 sch->qstats.drops++;
696 return -1;
697}
698#endif
699
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900700/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700701 It is mission critical procedure.
702
703 We "regenerate" toplevel cutoff, if transmitting class
704 has backlog and it is not regulated. It is not part of
705 original CBQ description, but looks more reasonable.
706 Probably, it is wrong. This question needs further investigation.
707*/
708
709static __inline__ void
710cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
711 struct cbq_class *borrowed)
712{
713 if (cl && q->toplevel >= borrowed->level) {
714 if (cl->q->q.qlen > 1) {
715 do {
Patrick McHardya0849802007-03-23 11:28:30 -0700716 if (borrowed->undertime == PSCHED_PASTPERFECT) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717 q->toplevel = borrowed->level;
718 return;
719 }
720 } while ((borrowed=borrowed->borrow) != NULL);
721 }
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900722#if 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723 /* It is not necessary now. Uncommenting it
724 will save CPU cycles, but decrease fairness.
725 */
726 q->toplevel = TC_CBQ_MAXLEVEL;
727#endif
728 }
729}
730
731static void
732cbq_update(struct cbq_sched_data *q)
733{
734 struct cbq_class *this = q->tx_class;
735 struct cbq_class *cl = this;
736 int len = q->tx_len;
737
738 q->tx_class = NULL;
739
740 for ( ; cl; cl = cl->share) {
741 long avgidle = cl->avgidle;
742 long idle;
743
744 cl->bstats.packets++;
745 cl->bstats.bytes += len;
746
747 /*
748 (now - last) is total time between packet right edges.
749 (last_pktlen/rate) is "virtual" busy time, so that
750
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900751 idle = (now - last) - last_pktlen/rate
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752 */
753
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700754 idle = q->now - cl->last;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755 if ((unsigned long)idle > 128*1024*1024) {
756 avgidle = cl->maxidle;
757 } else {
758 idle -= L2T(cl, len);
759
760 /* true_avgidle := (1-W)*true_avgidle + W*idle,
761 where W=2^{-ewma_log}. But cl->avgidle is scaled:
762 cl->avgidle == true_avgidle/W,
763 hence:
764 */
765 avgidle += idle - (avgidle>>cl->ewma_log);
766 }
767
768 if (avgidle <= 0) {
769 /* Overlimit or at-limit */
770
771 if (avgidle < cl->minidle)
772 avgidle = cl->minidle;
773
774 cl->avgidle = avgidle;
775
776 /* Calculate expected time, when this class
777 will be allowed to send.
778 It will occur, when:
779 (1-W)*true_avgidle + W*delay = 0, i.e.
780 idle = (1/W - 1)*(-true_avgidle)
781 or
782 idle = (1 - W)*(-cl->avgidle);
783 */
784 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
785
786 /*
787 That is not all.
788 To maintain the rate allocated to the class,
789 we add to undertime virtual clock,
790 necessary to complete transmitted packet.
791 (len/phys_bandwidth has been already passed
792 to the moment of cbq_update)
793 */
794
795 idle -= L2T(&q->link, len);
796 idle += L2T(cl, len);
797
Patrick McHardy7c59e252007-03-23 11:27:45 -0700798 cl->undertime = q->now + idle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799 } else {
800 /* Underlimit */
801
Patrick McHardya0849802007-03-23 11:28:30 -0700802 cl->undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700803 if (avgidle > cl->maxidle)
804 cl->avgidle = cl->maxidle;
805 else
806 cl->avgidle = avgidle;
807 }
808 cl->last = q->now;
809 }
810
811 cbq_update_toplevel(q, this, q->tx_borrowed);
812}
813
814static __inline__ struct cbq_class *
815cbq_under_limit(struct cbq_class *cl)
816{
817 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
818 struct cbq_class *this_cl = cl;
819
820 if (cl->tparent == NULL)
821 return cl;
822
Patrick McHardya0849802007-03-23 11:28:30 -0700823 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824 cl->delayed = 0;
825 return cl;
826 }
827
828 do {
829 /* It is very suspicious place. Now overlimit
830 action is generated for not bounded classes
831 only if link is completely congested.
832 Though it is in agree with ancestor-only paradigm,
833 it looks very stupid. Particularly,
834 it means that this chunk of code will either
835 never be called or result in strong amplification
836 of burstiness. Dangerous, silly, and, however,
837 no another solution exists.
838 */
839 if ((cl = cl->borrow) == NULL) {
840 this_cl->qstats.overlimits++;
841 this_cl->overlimit(this_cl);
842 return NULL;
843 }
844 if (cl->level > q->toplevel)
845 return NULL;
Patrick McHardya0849802007-03-23 11:28:30 -0700846 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847
848 cl->delayed = 0;
849 return cl;
850}
851
852static __inline__ struct sk_buff *
853cbq_dequeue_prio(struct Qdisc *sch, int prio)
854{
855 struct cbq_sched_data *q = qdisc_priv(sch);
856 struct cbq_class *cl_tail, *cl_prev, *cl;
857 struct sk_buff *skb;
858 int deficit;
859
860 cl_tail = cl_prev = q->active[prio];
861 cl = cl_prev->next_alive;
862
863 do {
864 deficit = 0;
865
866 /* Start round */
867 do {
868 struct cbq_class *borrow = cl;
869
870 if (cl->q->q.qlen &&
871 (borrow = cbq_under_limit(cl)) == NULL)
872 goto skip_class;
873
874 if (cl->deficit <= 0) {
875 /* Class exhausted its allotment per
876 this round. Switch to the next one.
877 */
878 deficit = 1;
879 cl->deficit += cl->quantum;
880 goto next_class;
881 }
882
883 skb = cl->q->dequeue(cl->q);
884
885 /* Class did not give us any skb :-(
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900886 It could occur even if cl->q->q.qlen != 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700887 f.e. if cl->q == "tbf"
888 */
889 if (skb == NULL)
890 goto skip_class;
891
892 cl->deficit -= skb->len;
893 q->tx_class = cl;
894 q->tx_borrowed = borrow;
895 if (borrow != cl) {
896#ifndef CBQ_XSTATS_BORROWS_BYTES
897 borrow->xstats.borrows++;
898 cl->xstats.borrows++;
899#else
900 borrow->xstats.borrows += skb->len;
901 cl->xstats.borrows += skb->len;
902#endif
903 }
904 q->tx_len = skb->len;
905
906 if (cl->deficit <= 0) {
907 q->active[prio] = cl;
908 cl = cl->next_alive;
909 cl->deficit += cl->quantum;
910 }
911 return skb;
912
913skip_class:
914 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
915 /* Class is empty or penalized.
916 Unlink it from active chain.
917 */
918 cl_prev->next_alive = cl->next_alive;
919 cl->next_alive = NULL;
920
921 /* Did cl_tail point to it? */
922 if (cl == cl_tail) {
923 /* Repair it! */
924 cl_tail = cl_prev;
925
926 /* Was it the last class in this band? */
927 if (cl == cl_tail) {
928 /* Kill the band! */
929 q->active[prio] = NULL;
930 q->activemask &= ~(1<<prio);
931 if (cl->q->q.qlen)
932 cbq_activate_class(cl);
933 return NULL;
934 }
935
936 q->active[prio] = cl_tail;
937 }
938 if (cl->q->q.qlen)
939 cbq_activate_class(cl);
940
941 cl = cl_prev;
942 }
943
944next_class:
945 cl_prev = cl;
946 cl = cl->next_alive;
947 } while (cl_prev != cl_tail);
948 } while (deficit);
949
950 q->active[prio] = cl_prev;
951
952 return NULL;
953}
954
955static __inline__ struct sk_buff *
956cbq_dequeue_1(struct Qdisc *sch)
957{
958 struct cbq_sched_data *q = qdisc_priv(sch);
959 struct sk_buff *skb;
960 unsigned activemask;
961
962 activemask = q->activemask&0xFF;
963 while (activemask) {
964 int prio = ffz(~activemask);
965 activemask &= ~(1<<prio);
966 skb = cbq_dequeue_prio(sch, prio);
967 if (skb)
968 return skb;
969 }
970 return NULL;
971}
972
973static struct sk_buff *
974cbq_dequeue(struct Qdisc *sch)
975{
976 struct sk_buff *skb;
977 struct cbq_sched_data *q = qdisc_priv(sch);
978 psched_time_t now;
979 psched_tdiff_t incr;
980
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700981 now = psched_get_time();
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700982 incr = now - q->now_rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983
984 if (q->tx_class) {
985 psched_tdiff_t incr2;
986 /* Time integrator. We calculate EOS time
987 by adding expected packet transmission time.
988 If real time is greater, we warp artificial clock,
989 so that:
990
991 cbq_time = max(real_time, work);
992 */
993 incr2 = L2T(&q->link, q->tx_len);
Patrick McHardy7c59e252007-03-23 11:27:45 -0700994 q->now += incr2;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995 cbq_update(q);
996 if ((incr -= incr2) < 0)
997 incr = 0;
998 }
Patrick McHardy7c59e252007-03-23 11:27:45 -0700999 q->now += incr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 q->now_rt = now;
1001
1002 for (;;) {
1003 q->wd_expires = 0;
1004
1005 skb = cbq_dequeue_1(sch);
1006 if (skb) {
1007 sch->q.qlen--;
1008 sch->flags &= ~TCQ_F_THROTTLED;
1009 return skb;
1010 }
1011
1012 /* All the classes are overlimit.
1013
1014 It is possible, if:
1015
1016 1. Scheduler is empty.
1017 2. Toplevel cutoff inhibited borrowing.
1018 3. Root class is overlimit.
1019
1020 Reset 2d and 3d conditions and retry.
1021
1022 Note, that NS and cbq-2.0 are buggy, peeking
1023 an arbitrary class is appropriate for ancestor-only
1024 sharing, but not for toplevel algorithm.
1025
1026 Our version is better, but slower, because it requires
1027 two passes, but it is unavoidable with top-level sharing.
1028 */
1029
1030 if (q->toplevel == TC_CBQ_MAXLEVEL &&
Patrick McHardya0849802007-03-23 11:28:30 -07001031 q->link.undertime == PSCHED_PASTPERFECT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032 break;
1033
1034 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardya0849802007-03-23 11:28:30 -07001035 q->link.undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036 }
1037
1038 /* No packets in scheduler or nobody wants to give them to us :-(
1039 Sigh... start watchdog timer in the last case. */
1040
1041 if (sch->q.qlen) {
1042 sch->qstats.overlimits++;
Patrick McHardy88a99352007-03-16 01:21:11 -07001043 if (q->wd_expires)
1044 qdisc_watchdog_schedule(&q->watchdog,
Patrick McHardybb239ac2007-03-16 12:31:28 -07001045 now + q->wd_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046 }
1047 return NULL;
1048}
1049
1050/* CBQ class maintanance routines */
1051
1052static void cbq_adjust_levels(struct cbq_class *this)
1053{
1054 if (this == NULL)
1055 return;
1056
1057 do {
1058 int level = 0;
1059 struct cbq_class *cl;
1060
1061 if ((cl = this->children) != NULL) {
1062 do {
1063 if (cl->level > level)
1064 level = cl->level;
1065 } while ((cl = cl->sibling) != this->children);
1066 }
1067 this->level = level+1;
1068 } while ((this = this->tparent) != NULL);
1069}
1070
1071static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1072{
1073 struct cbq_class *cl;
1074 unsigned h;
1075
1076 if (q->quanta[prio] == 0)
1077 return;
1078
1079 for (h=0; h<16; h++) {
1080 for (cl = q->classes[h]; cl; cl = cl->next) {
1081 /* BUGGGG... Beware! This expression suffer of
1082 arithmetic overflows!
1083 */
1084 if (cl->priority == prio) {
1085 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1086 q->quanta[prio];
1087 }
1088 if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1089 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1090 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1091 }
1092 }
1093 }
1094}
1095
1096static void cbq_sync_defmap(struct cbq_class *cl)
1097{
1098 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1099 struct cbq_class *split = cl->split;
1100 unsigned h;
1101 int i;
1102
1103 if (split == NULL)
1104 return;
1105
1106 for (i=0; i<=TC_PRIO_MAX; i++) {
1107 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1108 split->defaults[i] = NULL;
1109 }
1110
1111 for (i=0; i<=TC_PRIO_MAX; i++) {
1112 int level = split->level;
1113
1114 if (split->defaults[i])
1115 continue;
1116
1117 for (h=0; h<16; h++) {
1118 struct cbq_class *c;
1119
1120 for (c = q->classes[h]; c; c = c->next) {
1121 if (c->split == split && c->level < level &&
1122 c->defmap&(1<<i)) {
1123 split->defaults[i] = c;
1124 level = c->level;
1125 }
1126 }
1127 }
1128 }
1129}
1130
1131static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1132{
1133 struct cbq_class *split = NULL;
1134
1135 if (splitid == 0) {
1136 if ((split = cl->split) == NULL)
1137 return;
1138 splitid = split->classid;
1139 }
1140
1141 if (split == NULL || split->classid != splitid) {
1142 for (split = cl->tparent; split; split = split->tparent)
1143 if (split->classid == splitid)
1144 break;
1145 }
1146
1147 if (split == NULL)
1148 return;
1149
1150 if (cl->split != split) {
1151 cl->defmap = 0;
1152 cbq_sync_defmap(cl);
1153 cl->split = split;
1154 cl->defmap = def&mask;
1155 } else
1156 cl->defmap = (cl->defmap&~mask)|(def&mask);
1157
1158 cbq_sync_defmap(cl);
1159}
1160
1161static void cbq_unlink_class(struct cbq_class *this)
1162{
1163 struct cbq_class *cl, **clp;
1164 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1165
1166 for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1167 if (cl == this) {
1168 *clp = cl->next;
1169 cl->next = NULL;
1170 break;
1171 }
1172 }
1173
1174 if (this->tparent) {
1175 clp=&this->sibling;
1176 cl = *clp;
1177 do {
1178 if (cl == this) {
1179 *clp = cl->sibling;
1180 break;
1181 }
1182 clp = &cl->sibling;
1183 } while ((cl = *clp) != this->sibling);
1184
1185 if (this->tparent->children == this) {
1186 this->tparent->children = this->sibling;
1187 if (this->sibling == this)
1188 this->tparent->children = NULL;
1189 }
1190 } else {
1191 BUG_TRAP(this->sibling == this);
1192 }
1193}
1194
1195static void cbq_link_class(struct cbq_class *this)
1196{
1197 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1198 unsigned h = cbq_hash(this->classid);
1199 struct cbq_class *parent = this->tparent;
1200
1201 this->sibling = this;
1202 this->next = q->classes[h];
1203 q->classes[h] = this;
1204
1205 if (parent == NULL)
1206 return;
1207
1208 if (parent->children == NULL) {
1209 parent->children = this;
1210 } else {
1211 this->sibling = parent->children->sibling;
1212 parent->children->sibling = this;
1213 }
1214}
1215
1216static unsigned int cbq_drop(struct Qdisc* sch)
1217{
1218 struct cbq_sched_data *q = qdisc_priv(sch);
1219 struct cbq_class *cl, *cl_head;
1220 int prio;
1221 unsigned int len;
1222
1223 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1224 if ((cl_head = q->active[prio]) == NULL)
1225 continue;
1226
1227 cl = cl_head;
1228 do {
1229 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1230 sch->q.qlen--;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001231 if (!cl->q->q.qlen)
1232 cbq_deactivate_class(cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233 return len;
1234 }
1235 } while ((cl = cl->next_alive) != cl_head);
1236 }
1237 return 0;
1238}
1239
1240static void
1241cbq_reset(struct Qdisc* sch)
1242{
1243 struct cbq_sched_data *q = qdisc_priv(sch);
1244 struct cbq_class *cl;
1245 int prio;
1246 unsigned h;
1247
1248 q->activemask = 0;
1249 q->pmask = 0;
1250 q->tx_class = NULL;
1251 q->tx_borrowed = NULL;
Patrick McHardy88a99352007-03-16 01:21:11 -07001252 qdisc_watchdog_cancel(&q->watchdog);
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001253 hrtimer_cancel(&q->delay_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardy3bebcda2007-03-23 11:29:25 -07001255 q->now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256 q->now_rt = q->now;
1257
1258 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1259 q->active[prio] = NULL;
1260
1261 for (h = 0; h < 16; h++) {
1262 for (cl = q->classes[h]; cl; cl = cl->next) {
1263 qdisc_reset(cl->q);
1264
1265 cl->next_alive = NULL;
Patrick McHardya0849802007-03-23 11:28:30 -07001266 cl->undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267 cl->avgidle = cl->maxidle;
1268 cl->deficit = cl->quantum;
1269 cl->cpriority = cl->priority;
1270 }
1271 }
1272 sch->q.qlen = 0;
1273}
1274
1275
1276static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1277{
1278 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1279 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1280 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1281 }
1282 if (lss->change&TCF_CBQ_LSS_EWMA)
1283 cl->ewma_log = lss->ewma_log;
1284 if (lss->change&TCF_CBQ_LSS_AVPKT)
1285 cl->avpkt = lss->avpkt;
1286 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1287 cl->minidle = -(long)lss->minidle;
1288 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1289 cl->maxidle = lss->maxidle;
1290 cl->avgidle = lss->maxidle;
1291 }
1292 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1293 cl->offtime = lss->offtime;
1294 return 0;
1295}
1296
1297static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1298{
1299 q->nclasses[cl->priority]--;
1300 q->quanta[cl->priority] -= cl->weight;
1301 cbq_normalize_quanta(q, cl->priority);
1302}
1303
1304static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1305{
1306 q->nclasses[cl->priority]++;
1307 q->quanta[cl->priority] += cl->weight;
1308 cbq_normalize_quanta(q, cl->priority);
1309}
1310
1311static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1312{
1313 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1314
1315 if (wrr->allot)
1316 cl->allot = wrr->allot;
1317 if (wrr->weight)
1318 cl->weight = wrr->weight;
1319 if (wrr->priority) {
1320 cl->priority = wrr->priority-1;
1321 cl->cpriority = cl->priority;
1322 if (cl->priority >= cl->priority2)
1323 cl->priority2 = TC_CBQ_MAXPRIO-1;
1324 }
1325
1326 cbq_addprio(q, cl);
1327 return 0;
1328}
1329
1330static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1331{
1332 switch (ovl->strategy) {
1333 case TC_CBQ_OVL_CLASSIC:
1334 cl->overlimit = cbq_ovl_classic;
1335 break;
1336 case TC_CBQ_OVL_DELAY:
1337 cl->overlimit = cbq_ovl_delay;
1338 break;
1339 case TC_CBQ_OVL_LOWPRIO:
1340 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1341 ovl->priority2-1 <= cl->priority)
1342 return -EINVAL;
1343 cl->priority2 = ovl->priority2-1;
1344 cl->overlimit = cbq_ovl_lowprio;
1345 break;
1346 case TC_CBQ_OVL_DROP:
1347 cl->overlimit = cbq_ovl_drop;
1348 break;
1349 case TC_CBQ_OVL_RCLASSIC:
1350 cl->overlimit = cbq_ovl_rclassic;
1351 break;
1352 default:
1353 return -EINVAL;
1354 }
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001355 cl->penalty = ovl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356 return 0;
1357}
1358
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001359#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1361{
1362 cl->police = p->police;
1363
1364 if (cl->q->handle) {
1365 if (p->police == TC_POLICE_RECLASSIFY)
1366 cl->q->reshape_fail = cbq_reshape_fail;
1367 else
1368 cl->q->reshape_fail = NULL;
1369 }
1370 return 0;
1371}
1372#endif
1373
1374static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1375{
1376 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1377 return 0;
1378}
1379
Patrick McHardy1e904742008-01-22 22:11:17 -08001380static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001381{
1382 struct cbq_sched_data *q = qdisc_priv(sch);
Patrick McHardy1e904742008-01-22 22:11:17 -08001383 struct nlattr *tb[TCA_CBQ_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001384 struct tc_ratespec *r;
Patrick McHardycee63722008-01-23 20:33:32 -08001385 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001386
Patrick McHardycee63722008-01-23 20:33:32 -08001387 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, NULL);
1388 if (err < 0)
1389 return err;
1390
1391 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL ||
Patrick McHardy1e904742008-01-22 22:11:17 -08001392 nla_len(tb[TCA_CBQ_RATE]) < sizeof(struct tc_ratespec))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393 return -EINVAL;
1394
Patrick McHardy1e904742008-01-22 22:11:17 -08001395 if (tb[TCA_CBQ_LSSOPT] &&
1396 nla_len(tb[TCA_CBQ_LSSOPT]) < sizeof(struct tc_cbq_lssopt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001397 return -EINVAL;
1398
Patrick McHardy1e904742008-01-22 22:11:17 -08001399 r = nla_data(tb[TCA_CBQ_RATE]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001400
Patrick McHardy1e904742008-01-22 22:11:17 -08001401 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402 return -EINVAL;
1403
1404 q->link.refcnt = 1;
1405 q->link.sibling = &q->link;
1406 q->link.classid = sch->handle;
1407 q->link.qdisc = sch;
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001408 if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1409 sch->handle)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410 q->link.q = &noop_qdisc;
1411
1412 q->link.priority = TC_CBQ_MAXPRIO-1;
1413 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1414 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1415 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1416 q->link.overlimit = cbq_ovl_classic;
1417 q->link.allot = psched_mtu(sch->dev);
1418 q->link.quantum = q->link.allot;
1419 q->link.weight = q->link.R_tab->rate.rate;
1420
1421 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1422 q->link.avpkt = q->link.allot/2;
1423 q->link.minidle = -0x7FFFFFFF;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424
Patrick McHardy88a99352007-03-16 01:21:11 -07001425 qdisc_watchdog_init(&q->watchdog, sch);
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001426 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427 q->delay_timer.function = cbq_undelay;
1428 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardy3bebcda2007-03-23 11:29:25 -07001429 q->now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430 q->now_rt = q->now;
1431
1432 cbq_link_class(&q->link);
1433
Patrick McHardy1e904742008-01-22 22:11:17 -08001434 if (tb[TCA_CBQ_LSSOPT])
1435 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001436
1437 cbq_addprio(q, &q->link);
1438 return 0;
1439}
1440
1441static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1442{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001443 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444
Patrick McHardy1e904742008-01-22 22:11:17 -08001445 NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001446 return skb->len;
1447
Patrick McHardy1e904742008-01-22 22:11:17 -08001448nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001449 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450 return -1;
1451}
1452
1453static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1454{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001455 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456 struct tc_cbq_lssopt opt;
1457
1458 opt.flags = 0;
1459 if (cl->borrow == NULL)
1460 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1461 if (cl->share == NULL)
1462 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1463 opt.ewma_log = cl->ewma_log;
1464 opt.level = cl->level;
1465 opt.avpkt = cl->avpkt;
1466 opt.maxidle = cl->maxidle;
1467 opt.minidle = (u32)(-cl->minidle);
1468 opt.offtime = cl->offtime;
1469 opt.change = ~0;
Patrick McHardy1e904742008-01-22 22:11:17 -08001470 NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471 return skb->len;
1472
Patrick McHardy1e904742008-01-22 22:11:17 -08001473nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001474 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001475 return -1;
1476}
1477
1478static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1479{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001480 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481 struct tc_cbq_wrropt opt;
1482
1483 opt.flags = 0;
1484 opt.allot = cl->allot;
1485 opt.priority = cl->priority+1;
1486 opt.cpriority = cl->cpriority+1;
1487 opt.weight = cl->weight;
Patrick McHardy1e904742008-01-22 22:11:17 -08001488 NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489 return skb->len;
1490
Patrick McHardy1e904742008-01-22 22:11:17 -08001491nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001492 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001493 return -1;
1494}
1495
1496static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1497{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001498 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499 struct tc_cbq_ovl opt;
1500
1501 opt.strategy = cl->ovl_strategy;
1502 opt.priority2 = cl->priority2+1;
Patrick McHardy8a470772005-06-28 12:56:45 -07001503 opt.pad = 0;
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001504 opt.penalty = cl->penalty;
Patrick McHardy1e904742008-01-22 22:11:17 -08001505 NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506 return skb->len;
1507
Patrick McHardy1e904742008-01-22 22:11:17 -08001508nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001509 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001510 return -1;
1511}
1512
1513static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1514{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001515 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516 struct tc_cbq_fopt opt;
1517
1518 if (cl->split || cl->defmap) {
1519 opt.split = cl->split ? cl->split->classid : 0;
1520 opt.defmap = cl->defmap;
1521 opt.defchange = ~0;
Patrick McHardy1e904742008-01-22 22:11:17 -08001522 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001523 }
1524 return skb->len;
1525
Patrick McHardy1e904742008-01-22 22:11:17 -08001526nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001527 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001528 return -1;
1529}
1530
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001531#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1533{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001534 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535 struct tc_cbq_police opt;
1536
1537 if (cl->police) {
1538 opt.police = cl->police;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001539 opt.__res1 = 0;
1540 opt.__res2 = 0;
Patrick McHardy1e904742008-01-22 22:11:17 -08001541 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542 }
1543 return skb->len;
1544
Patrick McHardy1e904742008-01-22 22:11:17 -08001545nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001546 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001547 return -1;
1548}
1549#endif
1550
1551static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1552{
1553 if (cbq_dump_lss(skb, cl) < 0 ||
1554 cbq_dump_rate(skb, cl) < 0 ||
1555 cbq_dump_wrr(skb, cl) < 0 ||
1556 cbq_dump_ovl(skb, cl) < 0 ||
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001557#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558 cbq_dump_police(skb, cl) < 0 ||
1559#endif
1560 cbq_dump_fopt(skb, cl) < 0)
1561 return -1;
1562 return 0;
1563}
1564
1565static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1566{
1567 struct cbq_sched_data *q = qdisc_priv(sch);
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001568 unsigned char *b = skb_tail_pointer(skb);
Patrick McHardy1e904742008-01-22 22:11:17 -08001569 struct nlattr *nla;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570
Patrick McHardy1e904742008-01-22 22:11:17 -08001571 nla = (struct nlattr*)b;
1572 NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573 if (cbq_dump_attr(skb, &q->link) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001574 goto nla_put_failure;
1575 nla->nla_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576 return skb->len;
1577
Patrick McHardy1e904742008-01-22 22:11:17 -08001578nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001579 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580 return -1;
1581}
1582
1583static int
1584cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1585{
1586 struct cbq_sched_data *q = qdisc_priv(sch);
1587
1588 q->link.xstats.avgidle = q->link.avgidle;
1589 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1590}
1591
1592static int
1593cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1594 struct sk_buff *skb, struct tcmsg *tcm)
1595{
1596 struct cbq_class *cl = (struct cbq_class*)arg;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001597 unsigned char *b = skb_tail_pointer(skb);
Patrick McHardy1e904742008-01-22 22:11:17 -08001598 struct nlattr *nla;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599
1600 if (cl->tparent)
1601 tcm->tcm_parent = cl->tparent->classid;
1602 else
1603 tcm->tcm_parent = TC_H_ROOT;
1604 tcm->tcm_handle = cl->classid;
1605 tcm->tcm_info = cl->q->handle;
1606
Patrick McHardy1e904742008-01-22 22:11:17 -08001607 nla = (struct nlattr*)b;
1608 NLA_PUT(skb, TCA_OPTIONS, 0, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609 if (cbq_dump_attr(skb, cl) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001610 goto nla_put_failure;
1611 nla->nla_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612 return skb->len;
1613
Patrick McHardy1e904742008-01-22 22:11:17 -08001614nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001615 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616 return -1;
1617}
1618
1619static int
1620cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1621 struct gnet_dump *d)
1622{
1623 struct cbq_sched_data *q = qdisc_priv(sch);
1624 struct cbq_class *cl = (struct cbq_class*)arg;
1625
1626 cl->qstats.qlen = cl->q->q.qlen;
1627 cl->xstats.avgidle = cl->avgidle;
1628 cl->xstats.undertime = 0;
1629
Patrick McHardya0849802007-03-23 11:28:30 -07001630 if (cl->undertime != PSCHED_PASTPERFECT)
Patrick McHardy8edc0c32007-03-23 11:28:55 -07001631 cl->xstats.undertime = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632
1633 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001634 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1636 return -1;
1637
1638 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1639}
1640
1641static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1642 struct Qdisc **old)
1643{
1644 struct cbq_class *cl = (struct cbq_class*)arg;
1645
1646 if (cl) {
1647 if (new == NULL) {
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001648 if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1649 cl->classid)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001650 return -ENOBUFS;
1651 } else {
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001652#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001653 if (cl->police == TC_POLICE_RECLASSIFY)
1654 new->reshape_fail = cbq_reshape_fail;
1655#endif
1656 }
1657 sch_tree_lock(sch);
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001658 *old = xchg(&cl->q, new);
Patrick McHardy5e50da02006-11-29 17:36:20 -08001659 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660 qdisc_reset(*old);
1661 sch_tree_unlock(sch);
1662
1663 return 0;
1664 }
1665 return -ENOENT;
1666}
1667
1668static struct Qdisc *
1669cbq_leaf(struct Qdisc *sch, unsigned long arg)
1670{
1671 struct cbq_class *cl = (struct cbq_class*)arg;
1672
1673 return cl ? cl->q : NULL;
1674}
1675
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001676static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1677{
1678 struct cbq_class *cl = (struct cbq_class *)arg;
1679
1680 if (cl->q->q.qlen == 0)
1681 cbq_deactivate_class(cl);
1682}
1683
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1685{
1686 struct cbq_sched_data *q = qdisc_priv(sch);
1687 struct cbq_class *cl = cbq_class_lookup(q, classid);
1688
1689 if (cl) {
1690 cl->refcnt++;
1691 return (unsigned long)cl;
1692 }
1693 return 0;
1694}
1695
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1697{
1698 struct cbq_sched_data *q = qdisc_priv(sch);
1699
1700 BUG_TRAP(!cl->filters);
1701
Patrick McHardya48b5a62007-03-23 11:29:43 -07001702 tcf_destroy_chain(cl->filter_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001703 qdisc_destroy(cl->q);
1704 qdisc_put_rtab(cl->R_tab);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705 gen_kill_estimator(&cl->bstats, &cl->rate_est);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706 if (cl != &q->link)
1707 kfree(cl);
1708}
1709
1710static void
1711cbq_destroy(struct Qdisc* sch)
1712{
1713 struct cbq_sched_data *q = qdisc_priv(sch);
1714 struct cbq_class *cl;
1715 unsigned h;
1716
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001717#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718 q->rx_class = NULL;
1719#endif
1720 /*
1721 * Filters must be destroyed first because we don't destroy the
1722 * classes from root to leafs which means that filters can still
1723 * be bound to classes which have been destroyed already. --TGR '04
1724 */
Patrick McHardyb00b4bf2007-06-05 16:06:59 -07001725 for (h = 0; h < 16; h++) {
1726 for (cl = q->classes[h]; cl; cl = cl->next) {
Patrick McHardya48b5a62007-03-23 11:29:43 -07001727 tcf_destroy_chain(cl->filter_list);
Patrick McHardyb00b4bf2007-06-05 16:06:59 -07001728 cl->filter_list = NULL;
1729 }
1730 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001731 for (h = 0; h < 16; h++) {
1732 struct cbq_class *next;
1733
1734 for (cl = q->classes[h]; cl; cl = next) {
1735 next = cl->next;
1736 cbq_destroy_class(sch, cl);
1737 }
1738 }
1739}
1740
1741static void cbq_put(struct Qdisc *sch, unsigned long arg)
1742{
1743 struct cbq_class *cl = (struct cbq_class*)arg;
1744
1745 if (--cl->refcnt == 0) {
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001746#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747 struct cbq_sched_data *q = qdisc_priv(sch);
1748
1749 spin_lock_bh(&sch->dev->queue_lock);
1750 if (q->rx_class == cl)
1751 q->rx_class = NULL;
1752 spin_unlock_bh(&sch->dev->queue_lock);
1753#endif
1754
1755 cbq_destroy_class(sch, cl);
1756 }
1757}
1758
1759static int
Patrick McHardy1e904742008-01-22 22:11:17 -08001760cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761 unsigned long *arg)
1762{
1763 int err;
1764 struct cbq_sched_data *q = qdisc_priv(sch);
1765 struct cbq_class *cl = (struct cbq_class*)*arg;
Patrick McHardy1e904742008-01-22 22:11:17 -08001766 struct nlattr *opt = tca[TCA_OPTIONS];
1767 struct nlattr *tb[TCA_CBQ_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768 struct cbq_class *parent;
1769 struct qdisc_rate_table *rtab = NULL;
1770
Patrick McHardycee63722008-01-23 20:33:32 -08001771 if (opt == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772 return -EINVAL;
1773
Patrick McHardycee63722008-01-23 20:33:32 -08001774 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, NULL);
1775 if (err < 0)
1776 return err;
1777
Patrick McHardy1e904742008-01-22 22:11:17 -08001778 if (tb[TCA_CBQ_OVL_STRATEGY] &&
1779 nla_len(tb[TCA_CBQ_OVL_STRATEGY]) < sizeof(struct tc_cbq_ovl))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780 return -EINVAL;
1781
Patrick McHardy1e904742008-01-22 22:11:17 -08001782 if (tb[TCA_CBQ_FOPT] &&
1783 nla_len(tb[TCA_CBQ_FOPT]) < sizeof(struct tc_cbq_fopt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001784 return -EINVAL;
1785
Patrick McHardy1e904742008-01-22 22:11:17 -08001786 if (tb[TCA_CBQ_RATE] &&
1787 nla_len(tb[TCA_CBQ_RATE]) < sizeof(struct tc_ratespec))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001788 return -EINVAL;
1789
Patrick McHardy1e904742008-01-22 22:11:17 -08001790 if (tb[TCA_CBQ_LSSOPT] &&
1791 nla_len(tb[TCA_CBQ_LSSOPT]) < sizeof(struct tc_cbq_lssopt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001792 return -EINVAL;
1793
Patrick McHardy1e904742008-01-22 22:11:17 -08001794 if (tb[TCA_CBQ_WRROPT] &&
1795 nla_len(tb[TCA_CBQ_WRROPT]) < sizeof(struct tc_cbq_wrropt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001796 return -EINVAL;
1797
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001798#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy1e904742008-01-22 22:11:17 -08001799 if (tb[TCA_CBQ_POLICE] &&
1800 nla_len(tb[TCA_CBQ_POLICE]) < sizeof(struct tc_cbq_police))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801 return -EINVAL;
1802#endif
1803
1804 if (cl) {
1805 /* Check parent */
1806 if (parentid) {
1807 if (cl->tparent && cl->tparent->classid != parentid)
1808 return -EINVAL;
1809 if (!cl->tparent && parentid != TC_H_ROOT)
1810 return -EINVAL;
1811 }
1812
Patrick McHardy1e904742008-01-22 22:11:17 -08001813 if (tb[TCA_CBQ_RATE]) {
1814 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001815 if (rtab == NULL)
1816 return -EINVAL;
1817 }
1818
1819 /* Change class parameters */
1820 sch_tree_lock(sch);
1821
1822 if (cl->next_alive != NULL)
1823 cbq_deactivate_class(cl);
1824
1825 if (rtab) {
1826 rtab = xchg(&cl->R_tab, rtab);
1827 qdisc_put_rtab(rtab);
1828 }
1829
Patrick McHardy1e904742008-01-22 22:11:17 -08001830 if (tb[TCA_CBQ_LSSOPT])
1831 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001832
Patrick McHardy1e904742008-01-22 22:11:17 -08001833 if (tb[TCA_CBQ_WRROPT]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001834 cbq_rmprio(q, cl);
Patrick McHardy1e904742008-01-22 22:11:17 -08001835 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001836 }
1837
Patrick McHardy1e904742008-01-22 22:11:17 -08001838 if (tb[TCA_CBQ_OVL_STRATEGY])
1839 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001840
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001841#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy1e904742008-01-22 22:11:17 -08001842 if (tb[TCA_CBQ_POLICE])
1843 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001844#endif
1845
Patrick McHardy1e904742008-01-22 22:11:17 -08001846 if (tb[TCA_CBQ_FOPT])
1847 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001848
1849 if (cl->q->q.qlen)
1850 cbq_activate_class(cl);
1851
1852 sch_tree_unlock(sch);
1853
Patrick McHardy1e904742008-01-22 22:11:17 -08001854 if (tca[TCA_RATE])
Linus Torvalds1da177e2005-04-16 15:20:36 -07001855 gen_replace_estimator(&cl->bstats, &cl->rate_est,
Patrick McHardy4bdf3992007-07-02 22:47:37 -07001856 &sch->dev->queue_lock,
Patrick McHardy1e904742008-01-22 22:11:17 -08001857 tca[TCA_RATE]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001858 return 0;
1859 }
1860
1861 if (parentid == TC_H_ROOT)
1862 return -EINVAL;
1863
Patrick McHardy1e904742008-01-22 22:11:17 -08001864 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1865 tb[TCA_CBQ_LSSOPT] == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866 return -EINVAL;
1867
Patrick McHardy1e904742008-01-22 22:11:17 -08001868 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869 if (rtab == NULL)
1870 return -EINVAL;
1871
1872 if (classid) {
1873 err = -EINVAL;
1874 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1875 goto failure;
1876 } else {
1877 int i;
1878 classid = TC_H_MAKE(sch->handle,0x8000);
1879
1880 for (i=0; i<0x8000; i++) {
1881 if (++q->hgenerator >= 0x8000)
1882 q->hgenerator = 1;
1883 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1884 break;
1885 }
1886 err = -ENOSR;
1887 if (i >= 0x8000)
1888 goto failure;
1889 classid = classid|q->hgenerator;
1890 }
1891
1892 parent = &q->link;
1893 if (parentid) {
1894 parent = cbq_class_lookup(q, parentid);
1895 err = -EINVAL;
1896 if (parent == NULL)
1897 goto failure;
1898 }
1899
1900 err = -ENOBUFS;
Panagiotis Issaris0da974f2006-07-21 14:51:30 -07001901 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001902 if (cl == NULL)
1903 goto failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001904 cl->R_tab = rtab;
1905 rtab = NULL;
1906 cl->refcnt = 1;
Patrick McHardy9f9afec2006-11-29 17:35:18 -08001907 if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001908 cl->q = &noop_qdisc;
1909 cl->classid = classid;
1910 cl->tparent = parent;
1911 cl->qdisc = sch;
1912 cl->allot = parent->allot;
1913 cl->quantum = cl->allot;
1914 cl->weight = cl->R_tab->rate.rate;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001915
1916 sch_tree_lock(sch);
1917 cbq_link_class(cl);
1918 cl->borrow = cl->tparent;
1919 if (cl->tparent != &q->link)
1920 cl->share = cl->tparent;
1921 cbq_adjust_levels(parent);
1922 cl->minidle = -0x7FFFFFFF;
Patrick McHardy1e904742008-01-22 22:11:17 -08001923 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1924 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001925 if (cl->ewma_log==0)
1926 cl->ewma_log = q->link.ewma_log;
1927 if (cl->maxidle==0)
1928 cl->maxidle = q->link.maxidle;
1929 if (cl->avpkt==0)
1930 cl->avpkt = q->link.avpkt;
1931 cl->overlimit = cbq_ovl_classic;
Patrick McHardy1e904742008-01-22 22:11:17 -08001932 if (tb[TCA_CBQ_OVL_STRATEGY])
1933 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001934#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy1e904742008-01-22 22:11:17 -08001935 if (tb[TCA_CBQ_POLICE])
1936 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001937#endif
Patrick McHardy1e904742008-01-22 22:11:17 -08001938 if (tb[TCA_CBQ_FOPT])
1939 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940 sch_tree_unlock(sch);
1941
Patrick McHardy1e904742008-01-22 22:11:17 -08001942 if (tca[TCA_RATE])
Linus Torvalds1da177e2005-04-16 15:20:36 -07001943 gen_new_estimator(&cl->bstats, &cl->rate_est,
Patrick McHardy1e904742008-01-22 22:11:17 -08001944 &sch->dev->queue_lock, tca[TCA_RATE]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945
1946 *arg = (unsigned long)cl;
1947 return 0;
1948
1949failure:
1950 qdisc_put_rtab(rtab);
1951 return err;
1952}
1953
1954static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1955{
1956 struct cbq_sched_data *q = qdisc_priv(sch);
1957 struct cbq_class *cl = (struct cbq_class*)arg;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001958 unsigned int qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001959
1960 if (cl->filters || cl->children || cl == &q->link)
1961 return -EBUSY;
1962
1963 sch_tree_lock(sch);
1964
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001965 qlen = cl->q->q.qlen;
1966 qdisc_reset(cl->q);
1967 qdisc_tree_decrease_qlen(cl->q, qlen);
1968
Linus Torvalds1da177e2005-04-16 15:20:36 -07001969 if (cl->next_alive)
1970 cbq_deactivate_class(cl);
1971
1972 if (q->tx_borrowed == cl)
1973 q->tx_borrowed = q->tx_class;
1974 if (q->tx_class == cl) {
1975 q->tx_class = NULL;
1976 q->tx_borrowed = NULL;
1977 }
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001978#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001979 if (q->rx_class == cl)
1980 q->rx_class = NULL;
1981#endif
1982
1983 cbq_unlink_class(cl);
1984 cbq_adjust_levels(cl->tparent);
1985 cl->defmap = 0;
1986 cbq_sync_defmap(cl);
1987
1988 cbq_rmprio(q, cl);
1989 sch_tree_unlock(sch);
1990
1991 if (--cl->refcnt == 0)
1992 cbq_destroy_class(sch, cl);
1993
1994 return 0;
1995}
1996
1997static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1998{
1999 struct cbq_sched_data *q = qdisc_priv(sch);
2000 struct cbq_class *cl = (struct cbq_class *)arg;
2001
2002 if (cl == NULL)
2003 cl = &q->link;
2004
2005 return &cl->filter_list;
2006}
2007
2008static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2009 u32 classid)
2010{
2011 struct cbq_sched_data *q = qdisc_priv(sch);
2012 struct cbq_class *p = (struct cbq_class*)parent;
2013 struct cbq_class *cl = cbq_class_lookup(q, classid);
2014
2015 if (cl) {
2016 if (p && p->level <= cl->level)
2017 return 0;
2018 cl->filters++;
2019 return (unsigned long)cl;
2020 }
2021 return 0;
2022}
2023
2024static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2025{
2026 struct cbq_class *cl = (struct cbq_class*)arg;
2027
2028 cl->filters--;
2029}
2030
2031static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2032{
2033 struct cbq_sched_data *q = qdisc_priv(sch);
2034 unsigned h;
2035
2036 if (arg->stop)
2037 return;
2038
2039 for (h = 0; h < 16; h++) {
2040 struct cbq_class *cl;
2041
2042 for (cl = q->classes[h]; cl; cl = cl->next) {
2043 if (arg->count < arg->skip) {
2044 arg->count++;
2045 continue;
2046 }
2047 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2048 arg->stop = 1;
2049 return;
2050 }
2051 arg->count++;
2052 }
2053 }
2054}
2055
Eric Dumazet20fea082007-11-14 01:44:41 -08002056static const struct Qdisc_class_ops cbq_class_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002057 .graft = cbq_graft,
2058 .leaf = cbq_leaf,
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08002059 .qlen_notify = cbq_qlen_notify,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002060 .get = cbq_get,
2061 .put = cbq_put,
2062 .change = cbq_change_class,
2063 .delete = cbq_delete,
2064 .walk = cbq_walk,
2065 .tcf_chain = cbq_find_tcf,
2066 .bind_tcf = cbq_bind_filter,
2067 .unbind_tcf = cbq_unbind_filter,
2068 .dump = cbq_dump_class,
2069 .dump_stats = cbq_dump_class_stats,
2070};
2071
Eric Dumazet20fea082007-11-14 01:44:41 -08002072static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002073 .next = NULL,
2074 .cl_ops = &cbq_class_ops,
2075 .id = "cbq",
2076 .priv_size = sizeof(struct cbq_sched_data),
2077 .enqueue = cbq_enqueue,
2078 .dequeue = cbq_dequeue,
2079 .requeue = cbq_requeue,
2080 .drop = cbq_drop,
2081 .init = cbq_init,
2082 .reset = cbq_reset,
2083 .destroy = cbq_destroy,
2084 .change = NULL,
2085 .dump = cbq_dump,
2086 .dump_stats = cbq_dump_stats,
2087 .owner = THIS_MODULE,
2088};
2089
2090static int __init cbq_module_init(void)
2091{
2092 return register_qdisc(&cbq_qdisc_ops);
2093}
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09002094static void __exit cbq_module_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002095{
2096 unregister_qdisc(&cbq_qdisc_ops);
2097}
2098module_init(cbq_module_init)
2099module_exit(cbq_module_exit)
2100MODULE_LICENSE("GPL");