blob: 1b8128fb845d49462c13a2c826abacff96d62450 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 */
12
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/module.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090014#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070015#include <linux/types.h>
16#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <linux/skbuff.h>
Patrick McHardy0ba48052007-07-02 22:49:07 -070020#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <net/pkt_sched.h>
22
23
24/* Class-Based Queueing (CBQ) algorithm.
25 =======================================
26
27 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090028 Management Models for Packet Networks",
Linus Torvalds1da177e2005-04-16 15:20:36 -070029 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
30
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090031 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090033 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
Linus Torvalds1da177e2005-04-16 15:20:36 -070034 Parameters", 1996
35
36 [4] Sally Floyd and Michael Speer, "Experimental Results
37 for Class-Based Queueing", 1998, not published.
38
39 -----------------------------------------------------------------------
40
41 Algorithm skeleton was taken from NS simulator cbq.cc.
42 If someone wants to check this code against the LBL version,
43 he should take into account that ONLY the skeleton was borrowed,
44 the implementation is different. Particularly:
45
46 --- The WRR algorithm is different. Our version looks more
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090047 reasonable (I hope) and works when quanta are allowed to be
48 less than MTU, which is always the case when real time classes
49 have small rates. Note, that the statement of [3] is
50 incomplete, delay may actually be estimated even if class
51 per-round allotment is less than MTU. Namely, if per-round
52 allotment is W*r_i, and r_1+...+r_k = r < 1
Linus Torvalds1da177e2005-04-16 15:20:36 -070053
54 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
55
56 In the worst case we have IntServ estimate with D = W*r+k*MTU
57 and C = MTU*r. The proof (if correct at all) is trivial.
58
59
60 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
61 interpret some places, which look like wrong translations
62 from NS. Anyone is advised to find these differences
63 and explain to me, why I am wrong 8).
64
65 --- Linux has no EOI event, so that we cannot estimate true class
66 idle time. Workaround is to consider the next dequeue event
67 as sign that previous packet is finished. This is wrong because of
68 internal device queueing, but on a permanently loaded link it is true.
69 Moreover, combined with clock integrator, this scheme looks
70 very close to an ideal solution. */
71
72struct cbq_sched_data;
73
74
Eric Dumazetcc7ec452011-01-19 19:26:56 +000075struct cbq_class {
Patrick McHardyd77fea22008-07-05 23:22:05 -070076 struct Qdisc_class_common common;
Linus Torvalds1da177e2005-04-16 15:20:36 -070077 struct cbq_class *next_alive; /* next class with backlog in this priority band */
78
79/* Parameters */
Linus Torvalds1da177e2005-04-16 15:20:36 -070080 unsigned char priority; /* class priority */
81 unsigned char priority2; /* priority to be used after overlimit */
82 unsigned char ewma_log; /* time constant for idle time calculation */
83 unsigned char ovl_strategy;
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -070084#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -070085 unsigned char police;
86#endif
87
88 u32 defmap;
89
90 /* Link-sharing scheduler parameters */
91 long maxidle; /* Class parameters: see below. */
92 long offtime;
93 long minidle;
94 u32 avpkt;
95 struct qdisc_rate_table *R_tab;
96
97 /* Overlimit strategy parameters */
98 void (*overlimit)(struct cbq_class *cl);
Patrick McHardy1a13cb62007-03-16 01:22:20 -070099 psched_tdiff_t penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100
101 /* General scheduler (WRR) parameters */
102 long allot;
103 long quantum; /* Allotment per WRR round */
104 long weight; /* Relative allotment: see below */
105
106 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
107 struct cbq_class *split; /* Ptr to split node */
108 struct cbq_class *share; /* Ptr to LS parent in the class tree */
109 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
110 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
111 parent otherwise */
112 struct cbq_class *sibling; /* Sibling chain */
113 struct cbq_class *children; /* Pointer to children chain */
114
115 struct Qdisc *q; /* Elementary queueing discipline */
116
117
118/* Variables */
119 unsigned char cpriority; /* Effective priority */
120 unsigned char delayed;
121 unsigned char level; /* level of the class in hierarchy:
122 0 for leaf classes, and maximal
123 level of children + 1 for nodes.
124 */
125
126 psched_time_t last; /* Last end of service */
127 psched_time_t undertime;
128 long avgidle;
129 long deficit; /* Saved deficit for WRR */
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700130 psched_time_t penalized;
Eric Dumazetc1a8f1f2009-08-16 09:36:49 +0000131 struct gnet_stats_basic_packed bstats;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 struct gnet_stats_queue qstats;
Eric Dumazet45203a32013-06-06 08:43:22 -0700133 struct gnet_stats_rate_est64 rate_est;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 struct tc_cbq_xstats xstats;
135
John Fastabend25d8c0d2014-09-12 20:05:27 -0700136 struct tcf_proto __rcu *filter_list;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137
138 int refcnt;
139 int filters;
140
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000141 struct cbq_class *defaults[TC_PRIO_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142};
143
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000144struct cbq_sched_data {
Patrick McHardyd77fea22008-07-05 23:22:05 -0700145 struct Qdisc_class_hash clhash; /* Hash table of all classes */
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000146 int nclasses[TC_CBQ_MAXPRIO + 1];
147 unsigned int quanta[TC_CBQ_MAXPRIO + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148
149 struct cbq_class link;
150
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000151 unsigned int activemask;
152 struct cbq_class *active[TC_CBQ_MAXPRIO + 1]; /* List of all classes
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153 with backlog */
154
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700155#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156 struct cbq_class *rx_class;
157#endif
158 struct cbq_class *tx_class;
159 struct cbq_class *tx_borrowed;
160 int tx_len;
161 psched_time_t now; /* Cached timestamp */
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000162 unsigned int pmask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163
David S. Miller2fbd3da2009-09-01 17:59:25 -0700164 struct hrtimer delay_timer;
Patrick McHardy88a99352007-03-16 01:21:11 -0700165 struct qdisc_watchdog watchdog; /* Watchdog timer,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 started when CBQ has
167 backlog, but cannot
168 transmit just now */
Patrick McHardy88a99352007-03-16 01:21:11 -0700169 psched_tdiff_t wd_expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170 int toplevel;
171 u32 hgenerator;
172};
173
174
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000175#define L2T(cl, len) qdisc_l2t((cl)->R_tab, len)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000177static inline struct cbq_class *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
179{
Patrick McHardyd77fea22008-07-05 23:22:05 -0700180 struct Qdisc_class_common *clc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181
Patrick McHardyd77fea22008-07-05 23:22:05 -0700182 clc = qdisc_class_find(&q->clhash, classid);
183 if (clc == NULL)
184 return NULL;
185 return container_of(clc, struct cbq_class, common);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186}
187
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700188#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189
190static struct cbq_class *
191cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
192{
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000193 struct cbq_class *cl;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000195 for (cl = this->tparent; cl; cl = cl->tparent) {
196 struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
197
198 if (new != NULL && new != this)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 return new;
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000200 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201 return NULL;
202}
203
204#endif
205
206/* Classify packet. The procedure is pretty complicated, but
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000207 * it allows us to combine link sharing and priority scheduling
208 * transparently.
209 *
210 * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
211 * so that it resolves to split nodes. Then packets are classified
212 * by logical priority, or a more specific classifier may be attached
213 * to the split node.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214 */
215
216static struct cbq_class *
217cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
218{
219 struct cbq_sched_data *q = qdisc_priv(sch);
220 struct cbq_class *head = &q->link;
221 struct cbq_class **defmap;
222 struct cbq_class *cl = NULL;
223 u32 prio = skb->priority;
John Fastabend25d8c0d2014-09-12 20:05:27 -0700224 struct tcf_proto *fl;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225 struct tcf_result res;
226
227 /*
228 * Step 1. If skb->priority points to one of our classes, use it.
229 */
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000230 if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231 (cl = cbq_class_lookup(q, prio)) != NULL)
232 return cl;
233
Jarek Poplawskic27f3392008-08-04 22:39:11 -0700234 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235 for (;;) {
236 int result = 0;
237 defmap = head->defaults;
238
John Fastabend25d8c0d2014-09-12 20:05:27 -0700239 fl = rcu_dereference_bh(head->filter_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240 /*
241 * Step 2+n. Apply classifier.
242 */
Daniel Borkmann3b3ae882015-08-26 23:00:06 +0200243 result = tc_classify(skb, fl, &res, true);
John Fastabend25d8c0d2014-09-12 20:05:27 -0700244 if (!fl || result < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 goto fallback;
246
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000247 cl = (void *)res.class;
248 if (!cl) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249 if (TC_H_MAJ(res.classid))
250 cl = cbq_class_lookup(q, res.classid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000251 else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 cl = defmap[TC_PRIO_BESTEFFORT];
253
Eric Dumazetbdfc87f2012-09-11 13:11:12 +0000254 if (cl == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 goto fallback;
256 }
Eric Dumazetbdfc87f2012-09-11 13:11:12 +0000257 if (cl->level >= head->level)
258 goto fallback;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259#ifdef CONFIG_NET_CLS_ACT
260 switch (result) {
261 case TC_ACT_QUEUED:
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900262 case TC_ACT_STOLEN:
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700263 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 case TC_ACT_SHOT:
265 return NULL;
Patrick McHardy73ca4912007-07-15 00:02:31 -0700266 case TC_ACT_RECLASSIFY:
267 return cbq_reclassify(skb, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269#endif
270 if (cl->level == 0)
271 return cl;
272
273 /*
274 * Step 3+n. If classifier selected a link sharing class,
275 * apply agency specific classifier.
276 * Repeat this procdure until we hit a leaf node.
277 */
278 head = cl;
279 }
280
281fallback:
282 cl = head;
283
284 /*
285 * Step 4. No success...
286 */
287 if (TC_H_MAJ(prio) == 0 &&
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000288 !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
290 return head;
291
292 return cl;
293}
294
295/*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000296 * A packet has just been enqueued on the empty class.
297 * cbq_activate_class adds it to the tail of active class list
298 * of its priority band.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299 */
300
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000301static inline void cbq_activate_class(struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302{
303 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
304 int prio = cl->cpriority;
305 struct cbq_class *cl_tail;
306
307 cl_tail = q->active[prio];
308 q->active[prio] = cl;
309
310 if (cl_tail != NULL) {
311 cl->next_alive = cl_tail->next_alive;
312 cl_tail->next_alive = cl;
313 } else {
314 cl->next_alive = cl;
315 q->activemask |= (1<<prio);
316 }
317}
318
319/*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000320 * Unlink class from active chain.
321 * Note that this same procedure is done directly in cbq_dequeue*
322 * during round-robin procedure.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323 */
324
325static void cbq_deactivate_class(struct cbq_class *this)
326{
327 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
328 int prio = this->cpriority;
329 struct cbq_class *cl;
330 struct cbq_class *cl_prev = q->active[prio];
331
332 do {
333 cl = cl_prev->next_alive;
334 if (cl == this) {
335 cl_prev->next_alive = cl->next_alive;
336 cl->next_alive = NULL;
337
338 if (cl == q->active[prio]) {
339 q->active[prio] = cl_prev;
340 if (cl == q->active[prio]) {
341 q->active[prio] = NULL;
342 q->activemask &= ~(1<<prio);
343 return;
344 }
345 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346 return;
347 }
348 } while ((cl_prev = cl) != q->active[prio]);
349}
350
351static void
352cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
353{
354 int toplevel = q->toplevel;
355
Eric Dumazetfd245a42011-01-20 05:27:16 +0000356 if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
Vasily Averin7201c1d2014-08-14 12:27:59 +0400357 psched_time_t now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358
359 do {
Patrick McHardy104e0872007-03-23 11:28:07 -0700360 if (cl->undertime < now) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361 q->toplevel = cl->level;
362 return;
363 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000364 } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 }
366}
367
368static int
369cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
370{
371 struct cbq_sched_data *q = qdisc_priv(sch);
Satyam Sharmaddeee3c2007-09-16 14:54:05 -0700372 int uninitialized_var(ret);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
374
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700375#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376 q->rx_class = cl;
377#endif
378 if (cl == NULL) {
Jarek Poplawskic27f3392008-08-04 22:39:11 -0700379 if (ret & __NET_XMIT_BYPASS)
John Fastabend25331d62014-09-28 11:53:29 -0700380 qdisc_qstats_drop(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381 kfree_skb(skb);
382 return ret;
383 }
384
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700385#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386 cl->q->__parent = sch;
387#endif
Jussi Kivilinna5f861732008-07-20 00:08:04 -0700388 ret = qdisc_enqueue(skb, cl->q);
389 if (ret == NET_XMIT_SUCCESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390 sch->q.qlen++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391 cbq_mark_toplevel(q, cl);
392 if (!cl->next_alive)
393 cbq_activate_class(cl);
394 return ret;
395 }
396
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700397 if (net_xmit_drop_count(ret)) {
John Fastabend25331d62014-09-28 11:53:29 -0700398 qdisc_qstats_drop(sch);
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700399 cbq_mark_toplevel(q, cl);
400 cl->qstats.drops++;
401 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402 return ret;
403}
404
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405/* Overlimit actions */
406
407/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
408
409static void cbq_ovl_classic(struct cbq_class *cl)
410{
411 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700412 psched_tdiff_t delay = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413
414 if (!cl->delayed) {
415 delay += cl->offtime;
416
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900417 /*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000418 * Class goes to sleep, so that it will have no
419 * chance to work avgidle. Let's forgive it 8)
420 *
421 * BTW cbq-2.0 has a crap in this
422 * place, apparently they forgot to shift it by cl->ewma_log.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423 */
424 if (cl->avgidle < 0)
425 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
426 if (cl->avgidle < cl->minidle)
427 cl->avgidle = cl->minidle;
428 if (delay <= 0)
429 delay = 1;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700430 cl->undertime = q->now + delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431
432 cl->xstats.overactions++;
433 cl->delayed = 1;
434 }
435 if (q->wd_expires == 0 || q->wd_expires > delay)
436 q->wd_expires = delay;
437
438 /* Dirty work! We must schedule wakeups based on
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000439 * real available rate, rather than leaf rate,
440 * which may be tiny (even zero).
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 */
442 if (q->toplevel == TC_CBQ_MAXLEVEL) {
443 struct cbq_class *b;
444 psched_tdiff_t base_delay = q->wd_expires;
445
446 for (b = cl->borrow; b; b = b->borrow) {
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700447 delay = b->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448 if (delay < base_delay) {
449 if (delay <= 0)
450 delay = 1;
451 base_delay = delay;
452 }
453 }
454
455 q->wd_expires = base_delay;
456 }
457}
458
459/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000460 * they go overlimit
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461 */
462
463static void cbq_ovl_rclassic(struct cbq_class *cl)
464{
465 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
466 struct cbq_class *this = cl;
467
468 do {
469 if (cl->level > q->toplevel) {
470 cl = NULL;
471 break;
472 }
473 } while ((cl = cl->borrow) != NULL);
474
475 if (cl == NULL)
476 cl = this;
477 cbq_ovl_classic(cl);
478}
479
480/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
481
482static void cbq_ovl_delay(struct cbq_class *cl)
483{
484 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700485 psched_tdiff_t delay = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486
Jarek Poplawski2540e052008-08-21 05:11:14 -0700487 if (test_bit(__QDISC_STATE_DEACTIVATED,
488 &qdisc_root_sleeping(cl->qdisc)->state))
489 return;
490
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491 if (!cl->delayed) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700492 psched_time_t sched = q->now;
493 ktime_t expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494
495 delay += cl->offtime;
496 if (cl->avgidle < 0)
497 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
498 if (cl->avgidle < cl->minidle)
499 cl->avgidle = cl->minidle;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700500 cl->undertime = q->now + delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501
502 if (delay > 0) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700503 sched += delay + cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504 cl->penalized = sched;
505 cl->cpriority = TC_CBQ_MAXPRIO;
506 q->pmask |= (1<<TC_CBQ_MAXPRIO);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700507
Eric Dumazet46baac32012-10-20 00:40:51 +0000508 expires = ns_to_ktime(PSCHED_TICKS2NS(sched));
David S. Miller2fbd3da2009-09-01 17:59:25 -0700509 if (hrtimer_try_to_cancel(&q->delay_timer) &&
510 ktime_to_ns(ktime_sub(
511 hrtimer_get_expires(&q->delay_timer),
512 expires)) > 0)
513 hrtimer_set_expires(&q->delay_timer, expires);
514 hrtimer_restart(&q->delay_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515 cl->delayed = 1;
516 cl->xstats.overactions++;
517 return;
518 }
519 delay = 1;
520 }
521 if (q->wd_expires == 0 || q->wd_expires > delay)
522 q->wd_expires = delay;
523}
524
525/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
526
527static void cbq_ovl_lowprio(struct cbq_class *cl)
528{
529 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
530
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700531 cl->penalized = q->now + cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532
533 if (cl->cpriority != cl->priority2) {
534 cl->cpriority = cl->priority2;
535 q->pmask |= (1<<cl->cpriority);
536 cl->xstats.overactions++;
537 }
538 cbq_ovl_classic(cl);
539}
540
541/* TC_CBQ_OVL_DROP: penalize class by dropping */
542
543static void cbq_ovl_drop(struct cbq_class *cl)
544{
545 if (cl->q->ops->drop)
546 if (cl->q->ops->drop(cl->q))
547 cl->qdisc->q.qlen--;
548 cl->xstats.overactions++;
549 cbq_ovl_classic(cl);
550}
551
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700552static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
553 psched_time_t now)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554{
555 struct cbq_class *cl;
556 struct cbq_class *cl_prev = q->active[prio];
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700557 psched_time_t sched = now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558
559 if (cl_prev == NULL)
Patrick McHardye9054a32007-03-16 01:21:40 -0700560 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561
562 do {
563 cl = cl_prev->next_alive;
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700564 if (now - cl->penalized > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565 cl_prev->next_alive = cl->next_alive;
566 cl->next_alive = NULL;
567 cl->cpriority = cl->priority;
568 cl->delayed = 0;
569 cbq_activate_class(cl);
570
571 if (cl == q->active[prio]) {
572 q->active[prio] = cl_prev;
573 if (cl == q->active[prio]) {
574 q->active[prio] = NULL;
575 return 0;
576 }
577 }
578
579 cl = cl_prev->next_alive;
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700580 } else if (sched - cl->penalized > 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 sched = cl->penalized;
582 } while ((cl_prev = cl) != q->active[prio]);
583
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700584 return sched - now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585}
586
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700587static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588{
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700589 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
David S. Miller2fbd3da2009-09-01 17:59:25 -0700590 delay_timer);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700591 struct Qdisc *sch = q->watchdog.qdisc;
592 psched_time_t now;
593 psched_tdiff_t delay = 0;
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000594 unsigned int pmask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700596 now = psched_get_time();
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700597
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598 pmask = q->pmask;
599 q->pmask = 0;
600
601 while (pmask) {
602 int prio = ffz(~pmask);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700603 psched_tdiff_t tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604
605 pmask &= ~(1<<prio);
606
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700607 tmp = cbq_undelay_prio(q, prio, now);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 if (tmp > 0) {
609 q->pmask |= 1<<prio;
610 if (tmp < delay || delay == 0)
611 delay = tmp;
612 }
613 }
614
615 if (delay) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700616 ktime_t time;
617
618 time = ktime_set(0, 0);
Jarek Poplawskica44d6e2009-06-15 02:31:47 -0700619 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
Eric Dumazet4a8e3202014-09-20 18:01:30 -0700620 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS_PINNED);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621 }
622
Eric Dumazetfd245a42011-01-20 05:27:16 +0000623 qdisc_unthrottled(sch);
David S. Miller8608db02008-08-18 20:51:18 -0700624 __netif_schedule(qdisc_root(sch));
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700625 return HRTIMER_NORESTART;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626}
627
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700628#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
630{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 struct Qdisc *sch = child->__parent;
632 struct cbq_sched_data *q = qdisc_priv(sch);
633 struct cbq_class *cl = q->rx_class;
634
635 q->rx_class = NULL;
636
637 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700638 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639
640 cbq_mark_toplevel(q, cl);
641
642 q->rx_class = cl;
643 cl->q->__parent = sch;
644
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700645 ret = qdisc_enqueue(skb, cl->q);
646 if (ret == NET_XMIT_SUCCESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647 sch->q.qlen++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648 if (!cl->next_alive)
649 cbq_activate_class(cl);
650 return 0;
651 }
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700652 if (net_xmit_drop_count(ret))
John Fastabend25331d62014-09-28 11:53:29 -0700653 qdisc_qstats_drop(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 return 0;
655 }
656
John Fastabend25331d62014-09-28 11:53:29 -0700657 qdisc_qstats_drop(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658 return -1;
659}
660#endif
661
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900662/*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000663 * It is mission critical procedure.
664 *
665 * We "regenerate" toplevel cutoff, if transmitting class
666 * has backlog and it is not regulated. It is not part of
667 * original CBQ description, but looks more reasonable.
668 * Probably, it is wrong. This question needs further investigation.
669 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000671static inline void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
673 struct cbq_class *borrowed)
674{
675 if (cl && q->toplevel >= borrowed->level) {
676 if (cl->q->q.qlen > 1) {
677 do {
Patrick McHardya0849802007-03-23 11:28:30 -0700678 if (borrowed->undertime == PSCHED_PASTPERFECT) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679 q->toplevel = borrowed->level;
680 return;
681 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000682 } while ((borrowed = borrowed->borrow) != NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683 }
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900684#if 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 /* It is not necessary now. Uncommenting it
686 will save CPU cycles, but decrease fairness.
687 */
688 q->toplevel = TC_CBQ_MAXLEVEL;
689#endif
690 }
691}
692
693static void
694cbq_update(struct cbq_sched_data *q)
695{
696 struct cbq_class *this = q->tx_class;
697 struct cbq_class *cl = this;
698 int len = q->tx_len;
Vasily Averin73d0f372014-08-14 12:27:47 +0400699 psched_time_t now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700
701 q->tx_class = NULL;
Vasily Averin73d0f372014-08-14 12:27:47 +0400702 /* Time integrator. We calculate EOS time
703 * by adding expected packet transmission time.
704 */
705 now = q->now + L2T(&q->link, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706
707 for ( ; cl; cl = cl->share) {
708 long avgidle = cl->avgidle;
709 long idle;
710
711 cl->bstats.packets++;
712 cl->bstats.bytes += len;
713
714 /*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000715 * (now - last) is total time between packet right edges.
716 * (last_pktlen/rate) is "virtual" busy time, so that
717 *
718 * idle = (now - last) - last_pktlen/rate
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719 */
720
Vasily Averin73d0f372014-08-14 12:27:47 +0400721 idle = now - cl->last;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722 if ((unsigned long)idle > 128*1024*1024) {
723 avgidle = cl->maxidle;
724 } else {
725 idle -= L2T(cl, len);
726
727 /* true_avgidle := (1-W)*true_avgidle + W*idle,
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000728 * where W=2^{-ewma_log}. But cl->avgidle is scaled:
729 * cl->avgidle == true_avgidle/W,
730 * hence:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 */
732 avgidle += idle - (avgidle>>cl->ewma_log);
733 }
734
735 if (avgidle <= 0) {
736 /* Overlimit or at-limit */
737
738 if (avgidle < cl->minidle)
739 avgidle = cl->minidle;
740
741 cl->avgidle = avgidle;
742
743 /* Calculate expected time, when this class
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000744 * will be allowed to send.
745 * It will occur, when:
746 * (1-W)*true_avgidle + W*delay = 0, i.e.
747 * idle = (1/W - 1)*(-true_avgidle)
748 * or
749 * idle = (1 - W)*(-cl->avgidle);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750 */
751 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
752
753 /*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000754 * That is not all.
755 * To maintain the rate allocated to the class,
756 * we add to undertime virtual clock,
757 * necessary to complete transmitted packet.
758 * (len/phys_bandwidth has been already passed
759 * to the moment of cbq_update)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 */
761
762 idle -= L2T(&q->link, len);
763 idle += L2T(cl, len);
764
Vasily Averin73d0f372014-08-14 12:27:47 +0400765 cl->undertime = now + idle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766 } else {
767 /* Underlimit */
768
Patrick McHardya0849802007-03-23 11:28:30 -0700769 cl->undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770 if (avgidle > cl->maxidle)
771 cl->avgidle = cl->maxidle;
772 else
773 cl->avgidle = avgidle;
774 }
Vasily Averin73d0f372014-08-14 12:27:47 +0400775 if ((s64)(now - cl->last) > 0)
776 cl->last = now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777 }
778
779 cbq_update_toplevel(q, this, q->tx_borrowed);
780}
781
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000782static inline struct cbq_class *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783cbq_under_limit(struct cbq_class *cl)
784{
785 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
786 struct cbq_class *this_cl = cl;
787
788 if (cl->tparent == NULL)
789 return cl;
790
Patrick McHardya0849802007-03-23 11:28:30 -0700791 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792 cl->delayed = 0;
793 return cl;
794 }
795
796 do {
797 /* It is very suspicious place. Now overlimit
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000798 * action is generated for not bounded classes
799 * only if link is completely congested.
800 * Though it is in agree with ancestor-only paradigm,
801 * it looks very stupid. Particularly,
802 * it means that this chunk of code will either
803 * never be called or result in strong amplification
804 * of burstiness. Dangerous, silly, and, however,
805 * no another solution exists.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 */
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000807 cl = cl->borrow;
808 if (!cl) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 this_cl->qstats.overlimits++;
810 this_cl->overlimit(this_cl);
811 return NULL;
812 }
813 if (cl->level > q->toplevel)
814 return NULL;
Patrick McHardya0849802007-03-23 11:28:30 -0700815 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816
817 cl->delayed = 0;
818 return cl;
819}
820
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000821static inline struct sk_buff *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822cbq_dequeue_prio(struct Qdisc *sch, int prio)
823{
824 struct cbq_sched_data *q = qdisc_priv(sch);
825 struct cbq_class *cl_tail, *cl_prev, *cl;
826 struct sk_buff *skb;
827 int deficit;
828
829 cl_tail = cl_prev = q->active[prio];
830 cl = cl_prev->next_alive;
831
832 do {
833 deficit = 0;
834
835 /* Start round */
836 do {
837 struct cbq_class *borrow = cl;
838
839 if (cl->q->q.qlen &&
840 (borrow = cbq_under_limit(cl)) == NULL)
841 goto skip_class;
842
843 if (cl->deficit <= 0) {
844 /* Class exhausted its allotment per
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000845 * this round. Switch to the next one.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 */
847 deficit = 1;
848 cl->deficit += cl->quantum;
849 goto next_class;
850 }
851
852 skb = cl->q->dequeue(cl->q);
853
854 /* Class did not give us any skb :-(
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000855 * It could occur even if cl->q->q.qlen != 0
856 * f.e. if cl->q == "tbf"
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857 */
858 if (skb == NULL)
859 goto skip_class;
860
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700861 cl->deficit -= qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862 q->tx_class = cl;
863 q->tx_borrowed = borrow;
864 if (borrow != cl) {
865#ifndef CBQ_XSTATS_BORROWS_BYTES
866 borrow->xstats.borrows++;
867 cl->xstats.borrows++;
868#else
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700869 borrow->xstats.borrows += qdisc_pkt_len(skb);
870 cl->xstats.borrows += qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871#endif
872 }
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700873 q->tx_len = qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874
875 if (cl->deficit <= 0) {
876 q->active[prio] = cl;
877 cl = cl->next_alive;
878 cl->deficit += cl->quantum;
879 }
880 return skb;
881
882skip_class:
883 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
884 /* Class is empty or penalized.
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000885 * Unlink it from active chain.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886 */
887 cl_prev->next_alive = cl->next_alive;
888 cl->next_alive = NULL;
889
890 /* Did cl_tail point to it? */
891 if (cl == cl_tail) {
892 /* Repair it! */
893 cl_tail = cl_prev;
894
895 /* Was it the last class in this band? */
896 if (cl == cl_tail) {
897 /* Kill the band! */
898 q->active[prio] = NULL;
899 q->activemask &= ~(1<<prio);
900 if (cl->q->q.qlen)
901 cbq_activate_class(cl);
902 return NULL;
903 }
904
905 q->active[prio] = cl_tail;
906 }
907 if (cl->q->q.qlen)
908 cbq_activate_class(cl);
909
910 cl = cl_prev;
911 }
912
913next_class:
914 cl_prev = cl;
915 cl = cl->next_alive;
916 } while (cl_prev != cl_tail);
917 } while (deficit);
918
919 q->active[prio] = cl_prev;
920
921 return NULL;
922}
923
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000924static inline struct sk_buff *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925cbq_dequeue_1(struct Qdisc *sch)
926{
927 struct cbq_sched_data *q = qdisc_priv(sch);
928 struct sk_buff *skb;
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000929 unsigned int activemask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000931 activemask = q->activemask & 0xFF;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932 while (activemask) {
933 int prio = ffz(~activemask);
934 activemask &= ~(1<<prio);
935 skb = cbq_dequeue_prio(sch, prio);
936 if (skb)
937 return skb;
938 }
939 return NULL;
940}
941
942static struct sk_buff *
943cbq_dequeue(struct Qdisc *sch)
944{
945 struct sk_buff *skb;
946 struct cbq_sched_data *q = qdisc_priv(sch);
947 psched_time_t now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700949 now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950
Vasily Averin73d0f372014-08-14 12:27:47 +0400951 if (q->tx_class)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952 cbq_update(q);
Vasily Averin73d0f372014-08-14 12:27:47 +0400953
954 q->now = now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955
956 for (;;) {
957 q->wd_expires = 0;
958
959 skb = cbq_dequeue_1(sch);
960 if (skb) {
Eric Dumazet9190b3b2011-01-20 23:31:33 -0800961 qdisc_bstats_update(sch, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962 sch->q.qlen--;
Eric Dumazetfd245a42011-01-20 05:27:16 +0000963 qdisc_unthrottled(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 return skb;
965 }
966
967 /* All the classes are overlimit.
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000968 *
969 * It is possible, if:
970 *
971 * 1. Scheduler is empty.
972 * 2. Toplevel cutoff inhibited borrowing.
973 * 3. Root class is overlimit.
974 *
975 * Reset 2d and 3d conditions and retry.
976 *
977 * Note, that NS and cbq-2.0 are buggy, peeking
978 * an arbitrary class is appropriate for ancestor-only
979 * sharing, but not for toplevel algorithm.
980 *
981 * Our version is better, but slower, because it requires
982 * two passes, but it is unavoidable with top-level sharing.
983 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984
985 if (q->toplevel == TC_CBQ_MAXLEVEL &&
Patrick McHardya0849802007-03-23 11:28:30 -0700986 q->link.undertime == PSCHED_PASTPERFECT)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987 break;
988
989 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardya0849802007-03-23 11:28:30 -0700990 q->link.undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991 }
992
993 /* No packets in scheduler or nobody wants to give them to us :-(
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000994 * Sigh... start watchdog timer in the last case.
995 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996
997 if (sch->q.qlen) {
John Fastabend25331d62014-09-28 11:53:29 -0700998 qdisc_qstats_overlimit(sch);
Patrick McHardy88a99352007-03-16 01:21:11 -0700999 if (q->wd_expires)
1000 qdisc_watchdog_schedule(&q->watchdog,
Patrick McHardybb239ac2007-03-16 12:31:28 -07001001 now + q->wd_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002 }
1003 return NULL;
1004}
1005
1006/* CBQ class maintanance routines */
1007
1008static void cbq_adjust_levels(struct cbq_class *this)
1009{
1010 if (this == NULL)
1011 return;
1012
1013 do {
1014 int level = 0;
1015 struct cbq_class *cl;
1016
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001017 cl = this->children;
1018 if (cl) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019 do {
1020 if (cl->level > level)
1021 level = cl->level;
1022 } while ((cl = cl->sibling) != this->children);
1023 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001024 this->level = level + 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025 } while ((this = this->tparent) != NULL);
1026}
1027
1028static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1029{
1030 struct cbq_class *cl;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001031 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032
1033 if (q->quanta[prio] == 0)
1034 return;
1035
Patrick McHardyd77fea22008-07-05 23:22:05 -07001036 for (h = 0; h < q->clhash.hashsize; h++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -08001037 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001038 /* BUGGGG... Beware! This expression suffer of
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001039 * arithmetic overflows!
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040 */
1041 if (cl->priority == prio) {
1042 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1043 q->quanta[prio];
1044 }
Yang Yingliang833fa742013-12-10 20:55:32 +08001045 if (cl->quantum <= 0 ||
1046 cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) {
Yang Yingliangc17988a2013-12-23 17:38:58 +08001047 pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n",
1048 cl->common.classid, cl->quantum);
David S. Miller5ce2d482008-07-08 17:06:30 -07001049 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050 }
1051 }
1052 }
1053}
1054
1055static void cbq_sync_defmap(struct cbq_class *cl)
1056{
1057 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1058 struct cbq_class *split = cl->split;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001059 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060 int i;
1061
1062 if (split == NULL)
1063 return;
1064
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001065 for (i = 0; i <= TC_PRIO_MAX; i++) {
1066 if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067 split->defaults[i] = NULL;
1068 }
1069
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001070 for (i = 0; i <= TC_PRIO_MAX; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071 int level = split->level;
1072
1073 if (split->defaults[i])
1074 continue;
1075
Patrick McHardyd77fea22008-07-05 23:22:05 -07001076 for (h = 0; h < q->clhash.hashsize; h++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001077 struct cbq_class *c;
1078
Sasha Levinb67bfe02013-02-27 17:06:00 -08001079 hlist_for_each_entry(c, &q->clhash.hash[h],
Patrick McHardyd77fea22008-07-05 23:22:05 -07001080 common.hnode) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081 if (c->split == split && c->level < level &&
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001082 c->defmap & (1<<i)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083 split->defaults[i] = c;
1084 level = c->level;
1085 }
1086 }
1087 }
1088 }
1089}
1090
1091static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1092{
1093 struct cbq_class *split = NULL;
1094
1095 if (splitid == 0) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001096 split = cl->split;
1097 if (!split)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098 return;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001099 splitid = split->common.classid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100 }
1101
Patrick McHardyd77fea22008-07-05 23:22:05 -07001102 if (split == NULL || split->common.classid != splitid) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 for (split = cl->tparent; split; split = split->tparent)
Patrick McHardyd77fea22008-07-05 23:22:05 -07001104 if (split->common.classid == splitid)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105 break;
1106 }
1107
1108 if (split == NULL)
1109 return;
1110
1111 if (cl->split != split) {
1112 cl->defmap = 0;
1113 cbq_sync_defmap(cl);
1114 cl->split = split;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001115 cl->defmap = def & mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001116 } else
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001117 cl->defmap = (cl->defmap & ~mask) | (def & mask);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118
1119 cbq_sync_defmap(cl);
1120}
1121
1122static void cbq_unlink_class(struct cbq_class *this)
1123{
1124 struct cbq_class *cl, **clp;
1125 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1126
Patrick McHardyd77fea22008-07-05 23:22:05 -07001127 qdisc_class_hash_remove(&q->clhash, &this->common);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128
1129 if (this->tparent) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001130 clp = &this->sibling;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 cl = *clp;
1132 do {
1133 if (cl == this) {
1134 *clp = cl->sibling;
1135 break;
1136 }
1137 clp = &cl->sibling;
1138 } while ((cl = *clp) != this->sibling);
1139
1140 if (this->tparent->children == this) {
1141 this->tparent->children = this->sibling;
1142 if (this->sibling == this)
1143 this->tparent->children = NULL;
1144 }
1145 } else {
Ilpo Järvinen547b7922008-07-25 21:43:18 -07001146 WARN_ON(this->sibling != this);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 }
1148}
1149
1150static void cbq_link_class(struct cbq_class *this)
1151{
1152 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153 struct cbq_class *parent = this->tparent;
1154
1155 this->sibling = this;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001156 qdisc_class_hash_insert(&q->clhash, &this->common);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157
1158 if (parent == NULL)
1159 return;
1160
1161 if (parent->children == NULL) {
1162 parent->children = this;
1163 } else {
1164 this->sibling = parent->children->sibling;
1165 parent->children->sibling = this;
1166 }
1167}
1168
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001169static unsigned int cbq_drop(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170{
1171 struct cbq_sched_data *q = qdisc_priv(sch);
1172 struct cbq_class *cl, *cl_head;
1173 int prio;
1174 unsigned int len;
1175
1176 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001177 cl_head = q->active[prio];
1178 if (!cl_head)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179 continue;
1180
1181 cl = cl_head;
1182 do {
1183 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1184 sch->q.qlen--;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001185 if (!cl->q->q.qlen)
1186 cbq_deactivate_class(cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001187 return len;
1188 }
1189 } while ((cl = cl->next_alive) != cl_head);
1190 }
1191 return 0;
1192}
1193
1194static void
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001195cbq_reset(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196{
1197 struct cbq_sched_data *q = qdisc_priv(sch);
1198 struct cbq_class *cl;
1199 int prio;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001200 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201
1202 q->activemask = 0;
1203 q->pmask = 0;
1204 q->tx_class = NULL;
1205 q->tx_borrowed = NULL;
Patrick McHardy88a99352007-03-16 01:21:11 -07001206 qdisc_watchdog_cancel(&q->watchdog);
David S. Miller2fbd3da2009-09-01 17:59:25 -07001207 hrtimer_cancel(&q->delay_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardy3bebcda2007-03-23 11:29:25 -07001209 q->now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210
1211 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1212 q->active[prio] = NULL;
1213
Patrick McHardyd77fea22008-07-05 23:22:05 -07001214 for (h = 0; h < q->clhash.hashsize; h++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -08001215 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001216 qdisc_reset(cl->q);
1217
1218 cl->next_alive = NULL;
Patrick McHardya0849802007-03-23 11:28:30 -07001219 cl->undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220 cl->avgidle = cl->maxidle;
1221 cl->deficit = cl->quantum;
1222 cl->cpriority = cl->priority;
1223 }
1224 }
1225 sch->q.qlen = 0;
1226}
1227
1228
1229static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1230{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001231 if (lss->change & TCF_CBQ_LSS_FLAGS) {
1232 cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1233 cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001235 if (lss->change & TCF_CBQ_LSS_EWMA)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001236 cl->ewma_log = lss->ewma_log;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001237 if (lss->change & TCF_CBQ_LSS_AVPKT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238 cl->avpkt = lss->avpkt;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001239 if (lss->change & TCF_CBQ_LSS_MINIDLE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240 cl->minidle = -(long)lss->minidle;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001241 if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242 cl->maxidle = lss->maxidle;
1243 cl->avgidle = lss->maxidle;
1244 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001245 if (lss->change & TCF_CBQ_LSS_OFFTIME)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246 cl->offtime = lss->offtime;
1247 return 0;
1248}
1249
1250static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1251{
1252 q->nclasses[cl->priority]--;
1253 q->quanta[cl->priority] -= cl->weight;
1254 cbq_normalize_quanta(q, cl->priority);
1255}
1256
1257static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1258{
1259 q->nclasses[cl->priority]++;
1260 q->quanta[cl->priority] += cl->weight;
1261 cbq_normalize_quanta(q, cl->priority);
1262}
1263
1264static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1265{
1266 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1267
1268 if (wrr->allot)
1269 cl->allot = wrr->allot;
1270 if (wrr->weight)
1271 cl->weight = wrr->weight;
1272 if (wrr->priority) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001273 cl->priority = wrr->priority - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001274 cl->cpriority = cl->priority;
1275 if (cl->priority >= cl->priority2)
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001276 cl->priority2 = TC_CBQ_MAXPRIO - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277 }
1278
1279 cbq_addprio(q, cl);
1280 return 0;
1281}
1282
1283static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1284{
1285 switch (ovl->strategy) {
1286 case TC_CBQ_OVL_CLASSIC:
1287 cl->overlimit = cbq_ovl_classic;
1288 break;
1289 case TC_CBQ_OVL_DELAY:
1290 cl->overlimit = cbq_ovl_delay;
1291 break;
1292 case TC_CBQ_OVL_LOWPRIO:
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001293 if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
1294 ovl->priority2 - 1 <= cl->priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295 return -EINVAL;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001296 cl->priority2 = ovl->priority2 - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297 cl->overlimit = cbq_ovl_lowprio;
1298 break;
1299 case TC_CBQ_OVL_DROP:
1300 cl->overlimit = cbq_ovl_drop;
1301 break;
1302 case TC_CBQ_OVL_RCLASSIC:
1303 cl->overlimit = cbq_ovl_rclassic;
1304 break;
1305 default:
1306 return -EINVAL;
1307 }
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001308 cl->penalty = ovl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 return 0;
1310}
1311
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001312#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1314{
1315 cl->police = p->police;
1316
1317 if (cl->q->handle) {
1318 if (p->police == TC_POLICE_RECLASSIFY)
1319 cl->q->reshape_fail = cbq_reshape_fail;
1320 else
1321 cl->q->reshape_fail = NULL;
1322 }
1323 return 0;
1324}
1325#endif
1326
1327static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1328{
1329 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1330 return 0;
1331}
1332
Patrick McHardy27a34212008-01-23 20:35:39 -08001333static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1334 [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
1335 [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
1336 [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
1337 [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
1338 [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
1339 [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1340 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
1341};
1342
Patrick McHardy1e904742008-01-22 22:11:17 -08001343static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344{
1345 struct cbq_sched_data *q = qdisc_priv(sch);
Patrick McHardy1e904742008-01-22 22:11:17 -08001346 struct nlattr *tb[TCA_CBQ_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347 struct tc_ratespec *r;
Patrick McHardycee63722008-01-23 20:33:32 -08001348 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349
Patrick McHardy27a34212008-01-23 20:35:39 -08001350 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
Patrick McHardycee63722008-01-23 20:33:32 -08001351 if (err < 0)
1352 return err;
1353
Patrick McHardy27a34212008-01-23 20:35:39 -08001354 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355 return -EINVAL;
1356
Patrick McHardy1e904742008-01-22 22:11:17 -08001357 r = nla_data(tb[TCA_CBQ_RATE]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358
Patrick McHardy1e904742008-01-22 22:11:17 -08001359 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 return -EINVAL;
1361
Patrick McHardyd77fea22008-07-05 23:22:05 -07001362 err = qdisc_class_hash_init(&q->clhash);
1363 if (err < 0)
1364 goto put_rtab;
1365
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366 q->link.refcnt = 1;
1367 q->link.sibling = &q->link;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001368 q->link.common.classid = sch->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 q->link.qdisc = sch;
Changli Gao3511c912010-10-16 13:04:08 +00001370 q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1371 sch->handle);
1372 if (!q->link.q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373 q->link.q = &noop_qdisc;
1374
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001375 q->link.priority = TC_CBQ_MAXPRIO - 1;
1376 q->link.priority2 = TC_CBQ_MAXPRIO - 1;
1377 q->link.cpriority = TC_CBQ_MAXPRIO - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1379 q->link.overlimit = cbq_ovl_classic;
David S. Miller5ce2d482008-07-08 17:06:30 -07001380 q->link.allot = psched_mtu(qdisc_dev(sch));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001381 q->link.quantum = q->link.allot;
1382 q->link.weight = q->link.R_tab->rate.rate;
1383
1384 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1385 q->link.avpkt = q->link.allot/2;
1386 q->link.minidle = -0x7FFFFFFF;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387
Patrick McHardy88a99352007-03-16 01:21:11 -07001388 qdisc_watchdog_init(&q->watchdog, sch);
Eric Dumazet4a8e3202014-09-20 18:01:30 -07001389 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390 q->delay_timer.function = cbq_undelay;
1391 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardy3bebcda2007-03-23 11:29:25 -07001392 q->now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393
1394 cbq_link_class(&q->link);
1395
Patrick McHardy1e904742008-01-22 22:11:17 -08001396 if (tb[TCA_CBQ_LSSOPT])
1397 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398
1399 cbq_addprio(q, &q->link);
1400 return 0;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001401
1402put_rtab:
1403 qdisc_put_rtab(q->link.R_tab);
1404 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405}
1406
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001407static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001409 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410
David S. Miller1b34ec42012-03-29 05:11:39 -04001411 if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate))
1412 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001413 return skb->len;
1414
Patrick McHardy1e904742008-01-22 22:11:17 -08001415nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001416 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417 return -1;
1418}
1419
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001420static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001422 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423 struct tc_cbq_lssopt opt;
1424
1425 opt.flags = 0;
1426 if (cl->borrow == NULL)
1427 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1428 if (cl->share == NULL)
1429 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1430 opt.ewma_log = cl->ewma_log;
1431 opt.level = cl->level;
1432 opt.avpkt = cl->avpkt;
1433 opt.maxidle = cl->maxidle;
1434 opt.minidle = (u32)(-cl->minidle);
1435 opt.offtime = cl->offtime;
1436 opt.change = ~0;
David S. Miller1b34ec42012-03-29 05:11:39 -04001437 if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt))
1438 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439 return skb->len;
1440
Patrick McHardy1e904742008-01-22 22:11:17 -08001441nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001442 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443 return -1;
1444}
1445
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001446static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001448 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449 struct tc_cbq_wrropt opt;
1450
David S. Millera0db8562013-07-30 00:16:21 -07001451 memset(&opt, 0, sizeof(opt));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001452 opt.flags = 0;
1453 opt.allot = cl->allot;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001454 opt.priority = cl->priority + 1;
1455 opt.cpriority = cl->cpriority + 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456 opt.weight = cl->weight;
David S. Miller1b34ec42012-03-29 05:11:39 -04001457 if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt))
1458 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001459 return skb->len;
1460
Patrick McHardy1e904742008-01-22 22:11:17 -08001461nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001462 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463 return -1;
1464}
1465
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001466static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001467{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001468 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469 struct tc_cbq_ovl opt;
1470
1471 opt.strategy = cl->ovl_strategy;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001472 opt.priority2 = cl->priority2 + 1;
Patrick McHardy8a470772005-06-28 12:56:45 -07001473 opt.pad = 0;
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001474 opt.penalty = cl->penalty;
David S. Miller1b34ec42012-03-29 05:11:39 -04001475 if (nla_put(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt))
1476 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477 return skb->len;
1478
Patrick McHardy1e904742008-01-22 22:11:17 -08001479nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001480 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481 return -1;
1482}
1483
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001484static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001486 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487 struct tc_cbq_fopt opt;
1488
1489 if (cl->split || cl->defmap) {
Patrick McHardyd77fea22008-07-05 23:22:05 -07001490 opt.split = cl->split ? cl->split->common.classid : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491 opt.defmap = cl->defmap;
1492 opt.defchange = ~0;
David S. Miller1b34ec42012-03-29 05:11:39 -04001493 if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt))
1494 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495 }
1496 return skb->len;
1497
Patrick McHardy1e904742008-01-22 22:11:17 -08001498nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001499 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 return -1;
1501}
1502
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001503#ifdef CONFIG_NET_CLS_ACT
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001504static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001505{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001506 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001507 struct tc_cbq_police opt;
1508
1509 if (cl->police) {
1510 opt.police = cl->police;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001511 opt.__res1 = 0;
1512 opt.__res2 = 0;
David S. Miller1b34ec42012-03-29 05:11:39 -04001513 if (nla_put(skb, TCA_CBQ_POLICE, sizeof(opt), &opt))
1514 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001515 }
1516 return skb->len;
1517
Patrick McHardy1e904742008-01-22 22:11:17 -08001518nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001519 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520 return -1;
1521}
1522#endif
1523
1524static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1525{
1526 if (cbq_dump_lss(skb, cl) < 0 ||
1527 cbq_dump_rate(skb, cl) < 0 ||
1528 cbq_dump_wrr(skb, cl) < 0 ||
1529 cbq_dump_ovl(skb, cl) < 0 ||
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001530#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001531 cbq_dump_police(skb, cl) < 0 ||
1532#endif
1533 cbq_dump_fopt(skb, cl) < 0)
1534 return -1;
1535 return 0;
1536}
1537
1538static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1539{
1540 struct cbq_sched_data *q = qdisc_priv(sch);
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001541 struct nlattr *nest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001543 nest = nla_nest_start(skb, TCA_OPTIONS);
1544 if (nest == NULL)
1545 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001546 if (cbq_dump_attr(skb, &q->link) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001547 goto nla_put_failure;
Yang Yingliangd59b7d82014-03-12 10:20:32 +08001548 return nla_nest_end(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001549
Patrick McHardy1e904742008-01-22 22:11:17 -08001550nla_put_failure:
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001551 nla_nest_cancel(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001552 return -1;
1553}
1554
1555static int
1556cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1557{
1558 struct cbq_sched_data *q = qdisc_priv(sch);
1559
1560 q->link.xstats.avgidle = q->link.avgidle;
1561 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1562}
1563
1564static int
1565cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1566 struct sk_buff *skb, struct tcmsg *tcm)
1567{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001568 struct cbq_class *cl = (struct cbq_class *)arg;
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001569 struct nlattr *nest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570
1571 if (cl->tparent)
Patrick McHardyd77fea22008-07-05 23:22:05 -07001572 tcm->tcm_parent = cl->tparent->common.classid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573 else
1574 tcm->tcm_parent = TC_H_ROOT;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001575 tcm->tcm_handle = cl->common.classid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576 tcm->tcm_info = cl->q->handle;
1577
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001578 nest = nla_nest_start(skb, TCA_OPTIONS);
1579 if (nest == NULL)
1580 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581 if (cbq_dump_attr(skb, cl) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001582 goto nla_put_failure;
Yang Yingliangd59b7d82014-03-12 10:20:32 +08001583 return nla_nest_end(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001584
Patrick McHardy1e904742008-01-22 22:11:17 -08001585nla_put_failure:
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001586 nla_nest_cancel(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001587 return -1;
1588}
1589
1590static int
1591cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1592 struct gnet_dump *d)
1593{
1594 struct cbq_sched_data *q = qdisc_priv(sch);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001595 struct cbq_class *cl = (struct cbq_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001596
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597 cl->xstats.avgidle = cl->avgidle;
1598 cl->xstats.undertime = 0;
1599
Patrick McHardya0849802007-03-23 11:28:30 -07001600 if (cl->undertime != PSCHED_PASTPERFECT)
Patrick McHardy8edc0c32007-03-23 11:28:55 -07001601 cl->xstats.undertime = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001602
Eric Dumazetedb09eb2016-06-06 09:37:16 -07001603 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
1604 d, NULL, &cl->bstats) < 0 ||
Eric Dumazetd250a5f2009-10-02 10:32:18 +00001605 gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
John Fastabendb0ab6f92014-09-28 11:54:24 -07001606 gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001607 return -1;
1608
1609 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1610}
1611
1612static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1613 struct Qdisc **old)
1614{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001615 struct cbq_class *cl = (struct cbq_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001617 if (new == NULL) {
Changli Gao3511c912010-10-16 13:04:08 +00001618 new = qdisc_create_dflt(sch->dev_queue,
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001619 &pfifo_qdisc_ops, cl->common.classid);
1620 if (new == NULL)
1621 return -ENOBUFS;
1622 } else {
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001623#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001624 if (cl->police == TC_POLICE_RECLASSIFY)
1625 new->reshape_fail = cbq_reshape_fail;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627 }
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001628
WANG Cong86a79962016-02-25 14:55:00 -08001629 *old = qdisc_replace(sch, new, &cl->q);
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001630 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631}
1632
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001633static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001634{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001635 struct cbq_class *cl = (struct cbq_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001637 return cl->q;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001638}
1639
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001640static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1641{
1642 struct cbq_class *cl = (struct cbq_class *)arg;
1643
1644 if (cl->q->q.qlen == 0)
1645 cbq_deactivate_class(cl);
1646}
1647
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1649{
1650 struct cbq_sched_data *q = qdisc_priv(sch);
1651 struct cbq_class *cl = cbq_class_lookup(q, classid);
1652
1653 if (cl) {
1654 cl->refcnt++;
1655 return (unsigned long)cl;
1656 }
1657 return 0;
1658}
1659
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1661{
1662 struct cbq_sched_data *q = qdisc_priv(sch);
1663
Ilpo Järvinen547b7922008-07-25 21:43:18 -07001664 WARN_ON(cl->filters);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001665
Patrick McHardyff31ab52008-07-01 19:52:38 -07001666 tcf_destroy_chain(&cl->filter_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001667 qdisc_destroy(cl->q);
1668 qdisc_put_rtab(cl->R_tab);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669 gen_kill_estimator(&cl->bstats, &cl->rate_est);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670 if (cl != &q->link)
1671 kfree(cl);
1672}
1673
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001674static void cbq_destroy(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001675{
1676 struct cbq_sched_data *q = qdisc_priv(sch);
Sasha Levinb67bfe02013-02-27 17:06:00 -08001677 struct hlist_node *next;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001678 struct cbq_class *cl;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001679 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001681#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682 q->rx_class = NULL;
1683#endif
1684 /*
1685 * Filters must be destroyed first because we don't destroy the
1686 * classes from root to leafs which means that filters can still
1687 * be bound to classes which have been destroyed already. --TGR '04
1688 */
Patrick McHardyd77fea22008-07-05 23:22:05 -07001689 for (h = 0; h < q->clhash.hashsize; h++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -08001690 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode)
Patrick McHardyff31ab52008-07-01 19:52:38 -07001691 tcf_destroy_chain(&cl->filter_list);
Patrick McHardyb00b4bf2007-06-05 16:06:59 -07001692 }
Patrick McHardyd77fea22008-07-05 23:22:05 -07001693 for (h = 0; h < q->clhash.hashsize; h++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -08001694 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
Patrick McHardyd77fea22008-07-05 23:22:05 -07001695 common.hnode)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696 cbq_destroy_class(sch, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697 }
Patrick McHardyd77fea22008-07-05 23:22:05 -07001698 qdisc_class_hash_destroy(&q->clhash);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001699}
1700
1701static void cbq_put(struct Qdisc *sch, unsigned long arg)
1702{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001703 struct cbq_class *cl = (struct cbq_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704
1705 if (--cl->refcnt == 0) {
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001706#ifdef CONFIG_NET_CLS_ACT
Jarek Poplawski102396a2008-08-29 14:21:52 -07001707 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708 struct cbq_sched_data *q = qdisc_priv(sch);
1709
David S. Miller7698b4f2008-07-16 01:42:40 -07001710 spin_lock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 if (q->rx_class == cl)
1712 q->rx_class = NULL;
David S. Miller7698b4f2008-07-16 01:42:40 -07001713 spin_unlock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714#endif
1715
1716 cbq_destroy_class(sch, cl);
1717 }
1718}
1719
1720static int
Patrick McHardy1e904742008-01-22 22:11:17 -08001721cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001722 unsigned long *arg)
1723{
1724 int err;
1725 struct cbq_sched_data *q = qdisc_priv(sch);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001726 struct cbq_class *cl = (struct cbq_class *)*arg;
Patrick McHardy1e904742008-01-22 22:11:17 -08001727 struct nlattr *opt = tca[TCA_OPTIONS];
1728 struct nlattr *tb[TCA_CBQ_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001729 struct cbq_class *parent;
1730 struct qdisc_rate_table *rtab = NULL;
1731
Patrick McHardycee63722008-01-23 20:33:32 -08001732 if (opt == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733 return -EINVAL;
1734
Patrick McHardy27a34212008-01-23 20:35:39 -08001735 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
Patrick McHardycee63722008-01-23 20:33:32 -08001736 if (err < 0)
1737 return err;
1738
Linus Torvalds1da177e2005-04-16 15:20:36 -07001739 if (cl) {
1740 /* Check parent */
1741 if (parentid) {
Patrick McHardyd77fea22008-07-05 23:22:05 -07001742 if (cl->tparent &&
1743 cl->tparent->common.classid != parentid)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744 return -EINVAL;
1745 if (!cl->tparent && parentid != TC_H_ROOT)
1746 return -EINVAL;
1747 }
1748
Patrick McHardy1e904742008-01-22 22:11:17 -08001749 if (tb[TCA_CBQ_RATE]) {
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001750 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1751 tb[TCA_CBQ_RTAB]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001752 if (rtab == NULL)
1753 return -EINVAL;
1754 }
1755
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001756 if (tca[TCA_RATE]) {
John Fastabend22e0f8b2014-09-28 11:52:56 -07001757 err = gen_replace_estimator(&cl->bstats, NULL,
1758 &cl->rate_est,
Eric Dumazetedb09eb2016-06-06 09:37:16 -07001759 NULL,
1760 qdisc_root_sleeping_running(sch),
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001761 tca[TCA_RATE]);
1762 if (err) {
Yang Yingliang79c11f22013-12-17 15:29:17 +08001763 qdisc_put_rtab(rtab);
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001764 return err;
1765 }
1766 }
1767
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768 /* Change class parameters */
1769 sch_tree_lock(sch);
1770
1771 if (cl->next_alive != NULL)
1772 cbq_deactivate_class(cl);
1773
1774 if (rtab) {
Patrick McHardyb94c8af2008-11-20 04:11:36 -08001775 qdisc_put_rtab(cl->R_tab);
1776 cl->R_tab = rtab;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777 }
1778
Patrick McHardy1e904742008-01-22 22:11:17 -08001779 if (tb[TCA_CBQ_LSSOPT])
1780 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001781
Patrick McHardy1e904742008-01-22 22:11:17 -08001782 if (tb[TCA_CBQ_WRROPT]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783 cbq_rmprio(q, cl);
Patrick McHardy1e904742008-01-22 22:11:17 -08001784 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001785 }
1786
Patrick McHardy1e904742008-01-22 22:11:17 -08001787 if (tb[TCA_CBQ_OVL_STRATEGY])
1788 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001789
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001790#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy1e904742008-01-22 22:11:17 -08001791 if (tb[TCA_CBQ_POLICE])
1792 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001793#endif
1794
Patrick McHardy1e904742008-01-22 22:11:17 -08001795 if (tb[TCA_CBQ_FOPT])
1796 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001797
1798 if (cl->q->q.qlen)
1799 cbq_activate_class(cl);
1800
1801 sch_tree_unlock(sch);
1802
Linus Torvalds1da177e2005-04-16 15:20:36 -07001803 return 0;
1804 }
1805
1806 if (parentid == TC_H_ROOT)
1807 return -EINVAL;
1808
Patrick McHardy1e904742008-01-22 22:11:17 -08001809 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1810 tb[TCA_CBQ_LSSOPT] == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001811 return -EINVAL;
1812
Patrick McHardy1e904742008-01-22 22:11:17 -08001813 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001814 if (rtab == NULL)
1815 return -EINVAL;
1816
1817 if (classid) {
1818 err = -EINVAL;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001819 if (TC_H_MAJ(classid ^ sch->handle) ||
1820 cbq_class_lookup(q, classid))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001821 goto failure;
1822 } else {
1823 int i;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001824 classid = TC_H_MAKE(sch->handle, 0x8000);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001825
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001826 for (i = 0; i < 0x8000; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827 if (++q->hgenerator >= 0x8000)
1828 q->hgenerator = 1;
1829 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1830 break;
1831 }
1832 err = -ENOSR;
1833 if (i >= 0x8000)
1834 goto failure;
1835 classid = classid|q->hgenerator;
1836 }
1837
1838 parent = &q->link;
1839 if (parentid) {
1840 parent = cbq_class_lookup(q, parentid);
1841 err = -EINVAL;
1842 if (parent == NULL)
1843 goto failure;
1844 }
1845
1846 err = -ENOBUFS;
Panagiotis Issaris0da974f2006-07-21 14:51:30 -07001847 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001848 if (cl == NULL)
1849 goto failure;
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001850
1851 if (tca[TCA_RATE]) {
John Fastabend22e0f8b2014-09-28 11:52:56 -07001852 err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
Eric Dumazetedb09eb2016-06-06 09:37:16 -07001853 NULL,
1854 qdisc_root_sleeping_running(sch),
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001855 tca[TCA_RATE]);
1856 if (err) {
1857 kfree(cl);
1858 goto failure;
1859 }
1860 }
1861
Linus Torvalds1da177e2005-04-16 15:20:36 -07001862 cl->R_tab = rtab;
1863 rtab = NULL;
1864 cl->refcnt = 1;
Changli Gao3511c912010-10-16 13:04:08 +00001865 cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1866 if (!cl->q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867 cl->q = &noop_qdisc;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001868 cl->common.classid = classid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869 cl->tparent = parent;
1870 cl->qdisc = sch;
1871 cl->allot = parent->allot;
1872 cl->quantum = cl->allot;
1873 cl->weight = cl->R_tab->rate.rate;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001874
1875 sch_tree_lock(sch);
1876 cbq_link_class(cl);
1877 cl->borrow = cl->tparent;
1878 if (cl->tparent != &q->link)
1879 cl->share = cl->tparent;
1880 cbq_adjust_levels(parent);
1881 cl->minidle = -0x7FFFFFFF;
Patrick McHardy1e904742008-01-22 22:11:17 -08001882 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1883 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001884 if (cl->ewma_log == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885 cl->ewma_log = q->link.ewma_log;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001886 if (cl->maxidle == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001887 cl->maxidle = q->link.maxidle;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001888 if (cl->avpkt == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001889 cl->avpkt = q->link.avpkt;
1890 cl->overlimit = cbq_ovl_classic;
Patrick McHardy1e904742008-01-22 22:11:17 -08001891 if (tb[TCA_CBQ_OVL_STRATEGY])
1892 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001893#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy1e904742008-01-22 22:11:17 -08001894 if (tb[TCA_CBQ_POLICE])
1895 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001896#endif
Patrick McHardy1e904742008-01-22 22:11:17 -08001897 if (tb[TCA_CBQ_FOPT])
1898 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001899 sch_tree_unlock(sch);
1900
Patrick McHardyd77fea22008-07-05 23:22:05 -07001901 qdisc_class_hash_grow(sch, &q->clhash);
1902
Linus Torvalds1da177e2005-04-16 15:20:36 -07001903 *arg = (unsigned long)cl;
1904 return 0;
1905
1906failure:
1907 qdisc_put_rtab(rtab);
1908 return err;
1909}
1910
1911static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1912{
1913 struct cbq_sched_data *q = qdisc_priv(sch);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001914 struct cbq_class *cl = (struct cbq_class *)arg;
WANG Cong2ccccf52016-02-25 14:55:01 -08001915 unsigned int qlen, backlog;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001916
1917 if (cl->filters || cl->children || cl == &q->link)
1918 return -EBUSY;
1919
1920 sch_tree_lock(sch);
1921
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001922 qlen = cl->q->q.qlen;
WANG Cong2ccccf52016-02-25 14:55:01 -08001923 backlog = cl->q->qstats.backlog;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001924 qdisc_reset(cl->q);
WANG Cong2ccccf52016-02-25 14:55:01 -08001925 qdisc_tree_reduce_backlog(cl->q, qlen, backlog);
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001926
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927 if (cl->next_alive)
1928 cbq_deactivate_class(cl);
1929
1930 if (q->tx_borrowed == cl)
1931 q->tx_borrowed = q->tx_class;
1932 if (q->tx_class == cl) {
1933 q->tx_class = NULL;
1934 q->tx_borrowed = NULL;
1935 }
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001936#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001937 if (q->rx_class == cl)
1938 q->rx_class = NULL;
1939#endif
1940
1941 cbq_unlink_class(cl);
1942 cbq_adjust_levels(cl->tparent);
1943 cl->defmap = 0;
1944 cbq_sync_defmap(cl);
1945
1946 cbq_rmprio(q, cl);
1947 sch_tree_unlock(sch);
1948
Jarek Poplawski7cd0a632009-03-15 20:00:19 -07001949 BUG_ON(--cl->refcnt == 0);
1950 /*
1951 * This shouldn't happen: we "hold" one cops->get() when called
1952 * from tc_ctl_tclass; the destroy method is done from cops->put().
1953 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954
1955 return 0;
1956}
1957
John Fastabend25d8c0d2014-09-12 20:05:27 -07001958static struct tcf_proto __rcu **cbq_find_tcf(struct Qdisc *sch,
1959 unsigned long arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960{
1961 struct cbq_sched_data *q = qdisc_priv(sch);
1962 struct cbq_class *cl = (struct cbq_class *)arg;
1963
1964 if (cl == NULL)
1965 cl = &q->link;
1966
1967 return &cl->filter_list;
1968}
1969
1970static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1971 u32 classid)
1972{
1973 struct cbq_sched_data *q = qdisc_priv(sch);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001974 struct cbq_class *p = (struct cbq_class *)parent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001975 struct cbq_class *cl = cbq_class_lookup(q, classid);
1976
1977 if (cl) {
1978 if (p && p->level <= cl->level)
1979 return 0;
1980 cl->filters++;
1981 return (unsigned long)cl;
1982 }
1983 return 0;
1984}
1985
1986static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
1987{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001988 struct cbq_class *cl = (struct cbq_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001989
1990 cl->filters--;
1991}
1992
1993static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1994{
1995 struct cbq_sched_data *q = qdisc_priv(sch);
Patrick McHardyd77fea22008-07-05 23:22:05 -07001996 struct cbq_class *cl;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001997 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001998
1999 if (arg->stop)
2000 return;
2001
Patrick McHardyd77fea22008-07-05 23:22:05 -07002002 for (h = 0; h < q->clhash.hashsize; h++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -08002003 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002004 if (arg->count < arg->skip) {
2005 arg->count++;
2006 continue;
2007 }
2008 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2009 arg->stop = 1;
2010 return;
2011 }
2012 arg->count++;
2013 }
2014 }
2015}
2016
Eric Dumazet20fea082007-11-14 01:44:41 -08002017static const struct Qdisc_class_ops cbq_class_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002018 .graft = cbq_graft,
2019 .leaf = cbq_leaf,
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08002020 .qlen_notify = cbq_qlen_notify,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002021 .get = cbq_get,
2022 .put = cbq_put,
2023 .change = cbq_change_class,
2024 .delete = cbq_delete,
2025 .walk = cbq_walk,
2026 .tcf_chain = cbq_find_tcf,
2027 .bind_tcf = cbq_bind_filter,
2028 .unbind_tcf = cbq_unbind_filter,
2029 .dump = cbq_dump_class,
2030 .dump_stats = cbq_dump_class_stats,
2031};
2032
Eric Dumazet20fea082007-11-14 01:44:41 -08002033static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002034 .next = NULL,
2035 .cl_ops = &cbq_class_ops,
2036 .id = "cbq",
2037 .priv_size = sizeof(struct cbq_sched_data),
2038 .enqueue = cbq_enqueue,
2039 .dequeue = cbq_dequeue,
Jarek Poplawski77be1552008-10-31 00:47:01 -07002040 .peek = qdisc_peek_dequeued,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002041 .drop = cbq_drop,
2042 .init = cbq_init,
2043 .reset = cbq_reset,
2044 .destroy = cbq_destroy,
2045 .change = NULL,
2046 .dump = cbq_dump,
2047 .dump_stats = cbq_dump_stats,
2048 .owner = THIS_MODULE,
2049};
2050
2051static int __init cbq_module_init(void)
2052{
2053 return register_qdisc(&cbq_qdisc_ops);
2054}
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09002055static void __exit cbq_module_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002056{
2057 unregister_qdisc(&cbq_qdisc_ops);
2058}
2059module_init(cbq_module_init)
2060module_exit(cbq_module_exit)
2061MODULE_LICENSE("GPL");