blob: 7f8474cdce329b4fea382cab83bc00ab0952e72a [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 */
12
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/module.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090014#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070015#include <linux/types.h>
16#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <linux/skbuff.h>
Patrick McHardy0ba48052007-07-02 22:49:07 -070020#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <net/pkt_sched.h>
22
23
24/* Class-Based Queueing (CBQ) algorithm.
25 =======================================
26
27 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090028 Management Models for Packet Networks",
Linus Torvalds1da177e2005-04-16 15:20:36 -070029 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
30
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090031 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090033 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
Linus Torvalds1da177e2005-04-16 15:20:36 -070034 Parameters", 1996
35
36 [4] Sally Floyd and Michael Speer, "Experimental Results
37 for Class-Based Queueing", 1998, not published.
38
39 -----------------------------------------------------------------------
40
41 Algorithm skeleton was taken from NS simulator cbq.cc.
42 If someone wants to check this code against the LBL version,
43 he should take into account that ONLY the skeleton was borrowed,
44 the implementation is different. Particularly:
45
46 --- The WRR algorithm is different. Our version looks more
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +090047 reasonable (I hope) and works when quanta are allowed to be
48 less than MTU, which is always the case when real time classes
49 have small rates. Note, that the statement of [3] is
50 incomplete, delay may actually be estimated even if class
51 per-round allotment is less than MTU. Namely, if per-round
52 allotment is W*r_i, and r_1+...+r_k = r < 1
Linus Torvalds1da177e2005-04-16 15:20:36 -070053
54 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
55
56 In the worst case we have IntServ estimate with D = W*r+k*MTU
57 and C = MTU*r. The proof (if correct at all) is trivial.
58
59
60 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
61 interpret some places, which look like wrong translations
62 from NS. Anyone is advised to find these differences
63 and explain to me, why I am wrong 8).
64
65 --- Linux has no EOI event, so that we cannot estimate true class
66 idle time. Workaround is to consider the next dequeue event
67 as sign that previous packet is finished. This is wrong because of
68 internal device queueing, but on a permanently loaded link it is true.
69 Moreover, combined with clock integrator, this scheme looks
70 very close to an ideal solution. */
71
72struct cbq_sched_data;
73
74
Eric Dumazetcc7ec452011-01-19 19:26:56 +000075struct cbq_class {
Patrick McHardyd77fea22008-07-05 23:22:05 -070076 struct Qdisc_class_common common;
Linus Torvalds1da177e2005-04-16 15:20:36 -070077 struct cbq_class *next_alive; /* next class with backlog in this priority band */
78
79/* Parameters */
Linus Torvalds1da177e2005-04-16 15:20:36 -070080 unsigned char priority; /* class priority */
81 unsigned char priority2; /* priority to be used after overlimit */
82 unsigned char ewma_log; /* time constant for idle time calculation */
83 unsigned char ovl_strategy;
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -070084#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -070085 unsigned char police;
86#endif
87
88 u32 defmap;
89
90 /* Link-sharing scheduler parameters */
91 long maxidle; /* Class parameters: see below. */
92 long offtime;
93 long minidle;
94 u32 avpkt;
95 struct qdisc_rate_table *R_tab;
96
97 /* Overlimit strategy parameters */
98 void (*overlimit)(struct cbq_class *cl);
Patrick McHardy1a13cb62007-03-16 01:22:20 -070099 psched_tdiff_t penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100
101 /* General scheduler (WRR) parameters */
102 long allot;
103 long quantum; /* Allotment per WRR round */
104 long weight; /* Relative allotment: see below */
105
106 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
107 struct cbq_class *split; /* Ptr to split node */
108 struct cbq_class *share; /* Ptr to LS parent in the class tree */
109 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
110 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
111 parent otherwise */
112 struct cbq_class *sibling; /* Sibling chain */
113 struct cbq_class *children; /* Pointer to children chain */
114
115 struct Qdisc *q; /* Elementary queueing discipline */
116
117
118/* Variables */
119 unsigned char cpriority; /* Effective priority */
120 unsigned char delayed;
121 unsigned char level; /* level of the class in hierarchy:
122 0 for leaf classes, and maximal
123 level of children + 1 for nodes.
124 */
125
126 psched_time_t last; /* Last end of service */
127 psched_time_t undertime;
128 long avgidle;
129 long deficit; /* Saved deficit for WRR */
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700130 psched_time_t penalized;
Eric Dumazetc1a8f1f2009-08-16 09:36:49 +0000131 struct gnet_stats_basic_packed bstats;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132 struct gnet_stats_queue qstats;
Eric Dumazet45203a32013-06-06 08:43:22 -0700133 struct gnet_stats_rate_est64 rate_est;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134 struct tc_cbq_xstats xstats;
135
John Fastabend25d8c0d2014-09-12 20:05:27 -0700136 struct tcf_proto __rcu *filter_list;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137
138 int refcnt;
139 int filters;
140
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000141 struct cbq_class *defaults[TC_PRIO_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142};
143
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000144struct cbq_sched_data {
Patrick McHardyd77fea22008-07-05 23:22:05 -0700145 struct Qdisc_class_hash clhash; /* Hash table of all classes */
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000146 int nclasses[TC_CBQ_MAXPRIO + 1];
147 unsigned int quanta[TC_CBQ_MAXPRIO + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148
149 struct cbq_class link;
150
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000151 unsigned int activemask;
152 struct cbq_class *active[TC_CBQ_MAXPRIO + 1]; /* List of all classes
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153 with backlog */
154
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700155#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156 struct cbq_class *rx_class;
157#endif
158 struct cbq_class *tx_class;
159 struct cbq_class *tx_borrowed;
160 int tx_len;
161 psched_time_t now; /* Cached timestamp */
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000162 unsigned int pmask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163
David S. Miller2fbd3da2009-09-01 17:59:25 -0700164 struct hrtimer delay_timer;
Patrick McHardy88a99352007-03-16 01:21:11 -0700165 struct qdisc_watchdog watchdog; /* Watchdog timer,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 started when CBQ has
167 backlog, but cannot
168 transmit just now */
Patrick McHardy88a99352007-03-16 01:21:11 -0700169 psched_tdiff_t wd_expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170 int toplevel;
171 u32 hgenerator;
172};
173
174
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000175#define L2T(cl, len) qdisc_l2t((cl)->R_tab, len)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000177static inline struct cbq_class *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
179{
Patrick McHardyd77fea22008-07-05 23:22:05 -0700180 struct Qdisc_class_common *clc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181
Patrick McHardyd77fea22008-07-05 23:22:05 -0700182 clc = qdisc_class_find(&q->clhash, classid);
183 if (clc == NULL)
184 return NULL;
185 return container_of(clc, struct cbq_class, common);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186}
187
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700188#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189
190static struct cbq_class *
191cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
192{
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000193 struct cbq_class *cl;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000195 for (cl = this->tparent; cl; cl = cl->tparent) {
196 struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
197
198 if (new != NULL && new != this)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 return new;
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000200 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201 return NULL;
202}
203
204#endif
205
206/* Classify packet. The procedure is pretty complicated, but
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000207 * it allows us to combine link sharing and priority scheduling
208 * transparently.
209 *
210 * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
211 * so that it resolves to split nodes. Then packets are classified
212 * by logical priority, or a more specific classifier may be attached
213 * to the split node.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214 */
215
216static struct cbq_class *
217cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
218{
219 struct cbq_sched_data *q = qdisc_priv(sch);
220 struct cbq_class *head = &q->link;
221 struct cbq_class **defmap;
222 struct cbq_class *cl = NULL;
223 u32 prio = skb->priority;
John Fastabend25d8c0d2014-09-12 20:05:27 -0700224 struct tcf_proto *fl;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225 struct tcf_result res;
226
227 /*
228 * Step 1. If skb->priority points to one of our classes, use it.
229 */
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000230 if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231 (cl = cbq_class_lookup(q, prio)) != NULL)
232 return cl;
233
Jarek Poplawskic27f3392008-08-04 22:39:11 -0700234 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235 for (;;) {
236 int result = 0;
237 defmap = head->defaults;
238
John Fastabend25d8c0d2014-09-12 20:05:27 -0700239 fl = rcu_dereference_bh(head->filter_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240 /*
241 * Step 2+n. Apply classifier.
242 */
Daniel Borkmann3b3ae882015-08-26 23:00:06 +0200243 result = tc_classify(skb, fl, &res, true);
John Fastabend25d8c0d2014-09-12 20:05:27 -0700244 if (!fl || result < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 goto fallback;
246
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000247 cl = (void *)res.class;
248 if (!cl) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249 if (TC_H_MAJ(res.classid))
250 cl = cbq_class_lookup(q, res.classid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000251 else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 cl = defmap[TC_PRIO_BESTEFFORT];
253
Eric Dumazetbdfc87f2012-09-11 13:11:12 +0000254 if (cl == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 goto fallback;
256 }
Eric Dumazetbdfc87f2012-09-11 13:11:12 +0000257 if (cl->level >= head->level)
258 goto fallback;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259#ifdef CONFIG_NET_CLS_ACT
260 switch (result) {
261 case TC_ACT_QUEUED:
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900262 case TC_ACT_STOLEN:
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700263 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 case TC_ACT_SHOT:
265 return NULL;
Patrick McHardy73ca4912007-07-15 00:02:31 -0700266 case TC_ACT_RECLASSIFY:
267 return cbq_reclassify(skb, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269#endif
270 if (cl->level == 0)
271 return cl;
272
273 /*
274 * Step 3+n. If classifier selected a link sharing class,
275 * apply agency specific classifier.
276 * Repeat this procdure until we hit a leaf node.
277 */
278 head = cl;
279 }
280
281fallback:
282 cl = head;
283
284 /*
285 * Step 4. No success...
286 */
287 if (TC_H_MAJ(prio) == 0 &&
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000288 !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
290 return head;
291
292 return cl;
293}
294
295/*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000296 * A packet has just been enqueued on the empty class.
297 * cbq_activate_class adds it to the tail of active class list
298 * of its priority band.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299 */
300
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000301static inline void cbq_activate_class(struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302{
303 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
304 int prio = cl->cpriority;
305 struct cbq_class *cl_tail;
306
307 cl_tail = q->active[prio];
308 q->active[prio] = cl;
309
310 if (cl_tail != NULL) {
311 cl->next_alive = cl_tail->next_alive;
312 cl_tail->next_alive = cl;
313 } else {
314 cl->next_alive = cl;
315 q->activemask |= (1<<prio);
316 }
317}
318
319/*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000320 * Unlink class from active chain.
321 * Note that this same procedure is done directly in cbq_dequeue*
322 * during round-robin procedure.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323 */
324
325static void cbq_deactivate_class(struct cbq_class *this)
326{
327 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
328 int prio = this->cpriority;
329 struct cbq_class *cl;
330 struct cbq_class *cl_prev = q->active[prio];
331
332 do {
333 cl = cl_prev->next_alive;
334 if (cl == this) {
335 cl_prev->next_alive = cl->next_alive;
336 cl->next_alive = NULL;
337
338 if (cl == q->active[prio]) {
339 q->active[prio] = cl_prev;
340 if (cl == q->active[prio]) {
341 q->active[prio] = NULL;
342 q->activemask &= ~(1<<prio);
343 return;
344 }
345 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346 return;
347 }
348 } while ((cl_prev = cl) != q->active[prio]);
349}
350
351static void
352cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
353{
354 int toplevel = q->toplevel;
355
Eric Dumazetfd245a42011-01-20 05:27:16 +0000356 if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
Vasily Averin7201c1d2014-08-14 12:27:59 +0400357 psched_time_t now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358
359 do {
Patrick McHardy104e0872007-03-23 11:28:07 -0700360 if (cl->undertime < now) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361 q->toplevel = cl->level;
362 return;
363 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000364 } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 }
366}
367
368static int
369cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
370{
371 struct cbq_sched_data *q = qdisc_priv(sch);
Satyam Sharmaddeee3c2007-09-16 14:54:05 -0700372 int uninitialized_var(ret);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
374
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700375#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376 q->rx_class = cl;
377#endif
378 if (cl == NULL) {
Jarek Poplawskic27f3392008-08-04 22:39:11 -0700379 if (ret & __NET_XMIT_BYPASS)
John Fastabend25331d62014-09-28 11:53:29 -0700380 qdisc_qstats_drop(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381 kfree_skb(skb);
382 return ret;
383 }
384
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700385#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386 cl->q->__parent = sch;
387#endif
Jussi Kivilinna5f861732008-07-20 00:08:04 -0700388 ret = qdisc_enqueue(skb, cl->q);
389 if (ret == NET_XMIT_SUCCESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390 sch->q.qlen++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391 cbq_mark_toplevel(q, cl);
392 if (!cl->next_alive)
393 cbq_activate_class(cl);
394 return ret;
395 }
396
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700397 if (net_xmit_drop_count(ret)) {
John Fastabend25331d62014-09-28 11:53:29 -0700398 qdisc_qstats_drop(sch);
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700399 cbq_mark_toplevel(q, cl);
400 cl->qstats.drops++;
401 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402 return ret;
403}
404
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405/* Overlimit actions */
406
407/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
408
409static void cbq_ovl_classic(struct cbq_class *cl)
410{
411 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700412 psched_tdiff_t delay = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413
414 if (!cl->delayed) {
415 delay += cl->offtime;
416
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900417 /*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000418 * Class goes to sleep, so that it will have no
419 * chance to work avgidle. Let's forgive it 8)
420 *
421 * BTW cbq-2.0 has a crap in this
422 * place, apparently they forgot to shift it by cl->ewma_log.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423 */
424 if (cl->avgidle < 0)
425 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
426 if (cl->avgidle < cl->minidle)
427 cl->avgidle = cl->minidle;
428 if (delay <= 0)
429 delay = 1;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700430 cl->undertime = q->now + delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431
432 cl->xstats.overactions++;
433 cl->delayed = 1;
434 }
435 if (q->wd_expires == 0 || q->wd_expires > delay)
436 q->wd_expires = delay;
437
438 /* Dirty work! We must schedule wakeups based on
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000439 * real available rate, rather than leaf rate,
440 * which may be tiny (even zero).
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 */
442 if (q->toplevel == TC_CBQ_MAXLEVEL) {
443 struct cbq_class *b;
444 psched_tdiff_t base_delay = q->wd_expires;
445
446 for (b = cl->borrow; b; b = b->borrow) {
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700447 delay = b->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448 if (delay < base_delay) {
449 if (delay <= 0)
450 delay = 1;
451 base_delay = delay;
452 }
453 }
454
455 q->wd_expires = base_delay;
456 }
457}
458
459/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000460 * they go overlimit
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461 */
462
463static void cbq_ovl_rclassic(struct cbq_class *cl)
464{
465 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
466 struct cbq_class *this = cl;
467
468 do {
469 if (cl->level > q->toplevel) {
470 cl = NULL;
471 break;
472 }
473 } while ((cl = cl->borrow) != NULL);
474
475 if (cl == NULL)
476 cl = this;
477 cbq_ovl_classic(cl);
478}
479
480/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
481
482static void cbq_ovl_delay(struct cbq_class *cl)
483{
484 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
Patrick McHardy8edc0c32007-03-23 11:28:55 -0700485 psched_tdiff_t delay = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486
Jarek Poplawski2540e052008-08-21 05:11:14 -0700487 if (test_bit(__QDISC_STATE_DEACTIVATED,
488 &qdisc_root_sleeping(cl->qdisc)->state))
489 return;
490
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491 if (!cl->delayed) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700492 psched_time_t sched = q->now;
493 ktime_t expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700494
495 delay += cl->offtime;
496 if (cl->avgidle < 0)
497 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
498 if (cl->avgidle < cl->minidle)
499 cl->avgidle = cl->minidle;
Patrick McHardy7c59e252007-03-23 11:27:45 -0700500 cl->undertime = q->now + delay;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700501
502 if (delay > 0) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700503 sched += delay + cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504 cl->penalized = sched;
505 cl->cpriority = TC_CBQ_MAXPRIO;
506 q->pmask |= (1<<TC_CBQ_MAXPRIO);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700507
Eric Dumazet46baac32012-10-20 00:40:51 +0000508 expires = ns_to_ktime(PSCHED_TICKS2NS(sched));
David S. Miller2fbd3da2009-09-01 17:59:25 -0700509 if (hrtimer_try_to_cancel(&q->delay_timer) &&
510 ktime_to_ns(ktime_sub(
511 hrtimer_get_expires(&q->delay_timer),
512 expires)) > 0)
513 hrtimer_set_expires(&q->delay_timer, expires);
514 hrtimer_restart(&q->delay_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515 cl->delayed = 1;
516 cl->xstats.overactions++;
517 return;
518 }
519 delay = 1;
520 }
521 if (q->wd_expires == 0 || q->wd_expires > delay)
522 q->wd_expires = delay;
523}
524
525/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
526
527static void cbq_ovl_lowprio(struct cbq_class *cl)
528{
529 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
530
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700531 cl->penalized = q->now + cl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532
533 if (cl->cpriority != cl->priority2) {
534 cl->cpriority = cl->priority2;
535 q->pmask |= (1<<cl->cpriority);
536 cl->xstats.overactions++;
537 }
538 cbq_ovl_classic(cl);
539}
540
541/* TC_CBQ_OVL_DROP: penalize class by dropping */
542
543static void cbq_ovl_drop(struct cbq_class *cl)
544{
545 if (cl->q->ops->drop)
546 if (cl->q->ops->drop(cl->q))
547 cl->qdisc->q.qlen--;
548 cl->xstats.overactions++;
549 cbq_ovl_classic(cl);
550}
551
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700552static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
553 psched_time_t now)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554{
555 struct cbq_class *cl;
556 struct cbq_class *cl_prev = q->active[prio];
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700557 psched_time_t sched = now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558
559 if (cl_prev == NULL)
Patrick McHardye9054a32007-03-16 01:21:40 -0700560 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561
562 do {
563 cl = cl_prev->next_alive;
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700564 if (now - cl->penalized > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565 cl_prev->next_alive = cl->next_alive;
566 cl->next_alive = NULL;
567 cl->cpriority = cl->priority;
568 cl->delayed = 0;
569 cbq_activate_class(cl);
570
571 if (cl == q->active[prio]) {
572 q->active[prio] = cl_prev;
573 if (cl == q->active[prio]) {
574 q->active[prio] = NULL;
575 return 0;
576 }
577 }
578
579 cl = cl_prev->next_alive;
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700580 } else if (sched - cl->penalized > 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 sched = cl->penalized;
582 } while ((cl_prev = cl) != q->active[prio]);
583
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700584 return sched - now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585}
586
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700587static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588{
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700589 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
David S. Miller2fbd3da2009-09-01 17:59:25 -0700590 delay_timer);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700591 struct Qdisc *sch = q->watchdog.qdisc;
592 psched_time_t now;
593 psched_tdiff_t delay = 0;
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000594 unsigned int pmask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700596 now = psched_get_time();
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700597
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598 pmask = q->pmask;
599 q->pmask = 0;
600
601 while (pmask) {
602 int prio = ffz(~pmask);
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700603 psched_tdiff_t tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604
605 pmask &= ~(1<<prio);
606
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700607 tmp = cbq_undelay_prio(q, prio, now);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 if (tmp > 0) {
609 q->pmask |= 1<<prio;
610 if (tmp < delay || delay == 0)
611 delay = tmp;
612 }
613 }
614
615 if (delay) {
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700616 ktime_t time;
617
618 time = ktime_set(0, 0);
Jarek Poplawskica44d6e2009-06-15 02:31:47 -0700619 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
Eric Dumazet4a8e3202014-09-20 18:01:30 -0700620 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS_PINNED);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621 }
622
Eric Dumazetfd245a42011-01-20 05:27:16 +0000623 qdisc_unthrottled(sch);
David S. Miller8608db02008-08-18 20:51:18 -0700624 __netif_schedule(qdisc_root(sch));
Patrick McHardy1a13cb62007-03-16 01:22:20 -0700625 return HRTIMER_NORESTART;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626}
627
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -0700628#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
630{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 struct Qdisc *sch = child->__parent;
632 struct cbq_sched_data *q = qdisc_priv(sch);
633 struct cbq_class *cl = q->rx_class;
634
635 q->rx_class = NULL;
636
637 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700638 int ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639
640 cbq_mark_toplevel(q, cl);
641
642 q->rx_class = cl;
643 cl->q->__parent = sch;
644
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700645 ret = qdisc_enqueue(skb, cl->q);
646 if (ret == NET_XMIT_SUCCESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647 sch->q.qlen++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648 if (!cl->next_alive)
649 cbq_activate_class(cl);
650 return 0;
651 }
Jarek Poplawski378a2f02008-08-04 22:31:03 -0700652 if (net_xmit_drop_count(ret))
John Fastabend25331d62014-09-28 11:53:29 -0700653 qdisc_qstats_drop(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 return 0;
655 }
656
John Fastabend25331d62014-09-28 11:53:29 -0700657 qdisc_qstats_drop(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658 return -1;
659}
660#endif
661
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900662/*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000663 * It is mission critical procedure.
664 *
665 * We "regenerate" toplevel cutoff, if transmitting class
666 * has backlog and it is not regulated. It is not part of
667 * original CBQ description, but looks more reasonable.
668 * Probably, it is wrong. This question needs further investigation.
669 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000671static inline void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
673 struct cbq_class *borrowed)
674{
675 if (cl && q->toplevel >= borrowed->level) {
676 if (cl->q->q.qlen > 1) {
677 do {
Patrick McHardya0849802007-03-23 11:28:30 -0700678 if (borrowed->undertime == PSCHED_PASTPERFECT) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679 q->toplevel = borrowed->level;
680 return;
681 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000682 } while ((borrowed = borrowed->borrow) != NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683 }
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900684#if 0
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 /* It is not necessary now. Uncommenting it
686 will save CPU cycles, but decrease fairness.
687 */
688 q->toplevel = TC_CBQ_MAXLEVEL;
689#endif
690 }
691}
692
693static void
694cbq_update(struct cbq_sched_data *q)
695{
696 struct cbq_class *this = q->tx_class;
697 struct cbq_class *cl = this;
698 int len = q->tx_len;
Vasily Averin73d0f372014-08-14 12:27:47 +0400699 psched_time_t now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700
701 q->tx_class = NULL;
Vasily Averin73d0f372014-08-14 12:27:47 +0400702 /* Time integrator. We calculate EOS time
703 * by adding expected packet transmission time.
704 */
705 now = q->now + L2T(&q->link, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706
707 for ( ; cl; cl = cl->share) {
708 long avgidle = cl->avgidle;
709 long idle;
710
711 cl->bstats.packets++;
712 cl->bstats.bytes += len;
713
714 /*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000715 * (now - last) is total time between packet right edges.
716 * (last_pktlen/rate) is "virtual" busy time, so that
717 *
718 * idle = (now - last) - last_pktlen/rate
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719 */
720
Vasily Averin73d0f372014-08-14 12:27:47 +0400721 idle = now - cl->last;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722 if ((unsigned long)idle > 128*1024*1024) {
723 avgidle = cl->maxidle;
724 } else {
725 idle -= L2T(cl, len);
726
727 /* true_avgidle := (1-W)*true_avgidle + W*idle,
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000728 * where W=2^{-ewma_log}. But cl->avgidle is scaled:
729 * cl->avgidle == true_avgidle/W,
730 * hence:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 */
732 avgidle += idle - (avgidle>>cl->ewma_log);
733 }
734
735 if (avgidle <= 0) {
736 /* Overlimit or at-limit */
737
738 if (avgidle < cl->minidle)
739 avgidle = cl->minidle;
740
741 cl->avgidle = avgidle;
742
743 /* Calculate expected time, when this class
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000744 * will be allowed to send.
745 * It will occur, when:
746 * (1-W)*true_avgidle + W*delay = 0, i.e.
747 * idle = (1/W - 1)*(-true_avgidle)
748 * or
749 * idle = (1 - W)*(-cl->avgidle);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750 */
751 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
752
753 /*
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000754 * That is not all.
755 * To maintain the rate allocated to the class,
756 * we add to undertime virtual clock,
757 * necessary to complete transmitted packet.
758 * (len/phys_bandwidth has been already passed
759 * to the moment of cbq_update)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 */
761
762 idle -= L2T(&q->link, len);
763 idle += L2T(cl, len);
764
Vasily Averin73d0f372014-08-14 12:27:47 +0400765 cl->undertime = now + idle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766 } else {
767 /* Underlimit */
768
Patrick McHardya0849802007-03-23 11:28:30 -0700769 cl->undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770 if (avgidle > cl->maxidle)
771 cl->avgidle = cl->maxidle;
772 else
773 cl->avgidle = avgidle;
774 }
Vasily Averin73d0f372014-08-14 12:27:47 +0400775 if ((s64)(now - cl->last) > 0)
776 cl->last = now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777 }
778
779 cbq_update_toplevel(q, this, q->tx_borrowed);
780}
781
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000782static inline struct cbq_class *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783cbq_under_limit(struct cbq_class *cl)
784{
785 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
786 struct cbq_class *this_cl = cl;
787
788 if (cl->tparent == NULL)
789 return cl;
790
Patrick McHardya0849802007-03-23 11:28:30 -0700791 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792 cl->delayed = 0;
793 return cl;
794 }
795
796 do {
797 /* It is very suspicious place. Now overlimit
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000798 * action is generated for not bounded classes
799 * only if link is completely congested.
800 * Though it is in agree with ancestor-only paradigm,
801 * it looks very stupid. Particularly,
802 * it means that this chunk of code will either
803 * never be called or result in strong amplification
804 * of burstiness. Dangerous, silly, and, however,
805 * no another solution exists.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 */
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000807 cl = cl->borrow;
808 if (!cl) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 this_cl->qstats.overlimits++;
810 this_cl->overlimit(this_cl);
811 return NULL;
812 }
813 if (cl->level > q->toplevel)
814 return NULL;
Patrick McHardya0849802007-03-23 11:28:30 -0700815 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816
817 cl->delayed = 0;
818 return cl;
819}
820
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000821static inline struct sk_buff *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822cbq_dequeue_prio(struct Qdisc *sch, int prio)
823{
824 struct cbq_sched_data *q = qdisc_priv(sch);
825 struct cbq_class *cl_tail, *cl_prev, *cl;
826 struct sk_buff *skb;
827 int deficit;
828
829 cl_tail = cl_prev = q->active[prio];
830 cl = cl_prev->next_alive;
831
832 do {
833 deficit = 0;
834
835 /* Start round */
836 do {
837 struct cbq_class *borrow = cl;
838
839 if (cl->q->q.qlen &&
840 (borrow = cbq_under_limit(cl)) == NULL)
841 goto skip_class;
842
843 if (cl->deficit <= 0) {
844 /* Class exhausted its allotment per
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000845 * this round. Switch to the next one.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 */
847 deficit = 1;
848 cl->deficit += cl->quantum;
849 goto next_class;
850 }
851
852 skb = cl->q->dequeue(cl->q);
853
854 /* Class did not give us any skb :-(
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000855 * It could occur even if cl->q->q.qlen != 0
856 * f.e. if cl->q == "tbf"
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857 */
858 if (skb == NULL)
859 goto skip_class;
860
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700861 cl->deficit -= qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862 q->tx_class = cl;
863 q->tx_borrowed = borrow;
864 if (borrow != cl) {
865#ifndef CBQ_XSTATS_BORROWS_BYTES
866 borrow->xstats.borrows++;
867 cl->xstats.borrows++;
868#else
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700869 borrow->xstats.borrows += qdisc_pkt_len(skb);
870 cl->xstats.borrows += qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871#endif
872 }
Jussi Kivilinna0abf77e2008-07-20 00:08:27 -0700873 q->tx_len = qdisc_pkt_len(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874
875 if (cl->deficit <= 0) {
876 q->active[prio] = cl;
877 cl = cl->next_alive;
878 cl->deficit += cl->quantum;
879 }
880 return skb;
881
882skip_class:
883 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
884 /* Class is empty or penalized.
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000885 * Unlink it from active chain.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886 */
887 cl_prev->next_alive = cl->next_alive;
888 cl->next_alive = NULL;
889
890 /* Did cl_tail point to it? */
891 if (cl == cl_tail) {
892 /* Repair it! */
893 cl_tail = cl_prev;
894
895 /* Was it the last class in this band? */
896 if (cl == cl_tail) {
897 /* Kill the band! */
898 q->active[prio] = NULL;
899 q->activemask &= ~(1<<prio);
900 if (cl->q->q.qlen)
901 cbq_activate_class(cl);
902 return NULL;
903 }
904
905 q->active[prio] = cl_tail;
906 }
907 if (cl->q->q.qlen)
908 cbq_activate_class(cl);
909
910 cl = cl_prev;
911 }
912
913next_class:
914 cl_prev = cl;
915 cl = cl->next_alive;
916 } while (cl_prev != cl_tail);
917 } while (deficit);
918
919 q->active[prio] = cl_prev;
920
921 return NULL;
922}
923
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000924static inline struct sk_buff *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925cbq_dequeue_1(struct Qdisc *sch)
926{
927 struct cbq_sched_data *q = qdisc_priv(sch);
928 struct sk_buff *skb;
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000929 unsigned int activemask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000931 activemask = q->activemask & 0xFF;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932 while (activemask) {
933 int prio = ffz(~activemask);
934 activemask &= ~(1<<prio);
935 skb = cbq_dequeue_prio(sch, prio);
936 if (skb)
937 return skb;
938 }
939 return NULL;
940}
941
942static struct sk_buff *
943cbq_dequeue(struct Qdisc *sch)
944{
945 struct sk_buff *skb;
946 struct cbq_sched_data *q = qdisc_priv(sch);
947 psched_time_t now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948
Patrick McHardy3bebcda2007-03-23 11:29:25 -0700949 now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950
Vasily Averin73d0f372014-08-14 12:27:47 +0400951 if (q->tx_class)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952 cbq_update(q);
Vasily Averin73d0f372014-08-14 12:27:47 +0400953
954 q->now = now;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955
956 for (;;) {
957 q->wd_expires = 0;
958
959 skb = cbq_dequeue_1(sch);
960 if (skb) {
Eric Dumazet9190b3b2011-01-20 23:31:33 -0800961 qdisc_bstats_update(sch, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962 sch->q.qlen--;
Eric Dumazetfd245a42011-01-20 05:27:16 +0000963 qdisc_unthrottled(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 return skb;
965 }
966
967 /* All the classes are overlimit.
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000968 *
969 * It is possible, if:
970 *
971 * 1. Scheduler is empty.
972 * 2. Toplevel cutoff inhibited borrowing.
973 * 3. Root class is overlimit.
974 *
975 * Reset 2d and 3d conditions and retry.
976 *
977 * Note, that NS and cbq-2.0 are buggy, peeking
978 * an arbitrary class is appropriate for ancestor-only
979 * sharing, but not for toplevel algorithm.
980 *
981 * Our version is better, but slower, because it requires
982 * two passes, but it is unavoidable with top-level sharing.
983 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984
985 if (q->toplevel == TC_CBQ_MAXLEVEL &&
Patrick McHardya0849802007-03-23 11:28:30 -0700986 q->link.undertime == PSCHED_PASTPERFECT)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987 break;
988
989 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardya0849802007-03-23 11:28:30 -0700990 q->link.undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991 }
992
993 /* No packets in scheduler or nobody wants to give them to us :-(
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000994 * Sigh... start watchdog timer in the last case.
995 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996
997 if (sch->q.qlen) {
John Fastabend25331d62014-09-28 11:53:29 -0700998 qdisc_qstats_overlimit(sch);
Patrick McHardy88a99352007-03-16 01:21:11 -0700999 if (q->wd_expires)
1000 qdisc_watchdog_schedule(&q->watchdog,
Patrick McHardybb239ac2007-03-16 12:31:28 -07001001 now + q->wd_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002 }
1003 return NULL;
1004}
1005
1006/* CBQ class maintanance routines */
1007
1008static void cbq_adjust_levels(struct cbq_class *this)
1009{
1010 if (this == NULL)
1011 return;
1012
1013 do {
1014 int level = 0;
1015 struct cbq_class *cl;
1016
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001017 cl = this->children;
1018 if (cl) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019 do {
1020 if (cl->level > level)
1021 level = cl->level;
1022 } while ((cl = cl->sibling) != this->children);
1023 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001024 this->level = level + 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025 } while ((this = this->tparent) != NULL);
1026}
1027
1028static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1029{
1030 struct cbq_class *cl;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001031 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032
1033 if (q->quanta[prio] == 0)
1034 return;
1035
Patrick McHardyd77fea22008-07-05 23:22:05 -07001036 for (h = 0; h < q->clhash.hashsize; h++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -08001037 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001038 /* BUGGGG... Beware! This expression suffer of
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001039 * arithmetic overflows!
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040 */
1041 if (cl->priority == prio) {
1042 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1043 q->quanta[prio];
1044 }
Yang Yingliang833fa742013-12-10 20:55:32 +08001045 if (cl->quantum <= 0 ||
1046 cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) {
Yang Yingliangc17988a2013-12-23 17:38:58 +08001047 pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n",
1048 cl->common.classid, cl->quantum);
David S. Miller5ce2d482008-07-08 17:06:30 -07001049 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050 }
1051 }
1052 }
1053}
1054
1055static void cbq_sync_defmap(struct cbq_class *cl)
1056{
1057 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1058 struct cbq_class *split = cl->split;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001059 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060 int i;
1061
1062 if (split == NULL)
1063 return;
1064
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001065 for (i = 0; i <= TC_PRIO_MAX; i++) {
1066 if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067 split->defaults[i] = NULL;
1068 }
1069
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001070 for (i = 0; i <= TC_PRIO_MAX; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071 int level = split->level;
1072
1073 if (split->defaults[i])
1074 continue;
1075
Patrick McHardyd77fea22008-07-05 23:22:05 -07001076 for (h = 0; h < q->clhash.hashsize; h++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001077 struct cbq_class *c;
1078
Sasha Levinb67bfe02013-02-27 17:06:00 -08001079 hlist_for_each_entry(c, &q->clhash.hash[h],
Patrick McHardyd77fea22008-07-05 23:22:05 -07001080 common.hnode) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081 if (c->split == split && c->level < level &&
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001082 c->defmap & (1<<i)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001083 split->defaults[i] = c;
1084 level = c->level;
1085 }
1086 }
1087 }
1088 }
1089}
1090
1091static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1092{
1093 struct cbq_class *split = NULL;
1094
1095 if (splitid == 0) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001096 split = cl->split;
1097 if (!split)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098 return;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001099 splitid = split->common.classid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100 }
1101
Patrick McHardyd77fea22008-07-05 23:22:05 -07001102 if (split == NULL || split->common.classid != splitid) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 for (split = cl->tparent; split; split = split->tparent)
Patrick McHardyd77fea22008-07-05 23:22:05 -07001104 if (split->common.classid == splitid)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105 break;
1106 }
1107
1108 if (split == NULL)
1109 return;
1110
1111 if (cl->split != split) {
1112 cl->defmap = 0;
1113 cbq_sync_defmap(cl);
1114 cl->split = split;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001115 cl->defmap = def & mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001116 } else
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001117 cl->defmap = (cl->defmap & ~mask) | (def & mask);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118
1119 cbq_sync_defmap(cl);
1120}
1121
1122static void cbq_unlink_class(struct cbq_class *this)
1123{
1124 struct cbq_class *cl, **clp;
1125 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1126
Patrick McHardyd77fea22008-07-05 23:22:05 -07001127 qdisc_class_hash_remove(&q->clhash, &this->common);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128
1129 if (this->tparent) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001130 clp = &this->sibling;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 cl = *clp;
1132 do {
1133 if (cl == this) {
1134 *clp = cl->sibling;
1135 break;
1136 }
1137 clp = &cl->sibling;
1138 } while ((cl = *clp) != this->sibling);
1139
1140 if (this->tparent->children == this) {
1141 this->tparent->children = this->sibling;
1142 if (this->sibling == this)
1143 this->tparent->children = NULL;
1144 }
1145 } else {
Ilpo Järvinen547b7922008-07-25 21:43:18 -07001146 WARN_ON(this->sibling != this);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 }
1148}
1149
1150static void cbq_link_class(struct cbq_class *this)
1151{
1152 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153 struct cbq_class *parent = this->tparent;
1154
1155 this->sibling = this;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001156 qdisc_class_hash_insert(&q->clhash, &this->common);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157
1158 if (parent == NULL)
1159 return;
1160
1161 if (parent->children == NULL) {
1162 parent->children = this;
1163 } else {
1164 this->sibling = parent->children->sibling;
1165 parent->children->sibling = this;
1166 }
1167}
1168
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001169static unsigned int cbq_drop(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001170{
1171 struct cbq_sched_data *q = qdisc_priv(sch);
1172 struct cbq_class *cl, *cl_head;
1173 int prio;
1174 unsigned int len;
1175
1176 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001177 cl_head = q->active[prio];
1178 if (!cl_head)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179 continue;
1180
1181 cl = cl_head;
1182 do {
1183 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1184 sch->q.qlen--;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001185 if (!cl->q->q.qlen)
1186 cbq_deactivate_class(cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001187 return len;
1188 }
1189 } while ((cl = cl->next_alive) != cl_head);
1190 }
1191 return 0;
1192}
1193
1194static void
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001195cbq_reset(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196{
1197 struct cbq_sched_data *q = qdisc_priv(sch);
1198 struct cbq_class *cl;
1199 int prio;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001200 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201
1202 q->activemask = 0;
1203 q->pmask = 0;
1204 q->tx_class = NULL;
1205 q->tx_borrowed = NULL;
Patrick McHardy88a99352007-03-16 01:21:11 -07001206 qdisc_watchdog_cancel(&q->watchdog);
David S. Miller2fbd3da2009-09-01 17:59:25 -07001207 hrtimer_cancel(&q->delay_timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardy3bebcda2007-03-23 11:29:25 -07001209 q->now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210
1211 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1212 q->active[prio] = NULL;
1213
Patrick McHardyd77fea22008-07-05 23:22:05 -07001214 for (h = 0; h < q->clhash.hashsize; h++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -08001215 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001216 qdisc_reset(cl->q);
1217
1218 cl->next_alive = NULL;
Patrick McHardya0849802007-03-23 11:28:30 -07001219 cl->undertime = PSCHED_PASTPERFECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220 cl->avgidle = cl->maxidle;
1221 cl->deficit = cl->quantum;
1222 cl->cpriority = cl->priority;
1223 }
1224 }
1225 sch->q.qlen = 0;
1226}
1227
1228
1229static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1230{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001231 if (lss->change & TCF_CBQ_LSS_FLAGS) {
1232 cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1233 cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001235 if (lss->change & TCF_CBQ_LSS_EWMA)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001236 cl->ewma_log = lss->ewma_log;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001237 if (lss->change & TCF_CBQ_LSS_AVPKT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238 cl->avpkt = lss->avpkt;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001239 if (lss->change & TCF_CBQ_LSS_MINIDLE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240 cl->minidle = -(long)lss->minidle;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001241 if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242 cl->maxidle = lss->maxidle;
1243 cl->avgidle = lss->maxidle;
1244 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001245 if (lss->change & TCF_CBQ_LSS_OFFTIME)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246 cl->offtime = lss->offtime;
1247 return 0;
1248}
1249
1250static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1251{
1252 q->nclasses[cl->priority]--;
1253 q->quanta[cl->priority] -= cl->weight;
1254 cbq_normalize_quanta(q, cl->priority);
1255}
1256
1257static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1258{
1259 q->nclasses[cl->priority]++;
1260 q->quanta[cl->priority] += cl->weight;
1261 cbq_normalize_quanta(q, cl->priority);
1262}
1263
1264static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1265{
1266 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1267
1268 if (wrr->allot)
1269 cl->allot = wrr->allot;
1270 if (wrr->weight)
1271 cl->weight = wrr->weight;
1272 if (wrr->priority) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001273 cl->priority = wrr->priority - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001274 cl->cpriority = cl->priority;
1275 if (cl->priority >= cl->priority2)
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001276 cl->priority2 = TC_CBQ_MAXPRIO - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277 }
1278
1279 cbq_addprio(q, cl);
1280 return 0;
1281}
1282
1283static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1284{
1285 switch (ovl->strategy) {
1286 case TC_CBQ_OVL_CLASSIC:
1287 cl->overlimit = cbq_ovl_classic;
1288 break;
1289 case TC_CBQ_OVL_DELAY:
1290 cl->overlimit = cbq_ovl_delay;
1291 break;
1292 case TC_CBQ_OVL_LOWPRIO:
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001293 if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
1294 ovl->priority2 - 1 <= cl->priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295 return -EINVAL;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001296 cl->priority2 = ovl->priority2 - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297 cl->overlimit = cbq_ovl_lowprio;
1298 break;
1299 case TC_CBQ_OVL_DROP:
1300 cl->overlimit = cbq_ovl_drop;
1301 break;
1302 case TC_CBQ_OVL_RCLASSIC:
1303 cl->overlimit = cbq_ovl_rclassic;
1304 break;
1305 default:
1306 return -EINVAL;
1307 }
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001308 cl->penalty = ovl->penalty;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 return 0;
1310}
1311
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001312#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1314{
1315 cl->police = p->police;
1316
1317 if (cl->q->handle) {
1318 if (p->police == TC_POLICE_RECLASSIFY)
1319 cl->q->reshape_fail = cbq_reshape_fail;
1320 else
1321 cl->q->reshape_fail = NULL;
1322 }
1323 return 0;
1324}
1325#endif
1326
1327static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1328{
1329 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1330 return 0;
1331}
1332
Patrick McHardy27a34212008-01-23 20:35:39 -08001333static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1334 [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
1335 [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
1336 [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
1337 [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
1338 [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
1339 [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1340 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
1341};
1342
Patrick McHardy1e904742008-01-22 22:11:17 -08001343static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344{
1345 struct cbq_sched_data *q = qdisc_priv(sch);
Patrick McHardy1e904742008-01-22 22:11:17 -08001346 struct nlattr *tb[TCA_CBQ_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347 struct tc_ratespec *r;
Patrick McHardycee63722008-01-23 20:33:32 -08001348 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349
Patrick McHardy27a34212008-01-23 20:35:39 -08001350 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
Patrick McHardycee63722008-01-23 20:33:32 -08001351 if (err < 0)
1352 return err;
1353
Patrick McHardy27a34212008-01-23 20:35:39 -08001354 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355 return -EINVAL;
1356
Patrick McHardy1e904742008-01-22 22:11:17 -08001357 r = nla_data(tb[TCA_CBQ_RATE]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358
Patrick McHardy1e904742008-01-22 22:11:17 -08001359 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 return -EINVAL;
1361
Patrick McHardyd77fea22008-07-05 23:22:05 -07001362 err = qdisc_class_hash_init(&q->clhash);
1363 if (err < 0)
1364 goto put_rtab;
1365
Linus Torvalds1da177e2005-04-16 15:20:36 -07001366 q->link.refcnt = 1;
1367 q->link.sibling = &q->link;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001368 q->link.common.classid = sch->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 q->link.qdisc = sch;
Changli Gao3511c912010-10-16 13:04:08 +00001370 q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1371 sch->handle);
1372 if (!q->link.q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373 q->link.q = &noop_qdisc;
1374
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001375 q->link.priority = TC_CBQ_MAXPRIO - 1;
1376 q->link.priority2 = TC_CBQ_MAXPRIO - 1;
1377 q->link.cpriority = TC_CBQ_MAXPRIO - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1379 q->link.overlimit = cbq_ovl_classic;
David S. Miller5ce2d482008-07-08 17:06:30 -07001380 q->link.allot = psched_mtu(qdisc_dev(sch));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001381 q->link.quantum = q->link.allot;
1382 q->link.weight = q->link.R_tab->rate.rate;
1383
1384 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1385 q->link.avpkt = q->link.allot/2;
1386 q->link.minidle = -0x7FFFFFFF;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387
Patrick McHardy88a99352007-03-16 01:21:11 -07001388 qdisc_watchdog_init(&q->watchdog, sch);
Eric Dumazet4a8e3202014-09-20 18:01:30 -07001389 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390 q->delay_timer.function = cbq_undelay;
1391 q->toplevel = TC_CBQ_MAXLEVEL;
Patrick McHardy3bebcda2007-03-23 11:29:25 -07001392 q->now = psched_get_time();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393
1394 cbq_link_class(&q->link);
1395
Patrick McHardy1e904742008-01-22 22:11:17 -08001396 if (tb[TCA_CBQ_LSSOPT])
1397 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398
1399 cbq_addprio(q, &q->link);
1400 return 0;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001401
1402put_rtab:
1403 qdisc_put_rtab(q->link.R_tab);
1404 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405}
1406
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001407static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001409 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410
David S. Miller1b34ec42012-03-29 05:11:39 -04001411 if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate))
1412 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001413 return skb->len;
1414
Patrick McHardy1e904742008-01-22 22:11:17 -08001415nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001416 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417 return -1;
1418}
1419
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001420static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001422 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423 struct tc_cbq_lssopt opt;
1424
1425 opt.flags = 0;
1426 if (cl->borrow == NULL)
1427 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1428 if (cl->share == NULL)
1429 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1430 opt.ewma_log = cl->ewma_log;
1431 opt.level = cl->level;
1432 opt.avpkt = cl->avpkt;
1433 opt.maxidle = cl->maxidle;
1434 opt.minidle = (u32)(-cl->minidle);
1435 opt.offtime = cl->offtime;
1436 opt.change = ~0;
David S. Miller1b34ec42012-03-29 05:11:39 -04001437 if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt))
1438 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439 return skb->len;
1440
Patrick McHardy1e904742008-01-22 22:11:17 -08001441nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001442 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443 return -1;
1444}
1445
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001446static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001448 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449 struct tc_cbq_wrropt opt;
1450
David S. Millera0db8562013-07-30 00:16:21 -07001451 memset(&opt, 0, sizeof(opt));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001452 opt.flags = 0;
1453 opt.allot = cl->allot;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001454 opt.priority = cl->priority + 1;
1455 opt.cpriority = cl->cpriority + 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456 opt.weight = cl->weight;
David S. Miller1b34ec42012-03-29 05:11:39 -04001457 if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt))
1458 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001459 return skb->len;
1460
Patrick McHardy1e904742008-01-22 22:11:17 -08001461nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001462 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463 return -1;
1464}
1465
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001466static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001467{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001468 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469 struct tc_cbq_ovl opt;
1470
1471 opt.strategy = cl->ovl_strategy;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001472 opt.priority2 = cl->priority2 + 1;
Patrick McHardy8a470772005-06-28 12:56:45 -07001473 opt.pad = 0;
Patrick McHardy1a13cb62007-03-16 01:22:20 -07001474 opt.penalty = cl->penalty;
David S. Miller1b34ec42012-03-29 05:11:39 -04001475 if (nla_put(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt))
1476 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477 return skb->len;
1478
Patrick McHardy1e904742008-01-22 22:11:17 -08001479nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001480 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481 return -1;
1482}
1483
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001484static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001486 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487 struct tc_cbq_fopt opt;
1488
1489 if (cl->split || cl->defmap) {
Patrick McHardyd77fea22008-07-05 23:22:05 -07001490 opt.split = cl->split ? cl->split->common.classid : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491 opt.defmap = cl->defmap;
1492 opt.defchange = ~0;
David S. Miller1b34ec42012-03-29 05:11:39 -04001493 if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt))
1494 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495 }
1496 return skb->len;
1497
Patrick McHardy1e904742008-01-22 22:11:17 -08001498nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001499 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 return -1;
1501}
1502
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001503#ifdef CONFIG_NET_CLS_ACT
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001504static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001505{
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001506 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001507 struct tc_cbq_police opt;
1508
1509 if (cl->police) {
1510 opt.police = cl->police;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001511 opt.__res1 = 0;
1512 opt.__res2 = 0;
David S. Miller1b34ec42012-03-29 05:11:39 -04001513 if (nla_put(skb, TCA_CBQ_POLICE, sizeof(opt), &opt))
1514 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001515 }
1516 return skb->len;
1517
Patrick McHardy1e904742008-01-22 22:11:17 -08001518nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001519 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520 return -1;
1521}
1522#endif
1523
1524static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1525{
1526 if (cbq_dump_lss(skb, cl) < 0 ||
1527 cbq_dump_rate(skb, cl) < 0 ||
1528 cbq_dump_wrr(skb, cl) < 0 ||
1529 cbq_dump_ovl(skb, cl) < 0 ||
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001530#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001531 cbq_dump_police(skb, cl) < 0 ||
1532#endif
1533 cbq_dump_fopt(skb, cl) < 0)
1534 return -1;
1535 return 0;
1536}
1537
1538static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1539{
1540 struct cbq_sched_data *q = qdisc_priv(sch);
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001541 struct nlattr *nest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001543 nest = nla_nest_start(skb, TCA_OPTIONS);
1544 if (nest == NULL)
1545 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001546 if (cbq_dump_attr(skb, &q->link) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001547 goto nla_put_failure;
Yang Yingliangd59b7d82014-03-12 10:20:32 +08001548 return nla_nest_end(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001549
Patrick McHardy1e904742008-01-22 22:11:17 -08001550nla_put_failure:
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001551 nla_nest_cancel(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001552 return -1;
1553}
1554
1555static int
1556cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1557{
1558 struct cbq_sched_data *q = qdisc_priv(sch);
1559
1560 q->link.xstats.avgidle = q->link.avgidle;
1561 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1562}
1563
1564static int
1565cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1566 struct sk_buff *skb, struct tcmsg *tcm)
1567{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001568 struct cbq_class *cl = (struct cbq_class *)arg;
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001569 struct nlattr *nest;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570
1571 if (cl->tparent)
Patrick McHardyd77fea22008-07-05 23:22:05 -07001572 tcm->tcm_parent = cl->tparent->common.classid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573 else
1574 tcm->tcm_parent = TC_H_ROOT;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001575 tcm->tcm_handle = cl->common.classid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576 tcm->tcm_info = cl->q->handle;
1577
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001578 nest = nla_nest_start(skb, TCA_OPTIONS);
1579 if (nest == NULL)
1580 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581 if (cbq_dump_attr(skb, cl) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001582 goto nla_put_failure;
Yang Yingliangd59b7d82014-03-12 10:20:32 +08001583 return nla_nest_end(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001584
Patrick McHardy1e904742008-01-22 22:11:17 -08001585nla_put_failure:
Patrick McHardy4b3550ef2008-01-23 20:34:11 -08001586 nla_nest_cancel(skb, nest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001587 return -1;
1588}
1589
1590static int
1591cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1592 struct gnet_dump *d)
1593{
1594 struct cbq_sched_data *q = qdisc_priv(sch);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001595 struct cbq_class *cl = (struct cbq_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001596
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597 cl->xstats.avgidle = cl->avgidle;
1598 cl->xstats.undertime = 0;
1599
Patrick McHardya0849802007-03-23 11:28:30 -07001600 if (cl->undertime != PSCHED_PASTPERFECT)
Patrick McHardy8edc0c32007-03-23 11:28:55 -07001601 cl->xstats.undertime = cl->undertime - q->now;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001602
John Fastabend22e0f8b2014-09-28 11:52:56 -07001603 if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
Eric Dumazetd250a5f2009-10-02 10:32:18 +00001604 gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
John Fastabendb0ab6f92014-09-28 11:54:24 -07001605 gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606 return -1;
1607
1608 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1609}
1610
1611static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1612 struct Qdisc **old)
1613{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001614 struct cbq_class *cl = (struct cbq_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001615
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001616 if (new == NULL) {
Changli Gao3511c912010-10-16 13:04:08 +00001617 new = qdisc_create_dflt(sch->dev_queue,
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001618 &pfifo_qdisc_ops, cl->common.classid);
1619 if (new == NULL)
1620 return -ENOBUFS;
1621 } else {
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001622#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001623 if (cl->police == TC_POLICE_RECLASSIFY)
1624 new->reshape_fail = cbq_reshape_fail;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001625#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626 }
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001627
WANG Cong86a79962016-02-25 14:55:00 -08001628 *old = qdisc_replace(sch, new, &cl->q);
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001629 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001630}
1631
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001632static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001633{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001634 struct cbq_class *cl = (struct cbq_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635
Patrick McHardy5b9a9cc2009-09-04 06:41:17 +00001636 return cl->q;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001637}
1638
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001639static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1640{
1641 struct cbq_class *cl = (struct cbq_class *)arg;
1642
1643 if (cl->q->q.qlen == 0)
1644 cbq_deactivate_class(cl);
1645}
1646
Linus Torvalds1da177e2005-04-16 15:20:36 -07001647static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1648{
1649 struct cbq_sched_data *q = qdisc_priv(sch);
1650 struct cbq_class *cl = cbq_class_lookup(q, classid);
1651
1652 if (cl) {
1653 cl->refcnt++;
1654 return (unsigned long)cl;
1655 }
1656 return 0;
1657}
1658
Linus Torvalds1da177e2005-04-16 15:20:36 -07001659static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1660{
1661 struct cbq_sched_data *q = qdisc_priv(sch);
1662
Ilpo Järvinen547b7922008-07-25 21:43:18 -07001663 WARN_ON(cl->filters);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664
Patrick McHardyff31ab52008-07-01 19:52:38 -07001665 tcf_destroy_chain(&cl->filter_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001666 qdisc_destroy(cl->q);
1667 qdisc_put_rtab(cl->R_tab);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668 gen_kill_estimator(&cl->bstats, &cl->rate_est);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669 if (cl != &q->link)
1670 kfree(cl);
1671}
1672
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001673static void cbq_destroy(struct Qdisc *sch)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674{
1675 struct cbq_sched_data *q = qdisc_priv(sch);
Sasha Levinb67bfe02013-02-27 17:06:00 -08001676 struct hlist_node *next;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001677 struct cbq_class *cl;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001678 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001679
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001680#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681 q->rx_class = NULL;
1682#endif
1683 /*
1684 * Filters must be destroyed first because we don't destroy the
1685 * classes from root to leafs which means that filters can still
1686 * be bound to classes which have been destroyed already. --TGR '04
1687 */
Patrick McHardyd77fea22008-07-05 23:22:05 -07001688 for (h = 0; h < q->clhash.hashsize; h++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -08001689 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode)
Patrick McHardyff31ab52008-07-01 19:52:38 -07001690 tcf_destroy_chain(&cl->filter_list);
Patrick McHardyb00b4bf2007-06-05 16:06:59 -07001691 }
Patrick McHardyd77fea22008-07-05 23:22:05 -07001692 for (h = 0; h < q->clhash.hashsize; h++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -08001693 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
Patrick McHardyd77fea22008-07-05 23:22:05 -07001694 common.hnode)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695 cbq_destroy_class(sch, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696 }
Patrick McHardyd77fea22008-07-05 23:22:05 -07001697 qdisc_class_hash_destroy(&q->clhash);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698}
1699
1700static void cbq_put(struct Qdisc *sch, unsigned long arg)
1701{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001702 struct cbq_class *cl = (struct cbq_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001703
1704 if (--cl->refcnt == 0) {
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001705#ifdef CONFIG_NET_CLS_ACT
Jarek Poplawski102396a2008-08-29 14:21:52 -07001706 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001707 struct cbq_sched_data *q = qdisc_priv(sch);
1708
David S. Miller7698b4f2008-07-16 01:42:40 -07001709 spin_lock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710 if (q->rx_class == cl)
1711 q->rx_class = NULL;
David S. Miller7698b4f2008-07-16 01:42:40 -07001712 spin_unlock_bh(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713#endif
1714
1715 cbq_destroy_class(sch, cl);
1716 }
1717}
1718
1719static int
Patrick McHardy1e904742008-01-22 22:11:17 -08001720cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721 unsigned long *arg)
1722{
1723 int err;
1724 struct cbq_sched_data *q = qdisc_priv(sch);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001725 struct cbq_class *cl = (struct cbq_class *)*arg;
Patrick McHardy1e904742008-01-22 22:11:17 -08001726 struct nlattr *opt = tca[TCA_OPTIONS];
1727 struct nlattr *tb[TCA_CBQ_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001728 struct cbq_class *parent;
1729 struct qdisc_rate_table *rtab = NULL;
1730
Patrick McHardycee63722008-01-23 20:33:32 -08001731 if (opt == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732 return -EINVAL;
1733
Patrick McHardy27a34212008-01-23 20:35:39 -08001734 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
Patrick McHardycee63722008-01-23 20:33:32 -08001735 if (err < 0)
1736 return err;
1737
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738 if (cl) {
1739 /* Check parent */
1740 if (parentid) {
Patrick McHardyd77fea22008-07-05 23:22:05 -07001741 if (cl->tparent &&
1742 cl->tparent->common.classid != parentid)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743 return -EINVAL;
1744 if (!cl->tparent && parentid != TC_H_ROOT)
1745 return -EINVAL;
1746 }
1747
Patrick McHardy1e904742008-01-22 22:11:17 -08001748 if (tb[TCA_CBQ_RATE]) {
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001749 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1750 tb[TCA_CBQ_RTAB]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001751 if (rtab == NULL)
1752 return -EINVAL;
1753 }
1754
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001755 if (tca[TCA_RATE]) {
John Fastabend22e0f8b2014-09-28 11:52:56 -07001756 err = gen_replace_estimator(&cl->bstats, NULL,
1757 &cl->rate_est,
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001758 qdisc_root_sleeping_lock(sch),
1759 tca[TCA_RATE]);
1760 if (err) {
Yang Yingliang79c11f22013-12-17 15:29:17 +08001761 qdisc_put_rtab(rtab);
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001762 return err;
1763 }
1764 }
1765
Linus Torvalds1da177e2005-04-16 15:20:36 -07001766 /* Change class parameters */
1767 sch_tree_lock(sch);
1768
1769 if (cl->next_alive != NULL)
1770 cbq_deactivate_class(cl);
1771
1772 if (rtab) {
Patrick McHardyb94c8af2008-11-20 04:11:36 -08001773 qdisc_put_rtab(cl->R_tab);
1774 cl->R_tab = rtab;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001775 }
1776
Patrick McHardy1e904742008-01-22 22:11:17 -08001777 if (tb[TCA_CBQ_LSSOPT])
1778 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001779
Patrick McHardy1e904742008-01-22 22:11:17 -08001780 if (tb[TCA_CBQ_WRROPT]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001781 cbq_rmprio(q, cl);
Patrick McHardy1e904742008-01-22 22:11:17 -08001782 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783 }
1784
Patrick McHardy1e904742008-01-22 22:11:17 -08001785 if (tb[TCA_CBQ_OVL_STRATEGY])
1786 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001787
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001788#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy1e904742008-01-22 22:11:17 -08001789 if (tb[TCA_CBQ_POLICE])
1790 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001791#endif
1792
Patrick McHardy1e904742008-01-22 22:11:17 -08001793 if (tb[TCA_CBQ_FOPT])
1794 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001795
1796 if (cl->q->q.qlen)
1797 cbq_activate_class(cl);
1798
1799 sch_tree_unlock(sch);
1800
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801 return 0;
1802 }
1803
1804 if (parentid == TC_H_ROOT)
1805 return -EINVAL;
1806
Patrick McHardy1e904742008-01-22 22:11:17 -08001807 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1808 tb[TCA_CBQ_LSSOPT] == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001809 return -EINVAL;
1810
Patrick McHardy1e904742008-01-22 22:11:17 -08001811 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001812 if (rtab == NULL)
1813 return -EINVAL;
1814
1815 if (classid) {
1816 err = -EINVAL;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001817 if (TC_H_MAJ(classid ^ sch->handle) ||
1818 cbq_class_lookup(q, classid))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819 goto failure;
1820 } else {
1821 int i;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001822 classid = TC_H_MAKE(sch->handle, 0x8000);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001824 for (i = 0; i < 0x8000; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001825 if (++q->hgenerator >= 0x8000)
1826 q->hgenerator = 1;
1827 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1828 break;
1829 }
1830 err = -ENOSR;
1831 if (i >= 0x8000)
1832 goto failure;
1833 classid = classid|q->hgenerator;
1834 }
1835
1836 parent = &q->link;
1837 if (parentid) {
1838 parent = cbq_class_lookup(q, parentid);
1839 err = -EINVAL;
1840 if (parent == NULL)
1841 goto failure;
1842 }
1843
1844 err = -ENOBUFS;
Panagiotis Issaris0da974f2006-07-21 14:51:30 -07001845 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001846 if (cl == NULL)
1847 goto failure;
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001848
1849 if (tca[TCA_RATE]) {
John Fastabend22e0f8b2014-09-28 11:52:56 -07001850 err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001851 qdisc_root_sleeping_lock(sch),
1852 tca[TCA_RATE]);
1853 if (err) {
1854 kfree(cl);
1855 goto failure;
1856 }
1857 }
1858
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859 cl->R_tab = rtab;
1860 rtab = NULL;
1861 cl->refcnt = 1;
Changli Gao3511c912010-10-16 13:04:08 +00001862 cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1863 if (!cl->q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864 cl->q = &noop_qdisc;
Patrick McHardyd77fea22008-07-05 23:22:05 -07001865 cl->common.classid = classid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866 cl->tparent = parent;
1867 cl->qdisc = sch;
1868 cl->allot = parent->allot;
1869 cl->quantum = cl->allot;
1870 cl->weight = cl->R_tab->rate.rate;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001871
1872 sch_tree_lock(sch);
1873 cbq_link_class(cl);
1874 cl->borrow = cl->tparent;
1875 if (cl->tparent != &q->link)
1876 cl->share = cl->tparent;
1877 cbq_adjust_levels(parent);
1878 cl->minidle = -0x7FFFFFFF;
Patrick McHardy1e904742008-01-22 22:11:17 -08001879 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1880 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001881 if (cl->ewma_log == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001882 cl->ewma_log = q->link.ewma_log;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001883 if (cl->maxidle == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001884 cl->maxidle = q->link.maxidle;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001885 if (cl->avpkt == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001886 cl->avpkt = q->link.avpkt;
1887 cl->overlimit = cbq_ovl_classic;
Patrick McHardy1e904742008-01-22 22:11:17 -08001888 if (tb[TCA_CBQ_OVL_STRATEGY])
1889 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001890#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy1e904742008-01-22 22:11:17 -08001891 if (tb[TCA_CBQ_POLICE])
1892 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001893#endif
Patrick McHardy1e904742008-01-22 22:11:17 -08001894 if (tb[TCA_CBQ_FOPT])
1895 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001896 sch_tree_unlock(sch);
1897
Patrick McHardyd77fea22008-07-05 23:22:05 -07001898 qdisc_class_hash_grow(sch, &q->clhash);
1899
Linus Torvalds1da177e2005-04-16 15:20:36 -07001900 *arg = (unsigned long)cl;
1901 return 0;
1902
1903failure:
1904 qdisc_put_rtab(rtab);
1905 return err;
1906}
1907
1908static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1909{
1910 struct cbq_sched_data *q = qdisc_priv(sch);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001911 struct cbq_class *cl = (struct cbq_class *)arg;
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001912 unsigned int qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001913
1914 if (cl->filters || cl->children || cl == &q->link)
1915 return -EBUSY;
1916
1917 sch_tree_lock(sch);
1918
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08001919 qlen = cl->q->q.qlen;
1920 qdisc_reset(cl->q);
1921 qdisc_tree_decrease_qlen(cl->q, qlen);
1922
Linus Torvalds1da177e2005-04-16 15:20:36 -07001923 if (cl->next_alive)
1924 cbq_deactivate_class(cl);
1925
1926 if (q->tx_borrowed == cl)
1927 q->tx_borrowed = q->tx_class;
1928 if (q->tx_class == cl) {
1929 q->tx_class = NULL;
1930 q->tx_borrowed = NULL;
1931 }
Patrick McHardyc3bc7cf2007-07-15 00:03:05 -07001932#ifdef CONFIG_NET_CLS_ACT
Linus Torvalds1da177e2005-04-16 15:20:36 -07001933 if (q->rx_class == cl)
1934 q->rx_class = NULL;
1935#endif
1936
1937 cbq_unlink_class(cl);
1938 cbq_adjust_levels(cl->tparent);
1939 cl->defmap = 0;
1940 cbq_sync_defmap(cl);
1941
1942 cbq_rmprio(q, cl);
1943 sch_tree_unlock(sch);
1944
Jarek Poplawski7cd0a632009-03-15 20:00:19 -07001945 BUG_ON(--cl->refcnt == 0);
1946 /*
1947 * This shouldn't happen: we "hold" one cops->get() when called
1948 * from tc_ctl_tclass; the destroy method is done from cops->put().
1949 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001950
1951 return 0;
1952}
1953
John Fastabend25d8c0d2014-09-12 20:05:27 -07001954static struct tcf_proto __rcu **cbq_find_tcf(struct Qdisc *sch,
1955 unsigned long arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001956{
1957 struct cbq_sched_data *q = qdisc_priv(sch);
1958 struct cbq_class *cl = (struct cbq_class *)arg;
1959
1960 if (cl == NULL)
1961 cl = &q->link;
1962
1963 return &cl->filter_list;
1964}
1965
1966static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1967 u32 classid)
1968{
1969 struct cbq_sched_data *q = qdisc_priv(sch);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001970 struct cbq_class *p = (struct cbq_class *)parent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001971 struct cbq_class *cl = cbq_class_lookup(q, classid);
1972
1973 if (cl) {
1974 if (p && p->level <= cl->level)
1975 return 0;
1976 cl->filters++;
1977 return (unsigned long)cl;
1978 }
1979 return 0;
1980}
1981
1982static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
1983{
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001984 struct cbq_class *cl = (struct cbq_class *)arg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985
1986 cl->filters--;
1987}
1988
1989static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1990{
1991 struct cbq_sched_data *q = qdisc_priv(sch);
Patrick McHardyd77fea22008-07-05 23:22:05 -07001992 struct cbq_class *cl;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001993 unsigned int h;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001994
1995 if (arg->stop)
1996 return;
1997
Patrick McHardyd77fea22008-07-05 23:22:05 -07001998 for (h = 0; h < q->clhash.hashsize; h++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -08001999 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002000 if (arg->count < arg->skip) {
2001 arg->count++;
2002 continue;
2003 }
2004 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2005 arg->stop = 1;
2006 return;
2007 }
2008 arg->count++;
2009 }
2010 }
2011}
2012
Eric Dumazet20fea082007-11-14 01:44:41 -08002013static const struct Qdisc_class_ops cbq_class_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014 .graft = cbq_graft,
2015 .leaf = cbq_leaf,
Jarek Poplawskia37ef2e2006-12-08 00:25:55 -08002016 .qlen_notify = cbq_qlen_notify,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002017 .get = cbq_get,
2018 .put = cbq_put,
2019 .change = cbq_change_class,
2020 .delete = cbq_delete,
2021 .walk = cbq_walk,
2022 .tcf_chain = cbq_find_tcf,
2023 .bind_tcf = cbq_bind_filter,
2024 .unbind_tcf = cbq_unbind_filter,
2025 .dump = cbq_dump_class,
2026 .dump_stats = cbq_dump_class_stats,
2027};
2028
Eric Dumazet20fea082007-11-14 01:44:41 -08002029static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030 .next = NULL,
2031 .cl_ops = &cbq_class_ops,
2032 .id = "cbq",
2033 .priv_size = sizeof(struct cbq_sched_data),
2034 .enqueue = cbq_enqueue,
2035 .dequeue = cbq_dequeue,
Jarek Poplawski77be1552008-10-31 00:47:01 -07002036 .peek = qdisc_peek_dequeued,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002037 .drop = cbq_drop,
2038 .init = cbq_init,
2039 .reset = cbq_reset,
2040 .destroy = cbq_destroy,
2041 .change = NULL,
2042 .dump = cbq_dump,
2043 .dump_stats = cbq_dump_stats,
2044 .owner = THIS_MODULE,
2045};
2046
2047static int __init cbq_module_init(void)
2048{
2049 return register_qdisc(&cbq_qdisc_ops);
2050}
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09002051static void __exit cbq_module_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002052{
2053 unregister_qdisc(&cbq_qdisc_ops);
2054}
2055module_init(cbq_module_init)
2056module_exit(cbq_module_exit)
2057MODULE_LICENSE("GPL");