blob: 547b4a88ae2aa5fa9c850973cb6db9ad26e1035c [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
Patrick McHardy41794772007-03-16 01:19:15 -070029#include <linux/hrtimer.h>
Jarek Poplawski25bfcd52008-08-18 20:53:34 -070030#include <linux/lockdep.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090031#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020033#include <net/net_namespace.h>
Denis V. Lunevb8542722007-12-01 00:21:31 +110034#include <net/sock.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070035#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036#include <net/pkt_sched.h>
37
Tom Goff7316ae82010-03-19 15:40:13 +000038static int qdisc_notify(struct net *net, struct sk_buff *oskb,
39 struct nlmsghdr *n, u32 clid,
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 struct Qdisc *old, struct Qdisc *new);
Tom Goff7316ae82010-03-19 15:40:13 +000041static int tclass_notify(struct net *net, struct sk_buff *oskb,
42 struct nlmsghdr *n, struct Qdisc *q,
43 unsigned long cl, int event);
Linus Torvalds1da177e2005-04-16 15:20:36 -070044
45/*
46
47 Short review.
48 -------------
49
50 This file consists of two interrelated parts:
51
52 1. queueing disciplines manager frontend.
53 2. traffic classes manager frontend.
54
55 Generally, queueing discipline ("qdisc") is a black box,
56 which is able to enqueue packets and to dequeue them (when
57 device is ready to send something) in order and at times
58 determined by algorithm hidden in it.
59
60 qdisc's are divided to two categories:
61 - "queues", which have no internal structure visible from outside.
62 - "schedulers", which split all the packets to "traffic classes",
63 using "packet classifiers" (look at cls_api.c)
64
65 In turn, classes may have child qdiscs (as rule, queues)
66 attached to them etc. etc. etc.
67
68 The goal of the routines in this file is to translate
69 information supplied by user in the form of handles
70 to more intelligible for kernel form, to make some sanity
71 checks and part of work, which is common to all qdiscs
72 and to provide rtnetlink notifications.
73
74 All real intelligent work is done inside qdisc modules.
75
76
77
78 Every discipline has two major routines: enqueue and dequeue.
79
80 ---dequeue
81
82 dequeue usually returns a skb to send. It is allowed to return NULL,
83 but it does not mean that queue is empty, it just means that
84 discipline does not want to send anything this time.
85 Queue is really empty if q->q.qlen == 0.
86 For complicated disciplines with multiple queues q->q is not
87 real packet queue, but however q->q.qlen must be valid.
88
89 ---enqueue
90
91 enqueue returns 0, if packet was enqueued successfully.
92 If packet (this one or another one) was dropped, it returns
93 not zero error code.
94 NET_XMIT_DROP - this packet dropped
95 Expected action: do not backoff, but wait until queue will clear.
96 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
97 Expected action: backoff or ignore
98 NET_XMIT_POLICED - dropped by police.
99 Expected action: backoff or error to real-time apps.
100
101 Auxiliary routines:
102
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700103 ---peek
104
105 like dequeue but without removing a packet from the queue
106
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107 ---reset
108
109 returns qdisc to initial state: purge all buffers, clear all
110 timers, counters (except for statistics) etc.
111
112 ---init
113
114 initializes newly created qdisc.
115
116 ---destroy
117
118 destroys resources allocated by init and during lifetime of qdisc.
119
120 ---change
121
122 changes qdisc parameters.
123 */
124
125/* Protects list of registered TC modules. It is pure SMP lock. */
126static DEFINE_RWLOCK(qdisc_mod_lock);
127
128
129/************************************************
130 * Queueing disciplines manipulation. *
131 ************************************************/
132
133
134/* The list of all installed queueing disciplines. */
135
136static struct Qdisc_ops *qdisc_base;
137
138/* Register/uregister queueing discipline */
139
140int register_qdisc(struct Qdisc_ops *qops)
141{
142 struct Qdisc_ops *q, **qp;
143 int rc = -EEXIST;
144
145 write_lock(&qdisc_mod_lock);
146 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
147 if (!strcmp(qops->id, q->id))
148 goto out;
149
150 if (qops->enqueue == NULL)
151 qops->enqueue = noop_qdisc_ops.enqueue;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700152 if (qops->peek == NULL) {
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000153 if (qops->dequeue == NULL)
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700154 qops->peek = noop_qdisc_ops.peek;
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000155 else
156 goto out_einval;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700157 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 if (qops->dequeue == NULL)
159 qops->dequeue = noop_qdisc_ops.dequeue;
160
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000161 if (qops->cl_ops) {
162 const struct Qdisc_class_ops *cops = qops->cl_ops;
163
Jarek Poplawski3e9e5a52010-08-10 22:31:20 +0000164 if (!(cops->get && cops->put && cops->walk && cops->leaf))
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000165 goto out_einval;
166
167 if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf))
168 goto out_einval;
169 }
170
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171 qops->next = NULL;
172 *qp = qops;
173 rc = 0;
174out:
175 write_unlock(&qdisc_mod_lock);
176 return rc;
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000177
178out_einval:
179 rc = -EINVAL;
180 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800182EXPORT_SYMBOL(register_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183
184int unregister_qdisc(struct Qdisc_ops *qops)
185{
186 struct Qdisc_ops *q, **qp;
187 int err = -ENOENT;
188
189 write_lock(&qdisc_mod_lock);
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000190 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191 if (q == qops)
192 break;
193 if (q) {
194 *qp = q->next;
195 q->next = NULL;
196 err = 0;
197 }
198 write_unlock(&qdisc_mod_lock);
199 return err;
200}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800201EXPORT_SYMBOL(unregister_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202
stephen hemminger6da7c8f2013-08-27 16:19:08 -0700203/* Get default qdisc if not otherwise specified */
204void qdisc_get_default(char *name, size_t len)
205{
206 read_lock(&qdisc_mod_lock);
207 strlcpy(name, default_qdisc_ops->id, len);
208 read_unlock(&qdisc_mod_lock);
209}
210
211static struct Qdisc_ops *qdisc_lookup_default(const char *name)
212{
213 struct Qdisc_ops *q = NULL;
214
215 for (q = qdisc_base; q; q = q->next) {
216 if (!strcmp(name, q->id)) {
217 if (!try_module_get(q->owner))
218 q = NULL;
219 break;
220 }
221 }
222
223 return q;
224}
225
226/* Set new default qdisc to use */
227int qdisc_set_default(const char *name)
228{
229 const struct Qdisc_ops *ops;
230
231 if (!capable(CAP_NET_ADMIN))
232 return -EPERM;
233
234 write_lock(&qdisc_mod_lock);
235 ops = qdisc_lookup_default(name);
236 if (!ops) {
237 /* Not found, drop lock and try to load module */
238 write_unlock(&qdisc_mod_lock);
239 request_module("sch_%s", name);
240 write_lock(&qdisc_mod_lock);
241
242 ops = qdisc_lookup_default(name);
243 }
244
245 if (ops) {
246 /* Set new default */
247 module_put(default_qdisc_ops->owner);
248 default_qdisc_ops = ops;
249 }
250 write_unlock(&qdisc_mod_lock);
251
252 return ops ? 0 : -ENOENT;
253}
254
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255/* We know handle. Find qdisc among all qdisc's attached to device
256 (root qdisc, all its children, children of children etc.)
257 */
258
Hannes Eder6113b742008-11-28 03:06:46 -0800259static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
David S. Miller8123b422008-08-08 23:23:39 -0700260{
261 struct Qdisc *q;
262
263 if (!(root->flags & TCQ_F_BUILTIN) &&
264 root->handle == handle)
265 return root;
266
267 list_for_each_entry(q, &root->list, list) {
268 if (q->handle == handle)
269 return q;
270 }
271 return NULL;
272}
273
Eric Dumazet95dc1922013-12-05 11:12:02 -0800274void qdisc_list_add(struct Qdisc *q)
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700275{
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800276 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
Patrick McHardyaf356af2009-09-04 06:41:18 +0000277 list_add_tail(&q->list, &qdisc_dev(q)->qdisc->list);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700278}
Eric Dumazet95dc1922013-12-05 11:12:02 -0800279EXPORT_SYMBOL(qdisc_list_add);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700280
281void qdisc_list_del(struct Qdisc *q)
282{
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800283 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700284 list_del(&q->list);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700285}
286EXPORT_SYMBOL(qdisc_list_del);
287
David S. Milleread81cc2008-07-17 00:50:32 -0700288struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
Patrick McHardy43effa12006-11-29 17:35:48 -0800289{
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700290 struct Qdisc *q;
291
Patrick McHardyaf356af2009-09-04 06:41:18 +0000292 q = qdisc_match_from_root(dev->qdisc, handle);
293 if (q)
294 goto out;
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700295
Eric Dumazet24824a02010-10-02 06:11:55 +0000296 if (dev_ingress_queue(dev))
297 q = qdisc_match_from_root(
298 dev_ingress_queue(dev)->qdisc_sleeping,
299 handle);
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800300out:
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700301 return q;
Patrick McHardy43effa12006-11-29 17:35:48 -0800302}
303
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
305{
306 unsigned long cl;
307 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800308 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309
310 if (cops == NULL)
311 return NULL;
312 cl = cops->get(p, classid);
313
314 if (cl == 0)
315 return NULL;
316 leaf = cops->leaf(p, cl);
317 cops->put(p, cl);
318 return leaf;
319}
320
321/* Find queueing discipline by name */
322
Patrick McHardy1e904742008-01-22 22:11:17 -0800323static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324{
325 struct Qdisc_ops *q = NULL;
326
327 if (kind) {
328 read_lock(&qdisc_mod_lock);
329 for (q = qdisc_base; q; q = q->next) {
Patrick McHardy1e904742008-01-22 22:11:17 -0800330 if (nla_strcmp(kind, q->id) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 if (!try_module_get(q->owner))
332 q = NULL;
333 break;
334 }
335 }
336 read_unlock(&qdisc_mod_lock);
337 }
338 return q;
339}
340
Jesper Dangaard Brouer8a8e3d82013-08-14 23:47:11 +0200341/* The linklayer setting were not transferred from iproute2, in older
342 * versions, and the rate tables lookup systems have been dropped in
343 * the kernel. To keep backward compatible with older iproute2 tc
344 * utils, we detect the linklayer setting by detecting if the rate
345 * table were modified.
346 *
347 * For linklayer ATM table entries, the rate table will be aligned to
348 * 48 bytes, thus some table entries will contain the same value. The
349 * mpu (min packet unit) is also encoded into the old rate table, thus
350 * starting from the mpu, we find low and high table entries for
351 * mapping this cell. If these entries contain the same value, when
352 * the rate tables have been modified for linklayer ATM.
353 *
354 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
355 * and then roundup to the next cell, calc the table entry one below,
356 * and compare.
357 */
358static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
359{
360 int low = roundup(r->mpu, 48);
361 int high = roundup(low+1, 48);
362 int cell_low = low >> r->cell_log;
363 int cell_high = (high >> r->cell_log) - 1;
364
365 /* rtab is too inaccurate at rates > 100Mbit/s */
366 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
367 pr_debug("TC linklayer: Giving up ATM detection\n");
368 return TC_LINKLAYER_ETHERNET;
369 }
370
371 if ((cell_high > cell_low) && (cell_high < 256)
372 && (rtab[cell_low] == rtab[cell_high])) {
373 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
374 cell_low, cell_high, rtab[cell_high]);
375 return TC_LINKLAYER_ATM;
376 }
377 return TC_LINKLAYER_ETHERNET;
378}
379
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380static struct qdisc_rate_table *qdisc_rtab_list;
381
Patrick McHardy1e904742008-01-22 22:11:17 -0800382struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383{
384 struct qdisc_rate_table *rtab;
385
Eric Dumazet40edeff2013-06-02 11:15:55 +0000386 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
387 nla_len(tab) != TC_RTAB_SIZE)
388 return NULL;
389
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
Eric Dumazet40edeff2013-06-02 11:15:55 +0000391 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
392 !memcmp(&rtab->data, nla_data(tab), 1024)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393 rtab->refcnt++;
394 return rtab;
395 }
396 }
397
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
399 if (rtab) {
400 rtab->rate = *r;
401 rtab->refcnt = 1;
Patrick McHardy1e904742008-01-22 22:11:17 -0800402 memcpy(rtab->data, nla_data(tab), 1024);
Jesper Dangaard Brouer8a8e3d82013-08-14 23:47:11 +0200403 if (r->linklayer == TC_LINKLAYER_UNAWARE)
404 r->linklayer = __detect_linklayer(r, rtab->data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405 rtab->next = qdisc_rtab_list;
406 qdisc_rtab_list = rtab;
407 }
408 return rtab;
409}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800410EXPORT_SYMBOL(qdisc_get_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411
412void qdisc_put_rtab(struct qdisc_rate_table *tab)
413{
414 struct qdisc_rate_table *rtab, **rtabp;
415
416 if (!tab || --tab->refcnt)
417 return;
418
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000419 for (rtabp = &qdisc_rtab_list;
420 (rtab = *rtabp) != NULL;
421 rtabp = &rtab->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422 if (rtab == tab) {
423 *rtabp = rtab->next;
424 kfree(rtab);
425 return;
426 }
427 }
428}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800429EXPORT_SYMBOL(qdisc_put_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700431static LIST_HEAD(qdisc_stab_list);
432static DEFINE_SPINLOCK(qdisc_stab_lock);
433
434static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
435 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
436 [TCA_STAB_DATA] = { .type = NLA_BINARY },
437};
438
439static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
440{
441 struct nlattr *tb[TCA_STAB_MAX + 1];
442 struct qdisc_size_table *stab;
443 struct tc_sizespec *s;
444 unsigned int tsize = 0;
445 u16 *tab = NULL;
446 int err;
447
448 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
449 if (err < 0)
450 return ERR_PTR(err);
451 if (!tb[TCA_STAB_BASE])
452 return ERR_PTR(-EINVAL);
453
454 s = nla_data(tb[TCA_STAB_BASE]);
455
456 if (s->tsize > 0) {
457 if (!tb[TCA_STAB_DATA])
458 return ERR_PTR(-EINVAL);
459 tab = nla_data(tb[TCA_STAB_DATA]);
460 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
461 }
462
Dan Carpenter00093fa2010-08-14 11:09:49 +0000463 if (tsize != s->tsize || (!tab && tsize > 0))
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700464 return ERR_PTR(-EINVAL);
465
David S. Millerf3b96052008-08-18 22:33:05 -0700466 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700467
468 list_for_each_entry(stab, &qdisc_stab_list, list) {
469 if (memcmp(&stab->szopts, s, sizeof(*s)))
470 continue;
471 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
472 continue;
473 stab->refcnt++;
David S. Millerf3b96052008-08-18 22:33:05 -0700474 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700475 return stab;
476 }
477
David S. Millerf3b96052008-08-18 22:33:05 -0700478 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700479
480 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
481 if (!stab)
482 return ERR_PTR(-ENOMEM);
483
484 stab->refcnt = 1;
485 stab->szopts = *s;
486 if (tsize > 0)
487 memcpy(stab->data, tab, tsize * sizeof(u16));
488
David S. Millerf3b96052008-08-18 22:33:05 -0700489 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700490 list_add_tail(&stab->list, &qdisc_stab_list);
David S. Millerf3b96052008-08-18 22:33:05 -0700491 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700492
493 return stab;
494}
495
Eric Dumazeta2da5702011-01-20 03:48:19 +0000496static void stab_kfree_rcu(struct rcu_head *head)
497{
498 kfree(container_of(head, struct qdisc_size_table, rcu));
499}
500
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700501void qdisc_put_stab(struct qdisc_size_table *tab)
502{
503 if (!tab)
504 return;
505
David S. Millerf3b96052008-08-18 22:33:05 -0700506 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700507
508 if (--tab->refcnt == 0) {
509 list_del(&tab->list);
Eric Dumazeta2da5702011-01-20 03:48:19 +0000510 call_rcu_bh(&tab->rcu, stab_kfree_rcu);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700511 }
512
David S. Millerf3b96052008-08-18 22:33:05 -0700513 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700514}
515EXPORT_SYMBOL(qdisc_put_stab);
516
517static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
518{
519 struct nlattr *nest;
520
521 nest = nla_nest_start(skb, TCA_STAB);
Patrick McHardy3aa46142008-11-20 04:07:14 -0800522 if (nest == NULL)
523 goto nla_put_failure;
David S. Miller1b34ec42012-03-29 05:11:39 -0400524 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
525 goto nla_put_failure;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700526 nla_nest_end(skb, nest);
527
528 return skb->len;
529
530nla_put_failure:
531 return -1;
532}
533
Eric Dumazeta2da5702011-01-20 03:48:19 +0000534void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab)
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700535{
536 int pkt_len, slot;
537
538 pkt_len = skb->len + stab->szopts.overhead;
539 if (unlikely(!stab->szopts.tsize))
540 goto out;
541
542 slot = pkt_len + stab->szopts.cell_align;
543 if (unlikely(slot < 0))
544 slot = 0;
545
546 slot >>= stab->szopts.cell_log;
547 if (likely(slot < stab->szopts.tsize))
548 pkt_len = stab->data[slot];
549 else
550 pkt_len = stab->data[stab->szopts.tsize - 1] *
551 (slot / stab->szopts.tsize) +
552 stab->data[slot % stab->szopts.tsize];
553
554 pkt_len <<= stab->szopts.size_log;
555out:
556 if (unlikely(pkt_len < 1))
557 pkt_len = 1;
558 qdisc_skb_cb(skb)->pkt_len = pkt_len;
559}
Eric Dumazeta2da5702011-01-20 03:48:19 +0000560EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700561
Jarek Poplawskib00355d2009-02-01 01:12:42 -0800562void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
563{
564 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000565 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
566 txt, qdisc->ops->id, qdisc->handle >> 16);
Jarek Poplawskib00355d2009-02-01 01:12:42 -0800567 qdisc->flags |= TCQ_F_WARN_NONWC;
568 }
569}
570EXPORT_SYMBOL(qdisc_warn_nonwc);
571
Patrick McHardy41794772007-03-16 01:19:15 -0700572static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
573{
574 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
David S. Miller2fbd3da2009-09-01 17:59:25 -0700575 timer);
Patrick McHardy41794772007-03-16 01:19:15 -0700576
Eric Dumazetfd245a42011-01-20 05:27:16 +0000577 qdisc_unthrottled(wd->qdisc);
David S. Miller8608db02008-08-18 20:51:18 -0700578 __netif_schedule(qdisc_root(wd->qdisc));
Stephen Hemminger19365022007-03-22 12:18:35 -0700579
Patrick McHardy41794772007-03-16 01:19:15 -0700580 return HRTIMER_NORESTART;
581}
582
583void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
584{
David S. Miller2fbd3da2009-09-01 17:59:25 -0700585 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
586 wd->timer.function = qdisc_watchdog;
Patrick McHardy41794772007-03-16 01:19:15 -0700587 wd->qdisc = qdisc;
588}
589EXPORT_SYMBOL(qdisc_watchdog_init);
590
Jiri Pirko34c5d292013-02-12 00:12:04 +0000591void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
Patrick McHardy41794772007-03-16 01:19:15 -0700592{
Jarek Poplawski2540e052008-08-21 05:11:14 -0700593 if (test_bit(__QDISC_STATE_DEACTIVATED,
594 &qdisc_root_sleeping(wd->qdisc)->state))
595 return;
596
Eric Dumazetfd245a42011-01-20 05:27:16 +0000597 qdisc_throttled(wd->qdisc);
Eric Dumazet46baac32012-10-20 00:40:51 +0000598
599 hrtimer_start(&wd->timer,
Jiri Pirko34c5d292013-02-12 00:12:04 +0000600 ns_to_ktime(expires),
Eric Dumazet46baac32012-10-20 00:40:51 +0000601 HRTIMER_MODE_ABS);
Patrick McHardy41794772007-03-16 01:19:15 -0700602}
Jiri Pirko34c5d292013-02-12 00:12:04 +0000603EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
Patrick McHardy41794772007-03-16 01:19:15 -0700604
605void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
606{
David S. Miller2fbd3da2009-09-01 17:59:25 -0700607 hrtimer_cancel(&wd->timer);
Eric Dumazetfd245a42011-01-20 05:27:16 +0000608 qdisc_unthrottled(wd->qdisc);
Patrick McHardy41794772007-03-16 01:19:15 -0700609}
610EXPORT_SYMBOL(qdisc_watchdog_cancel);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611
Adrian Bunka94f7792008-07-22 14:20:11 -0700612static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700613{
614 unsigned int size = n * sizeof(struct hlist_head), i;
615 struct hlist_head *h;
616
617 if (size <= PAGE_SIZE)
618 h = kmalloc(size, GFP_KERNEL);
619 else
620 h = (struct hlist_head *)
621 __get_free_pages(GFP_KERNEL, get_order(size));
622
623 if (h != NULL) {
624 for (i = 0; i < n; i++)
625 INIT_HLIST_HEAD(&h[i]);
626 }
627 return h;
628}
629
630static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
631{
632 unsigned int size = n * sizeof(struct hlist_head);
633
634 if (size <= PAGE_SIZE)
635 kfree(h);
636 else
637 free_pages((unsigned long)h, get_order(size));
638}
639
640void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
641{
642 struct Qdisc_class_common *cl;
Sasha Levinb67bfe02013-02-27 17:06:00 -0800643 struct hlist_node *next;
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700644 struct hlist_head *nhash, *ohash;
645 unsigned int nsize, nmask, osize;
646 unsigned int i, h;
647
648 /* Rehash when load factor exceeds 0.75 */
649 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
650 return;
651 nsize = clhash->hashsize * 2;
652 nmask = nsize - 1;
653 nhash = qdisc_class_hash_alloc(nsize);
654 if (nhash == NULL)
655 return;
656
657 ohash = clhash->hash;
658 osize = clhash->hashsize;
659
660 sch_tree_lock(sch);
661 for (i = 0; i < osize; i++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -0800662 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700663 h = qdisc_class_hash(cl->classid, nmask);
664 hlist_add_head(&cl->hnode, &nhash[h]);
665 }
666 }
667 clhash->hash = nhash;
668 clhash->hashsize = nsize;
669 clhash->hashmask = nmask;
670 sch_tree_unlock(sch);
671
672 qdisc_class_hash_free(ohash, osize);
673}
674EXPORT_SYMBOL(qdisc_class_hash_grow);
675
676int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
677{
678 unsigned int size = 4;
679
680 clhash->hash = qdisc_class_hash_alloc(size);
681 if (clhash->hash == NULL)
682 return -ENOMEM;
683 clhash->hashsize = size;
684 clhash->hashmask = size - 1;
685 clhash->hashelems = 0;
686 return 0;
687}
688EXPORT_SYMBOL(qdisc_class_hash_init);
689
690void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
691{
692 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
693}
694EXPORT_SYMBOL(qdisc_class_hash_destroy);
695
696void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
697 struct Qdisc_class_common *cl)
698{
699 unsigned int h;
700
701 INIT_HLIST_NODE(&cl->hnode);
702 h = qdisc_class_hash(cl->classid, clhash->hashmask);
703 hlist_add_head(&cl->hnode, &clhash->hash[h]);
704 clhash->hashelems++;
705}
706EXPORT_SYMBOL(qdisc_class_hash_insert);
707
708void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
709 struct Qdisc_class_common *cl)
710{
711 hlist_del(&cl->hnode);
712 clhash->hashelems--;
713}
714EXPORT_SYMBOL(qdisc_class_hash_remove);
715
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000716/* Allocate an unique handle from space managed by kernel
717 * Possible range is [8000-FFFF]:0000 (0x8000 values)
718 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719static u32 qdisc_alloc_handle(struct net_device *dev)
720{
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000721 int i = 0x8000;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
723
724 do {
725 autohandle += TC_H_MAKE(0x10000U, 0);
726 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
727 autohandle = TC_H_MAKE(0x80000000U, 0);
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000728 if (!qdisc_lookup(dev, autohandle))
729 return autohandle;
730 cond_resched();
731 } while (--i > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000733 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734}
735
Patrick McHardy43effa12006-11-29 17:35:48 -0800736void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
737{
Eric Dumazet20fea082007-11-14 01:44:41 -0800738 const struct Qdisc_class_ops *cops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800739 unsigned long cl;
740 u32 parentid;
Eric Dumazet2c8c8e62013-10-07 08:32:32 -0700741 int drops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800742
743 if (n == 0)
744 return;
Eric Dumazet2c8c8e62013-10-07 08:32:32 -0700745 drops = max_t(int, n, 0);
Patrick McHardy43effa12006-11-29 17:35:48 -0800746 while ((parentid = sch->parent)) {
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700747 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
748 return;
749
David S. Miller5ce2d482008-07-08 17:06:30 -0700750 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700751 if (sch == NULL) {
752 WARN_ON(parentid != TC_H_ROOT);
753 return;
754 }
Patrick McHardy43effa12006-11-29 17:35:48 -0800755 cops = sch->ops->cl_ops;
756 if (cops->qlen_notify) {
757 cl = cops->get(sch, parentid);
758 cops->qlen_notify(sch, cl);
759 cops->put(sch, cl);
760 }
761 sch->q.qlen -= n;
Eric Dumazet2c8c8e62013-10-07 08:32:32 -0700762 sch->qstats.drops += drops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800763 }
764}
765EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766
Tom Goff7316ae82010-03-19 15:40:13 +0000767static void notify_and_destroy(struct net *net, struct sk_buff *skb,
768 struct nlmsghdr *n, u32 clid,
David S. Miller99194cf2008-07-17 04:54:10 -0700769 struct Qdisc *old, struct Qdisc *new)
770{
771 if (new || old)
Tom Goff7316ae82010-03-19 15:40:13 +0000772 qdisc_notify(net, skb, n, clid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773
David S. Miller4d8863a2008-08-18 21:03:15 -0700774 if (old)
David S. Miller99194cf2008-07-17 04:54:10 -0700775 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700776}
777
778/* Graft qdisc "new" to class "classid" of qdisc "parent" or
779 * to device "dev".
780 *
781 * When appropriate send a netlink notification using 'skb'
782 * and "n".
783 *
784 * On success, destroy old qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700785 */
786
787static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
David S. Miller99194cf2008-07-17 04:54:10 -0700788 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
789 struct Qdisc *new, struct Qdisc *old)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790{
David S. Miller99194cf2008-07-17 04:54:10 -0700791 struct Qdisc *q = old;
Tom Goff7316ae82010-03-19 15:40:13 +0000792 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900795 if (parent == NULL) {
David S. Miller99194cf2008-07-17 04:54:10 -0700796 unsigned int i, num_q, ingress;
797
798 ingress = 0;
799 num_q = dev->num_tx_queues;
David S. Miller8d50b532008-07-30 02:37:46 -0700800 if ((q && q->flags & TCQ_F_INGRESS) ||
801 (new && new->flags & TCQ_F_INGRESS)) {
David S. Miller99194cf2008-07-17 04:54:10 -0700802 num_q = 1;
803 ingress = 1;
Eric Dumazet24824a02010-10-02 06:11:55 +0000804 if (!dev_ingress_queue(dev))
805 return -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 }
David S. Miller99194cf2008-07-17 04:54:10 -0700807
808 if (dev->flags & IFF_UP)
809 dev_deactivate(dev);
810
David S. Miller6ec1c692009-09-06 01:58:51 -0700811 if (new && new->ops->attach) {
812 new->ops->attach(new);
813 num_q = 0;
814 }
815
David S. Miller99194cf2008-07-17 04:54:10 -0700816 for (i = 0; i < num_q; i++) {
Eric Dumazet24824a02010-10-02 06:11:55 +0000817 struct netdev_queue *dev_queue = dev_ingress_queue(dev);
David S. Miller99194cf2008-07-17 04:54:10 -0700818
819 if (!ingress)
820 dev_queue = netdev_get_tx_queue(dev, i);
821
David S. Miller8d50b532008-07-30 02:37:46 -0700822 old = dev_graft_qdisc(dev_queue, new);
823 if (new && i > 0)
824 atomic_inc(&new->refcnt);
825
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000826 if (!ingress)
827 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700828 }
829
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000830 if (!ingress) {
Tom Goff7316ae82010-03-19 15:40:13 +0000831 notify_and_destroy(net, skb, n, classid,
832 dev->qdisc, new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000833 if (new && !new->ops->attach)
834 atomic_inc(&new->refcnt);
835 dev->qdisc = new ? : &noop_qdisc;
836 } else {
Tom Goff7316ae82010-03-19 15:40:13 +0000837 notify_and_destroy(net, skb, n, classid, old, new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000838 }
Patrick McHardyaf356af2009-09-04 06:41:18 +0000839
David S. Miller99194cf2008-07-17 04:54:10 -0700840 if (dev->flags & IFF_UP)
841 dev_activate(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842 } else {
Eric Dumazet20fea082007-11-14 01:44:41 -0800843 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700844
Patrick McHardyc9f1d032009-09-04 06:41:13 +0000845 err = -EOPNOTSUPP;
846 if (cops && cops->graft) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 unsigned long cl = cops->get(parent, classid);
848 if (cl) {
David S. Miller99194cf2008-07-17 04:54:10 -0700849 err = cops->graft(parent, cl, new, &old);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 cops->put(parent, cl);
Patrick McHardyc9f1d032009-09-04 06:41:13 +0000851 } else
852 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853 }
David S. Miller99194cf2008-07-17 04:54:10 -0700854 if (!err)
Tom Goff7316ae82010-03-19 15:40:13 +0000855 notify_and_destroy(net, skb, n, classid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856 }
857 return err;
858}
859
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700860/* lockdep annotation is needed for ingress; egress gets it only for name */
861static struct lock_class_key qdisc_tx_lock;
862static struct lock_class_key qdisc_rx_lock;
863
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864/*
865 Allocate and initialize new qdisc.
866
867 Parameters are passed via opt.
868 */
869
870static struct Qdisc *
David S. Millerbb949fb2008-07-08 16:55:56 -0700871qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
Patrick McHardy23bcf632009-09-09 18:11:23 -0700872 struct Qdisc *p, u32 parent, u32 handle,
873 struct nlattr **tca, int *errp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874{
875 int err;
Patrick McHardy1e904742008-01-22 22:11:17 -0800876 struct nlattr *kind = tca[TCA_KIND];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877 struct Qdisc *sch;
878 struct Qdisc_ops *ops;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700879 struct qdisc_size_table *stab;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880
881 ops = qdisc_lookup_ops(kind);
Johannes Berg95a5afc2008-10-16 15:24:51 -0700882#ifdef CONFIG_MODULES
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883 if (ops == NULL && kind != NULL) {
884 char name[IFNAMSIZ];
Patrick McHardy1e904742008-01-22 22:11:17 -0800885 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886 /* We dropped the RTNL semaphore in order to
887 * perform the module load. So, even if we
888 * succeeded in loading the module we have to
889 * tell the caller to replay the request. We
890 * indicate this using -EAGAIN.
891 * We replay the request because the device may
892 * go away in the mean time.
893 */
894 rtnl_unlock();
895 request_module("sch_%s", name);
896 rtnl_lock();
897 ops = qdisc_lookup_ops(kind);
898 if (ops != NULL) {
899 /* We will try again qdisc_lookup_ops,
900 * so don't keep a reference.
901 */
902 module_put(ops->owner);
903 err = -EAGAIN;
904 goto err_out;
905 }
906 }
907 }
908#endif
909
Jamal Hadi Salimb9e2cc02006-08-03 16:36:51 -0700910 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911 if (ops == NULL)
912 goto err_out;
913
David S. Miller5ce2d482008-07-08 17:06:30 -0700914 sch = qdisc_alloc(dev_queue, ops);
Thomas Graf3d54b822005-07-05 14:15:09 -0700915 if (IS_ERR(sch)) {
916 err = PTR_ERR(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917 goto err_out2;
Thomas Graf3d54b822005-07-05 14:15:09 -0700918 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700920 sch->parent = parent;
921
Thomas Graf3d54b822005-07-05 14:15:09 -0700922 if (handle == TC_H_INGRESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923 sch->flags |= TCQ_F_INGRESS;
Thomas Graf3d54b822005-07-05 14:15:09 -0700924 handle = TC_H_MAKE(TC_H_INGRESS, 0);
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700925 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700926 } else {
Patrick McHardyfd44de72007-04-16 17:07:08 -0700927 if (handle == 0) {
928 handle = qdisc_alloc_handle(dev);
929 err = -ENOMEM;
930 if (handle == 0)
931 goto err_out3;
932 }
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700933 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
Eric Dumazet1abbe132012-12-11 15:54:33 +0000934 if (!netif_is_multiqueue(dev))
935 sch->flags |= TCQ_F_ONETXQUEUE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936 }
937
Thomas Graf3d54b822005-07-05 14:15:09 -0700938 sch->handle = handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939
Patrick McHardy1e904742008-01-22 22:11:17 -0800940 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700941 if (tca[TCA_STAB]) {
942 stab = qdisc_get_stab(tca[TCA_STAB]);
943 if (IS_ERR(stab)) {
944 err = PTR_ERR(stab);
Jarek Poplawski7c64b9f2009-09-15 23:42:05 -0700945 goto err_out4;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700946 }
Eric Dumazeta2da5702011-01-20 03:48:19 +0000947 rcu_assign_pointer(sch->stab, stab);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700948 }
Patrick McHardy1e904742008-01-22 22:11:17 -0800949 if (tca[TCA_RATE]) {
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700950 spinlock_t *root_lock;
951
Patrick McHardy23bcf632009-09-09 18:11:23 -0700952 err = -EOPNOTSUPP;
953 if (sch->flags & TCQ_F_MQROOT)
954 goto err_out4;
955
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700956 if ((sch->parent != TC_H_ROOT) &&
Patrick McHardy23bcf632009-09-09 18:11:23 -0700957 !(sch->flags & TCQ_F_INGRESS) &&
958 (!p || !(p->flags & TCQ_F_MQROOT)))
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700959 root_lock = qdisc_root_sleeping_lock(sch);
960 else
961 root_lock = qdisc_lock(sch);
962
Thomas Graf023e09a2005-07-05 14:15:53 -0700963 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700964 root_lock, tca[TCA_RATE]);
Patrick McHardy23bcf632009-09-09 18:11:23 -0700965 if (err)
966 goto err_out4;
Thomas Graf023e09a2005-07-05 14:15:53 -0700967 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700968
969 qdisc_list_add(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 return sch;
972 }
973err_out3:
974 dev_put(dev);
Thomas Graf3d54b822005-07-05 14:15:09 -0700975 kfree((char *) sch - sch->padded);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976err_out2:
977 module_put(ops->owner);
978err_out:
979 *errp = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980 return NULL;
Patrick McHardy23bcf632009-09-09 18:11:23 -0700981
982err_out4:
983 /*
984 * Any broken qdiscs that would require a ops->reset() here?
985 * The qdisc was never in action so it shouldn't be necessary.
986 */
Eric Dumazeta2da5702011-01-20 03:48:19 +0000987 qdisc_put_stab(rtnl_dereference(sch->stab));
Patrick McHardy23bcf632009-09-09 18:11:23 -0700988 if (ops->destroy)
989 ops->destroy(sch);
990 goto err_out3;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991}
992
Patrick McHardy1e904742008-01-22 22:11:17 -0800993static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994{
Eric Dumazeta2da5702011-01-20 03:48:19 +0000995 struct qdisc_size_table *ostab, *stab = NULL;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700996 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700998 if (tca[TCA_OPTIONS]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999 if (sch->ops->change == NULL)
1000 return -EINVAL;
Patrick McHardy1e904742008-01-22 22:11:17 -08001001 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002 if (err)
1003 return err;
1004 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001005
1006 if (tca[TCA_STAB]) {
1007 stab = qdisc_get_stab(tca[TCA_STAB]);
1008 if (IS_ERR(stab))
1009 return PTR_ERR(stab);
1010 }
1011
Eric Dumazeta2da5702011-01-20 03:48:19 +00001012 ostab = rtnl_dereference(sch->stab);
1013 rcu_assign_pointer(sch->stab, stab);
1014 qdisc_put_stab(ostab);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001015
Patrick McHardy23bcf632009-09-09 18:11:23 -07001016 if (tca[TCA_RATE]) {
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001017 /* NB: ignores errors from replace_estimator
1018 because change can't be undone. */
Patrick McHardy23bcf632009-09-09 18:11:23 -07001019 if (sch->flags & TCQ_F_MQROOT)
1020 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021 gen_replace_estimator(&sch->bstats, &sch->rate_est,
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001022 qdisc_root_sleeping_lock(sch),
1023 tca[TCA_RATE]);
Patrick McHardy23bcf632009-09-09 18:11:23 -07001024 }
1025out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026 return 0;
1027}
1028
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001029struct check_loop_arg {
1030 struct qdisc_walker w;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031 struct Qdisc *p;
1032 int depth;
1033};
1034
1035static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
1036
1037static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1038{
1039 struct check_loop_arg arg;
1040
1041 if (q->ops->cl_ops == NULL)
1042 return 0;
1043
1044 arg.w.stop = arg.w.skip = arg.w.count = 0;
1045 arg.w.fn = check_loop_fn;
1046 arg.depth = depth;
1047 arg.p = p;
1048 q->ops->cl_ops->walk(q, &arg.w);
1049 return arg.w.stop ? -ELOOP : 0;
1050}
1051
1052static int
1053check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1054{
1055 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -08001056 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057 struct check_loop_arg *arg = (struct check_loop_arg *)w;
1058
1059 leaf = cops->leaf(q, cl);
1060 if (leaf) {
1061 if (leaf == arg->p || arg->depth > 7)
1062 return -ELOOP;
1063 return check_loop(leaf, arg->p, arg->depth + 1);
1064 }
1065 return 0;
1066}
1067
1068/*
1069 * Delete/get qdisc.
1070 */
1071
Thomas Graf661d2962013-03-21 07:45:29 +00001072static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001074 struct net *net = sock_net(skb->sk);
David S. Miller02ef22c2012-06-26 21:50:05 -07001075 struct tcmsg *tcm = nlmsg_data(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001076 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001077 struct net_device *dev;
Hong zhi guode179c82013-03-25 17:36:33 +00001078 u32 clid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001079 struct Qdisc *q = NULL;
1080 struct Qdisc *p = NULL;
1081 int err;
1082
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001083 if ((n->nlmsg_type != RTM_GETQDISC) && !capable(CAP_NET_ADMIN))
1084 return -EPERM;
1085
Patrick McHardy1e904742008-01-22 22:11:17 -08001086 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1087 if (err < 0)
1088 return err;
1089
Hong zhi guode179c82013-03-25 17:36:33 +00001090 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1091 if (!dev)
1092 return -ENODEV;
1093
1094 clid = tcm->tcm_parent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095 if (clid) {
1096 if (clid != TC_H_ROOT) {
1097 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001098 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1099 if (!p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100 return -ENOENT;
1101 q = qdisc_leaf(p, clid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001102 } else if (dev_ingress_queue(dev)) {
1103 q = dev_ingress_queue(dev)->qdisc_sleeping;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001104 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +00001106 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107 }
1108 if (!q)
1109 return -ENOENT;
1110
1111 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1112 return -EINVAL;
1113 } else {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001114 q = qdisc_lookup(dev, tcm->tcm_handle);
1115 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001116 return -ENOENT;
1117 }
1118
Patrick McHardy1e904742008-01-22 22:11:17 -08001119 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120 return -EINVAL;
1121
1122 if (n->nlmsg_type == RTM_DELQDISC) {
1123 if (!clid)
1124 return -EINVAL;
1125 if (q->handle == 0)
1126 return -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001127 err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1128 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130 } else {
Tom Goff7316ae82010-03-19 15:40:13 +00001131 qdisc_notify(net, skb, n, clid, NULL, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132 }
1133 return 0;
1134}
1135
1136/*
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001137 * Create/change qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138 */
1139
Thomas Graf661d2962013-03-21 07:45:29 +00001140static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001142 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143 struct tcmsg *tcm;
Patrick McHardy1e904742008-01-22 22:11:17 -08001144 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145 struct net_device *dev;
1146 u32 clid;
1147 struct Qdisc *q, *p;
1148 int err;
1149
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001150 if (!capable(CAP_NET_ADMIN))
1151 return -EPERM;
1152
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153replay:
1154 /* Reinit, just in case something touches this. */
Hong zhi guode179c82013-03-25 17:36:33 +00001155 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1156 if (err < 0)
1157 return err;
1158
David S. Miller02ef22c2012-06-26 21:50:05 -07001159 tcm = nlmsg_data(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001160 clid = tcm->tcm_parent;
1161 q = p = NULL;
1162
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001163 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1164 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165 return -ENODEV;
1166
Patrick McHardy1e904742008-01-22 22:11:17 -08001167
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168 if (clid) {
1169 if (clid != TC_H_ROOT) {
1170 if (clid != TC_H_INGRESS) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001171 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1172 if (!p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173 return -ENOENT;
1174 q = qdisc_leaf(p, clid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001175 } else if (dev_ingress_queue_create(dev)) {
1176 q = dev_ingress_queue(dev)->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177 }
1178 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +00001179 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001180 }
1181
1182 /* It may be default qdisc, ignore it */
1183 if (q && q->handle == 0)
1184 q = NULL;
1185
1186 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1187 if (tcm->tcm_handle) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001188 if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189 return -EEXIST;
1190 if (TC_H_MIN(tcm->tcm_handle))
1191 return -EINVAL;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001192 q = qdisc_lookup(dev, tcm->tcm_handle);
1193 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194 goto create_n_graft;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001195 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001197 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198 return -EINVAL;
1199 if (q == p ||
1200 (p && check_loop(q, p, 0)))
1201 return -ELOOP;
1202 atomic_inc(&q->refcnt);
1203 goto graft;
1204 } else {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001205 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001206 goto create_n_graft;
1207
1208 /* This magic test requires explanation.
1209 *
1210 * We know, that some child q is already
1211 * attached to this parent and have choice:
1212 * either to change it or to create/graft new one.
1213 *
1214 * 1. We are allowed to create/graft only
1215 * if CREATE and REPLACE flags are set.
1216 *
1217 * 2. If EXCL is set, requestor wanted to say,
1218 * that qdisc tcm_handle is not expected
1219 * to exist, so that we choose create/graft too.
1220 *
1221 * 3. The last case is when no flags are set.
1222 * Alas, it is sort of hole in API, we
1223 * cannot decide what to do unambiguously.
1224 * For now we select create/graft, if
1225 * user gave KIND, which does not match existing.
1226 */
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001227 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1228 (n->nlmsg_flags & NLM_F_REPLACE) &&
1229 ((n->nlmsg_flags & NLM_F_EXCL) ||
Patrick McHardy1e904742008-01-22 22:11:17 -08001230 (tca[TCA_KIND] &&
1231 nla_strcmp(tca[TCA_KIND], q->ops->id))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232 goto create_n_graft;
1233 }
1234 }
1235 } else {
1236 if (!tcm->tcm_handle)
1237 return -EINVAL;
1238 q = qdisc_lookup(dev, tcm->tcm_handle);
1239 }
1240
1241 /* Change qdisc parameters */
1242 if (q == NULL)
1243 return -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001244 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001245 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001246 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247 return -EINVAL;
1248 err = qdisc_change(q, tca);
1249 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001250 qdisc_notify(net, skb, n, clid, NULL, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001251 return err;
1252
1253create_n_graft:
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001254 if (!(n->nlmsg_flags & NLM_F_CREATE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001255 return -ENOENT;
Eric Dumazet24824a02010-10-02 06:11:55 +00001256 if (clid == TC_H_INGRESS) {
1257 if (dev_ingress_queue(dev))
1258 q = qdisc_create(dev, dev_ingress_queue(dev), p,
1259 tcm->tcm_parent, tcm->tcm_parent,
1260 tca, &err);
1261 else
1262 err = -ENOENT;
1263 } else {
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001264 struct netdev_queue *dev_queue;
David S. Miller6ec1c692009-09-06 01:58:51 -07001265
1266 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001267 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1268 else if (p)
1269 dev_queue = p->dev_queue;
1270 else
1271 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Miller6ec1c692009-09-06 01:58:51 -07001272
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001273 q = qdisc_create(dev, dev_queue, p,
David S. Millerbb949fb2008-07-08 16:55:56 -07001274 tcm->tcm_parent, tcm->tcm_handle,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001275 tca, &err);
David S. Miller6ec1c692009-09-06 01:58:51 -07001276 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277 if (q == NULL) {
1278 if (err == -EAGAIN)
1279 goto replay;
1280 return err;
1281 }
1282
1283graft:
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001284 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1285 if (err) {
1286 if (q)
1287 qdisc_destroy(q);
1288 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289 }
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001290
Linus Torvalds1da177e2005-04-16 15:20:36 -07001291 return 0;
1292}
1293
1294static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
Eric W. Biederman15e47302012-09-07 20:12:54 +00001295 u32 portid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001296{
1297 struct tcmsg *tcm;
1298 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001299 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001300 struct gnet_dump d;
Eric Dumazeta2da5702011-01-20 03:48:19 +00001301 struct qdisc_size_table *stab;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302
Eric W. Biederman15e47302012-09-07 20:12:54 +00001303 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
David S. Miller02ef22c2012-06-26 21:50:05 -07001304 if (!nlh)
1305 goto out_nlmsg_trim;
1306 tcm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001307 tcm->tcm_family = AF_UNSPEC;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001308 tcm->tcm__pad1 = 0;
1309 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001310 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001311 tcm->tcm_parent = clid;
1312 tcm->tcm_handle = q->handle;
1313 tcm->tcm_info = atomic_read(&q->refcnt);
David S. Miller1b34ec42012-03-29 05:11:39 -04001314 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1315 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001316 if (q->ops->dump && q->ops->dump(q, skb) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001317 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318 q->qstats.qlen = q->q.qlen;
1319
Eric Dumazeta2da5702011-01-20 03:48:19 +00001320 stab = rtnl_dereference(q->stab);
1321 if (stab && qdisc_dump_stab(skb, stab) < 0)
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001322 goto nla_put_failure;
1323
Jarek Poplawski102396a2008-08-29 14:21:52 -07001324 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1325 qdisc_root_sleeping_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001326 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001327
1328 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001329 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001330
1331 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
Eric Dumazetd250a5f2009-10-02 10:32:18 +00001332 gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333 gnet_stats_copy_queue(&d, &q->qstats) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001334 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001335
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001337 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001338
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001339 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340 return skb->len;
1341
David S. Miller02ef22c2012-06-26 21:50:05 -07001342out_nlmsg_trim:
Patrick McHardy1e904742008-01-22 22:11:17 -08001343nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001344 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345 return -1;
1346}
1347
Eric Dumazet53b0f082010-05-22 20:37:44 +00001348static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1349{
1350 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1351}
1352
Tom Goff7316ae82010-03-19 15:40:13 +00001353static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1354 struct nlmsghdr *n, u32 clid,
1355 struct Qdisc *old, struct Qdisc *new)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356{
1357 struct sk_buff *skb;
Eric W. Biederman15e47302012-09-07 20:12:54 +00001358 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359
1360 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1361 if (!skb)
1362 return -ENOBUFS;
1363
Eric Dumazet53b0f082010-05-22 20:37:44 +00001364 if (old && !tc_qdisc_dump_ignore(old)) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00001365 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001366 0, RTM_DELQDISC) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367 goto err_out;
1368 }
Eric Dumazet53b0f082010-05-22 20:37:44 +00001369 if (new && !tc_qdisc_dump_ignore(new)) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00001370 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001371 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372 goto err_out;
1373 }
1374
1375 if (skb->len)
Eric W. Biederman15e47302012-09-07 20:12:54 +00001376 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001377 n->nlmsg_flags & NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378
1379err_out:
1380 kfree_skb(skb);
1381 return -EINVAL;
1382}
1383
David S. Miller30723672008-07-18 22:50:15 -07001384static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1385 struct netlink_callback *cb,
1386 int *q_idx_p, int s_q_idx)
1387{
1388 int ret = 0, q_idx = *q_idx_p;
1389 struct Qdisc *q;
1390
1391 if (!root)
1392 return 0;
1393
1394 q = root;
1395 if (q_idx < s_q_idx) {
1396 q_idx++;
1397 } else {
1398 if (!tc_qdisc_dump_ignore(q) &&
Eric W. Biederman15e47302012-09-07 20:12:54 +00001399 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
David S. Miller30723672008-07-18 22:50:15 -07001400 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1401 goto done;
1402 q_idx++;
1403 }
1404 list_for_each_entry(q, &root->list, list) {
1405 if (q_idx < s_q_idx) {
1406 q_idx++;
1407 continue;
1408 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001409 if (!tc_qdisc_dump_ignore(q) &&
Eric W. Biederman15e47302012-09-07 20:12:54 +00001410 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
David S. Miller30723672008-07-18 22:50:15 -07001411 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1412 goto done;
1413 q_idx++;
1414 }
1415
1416out:
1417 *q_idx_p = q_idx;
1418 return ret;
1419done:
1420 ret = -1;
1421 goto out;
1422}
1423
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1425{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001426 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427 int idx, q_idx;
1428 int s_idx, s_q_idx;
1429 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430
1431 s_idx = cb->args[0];
1432 s_q_idx = q_idx = cb->args[1];
stephen hemmingerf1e90162009-11-10 07:54:49 +00001433
1434 rcu_read_lock();
Pavel Emelianov7562f872007-05-03 15:13:45 -07001435 idx = 0;
Tom Goff7316ae82010-03-19 15:40:13 +00001436 for_each_netdev_rcu(net, dev) {
David S. Miller30723672008-07-18 22:50:15 -07001437 struct netdev_queue *dev_queue;
1438
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439 if (idx < s_idx)
Pavel Emelianov7562f872007-05-03 15:13:45 -07001440 goto cont;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001441 if (idx > s_idx)
1442 s_q_idx = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443 q_idx = 0;
David S. Miller30723672008-07-18 22:50:15 -07001444
Patrick McHardyaf356af2009-09-04 06:41:18 +00001445 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001446 goto done;
1447
Eric Dumazet24824a02010-10-02 06:11:55 +00001448 dev_queue = dev_ingress_queue(dev);
1449 if (dev_queue &&
1450 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1451 &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001452 goto done;
1453
Pavel Emelianov7562f872007-05-03 15:13:45 -07001454cont:
1455 idx++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456 }
1457
1458done:
stephen hemmingerf1e90162009-11-10 07:54:49 +00001459 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001460
1461 cb->args[0] = idx;
1462 cb->args[1] = q_idx;
1463
1464 return skb->len;
1465}
1466
1467
1468
1469/************************************************
1470 * Traffic classes manipulation. *
1471 ************************************************/
1472
1473
1474
Thomas Graf661d2962013-03-21 07:45:29 +00001475static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001476{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001477 struct net *net = sock_net(skb->sk);
David S. Miller02ef22c2012-06-26 21:50:05 -07001478 struct tcmsg *tcm = nlmsg_data(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001479 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480 struct net_device *dev;
1481 struct Qdisc *q = NULL;
Eric Dumazet20fea082007-11-14 01:44:41 -08001482 const struct Qdisc_class_ops *cops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001483 unsigned long cl = 0;
1484 unsigned long new_cl;
Hong zhi guode179c82013-03-25 17:36:33 +00001485 u32 portid;
1486 u32 clid;
1487 u32 qid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001488 int err;
1489
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001490 if ((n->nlmsg_type != RTM_GETTCLASS) && !capable(CAP_NET_ADMIN))
1491 return -EPERM;
1492
Patrick McHardy1e904742008-01-22 22:11:17 -08001493 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1494 if (err < 0)
1495 return err;
1496
Hong zhi guode179c82013-03-25 17:36:33 +00001497 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1498 if (!dev)
1499 return -ENODEV;
1500
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501 /*
1502 parent == TC_H_UNSPEC - unspecified parent.
1503 parent == TC_H_ROOT - class is root, which has no parent.
1504 parent == X:0 - parent is root class.
1505 parent == X:Y - parent is a node in hierarchy.
1506 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1507
1508 handle == 0:0 - generate handle from kernel pool.
1509 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1510 handle == X:Y - clear.
1511 handle == X:0 - root class.
1512 */
1513
1514 /* Step 1. Determine qdisc handle X:0 */
1515
Hong zhi guode179c82013-03-25 17:36:33 +00001516 portid = tcm->tcm_parent;
1517 clid = tcm->tcm_handle;
1518 qid = TC_H_MAJ(clid);
1519
Eric W. Biederman15e47302012-09-07 20:12:54 +00001520 if (portid != TC_H_ROOT) {
1521 u32 qid1 = TC_H_MAJ(portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001522
1523 if (qid && qid1) {
1524 /* If both majors are known, they must be identical. */
1525 if (qid != qid1)
1526 return -EINVAL;
1527 } else if (qid1) {
1528 qid = qid1;
1529 } else if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00001530 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001531
1532 /* Now qid is genuine qdisc handle consistent
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001533 * both with parent and child.
1534 *
Eric W. Biederman15e47302012-09-07 20:12:54 +00001535 * TC_H_MAJ(portid) still may be unspecified, complete it now.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001536 */
Eric W. Biederman15e47302012-09-07 20:12:54 +00001537 if (portid)
1538 portid = TC_H_MAKE(qid, portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001539 } else {
1540 if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00001541 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542 }
1543
1544 /* OK. Locate qdisc */
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001545 q = qdisc_lookup(dev, qid);
1546 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001547 return -ENOENT;
1548
1549 /* An check that it supports classes */
1550 cops = q->ops->cl_ops;
1551 if (cops == NULL)
1552 return -EINVAL;
1553
1554 /* Now try to get class */
1555 if (clid == 0) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00001556 if (portid == TC_H_ROOT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001557 clid = qid;
1558 } else
1559 clid = TC_H_MAKE(qid, clid);
1560
1561 if (clid)
1562 cl = cops->get(q, clid);
1563
1564 if (cl == 0) {
1565 err = -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001566 if (n->nlmsg_type != RTM_NEWTCLASS ||
1567 !(n->nlmsg_flags & NLM_F_CREATE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568 goto out;
1569 } else {
1570 switch (n->nlmsg_type) {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001571 case RTM_NEWTCLASS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572 err = -EEXIST;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001573 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574 goto out;
1575 break;
1576 case RTM_DELTCLASS:
Patrick McHardyde6d5cd2009-09-04 06:41:16 +00001577 err = -EOPNOTSUPP;
1578 if (cops->delete)
1579 err = cops->delete(q, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001581 tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001582 goto out;
1583 case RTM_GETTCLASS:
Tom Goff7316ae82010-03-19 15:40:13 +00001584 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001585 goto out;
1586 default:
1587 err = -EINVAL;
1588 goto out;
1589 }
1590 }
1591
1592 new_cl = cl;
Patrick McHardyde6d5cd2009-09-04 06:41:16 +00001593 err = -EOPNOTSUPP;
1594 if (cops->change)
Eric W. Biederman15e47302012-09-07 20:12:54 +00001595 err = cops->change(q, clid, portid, tca, &new_cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001596 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001597 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001598
1599out:
1600 if (cl)
1601 cops->put(q, cl);
1602
1603 return err;
1604}
1605
1606
1607static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1608 unsigned long cl,
Eric W. Biederman15e47302012-09-07 20:12:54 +00001609 u32 portid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610{
1611 struct tcmsg *tcm;
1612 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001613 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001614 struct gnet_dump d;
Eric Dumazet20fea082007-11-14 01:44:41 -08001615 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616
Eric W. Biederman15e47302012-09-07 20:12:54 +00001617 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
David S. Miller02ef22c2012-06-26 21:50:05 -07001618 if (!nlh)
1619 goto out_nlmsg_trim;
1620 tcm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621 tcm->tcm_family = AF_UNSPEC;
Eric Dumazet16ebb5e2009-09-02 02:40:09 +00001622 tcm->tcm__pad1 = 0;
1623 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001624 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001625 tcm->tcm_parent = q->handle;
1626 tcm->tcm_handle = q->handle;
1627 tcm->tcm_info = 0;
David S. Miller1b34ec42012-03-29 05:11:39 -04001628 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1629 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001630 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001631 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632
Jarek Poplawski102396a2008-08-29 14:21:52 -07001633 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1634 qdisc_root_sleeping_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001635 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636
1637 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001638 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001639
1640 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001641 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001643 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001644 return skb->len;
1645
David S. Miller02ef22c2012-06-26 21:50:05 -07001646out_nlmsg_trim:
Patrick McHardy1e904742008-01-22 22:11:17 -08001647nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001648 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001649 return -1;
1650}
1651
Tom Goff7316ae82010-03-19 15:40:13 +00001652static int tclass_notify(struct net *net, struct sk_buff *oskb,
1653 struct nlmsghdr *n, struct Qdisc *q,
1654 unsigned long cl, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655{
1656 struct sk_buff *skb;
Eric W. Biederman15e47302012-09-07 20:12:54 +00001657 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658
1659 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1660 if (!skb)
1661 return -ENOBUFS;
1662
Eric W. Biederman15e47302012-09-07 20:12:54 +00001663 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664 kfree_skb(skb);
1665 return -EINVAL;
1666 }
1667
Eric W. Biederman15e47302012-09-07 20:12:54 +00001668 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001669 n->nlmsg_flags & NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670}
1671
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001672struct qdisc_dump_args {
1673 struct qdisc_walker w;
1674 struct sk_buff *skb;
1675 struct netlink_callback *cb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676};
1677
1678static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1679{
1680 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1681
Eric W. Biederman15e47302012-09-07 20:12:54 +00001682 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1684}
1685
David S. Miller30723672008-07-18 22:50:15 -07001686static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1687 struct tcmsg *tcm, struct netlink_callback *cb,
1688 int *t_p, int s_t)
1689{
1690 struct qdisc_dump_args arg;
1691
1692 if (tc_qdisc_dump_ignore(q) ||
1693 *t_p < s_t || !q->ops->cl_ops ||
1694 (tcm->tcm_parent &&
1695 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1696 (*t_p)++;
1697 return 0;
1698 }
1699 if (*t_p > s_t)
1700 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1701 arg.w.fn = qdisc_class_dump;
1702 arg.skb = skb;
1703 arg.cb = cb;
1704 arg.w.stop = 0;
1705 arg.w.skip = cb->args[1];
1706 arg.w.count = 0;
1707 q->ops->cl_ops->walk(q, &arg.w);
1708 cb->args[1] = arg.w.count;
1709 if (arg.w.stop)
1710 return -1;
1711 (*t_p)++;
1712 return 0;
1713}
1714
1715static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1716 struct tcmsg *tcm, struct netlink_callback *cb,
1717 int *t_p, int s_t)
1718{
1719 struct Qdisc *q;
1720
1721 if (!root)
1722 return 0;
1723
1724 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1725 return -1;
1726
1727 list_for_each_entry(q, &root->list, list) {
1728 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1729 return -1;
1730 }
1731
1732 return 0;
1733}
1734
Linus Torvalds1da177e2005-04-16 15:20:36 -07001735static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1736{
David S. Miller02ef22c2012-06-26 21:50:05 -07001737 struct tcmsg *tcm = nlmsg_data(cb->nlh);
David S. Miller30723672008-07-18 22:50:15 -07001738 struct net *net = sock_net(skb->sk);
1739 struct netdev_queue *dev_queue;
1740 struct net_device *dev;
1741 int t, s_t;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001742
Hong zhi guo573ce262013-03-27 06:47:04 +00001743 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744 return 0;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001745 dev = dev_get_by_index(net, tcm->tcm_ifindex);
1746 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747 return 0;
1748
1749 s_t = cb->args[0];
1750 t = 0;
1751
Patrick McHardyaf356af2009-09-04 06:41:18 +00001752 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001753 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001754
Eric Dumazet24824a02010-10-02 06:11:55 +00001755 dev_queue = dev_ingress_queue(dev);
1756 if (dev_queue &&
1757 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1758 &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001759 goto done;
1760
1761done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001762 cb->args[0] = t;
1763
1764 dev_put(dev);
1765 return skb->len;
1766}
1767
1768/* Main classifier routine: scans classifier chain attached
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001769 * to this qdisc, (optionally) tests for protocol and asks
1770 * specific classifiers.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001771 */
Eric Dumazetdc7f9f62011-07-05 23:25:42 +00001772int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
Patrick McHardy73ca4912007-07-15 00:02:31 -07001773 struct tcf_result *res)
1774{
1775 __be16 protocol = skb->protocol;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001776 int err;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001777
1778 for (; tp; tp = tp->next) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001779 if (tp->protocol != protocol &&
1780 tp->protocol != htons(ETH_P_ALL))
1781 continue;
1782 err = tp->classify(skb, tp, res);
1783
1784 if (err >= 0) {
Patrick McHardy73ca4912007-07-15 00:02:31 -07001785#ifdef CONFIG_NET_CLS_ACT
1786 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1787 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1788#endif
1789 return err;
1790 }
1791 }
1792 return -1;
1793}
1794EXPORT_SYMBOL(tc_classify_compat);
1795
Eric Dumazetdc7f9f62011-07-05 23:25:42 +00001796int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
Patrick McHardy73ca4912007-07-15 00:02:31 -07001797 struct tcf_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798{
1799 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800#ifdef CONFIG_NET_CLS_ACT
Eric Dumazetdc7f9f62011-07-05 23:25:42 +00001801 const struct tcf_proto *otp = tp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802reclassify:
Hagen Paul Pfeifer52bc9742011-02-25 05:45:21 +00001803#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804
Patrick McHardy73ca4912007-07-15 00:02:31 -07001805 err = tc_classify_compat(skb, tp, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001806#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy73ca4912007-07-15 00:02:31 -07001807 if (err == TC_ACT_RECLASSIFY) {
1808 u32 verd = G_TC_VERD(skb->tc_verd);
1809 tp = otp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001810
Patrick McHardy73ca4912007-07-15 00:02:31 -07001811 if (verd++ >= MAX_REC_LOOP) {
Joe Perchese87cc472012-05-13 21:56:26 +00001812 net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x\n",
1813 tp->q->ops->id,
1814 tp->prio & 0xffff,
1815 ntohs(tp->protocol));
Patrick McHardy73ca4912007-07-15 00:02:31 -07001816 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001817 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001818 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1819 goto reclassify;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001820 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001821#endif
1822 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823}
Patrick McHardy73ca4912007-07-15 00:02:31 -07001824EXPORT_SYMBOL(tc_classify);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001825
Patrick McHardya48b5a62007-03-23 11:29:43 -07001826void tcf_destroy(struct tcf_proto *tp)
1827{
1828 tp->ops->destroy(tp);
1829 module_put(tp->ops->owner);
1830 kfree(tp);
1831}
1832
Patrick McHardyff31ab52008-07-01 19:52:38 -07001833void tcf_destroy_chain(struct tcf_proto **fl)
Patrick McHardya48b5a62007-03-23 11:29:43 -07001834{
1835 struct tcf_proto *tp;
1836
Patrick McHardyff31ab52008-07-01 19:52:38 -07001837 while ((tp = *fl) != NULL) {
1838 *fl = tp->next;
Patrick McHardya48b5a62007-03-23 11:29:43 -07001839 tcf_destroy(tp);
1840 }
1841}
1842EXPORT_SYMBOL(tcf_destroy_chain);
1843
Linus Torvalds1da177e2005-04-16 15:20:36 -07001844#ifdef CONFIG_PROC_FS
1845static int psched_show(struct seq_file *seq, void *v)
1846{
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001847 struct timespec ts;
1848
1849 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001850 seq_printf(seq, "%08x %08x %08x %08x\n",
Jarek Poplawskica44d6e2009-06-15 02:31:47 -07001851 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
Patrick McHardy514bca32007-03-16 12:34:52 -07001852 1000000,
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001853 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001854
1855 return 0;
1856}
1857
1858static int psched_open(struct inode *inode, struct file *file)
1859{
Tom Goff7e5ab152010-03-30 19:44:56 -07001860 return single_open(file, psched_show, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001861}
1862
Arjan van de Venda7071d2007-02-12 00:55:36 -08001863static const struct file_operations psched_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864 .owner = THIS_MODULE,
1865 .open = psched_open,
1866 .read = seq_read,
1867 .llseek = seq_lseek,
1868 .release = single_release,
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001869};
Tom Goff7316ae82010-03-19 15:40:13 +00001870
1871static int __net_init psched_net_init(struct net *net)
1872{
1873 struct proc_dir_entry *e;
1874
Gao fengd4beaa62013-02-18 01:34:54 +00001875 e = proc_create("psched", 0, net->proc_net, &psched_fops);
Tom Goff7316ae82010-03-19 15:40:13 +00001876 if (e == NULL)
1877 return -ENOMEM;
1878
1879 return 0;
1880}
1881
1882static void __net_exit psched_net_exit(struct net *net)
1883{
Gao fengece31ff2013-02-18 01:34:56 +00001884 remove_proc_entry("psched", net->proc_net);
Tom Goff7316ae82010-03-19 15:40:13 +00001885}
1886#else
1887static int __net_init psched_net_init(struct net *net)
1888{
1889 return 0;
1890}
1891
1892static void __net_exit psched_net_exit(struct net *net)
1893{
1894}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001895#endif
1896
Tom Goff7316ae82010-03-19 15:40:13 +00001897static struct pernet_operations psched_net_ops = {
1898 .init = psched_net_init,
1899 .exit = psched_net_exit,
1900};
1901
Linus Torvalds1da177e2005-04-16 15:20:36 -07001902static int __init pktsched_init(void)
1903{
Tom Goff7316ae82010-03-19 15:40:13 +00001904 int err;
1905
1906 err = register_pernet_subsys(&psched_net_ops);
1907 if (err) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001908 pr_err("pktsched_init: "
Tom Goff7316ae82010-03-19 15:40:13 +00001909 "cannot initialize per netns operations\n");
1910 return err;
1911 }
1912
stephen hemminger6da7c8f2013-08-27 16:19:08 -07001913 register_qdisc(&pfifo_fast_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001914 register_qdisc(&pfifo_qdisc_ops);
1915 register_qdisc(&bfifo_qdisc_ops);
Hagen Paul Pfeifer57dbb2d2010-01-24 12:30:59 +00001916 register_qdisc(&pfifo_head_drop_qdisc_ops);
David S. Miller6ec1c692009-09-06 01:58:51 -07001917 register_qdisc(&mq_qdisc_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918
Greg Rosec7ac8672011-06-10 01:27:09 +00001919 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
1920 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
1921 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, NULL);
1922 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
1923 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
1924 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, NULL);
Thomas Grafbe577dd2007-03-22 11:55:50 -07001925
Linus Torvalds1da177e2005-04-16 15:20:36 -07001926 return 0;
1927}
1928
1929subsys_initcall(pktsched_init);