blob: d4a8bbfcc9530c305a77b349bb30d6c19b6ca2f1 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
Patrick McHardy41794772007-03-16 01:19:15 -070029#include <linux/hrtimer.h>
Jarek Poplawski25bfcd52008-08-18 20:53:34 -070030#include <linux/lockdep.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090031#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020033#include <net/net_namespace.h>
Denis V. Lunevb8542722007-12-01 00:21:31 +110034#include <net/sock.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070035#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036#include <net/pkt_sched.h>
37
Tom Goff7316ae82010-03-19 15:40:13 +000038static int qdisc_notify(struct net *net, struct sk_buff *oskb,
39 struct nlmsghdr *n, u32 clid,
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 struct Qdisc *old, struct Qdisc *new);
Tom Goff7316ae82010-03-19 15:40:13 +000041static int tclass_notify(struct net *net, struct sk_buff *oskb,
42 struct nlmsghdr *n, struct Qdisc *q,
43 unsigned long cl, int event);
Linus Torvalds1da177e2005-04-16 15:20:36 -070044
45/*
46
47 Short review.
48 -------------
49
50 This file consists of two interrelated parts:
51
52 1. queueing disciplines manager frontend.
53 2. traffic classes manager frontend.
54
55 Generally, queueing discipline ("qdisc") is a black box,
56 which is able to enqueue packets and to dequeue them (when
57 device is ready to send something) in order and at times
58 determined by algorithm hidden in it.
59
60 qdisc's are divided to two categories:
61 - "queues", which have no internal structure visible from outside.
62 - "schedulers", which split all the packets to "traffic classes",
63 using "packet classifiers" (look at cls_api.c)
64
65 In turn, classes may have child qdiscs (as rule, queues)
66 attached to them etc. etc. etc.
67
68 The goal of the routines in this file is to translate
69 information supplied by user in the form of handles
70 to more intelligible for kernel form, to make some sanity
71 checks and part of work, which is common to all qdiscs
72 and to provide rtnetlink notifications.
73
74 All real intelligent work is done inside qdisc modules.
75
76
77
78 Every discipline has two major routines: enqueue and dequeue.
79
80 ---dequeue
81
82 dequeue usually returns a skb to send. It is allowed to return NULL,
83 but it does not mean that queue is empty, it just means that
84 discipline does not want to send anything this time.
85 Queue is really empty if q->q.qlen == 0.
86 For complicated disciplines with multiple queues q->q is not
87 real packet queue, but however q->q.qlen must be valid.
88
89 ---enqueue
90
91 enqueue returns 0, if packet was enqueued successfully.
92 If packet (this one or another one) was dropped, it returns
93 not zero error code.
94 NET_XMIT_DROP - this packet dropped
95 Expected action: do not backoff, but wait until queue will clear.
96 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
97 Expected action: backoff or ignore
98 NET_XMIT_POLICED - dropped by police.
99 Expected action: backoff or error to real-time apps.
100
101 Auxiliary routines:
102
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700103 ---peek
104
105 like dequeue but without removing a packet from the queue
106
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107 ---reset
108
109 returns qdisc to initial state: purge all buffers, clear all
110 timers, counters (except for statistics) etc.
111
112 ---init
113
114 initializes newly created qdisc.
115
116 ---destroy
117
118 destroys resources allocated by init and during lifetime of qdisc.
119
120 ---change
121
122 changes qdisc parameters.
123 */
124
125/* Protects list of registered TC modules. It is pure SMP lock. */
126static DEFINE_RWLOCK(qdisc_mod_lock);
127
128
129/************************************************
130 * Queueing disciplines manipulation. *
131 ************************************************/
132
133
134/* The list of all installed queueing disciplines. */
135
136static struct Qdisc_ops *qdisc_base;
137
Zhi Yong Wu21eb2182014-01-01 04:34:51 +0800138/* Register/unregister queueing discipline */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139
140int register_qdisc(struct Qdisc_ops *qops)
141{
142 struct Qdisc_ops *q, **qp;
143 int rc = -EEXIST;
144
145 write_lock(&qdisc_mod_lock);
146 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
147 if (!strcmp(qops->id, q->id))
148 goto out;
149
150 if (qops->enqueue == NULL)
151 qops->enqueue = noop_qdisc_ops.enqueue;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700152 if (qops->peek == NULL) {
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000153 if (qops->dequeue == NULL)
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700154 qops->peek = noop_qdisc_ops.peek;
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000155 else
156 goto out_einval;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700157 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 if (qops->dequeue == NULL)
159 qops->dequeue = noop_qdisc_ops.dequeue;
160
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000161 if (qops->cl_ops) {
162 const struct Qdisc_class_ops *cops = qops->cl_ops;
163
Jarek Poplawski3e9e5a52010-08-10 22:31:20 +0000164 if (!(cops->get && cops->put && cops->walk && cops->leaf))
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000165 goto out_einval;
166
167 if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf))
168 goto out_einval;
169 }
170
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171 qops->next = NULL;
172 *qp = qops;
173 rc = 0;
174out:
175 write_unlock(&qdisc_mod_lock);
176 return rc;
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000177
178out_einval:
179 rc = -EINVAL;
180 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800182EXPORT_SYMBOL(register_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183
184int unregister_qdisc(struct Qdisc_ops *qops)
185{
186 struct Qdisc_ops *q, **qp;
187 int err = -ENOENT;
188
189 write_lock(&qdisc_mod_lock);
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000190 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191 if (q == qops)
192 break;
193 if (q) {
194 *qp = q->next;
195 q->next = NULL;
196 err = 0;
197 }
198 write_unlock(&qdisc_mod_lock);
199 return err;
200}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800201EXPORT_SYMBOL(unregister_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202
stephen hemminger6da7c8f2013-08-27 16:19:08 -0700203/* Get default qdisc if not otherwise specified */
204void qdisc_get_default(char *name, size_t len)
205{
206 read_lock(&qdisc_mod_lock);
207 strlcpy(name, default_qdisc_ops->id, len);
208 read_unlock(&qdisc_mod_lock);
209}
210
211static struct Qdisc_ops *qdisc_lookup_default(const char *name)
212{
213 struct Qdisc_ops *q = NULL;
214
215 for (q = qdisc_base; q; q = q->next) {
216 if (!strcmp(name, q->id)) {
217 if (!try_module_get(q->owner))
218 q = NULL;
219 break;
220 }
221 }
222
223 return q;
224}
225
226/* Set new default qdisc to use */
227int qdisc_set_default(const char *name)
228{
229 const struct Qdisc_ops *ops;
230
231 if (!capable(CAP_NET_ADMIN))
232 return -EPERM;
233
234 write_lock(&qdisc_mod_lock);
235 ops = qdisc_lookup_default(name);
236 if (!ops) {
237 /* Not found, drop lock and try to load module */
238 write_unlock(&qdisc_mod_lock);
239 request_module("sch_%s", name);
240 write_lock(&qdisc_mod_lock);
241
242 ops = qdisc_lookup_default(name);
243 }
244
245 if (ops) {
246 /* Set new default */
247 module_put(default_qdisc_ops->owner);
248 default_qdisc_ops = ops;
249 }
250 write_unlock(&qdisc_mod_lock);
251
252 return ops ? 0 : -ENOENT;
253}
254
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255/* We know handle. Find qdisc among all qdisc's attached to device
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800256 * (root qdisc, all its children, children of children etc.)
257 * Note: caller either uses rtnl or rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 */
259
Hannes Eder6113b742008-11-28 03:06:46 -0800260static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
David S. Miller8123b422008-08-08 23:23:39 -0700261{
262 struct Qdisc *q;
263
264 if (!(root->flags & TCQ_F_BUILTIN) &&
265 root->handle == handle)
266 return root;
267
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800268 list_for_each_entry_rcu(q, &root->list, list) {
David S. Miller8123b422008-08-08 23:23:39 -0700269 if (q->handle == handle)
270 return q;
271 }
272 return NULL;
273}
274
Eric Dumazet95dc1922013-12-05 11:12:02 -0800275void qdisc_list_add(struct Qdisc *q)
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700276{
Eric Dumazet37314362014-03-08 08:01:19 -0800277 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
278 struct Qdisc *root = qdisc_dev(q)->qdisc;
Eric Dumazete57a7842013-12-12 15:41:56 -0800279
Eric Dumazet37314362014-03-08 08:01:19 -0800280 WARN_ON_ONCE(root == &noop_qdisc);
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800281 ASSERT_RTNL();
282 list_add_tail_rcu(&q->list, &root->list);
Eric Dumazet37314362014-03-08 08:01:19 -0800283 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700284}
Eric Dumazet95dc1922013-12-05 11:12:02 -0800285EXPORT_SYMBOL(qdisc_list_add);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700286
287void qdisc_list_del(struct Qdisc *q)
288{
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800289 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
290 ASSERT_RTNL();
291 list_del_rcu(&q->list);
292 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700293}
294EXPORT_SYMBOL(qdisc_list_del);
295
David S. Milleread81cc2008-07-17 00:50:32 -0700296struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
Patrick McHardy43effa12006-11-29 17:35:48 -0800297{
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700298 struct Qdisc *q;
299
Patrick McHardyaf356af2009-09-04 06:41:18 +0000300 q = qdisc_match_from_root(dev->qdisc, handle);
301 if (q)
302 goto out;
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700303
Eric Dumazet24824a02010-10-02 06:11:55 +0000304 if (dev_ingress_queue(dev))
305 q = qdisc_match_from_root(
306 dev_ingress_queue(dev)->qdisc_sleeping,
307 handle);
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800308out:
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700309 return q;
Patrick McHardy43effa12006-11-29 17:35:48 -0800310}
311
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
313{
314 unsigned long cl;
315 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800316 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317
318 if (cops == NULL)
319 return NULL;
320 cl = cops->get(p, classid);
321
322 if (cl == 0)
323 return NULL;
324 leaf = cops->leaf(p, cl);
325 cops->put(p, cl);
326 return leaf;
327}
328
329/* Find queueing discipline by name */
330
Patrick McHardy1e904742008-01-22 22:11:17 -0800331static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332{
333 struct Qdisc_ops *q = NULL;
334
335 if (kind) {
336 read_lock(&qdisc_mod_lock);
337 for (q = qdisc_base; q; q = q->next) {
Patrick McHardy1e904742008-01-22 22:11:17 -0800338 if (nla_strcmp(kind, q->id) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 if (!try_module_get(q->owner))
340 q = NULL;
341 break;
342 }
343 }
344 read_unlock(&qdisc_mod_lock);
345 }
346 return q;
347}
348
Jesper Dangaard Brouer8a8e3d82013-08-14 23:47:11 +0200349/* The linklayer setting were not transferred from iproute2, in older
350 * versions, and the rate tables lookup systems have been dropped in
351 * the kernel. To keep backward compatible with older iproute2 tc
352 * utils, we detect the linklayer setting by detecting if the rate
353 * table were modified.
354 *
355 * For linklayer ATM table entries, the rate table will be aligned to
356 * 48 bytes, thus some table entries will contain the same value. The
357 * mpu (min packet unit) is also encoded into the old rate table, thus
358 * starting from the mpu, we find low and high table entries for
359 * mapping this cell. If these entries contain the same value, when
360 * the rate tables have been modified for linklayer ATM.
361 *
362 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
363 * and then roundup to the next cell, calc the table entry one below,
364 * and compare.
365 */
366static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
367{
368 int low = roundup(r->mpu, 48);
369 int high = roundup(low+1, 48);
370 int cell_low = low >> r->cell_log;
371 int cell_high = (high >> r->cell_log) - 1;
372
373 /* rtab is too inaccurate at rates > 100Mbit/s */
374 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
375 pr_debug("TC linklayer: Giving up ATM detection\n");
376 return TC_LINKLAYER_ETHERNET;
377 }
378
379 if ((cell_high > cell_low) && (cell_high < 256)
380 && (rtab[cell_low] == rtab[cell_high])) {
381 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
382 cell_low, cell_high, rtab[cell_high]);
383 return TC_LINKLAYER_ATM;
384 }
385 return TC_LINKLAYER_ETHERNET;
386}
387
Linus Torvalds1da177e2005-04-16 15:20:36 -0700388static struct qdisc_rate_table *qdisc_rtab_list;
389
Patrick McHardy1e904742008-01-22 22:11:17 -0800390struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391{
392 struct qdisc_rate_table *rtab;
393
Eric Dumazet40edeff2013-06-02 11:15:55 +0000394 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
395 nla_len(tab) != TC_RTAB_SIZE)
396 return NULL;
397
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
Eric Dumazet40edeff2013-06-02 11:15:55 +0000399 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
400 !memcmp(&rtab->data, nla_data(tab), 1024)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401 rtab->refcnt++;
402 return rtab;
403 }
404 }
405
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
407 if (rtab) {
408 rtab->rate = *r;
409 rtab->refcnt = 1;
Patrick McHardy1e904742008-01-22 22:11:17 -0800410 memcpy(rtab->data, nla_data(tab), 1024);
Jesper Dangaard Brouer8a8e3d82013-08-14 23:47:11 +0200411 if (r->linklayer == TC_LINKLAYER_UNAWARE)
412 r->linklayer = __detect_linklayer(r, rtab->data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413 rtab->next = qdisc_rtab_list;
414 qdisc_rtab_list = rtab;
415 }
416 return rtab;
417}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800418EXPORT_SYMBOL(qdisc_get_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419
420void qdisc_put_rtab(struct qdisc_rate_table *tab)
421{
422 struct qdisc_rate_table *rtab, **rtabp;
423
424 if (!tab || --tab->refcnt)
425 return;
426
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000427 for (rtabp = &qdisc_rtab_list;
428 (rtab = *rtabp) != NULL;
429 rtabp = &rtab->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430 if (rtab == tab) {
431 *rtabp = rtab->next;
432 kfree(rtab);
433 return;
434 }
435 }
436}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800437EXPORT_SYMBOL(qdisc_put_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700439static LIST_HEAD(qdisc_stab_list);
440static DEFINE_SPINLOCK(qdisc_stab_lock);
441
442static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
443 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
444 [TCA_STAB_DATA] = { .type = NLA_BINARY },
445};
446
447static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
448{
449 struct nlattr *tb[TCA_STAB_MAX + 1];
450 struct qdisc_size_table *stab;
451 struct tc_sizespec *s;
452 unsigned int tsize = 0;
453 u16 *tab = NULL;
454 int err;
455
456 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
457 if (err < 0)
458 return ERR_PTR(err);
459 if (!tb[TCA_STAB_BASE])
460 return ERR_PTR(-EINVAL);
461
462 s = nla_data(tb[TCA_STAB_BASE]);
463
464 if (s->tsize > 0) {
465 if (!tb[TCA_STAB_DATA])
466 return ERR_PTR(-EINVAL);
467 tab = nla_data(tb[TCA_STAB_DATA]);
468 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
469 }
470
Dan Carpenter00093fa2010-08-14 11:09:49 +0000471 if (tsize != s->tsize || (!tab && tsize > 0))
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700472 return ERR_PTR(-EINVAL);
473
David S. Millerf3b96052008-08-18 22:33:05 -0700474 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700475
476 list_for_each_entry(stab, &qdisc_stab_list, list) {
477 if (memcmp(&stab->szopts, s, sizeof(*s)))
478 continue;
479 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
480 continue;
481 stab->refcnt++;
David S. Millerf3b96052008-08-18 22:33:05 -0700482 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700483 return stab;
484 }
485
David S. Millerf3b96052008-08-18 22:33:05 -0700486 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700487
488 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
489 if (!stab)
490 return ERR_PTR(-ENOMEM);
491
492 stab->refcnt = 1;
493 stab->szopts = *s;
494 if (tsize > 0)
495 memcpy(stab->data, tab, tsize * sizeof(u16));
496
David S. Millerf3b96052008-08-18 22:33:05 -0700497 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700498 list_add_tail(&stab->list, &qdisc_stab_list);
David S. Millerf3b96052008-08-18 22:33:05 -0700499 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700500
501 return stab;
502}
503
Eric Dumazeta2da5702011-01-20 03:48:19 +0000504static void stab_kfree_rcu(struct rcu_head *head)
505{
506 kfree(container_of(head, struct qdisc_size_table, rcu));
507}
508
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700509void qdisc_put_stab(struct qdisc_size_table *tab)
510{
511 if (!tab)
512 return;
513
David S. Millerf3b96052008-08-18 22:33:05 -0700514 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700515
516 if (--tab->refcnt == 0) {
517 list_del(&tab->list);
Eric Dumazeta2da5702011-01-20 03:48:19 +0000518 call_rcu_bh(&tab->rcu, stab_kfree_rcu);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700519 }
520
David S. Millerf3b96052008-08-18 22:33:05 -0700521 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700522}
523EXPORT_SYMBOL(qdisc_put_stab);
524
525static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
526{
527 struct nlattr *nest;
528
529 nest = nla_nest_start(skb, TCA_STAB);
Patrick McHardy3aa46142008-11-20 04:07:14 -0800530 if (nest == NULL)
531 goto nla_put_failure;
David S. Miller1b34ec42012-03-29 05:11:39 -0400532 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
533 goto nla_put_failure;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700534 nla_nest_end(skb, nest);
535
536 return skb->len;
537
538nla_put_failure:
539 return -1;
540}
541
Eric Dumazeta2da5702011-01-20 03:48:19 +0000542void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab)
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700543{
544 int pkt_len, slot;
545
546 pkt_len = skb->len + stab->szopts.overhead;
547 if (unlikely(!stab->szopts.tsize))
548 goto out;
549
550 slot = pkt_len + stab->szopts.cell_align;
551 if (unlikely(slot < 0))
552 slot = 0;
553
554 slot >>= stab->szopts.cell_log;
555 if (likely(slot < stab->szopts.tsize))
556 pkt_len = stab->data[slot];
557 else
558 pkt_len = stab->data[stab->szopts.tsize - 1] *
559 (slot / stab->szopts.tsize) +
560 stab->data[slot % stab->szopts.tsize];
561
562 pkt_len <<= stab->szopts.size_log;
563out:
564 if (unlikely(pkt_len < 1))
565 pkt_len = 1;
566 qdisc_skb_cb(skb)->pkt_len = pkt_len;
567}
Eric Dumazeta2da5702011-01-20 03:48:19 +0000568EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700569
Florian Westphal6e765a02014-06-11 20:35:18 +0200570void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
Jarek Poplawskib00355d2009-02-01 01:12:42 -0800571{
572 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000573 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
574 txt, qdisc->ops->id, qdisc->handle >> 16);
Jarek Poplawskib00355d2009-02-01 01:12:42 -0800575 qdisc->flags |= TCQ_F_WARN_NONWC;
576 }
577}
578EXPORT_SYMBOL(qdisc_warn_nonwc);
579
Patrick McHardy41794772007-03-16 01:19:15 -0700580static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
581{
582 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
David S. Miller2fbd3da2009-09-01 17:59:25 -0700583 timer);
Patrick McHardy41794772007-03-16 01:19:15 -0700584
John Fastabend1e203c12014-10-02 22:43:09 -0700585 rcu_read_lock();
Eric Dumazetfd245a42011-01-20 05:27:16 +0000586 qdisc_unthrottled(wd->qdisc);
David S. Miller8608db02008-08-18 20:51:18 -0700587 __netif_schedule(qdisc_root(wd->qdisc));
John Fastabend1e203c12014-10-02 22:43:09 -0700588 rcu_read_unlock();
Stephen Hemminger19365022007-03-22 12:18:35 -0700589
Patrick McHardy41794772007-03-16 01:19:15 -0700590 return HRTIMER_NORESTART;
591}
592
593void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
594{
Eric Dumazet4a8e3202014-09-20 18:01:30 -0700595 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
David S. Miller2fbd3da2009-09-01 17:59:25 -0700596 wd->timer.function = qdisc_watchdog;
Patrick McHardy41794772007-03-16 01:19:15 -0700597 wd->qdisc = qdisc;
598}
599EXPORT_SYMBOL(qdisc_watchdog_init);
600
Eric Dumazetf2600cf2014-10-04 10:11:31 -0700601void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires, bool throttle)
Patrick McHardy41794772007-03-16 01:19:15 -0700602{
Jarek Poplawski2540e052008-08-21 05:11:14 -0700603 if (test_bit(__QDISC_STATE_DEACTIVATED,
604 &qdisc_root_sleeping(wd->qdisc)->state))
605 return;
606
Eric Dumazetf2600cf2014-10-04 10:11:31 -0700607 if (throttle)
608 qdisc_throttled(wd->qdisc);
Eric Dumazet46baac32012-10-20 00:40:51 +0000609
Eric Dumazeta9efad82016-05-23 14:24:56 -0700610 if (wd->last_expires == expires)
611 return;
612
613 wd->last_expires = expires;
Eric Dumazet46baac32012-10-20 00:40:51 +0000614 hrtimer_start(&wd->timer,
Jiri Pirko34c5d292013-02-12 00:12:04 +0000615 ns_to_ktime(expires),
Eric Dumazet4a8e3202014-09-20 18:01:30 -0700616 HRTIMER_MODE_ABS_PINNED);
Patrick McHardy41794772007-03-16 01:19:15 -0700617}
Jiri Pirko34c5d292013-02-12 00:12:04 +0000618EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
Patrick McHardy41794772007-03-16 01:19:15 -0700619
620void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
621{
David S. Miller2fbd3da2009-09-01 17:59:25 -0700622 hrtimer_cancel(&wd->timer);
Eric Dumazetfd245a42011-01-20 05:27:16 +0000623 qdisc_unthrottled(wd->qdisc);
Patrick McHardy41794772007-03-16 01:19:15 -0700624}
625EXPORT_SYMBOL(qdisc_watchdog_cancel);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626
Adrian Bunka94f7792008-07-22 14:20:11 -0700627static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700628{
629 unsigned int size = n * sizeof(struct hlist_head), i;
630 struct hlist_head *h;
631
632 if (size <= PAGE_SIZE)
633 h = kmalloc(size, GFP_KERNEL);
634 else
635 h = (struct hlist_head *)
636 __get_free_pages(GFP_KERNEL, get_order(size));
637
638 if (h != NULL) {
639 for (i = 0; i < n; i++)
640 INIT_HLIST_HEAD(&h[i]);
641 }
642 return h;
643}
644
645static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
646{
647 unsigned int size = n * sizeof(struct hlist_head);
648
649 if (size <= PAGE_SIZE)
650 kfree(h);
651 else
652 free_pages((unsigned long)h, get_order(size));
653}
654
655void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
656{
657 struct Qdisc_class_common *cl;
Sasha Levinb67bfe02013-02-27 17:06:00 -0800658 struct hlist_node *next;
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700659 struct hlist_head *nhash, *ohash;
660 unsigned int nsize, nmask, osize;
661 unsigned int i, h;
662
663 /* Rehash when load factor exceeds 0.75 */
664 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
665 return;
666 nsize = clhash->hashsize * 2;
667 nmask = nsize - 1;
668 nhash = qdisc_class_hash_alloc(nsize);
669 if (nhash == NULL)
670 return;
671
672 ohash = clhash->hash;
673 osize = clhash->hashsize;
674
675 sch_tree_lock(sch);
676 for (i = 0; i < osize; i++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -0800677 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700678 h = qdisc_class_hash(cl->classid, nmask);
679 hlist_add_head(&cl->hnode, &nhash[h]);
680 }
681 }
682 clhash->hash = nhash;
683 clhash->hashsize = nsize;
684 clhash->hashmask = nmask;
685 sch_tree_unlock(sch);
686
687 qdisc_class_hash_free(ohash, osize);
688}
689EXPORT_SYMBOL(qdisc_class_hash_grow);
690
691int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
692{
693 unsigned int size = 4;
694
695 clhash->hash = qdisc_class_hash_alloc(size);
696 if (clhash->hash == NULL)
697 return -ENOMEM;
698 clhash->hashsize = size;
699 clhash->hashmask = size - 1;
700 clhash->hashelems = 0;
701 return 0;
702}
703EXPORT_SYMBOL(qdisc_class_hash_init);
704
705void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
706{
707 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
708}
709EXPORT_SYMBOL(qdisc_class_hash_destroy);
710
711void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
712 struct Qdisc_class_common *cl)
713{
714 unsigned int h;
715
716 INIT_HLIST_NODE(&cl->hnode);
717 h = qdisc_class_hash(cl->classid, clhash->hashmask);
718 hlist_add_head(&cl->hnode, &clhash->hash[h]);
719 clhash->hashelems++;
720}
721EXPORT_SYMBOL(qdisc_class_hash_insert);
722
723void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
724 struct Qdisc_class_common *cl)
725{
726 hlist_del(&cl->hnode);
727 clhash->hashelems--;
728}
729EXPORT_SYMBOL(qdisc_class_hash_remove);
730
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000731/* Allocate an unique handle from space managed by kernel
732 * Possible range is [8000-FFFF]:0000 (0x8000 values)
733 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734static u32 qdisc_alloc_handle(struct net_device *dev)
735{
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000736 int i = 0x8000;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
738
739 do {
740 autohandle += TC_H_MAKE(0x10000U, 0);
741 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
742 autohandle = TC_H_MAKE(0x80000000U, 0);
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000743 if (!qdisc_lookup(dev, autohandle))
744 return autohandle;
745 cond_resched();
746 } while (--i > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000748 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749}
750
WANG Cong2ccccf52016-02-25 14:55:01 -0800751void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
752 unsigned int len)
Patrick McHardy43effa12006-11-29 17:35:48 -0800753{
Eric Dumazet20fea082007-11-14 01:44:41 -0800754 const struct Qdisc_class_ops *cops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800755 unsigned long cl;
756 u32 parentid;
Eric Dumazet2c8c8e62013-10-07 08:32:32 -0700757 int drops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800758
WANG Cong2ccccf52016-02-25 14:55:01 -0800759 if (n == 0 && len == 0)
Patrick McHardy43effa12006-11-29 17:35:48 -0800760 return;
Eric Dumazet2c8c8e62013-10-07 08:32:32 -0700761 drops = max_t(int, n, 0);
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800762 rcu_read_lock();
Patrick McHardy43effa12006-11-29 17:35:48 -0800763 while ((parentid = sch->parent)) {
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700764 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800765 break;
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700766
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800767 if (sch->flags & TCQ_F_NOPARENT)
768 break;
769 /* TODO: perform the search on a per txq basis */
David S. Miller5ce2d482008-07-08 17:06:30 -0700770 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700771 if (sch == NULL) {
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800772 WARN_ON_ONCE(parentid != TC_H_ROOT);
773 break;
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700774 }
Patrick McHardy43effa12006-11-29 17:35:48 -0800775 cops = sch->ops->cl_ops;
776 if (cops->qlen_notify) {
777 cl = cops->get(sch, parentid);
778 cops->qlen_notify(sch, cl);
779 cops->put(sch, cl);
780 }
781 sch->q.qlen -= n;
WANG Cong2ccccf52016-02-25 14:55:01 -0800782 sch->qstats.backlog -= len;
John Fastabend25331d62014-09-28 11:53:29 -0700783 __qdisc_qstats_drop(sch, drops);
Patrick McHardy43effa12006-11-29 17:35:48 -0800784 }
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800785 rcu_read_unlock();
Patrick McHardy43effa12006-11-29 17:35:48 -0800786}
WANG Cong2ccccf52016-02-25 14:55:01 -0800787EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788
Tom Goff7316ae82010-03-19 15:40:13 +0000789static void notify_and_destroy(struct net *net, struct sk_buff *skb,
790 struct nlmsghdr *n, u32 clid,
David S. Miller99194cf2008-07-17 04:54:10 -0700791 struct Qdisc *old, struct Qdisc *new)
792{
793 if (new || old)
Tom Goff7316ae82010-03-19 15:40:13 +0000794 qdisc_notify(net, skb, n, clid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795
David S. Miller4d8863a2008-08-18 21:03:15 -0700796 if (old)
David S. Miller99194cf2008-07-17 04:54:10 -0700797 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700798}
799
800/* Graft qdisc "new" to class "classid" of qdisc "parent" or
801 * to device "dev".
802 *
803 * When appropriate send a netlink notification using 'skb'
804 * and "n".
805 *
806 * On success, destroy old qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807 */
808
809static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
David S. Miller99194cf2008-07-17 04:54:10 -0700810 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
811 struct Qdisc *new, struct Qdisc *old)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812{
David S. Miller99194cf2008-07-17 04:54:10 -0700813 struct Qdisc *q = old;
Tom Goff7316ae82010-03-19 15:40:13 +0000814 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900817 if (parent == NULL) {
David S. Miller99194cf2008-07-17 04:54:10 -0700818 unsigned int i, num_q, ingress;
819
820 ingress = 0;
821 num_q = dev->num_tx_queues;
David S. Miller8d50b532008-07-30 02:37:46 -0700822 if ((q && q->flags & TCQ_F_INGRESS) ||
823 (new && new->flags & TCQ_F_INGRESS)) {
David S. Miller99194cf2008-07-17 04:54:10 -0700824 num_q = 1;
825 ingress = 1;
Eric Dumazet24824a02010-10-02 06:11:55 +0000826 if (!dev_ingress_queue(dev))
827 return -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828 }
David S. Miller99194cf2008-07-17 04:54:10 -0700829
830 if (dev->flags & IFF_UP)
831 dev_deactivate(dev);
832
WANG Cong86e363d2015-05-26 16:08:48 -0700833 if (new && new->ops->attach)
834 goto skip;
David S. Miller6ec1c692009-09-06 01:58:51 -0700835
David S. Miller99194cf2008-07-17 04:54:10 -0700836 for (i = 0; i < num_q; i++) {
Eric Dumazet24824a02010-10-02 06:11:55 +0000837 struct netdev_queue *dev_queue = dev_ingress_queue(dev);
David S. Miller99194cf2008-07-17 04:54:10 -0700838
839 if (!ingress)
840 dev_queue = netdev_get_tx_queue(dev, i);
841
David S. Miller8d50b532008-07-30 02:37:46 -0700842 old = dev_graft_qdisc(dev_queue, new);
843 if (new && i > 0)
844 atomic_inc(&new->refcnt);
845
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000846 if (!ingress)
847 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700848 }
849
WANG Cong86e363d2015-05-26 16:08:48 -0700850skip:
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000851 if (!ingress) {
Tom Goff7316ae82010-03-19 15:40:13 +0000852 notify_and_destroy(net, skb, n, classid,
853 dev->qdisc, new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000854 if (new && !new->ops->attach)
855 atomic_inc(&new->refcnt);
856 dev->qdisc = new ? : &noop_qdisc;
WANG Cong86e363d2015-05-26 16:08:48 -0700857
858 if (new && new->ops->attach)
859 new->ops->attach(new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000860 } else {
Tom Goff7316ae82010-03-19 15:40:13 +0000861 notify_and_destroy(net, skb, n, classid, old, new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000862 }
Patrick McHardyaf356af2009-09-04 06:41:18 +0000863
David S. Miller99194cf2008-07-17 04:54:10 -0700864 if (dev->flags & IFF_UP)
865 dev_activate(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866 } else {
Eric Dumazet20fea082007-11-14 01:44:41 -0800867 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868
Patrick McHardyc9f1d032009-09-04 06:41:13 +0000869 err = -EOPNOTSUPP;
870 if (cops && cops->graft) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871 unsigned long cl = cops->get(parent, classid);
872 if (cl) {
David S. Miller99194cf2008-07-17 04:54:10 -0700873 err = cops->graft(parent, cl, new, &old);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874 cops->put(parent, cl);
Patrick McHardyc9f1d032009-09-04 06:41:13 +0000875 } else
876 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877 }
David S. Miller99194cf2008-07-17 04:54:10 -0700878 if (!err)
Tom Goff7316ae82010-03-19 15:40:13 +0000879 notify_and_destroy(net, skb, n, classid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880 }
881 return err;
882}
883
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700884/* lockdep annotation is needed for ingress; egress gets it only for name */
885static struct lock_class_key qdisc_tx_lock;
886static struct lock_class_key qdisc_rx_lock;
887
Linus Torvalds1da177e2005-04-16 15:20:36 -0700888/*
889 Allocate and initialize new qdisc.
890
891 Parameters are passed via opt.
892 */
893
894static struct Qdisc *
David S. Millerbb949fb2008-07-08 16:55:56 -0700895qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
Patrick McHardy23bcf632009-09-09 18:11:23 -0700896 struct Qdisc *p, u32 parent, u32 handle,
897 struct nlattr **tca, int *errp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898{
899 int err;
Patrick McHardy1e904742008-01-22 22:11:17 -0800900 struct nlattr *kind = tca[TCA_KIND];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901 struct Qdisc *sch;
902 struct Qdisc_ops *ops;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700903 struct qdisc_size_table *stab;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904
905 ops = qdisc_lookup_ops(kind);
Johannes Berg95a5afc2008-10-16 15:24:51 -0700906#ifdef CONFIG_MODULES
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907 if (ops == NULL && kind != NULL) {
908 char name[IFNAMSIZ];
Patrick McHardy1e904742008-01-22 22:11:17 -0800909 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910 /* We dropped the RTNL semaphore in order to
911 * perform the module load. So, even if we
912 * succeeded in loading the module we have to
913 * tell the caller to replay the request. We
914 * indicate this using -EAGAIN.
915 * We replay the request because the device may
916 * go away in the mean time.
917 */
918 rtnl_unlock();
919 request_module("sch_%s", name);
920 rtnl_lock();
921 ops = qdisc_lookup_ops(kind);
922 if (ops != NULL) {
923 /* We will try again qdisc_lookup_ops,
924 * so don't keep a reference.
925 */
926 module_put(ops->owner);
927 err = -EAGAIN;
928 goto err_out;
929 }
930 }
931 }
932#endif
933
Jamal Hadi Salimb9e2cc02006-08-03 16:36:51 -0700934 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935 if (ops == NULL)
936 goto err_out;
937
David S. Miller5ce2d482008-07-08 17:06:30 -0700938 sch = qdisc_alloc(dev_queue, ops);
Thomas Graf3d54b822005-07-05 14:15:09 -0700939 if (IS_ERR(sch)) {
940 err = PTR_ERR(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941 goto err_out2;
Thomas Graf3d54b822005-07-05 14:15:09 -0700942 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700944 sch->parent = parent;
945
Thomas Graf3d54b822005-07-05 14:15:09 -0700946 if (handle == TC_H_INGRESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947 sch->flags |= TCQ_F_INGRESS;
Thomas Graf3d54b822005-07-05 14:15:09 -0700948 handle = TC_H_MAKE(TC_H_INGRESS, 0);
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700949 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700950 } else {
Patrick McHardyfd44de72007-04-16 17:07:08 -0700951 if (handle == 0) {
952 handle = qdisc_alloc_handle(dev);
953 err = -ENOMEM;
954 if (handle == 0)
955 goto err_out3;
956 }
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700957 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
Eric Dumazet1abbe132012-12-11 15:54:33 +0000958 if (!netif_is_multiqueue(dev))
Eric Dumazet225734d2015-12-15 09:43:12 -0800959 sch->flags |= TCQ_F_ONETXQUEUE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960 }
961
Thomas Graf3d54b822005-07-05 14:15:09 -0700962 sch->handle = handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963
Patrick McHardy1e904742008-01-22 22:11:17 -0800964 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
John Fastabend22e0f8b2014-09-28 11:52:56 -0700965 if (qdisc_is_percpu_stats(sch)) {
966 sch->cpu_bstats =
Sabrina Dubroca7c1c97d2014-10-21 11:23:30 +0200967 netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
John Fastabend22e0f8b2014-09-28 11:52:56 -0700968 if (!sch->cpu_bstats)
969 goto err_out4;
John Fastabendb0ab6f92014-09-28 11:54:24 -0700970
971 sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
972 if (!sch->cpu_qstats)
973 goto err_out4;
John Fastabend22e0f8b2014-09-28 11:52:56 -0700974 }
975
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700976 if (tca[TCA_STAB]) {
977 stab = qdisc_get_stab(tca[TCA_STAB]);
978 if (IS_ERR(stab)) {
979 err = PTR_ERR(stab);
Jarek Poplawski7c64b9f2009-09-15 23:42:05 -0700980 goto err_out4;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700981 }
Eric Dumazeta2da5702011-01-20 03:48:19 +0000982 rcu_assign_pointer(sch->stab, stab);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700983 }
Patrick McHardy1e904742008-01-22 22:11:17 -0800984 if (tca[TCA_RATE]) {
Eric Dumazetedb09eb2016-06-06 09:37:16 -0700985 seqcount_t *running;
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700986
Patrick McHardy23bcf632009-09-09 18:11:23 -0700987 err = -EOPNOTSUPP;
988 if (sch->flags & TCQ_F_MQROOT)
989 goto err_out4;
990
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700991 if ((sch->parent != TC_H_ROOT) &&
Patrick McHardy23bcf632009-09-09 18:11:23 -0700992 !(sch->flags & TCQ_F_INGRESS) &&
993 (!p || !(p->flags & TCQ_F_MQROOT)))
Eric Dumazetedb09eb2016-06-06 09:37:16 -0700994 running = qdisc_root_sleeping_running(sch);
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700995 else
Eric Dumazetedb09eb2016-06-06 09:37:16 -0700996 running = &sch->running;
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700997
John Fastabend22e0f8b2014-09-28 11:52:56 -0700998 err = gen_new_estimator(&sch->bstats,
999 sch->cpu_bstats,
1000 &sch->rate_est,
Eric Dumazetedb09eb2016-06-06 09:37:16 -07001001 NULL,
1002 running,
John Fastabend22e0f8b2014-09-28 11:52:56 -07001003 tca[TCA_RATE]);
Patrick McHardy23bcf632009-09-09 18:11:23 -07001004 if (err)
1005 goto err_out4;
Thomas Graf023e09a2005-07-05 14:15:53 -07001006 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -07001007
1008 qdisc_list_add(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010 return sch;
1011 }
1012err_out3:
1013 dev_put(dev);
Thomas Graf3d54b822005-07-05 14:15:09 -07001014 kfree((char *) sch - sch->padded);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015err_out2:
1016 module_put(ops->owner);
1017err_out:
1018 *errp = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019 return NULL;
Patrick McHardy23bcf632009-09-09 18:11:23 -07001020
1021err_out4:
John Fastabend22e0f8b2014-09-28 11:52:56 -07001022 free_percpu(sch->cpu_bstats);
John Fastabendb0ab6f92014-09-28 11:54:24 -07001023 free_percpu(sch->cpu_qstats);
Patrick McHardy23bcf632009-09-09 18:11:23 -07001024 /*
1025 * Any broken qdiscs that would require a ops->reset() here?
1026 * The qdisc was never in action so it shouldn't be necessary.
1027 */
Eric Dumazeta2da5702011-01-20 03:48:19 +00001028 qdisc_put_stab(rtnl_dereference(sch->stab));
Patrick McHardy23bcf632009-09-09 18:11:23 -07001029 if (ops->destroy)
1030 ops->destroy(sch);
1031 goto err_out3;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032}
1033
Patrick McHardy1e904742008-01-22 22:11:17 -08001034static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001035{
Eric Dumazeta2da5702011-01-20 03:48:19 +00001036 struct qdisc_size_table *ostab, *stab = NULL;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001037 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001038
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001039 if (tca[TCA_OPTIONS]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040 if (sch->ops->change == NULL)
1041 return -EINVAL;
Patrick McHardy1e904742008-01-22 22:11:17 -08001042 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043 if (err)
1044 return err;
1045 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001046
1047 if (tca[TCA_STAB]) {
1048 stab = qdisc_get_stab(tca[TCA_STAB]);
1049 if (IS_ERR(stab))
1050 return PTR_ERR(stab);
1051 }
1052
Eric Dumazeta2da5702011-01-20 03:48:19 +00001053 ostab = rtnl_dereference(sch->stab);
1054 rcu_assign_pointer(sch->stab, stab);
1055 qdisc_put_stab(ostab);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001056
Patrick McHardy23bcf632009-09-09 18:11:23 -07001057 if (tca[TCA_RATE]) {
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001058 /* NB: ignores errors from replace_estimator
1059 because change can't be undone. */
Patrick McHardy23bcf632009-09-09 18:11:23 -07001060 if (sch->flags & TCQ_F_MQROOT)
1061 goto out;
John Fastabend22e0f8b2014-09-28 11:52:56 -07001062 gen_replace_estimator(&sch->bstats,
1063 sch->cpu_bstats,
1064 &sch->rate_est,
Eric Dumazetedb09eb2016-06-06 09:37:16 -07001065 NULL,
1066 qdisc_root_sleeping_running(sch),
John Fastabend22e0f8b2014-09-28 11:52:56 -07001067 tca[TCA_RATE]);
Patrick McHardy23bcf632009-09-09 18:11:23 -07001068 }
1069out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001070 return 0;
1071}
1072
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001073struct check_loop_arg {
1074 struct qdisc_walker w;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075 struct Qdisc *p;
1076 int depth;
1077};
1078
1079static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
1080
1081static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1082{
1083 struct check_loop_arg arg;
1084
1085 if (q->ops->cl_ops == NULL)
1086 return 0;
1087
1088 arg.w.stop = arg.w.skip = arg.w.count = 0;
1089 arg.w.fn = check_loop_fn;
1090 arg.depth = depth;
1091 arg.p = p;
1092 q->ops->cl_ops->walk(q, &arg.w);
1093 return arg.w.stop ? -ELOOP : 0;
1094}
1095
1096static int
1097check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1098{
1099 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -08001100 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101 struct check_loop_arg *arg = (struct check_loop_arg *)w;
1102
1103 leaf = cops->leaf(q, cl);
1104 if (leaf) {
1105 if (leaf == arg->p || arg->depth > 7)
1106 return -ELOOP;
1107 return check_loop(leaf, arg->p, arg->depth + 1);
1108 }
1109 return 0;
1110}
1111
1112/*
1113 * Delete/get qdisc.
1114 */
1115
Thomas Graf661d2962013-03-21 07:45:29 +00001116static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001118 struct net *net = sock_net(skb->sk);
David S. Miller02ef22c2012-06-26 21:50:05 -07001119 struct tcmsg *tcm = nlmsg_data(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001120 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121 struct net_device *dev;
Hong zhi guode179c82013-03-25 17:36:33 +00001122 u32 clid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123 struct Qdisc *q = NULL;
1124 struct Qdisc *p = NULL;
1125 int err;
1126
Stéphane Graber4e8bbb82014-04-30 11:25:43 -04001127 if ((n->nlmsg_type != RTM_GETQDISC) &&
David S. Miller5f013c9b2014-05-12 13:19:14 -04001128 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001129 return -EPERM;
1130
Patrick McHardy1e904742008-01-22 22:11:17 -08001131 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1132 if (err < 0)
1133 return err;
1134
Hong zhi guode179c82013-03-25 17:36:33 +00001135 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1136 if (!dev)
1137 return -ENODEV;
1138
1139 clid = tcm->tcm_parent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140 if (clid) {
1141 if (clid != TC_H_ROOT) {
1142 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001143 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1144 if (!p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145 return -ENOENT;
1146 q = qdisc_leaf(p, clid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001147 } else if (dev_ingress_queue(dev)) {
1148 q = dev_ingress_queue(dev)->qdisc_sleeping;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001149 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +00001151 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001152 }
1153 if (!q)
1154 return -ENOENT;
1155
1156 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1157 return -EINVAL;
1158 } else {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001159 q = qdisc_lookup(dev, tcm->tcm_handle);
1160 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001161 return -ENOENT;
1162 }
1163
Patrick McHardy1e904742008-01-22 22:11:17 -08001164 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165 return -EINVAL;
1166
1167 if (n->nlmsg_type == RTM_DELQDISC) {
1168 if (!clid)
1169 return -EINVAL;
1170 if (q->handle == 0)
1171 return -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001172 err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1173 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175 } else {
Tom Goff7316ae82010-03-19 15:40:13 +00001176 qdisc_notify(net, skb, n, clid, NULL, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177 }
1178 return 0;
1179}
1180
1181/*
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001182 * Create/change qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001183 */
1184
Thomas Graf661d2962013-03-21 07:45:29 +00001185static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001187 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188 struct tcmsg *tcm;
Patrick McHardy1e904742008-01-22 22:11:17 -08001189 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190 struct net_device *dev;
1191 u32 clid;
1192 struct Qdisc *q, *p;
1193 int err;
1194
David S. Miller5f013c9b2014-05-12 13:19:14 -04001195 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001196 return -EPERM;
1197
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198replay:
1199 /* Reinit, just in case something touches this. */
Hong zhi guode179c82013-03-25 17:36:33 +00001200 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1201 if (err < 0)
1202 return err;
1203
David S. Miller02ef22c2012-06-26 21:50:05 -07001204 tcm = nlmsg_data(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205 clid = tcm->tcm_parent;
1206 q = p = NULL;
1207
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001208 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1209 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210 return -ENODEV;
1211
Patrick McHardy1e904742008-01-22 22:11:17 -08001212
Linus Torvalds1da177e2005-04-16 15:20:36 -07001213 if (clid) {
1214 if (clid != TC_H_ROOT) {
1215 if (clid != TC_H_INGRESS) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001216 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1217 if (!p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218 return -ENOENT;
1219 q = qdisc_leaf(p, clid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001220 } else if (dev_ingress_queue_create(dev)) {
1221 q = dev_ingress_queue(dev)->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001222 }
1223 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +00001224 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225 }
1226
1227 /* It may be default qdisc, ignore it */
1228 if (q && q->handle == 0)
1229 q = NULL;
1230
1231 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1232 if (tcm->tcm_handle) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001233 if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234 return -EEXIST;
1235 if (TC_H_MIN(tcm->tcm_handle))
1236 return -EINVAL;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001237 q = qdisc_lookup(dev, tcm->tcm_handle);
1238 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239 goto create_n_graft;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001240 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001242 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243 return -EINVAL;
1244 if (q == p ||
1245 (p && check_loop(q, p, 0)))
1246 return -ELOOP;
1247 atomic_inc(&q->refcnt);
1248 goto graft;
1249 } else {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001250 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001251 goto create_n_graft;
1252
1253 /* This magic test requires explanation.
1254 *
1255 * We know, that some child q is already
1256 * attached to this parent and have choice:
1257 * either to change it or to create/graft new one.
1258 *
1259 * 1. We are allowed to create/graft only
1260 * if CREATE and REPLACE flags are set.
1261 *
1262 * 2. If EXCL is set, requestor wanted to say,
1263 * that qdisc tcm_handle is not expected
1264 * to exist, so that we choose create/graft too.
1265 *
1266 * 3. The last case is when no flags are set.
1267 * Alas, it is sort of hole in API, we
1268 * cannot decide what to do unambiguously.
1269 * For now we select create/graft, if
1270 * user gave KIND, which does not match existing.
1271 */
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001272 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1273 (n->nlmsg_flags & NLM_F_REPLACE) &&
1274 ((n->nlmsg_flags & NLM_F_EXCL) ||
Patrick McHardy1e904742008-01-22 22:11:17 -08001275 (tca[TCA_KIND] &&
1276 nla_strcmp(tca[TCA_KIND], q->ops->id))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277 goto create_n_graft;
1278 }
1279 }
1280 } else {
1281 if (!tcm->tcm_handle)
1282 return -EINVAL;
1283 q = qdisc_lookup(dev, tcm->tcm_handle);
1284 }
1285
1286 /* Change qdisc parameters */
1287 if (q == NULL)
1288 return -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001289 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001291 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292 return -EINVAL;
1293 err = qdisc_change(q, tca);
1294 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001295 qdisc_notify(net, skb, n, clid, NULL, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001296 return err;
1297
1298create_n_graft:
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001299 if (!(n->nlmsg_flags & NLM_F_CREATE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001300 return -ENOENT;
Eric Dumazet24824a02010-10-02 06:11:55 +00001301 if (clid == TC_H_INGRESS) {
1302 if (dev_ingress_queue(dev))
1303 q = qdisc_create(dev, dev_ingress_queue(dev), p,
1304 tcm->tcm_parent, tcm->tcm_parent,
1305 tca, &err);
1306 else
1307 err = -ENOENT;
1308 } else {
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001309 struct netdev_queue *dev_queue;
David S. Miller6ec1c692009-09-06 01:58:51 -07001310
1311 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001312 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1313 else if (p)
1314 dev_queue = p->dev_queue;
1315 else
1316 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Miller6ec1c692009-09-06 01:58:51 -07001317
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001318 q = qdisc_create(dev, dev_queue, p,
David S. Millerbb949fb2008-07-08 16:55:56 -07001319 tcm->tcm_parent, tcm->tcm_handle,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001320 tca, &err);
David S. Miller6ec1c692009-09-06 01:58:51 -07001321 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322 if (q == NULL) {
1323 if (err == -EAGAIN)
1324 goto replay;
1325 return err;
1326 }
1327
1328graft:
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001329 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1330 if (err) {
1331 if (q)
1332 qdisc_destroy(q);
1333 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334 }
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001335
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336 return 0;
1337}
1338
1339static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
Eric W. Biederman15e47302012-09-07 20:12:54 +00001340 u32 portid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341{
John Fastabend22e0f8b2014-09-28 11:52:56 -07001342 struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
John Fastabendb0ab6f92014-09-28 11:54:24 -07001343 struct gnet_stats_queue __percpu *cpu_qstats = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344 struct tcmsg *tcm;
1345 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001346 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347 struct gnet_dump d;
Eric Dumazeta2da5702011-01-20 03:48:19 +00001348 struct qdisc_size_table *stab;
John Fastabend64015852014-09-28 11:53:57 -07001349 __u32 qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001350
Eric Dumazetfba373d2014-03-10 17:11:43 -07001351 cond_resched();
Eric W. Biederman15e47302012-09-07 20:12:54 +00001352 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
David S. Miller02ef22c2012-06-26 21:50:05 -07001353 if (!nlh)
1354 goto out_nlmsg_trim;
1355 tcm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356 tcm->tcm_family = AF_UNSPEC;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001357 tcm->tcm__pad1 = 0;
1358 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001359 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 tcm->tcm_parent = clid;
1361 tcm->tcm_handle = q->handle;
1362 tcm->tcm_info = atomic_read(&q->refcnt);
David S. Miller1b34ec42012-03-29 05:11:39 -04001363 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1364 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001365 if (q->ops->dump && q->ops->dump(q, skb) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001366 goto nla_put_failure;
John Fastabend64015852014-09-28 11:53:57 -07001367 qlen = q->q.qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001368
Eric Dumazeta2da5702011-01-20 03:48:19 +00001369 stab = rtnl_dereference(q->stab);
1370 if (stab && qdisc_dump_stab(skb, stab) < 0)
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001371 goto nla_put_failure;
1372
Jarek Poplawski102396a2008-08-29 14:21:52 -07001373 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
Eric Dumazetedb09eb2016-06-06 09:37:16 -07001374 NULL, &d, TCA_PAD) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001375 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376
1377 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001378 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379
John Fastabendb0ab6f92014-09-28 11:54:24 -07001380 if (qdisc_is_percpu_stats(q)) {
John Fastabend22e0f8b2014-09-28 11:52:56 -07001381 cpu_bstats = q->cpu_bstats;
John Fastabendb0ab6f92014-09-28 11:54:24 -07001382 cpu_qstats = q->cpu_qstats;
1383 }
John Fastabend22e0f8b2014-09-28 11:52:56 -07001384
Eric Dumazetedb09eb2016-06-06 09:37:16 -07001385 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
1386 &d, cpu_bstats, &q->bstats) < 0 ||
Eric Dumazetd250a5f2009-10-02 10:32:18 +00001387 gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
John Fastabendb0ab6f92014-09-28 11:54:24 -07001388 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001389 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001390
Linus Torvalds1da177e2005-04-16 15:20:36 -07001391 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001392 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001393
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001394 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001395 return skb->len;
1396
David S. Miller02ef22c2012-06-26 21:50:05 -07001397out_nlmsg_trim:
Patrick McHardy1e904742008-01-22 22:11:17 -08001398nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001399 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001400 return -1;
1401}
1402
Eric Dumazet53b0f082010-05-22 20:37:44 +00001403static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1404{
1405 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1406}
1407
Tom Goff7316ae82010-03-19 15:40:13 +00001408static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1409 struct nlmsghdr *n, u32 clid,
1410 struct Qdisc *old, struct Qdisc *new)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001411{
1412 struct sk_buff *skb;
Eric W. Biederman15e47302012-09-07 20:12:54 +00001413 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414
1415 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1416 if (!skb)
1417 return -ENOBUFS;
1418
Eric Dumazet53b0f082010-05-22 20:37:44 +00001419 if (old && !tc_qdisc_dump_ignore(old)) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00001420 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001421 0, RTM_DELQDISC) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422 goto err_out;
1423 }
Eric Dumazet53b0f082010-05-22 20:37:44 +00001424 if (new && !tc_qdisc_dump_ignore(new)) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00001425 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001426 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427 goto err_out;
1428 }
1429
1430 if (skb->len)
Eric W. Biederman15e47302012-09-07 20:12:54 +00001431 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001432 n->nlmsg_flags & NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001433
1434err_out:
1435 kfree_skb(skb);
1436 return -EINVAL;
1437}
1438
David S. Miller30723672008-07-18 22:50:15 -07001439static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1440 struct netlink_callback *cb,
1441 int *q_idx_p, int s_q_idx)
1442{
1443 int ret = 0, q_idx = *q_idx_p;
1444 struct Qdisc *q;
1445
1446 if (!root)
1447 return 0;
1448
1449 q = root;
1450 if (q_idx < s_q_idx) {
1451 q_idx++;
1452 } else {
1453 if (!tc_qdisc_dump_ignore(q) &&
Eric W. Biederman15e47302012-09-07 20:12:54 +00001454 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
David S. Miller30723672008-07-18 22:50:15 -07001455 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1456 goto done;
1457 q_idx++;
1458 }
1459 list_for_each_entry(q, &root->list, list) {
1460 if (q_idx < s_q_idx) {
1461 q_idx++;
1462 continue;
1463 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001464 if (!tc_qdisc_dump_ignore(q) &&
Eric W. Biederman15e47302012-09-07 20:12:54 +00001465 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
David S. Miller30723672008-07-18 22:50:15 -07001466 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1467 goto done;
1468 q_idx++;
1469 }
1470
1471out:
1472 *q_idx_p = q_idx;
1473 return ret;
1474done:
1475 ret = -1;
1476 goto out;
1477}
1478
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1480{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001481 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482 int idx, q_idx;
1483 int s_idx, s_q_idx;
1484 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485
1486 s_idx = cb->args[0];
1487 s_q_idx = q_idx = cb->args[1];
stephen hemmingerf1e90162009-11-10 07:54:49 +00001488
Pavel Emelianov7562f872007-05-03 15:13:45 -07001489 idx = 0;
Eric Dumazet15dc36e2014-03-10 17:11:42 -07001490 ASSERT_RTNL();
1491 for_each_netdev(net, dev) {
David S. Miller30723672008-07-18 22:50:15 -07001492 struct netdev_queue *dev_queue;
1493
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494 if (idx < s_idx)
Pavel Emelianov7562f872007-05-03 15:13:45 -07001495 goto cont;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496 if (idx > s_idx)
1497 s_q_idx = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498 q_idx = 0;
David S. Miller30723672008-07-18 22:50:15 -07001499
Patrick McHardyaf356af2009-09-04 06:41:18 +00001500 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001501 goto done;
1502
Eric Dumazet24824a02010-10-02 06:11:55 +00001503 dev_queue = dev_ingress_queue(dev);
1504 if (dev_queue &&
1505 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1506 &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001507 goto done;
1508
Pavel Emelianov7562f872007-05-03 15:13:45 -07001509cont:
1510 idx++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001511 }
1512
1513done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514 cb->args[0] = idx;
1515 cb->args[1] = q_idx;
1516
1517 return skb->len;
1518}
1519
1520
1521
1522/************************************************
1523 * Traffic classes manipulation. *
1524 ************************************************/
1525
1526
1527
Thomas Graf661d2962013-03-21 07:45:29 +00001528static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001529{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001530 struct net *net = sock_net(skb->sk);
David S. Miller02ef22c2012-06-26 21:50:05 -07001531 struct tcmsg *tcm = nlmsg_data(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001532 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001533 struct net_device *dev;
1534 struct Qdisc *q = NULL;
Eric Dumazet20fea082007-11-14 01:44:41 -08001535 const struct Qdisc_class_ops *cops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001536 unsigned long cl = 0;
1537 unsigned long new_cl;
Hong zhi guode179c82013-03-25 17:36:33 +00001538 u32 portid;
1539 u32 clid;
1540 u32 qid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001541 int err;
1542
Stéphane Graber4e8bbb82014-04-30 11:25:43 -04001543 if ((n->nlmsg_type != RTM_GETTCLASS) &&
David S. Miller5f013c9b2014-05-12 13:19:14 -04001544 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001545 return -EPERM;
1546
Patrick McHardy1e904742008-01-22 22:11:17 -08001547 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1548 if (err < 0)
1549 return err;
1550
Hong zhi guode179c82013-03-25 17:36:33 +00001551 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1552 if (!dev)
1553 return -ENODEV;
1554
Linus Torvalds1da177e2005-04-16 15:20:36 -07001555 /*
1556 parent == TC_H_UNSPEC - unspecified parent.
1557 parent == TC_H_ROOT - class is root, which has no parent.
1558 parent == X:0 - parent is root class.
1559 parent == X:Y - parent is a node in hierarchy.
1560 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1561
1562 handle == 0:0 - generate handle from kernel pool.
1563 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1564 handle == X:Y - clear.
1565 handle == X:0 - root class.
1566 */
1567
1568 /* Step 1. Determine qdisc handle X:0 */
1569
Hong zhi guode179c82013-03-25 17:36:33 +00001570 portid = tcm->tcm_parent;
1571 clid = tcm->tcm_handle;
1572 qid = TC_H_MAJ(clid);
1573
Eric W. Biederman15e47302012-09-07 20:12:54 +00001574 if (portid != TC_H_ROOT) {
1575 u32 qid1 = TC_H_MAJ(portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576
1577 if (qid && qid1) {
1578 /* If both majors are known, they must be identical. */
1579 if (qid != qid1)
1580 return -EINVAL;
1581 } else if (qid1) {
1582 qid = qid1;
1583 } else if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00001584 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001585
1586 /* Now qid is genuine qdisc handle consistent
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001587 * both with parent and child.
1588 *
Eric W. Biederman15e47302012-09-07 20:12:54 +00001589 * TC_H_MAJ(portid) still may be unspecified, complete it now.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001590 */
Eric W. Biederman15e47302012-09-07 20:12:54 +00001591 if (portid)
1592 portid = TC_H_MAKE(qid, portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001593 } else {
1594 if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00001595 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001596 }
1597
1598 /* OK. Locate qdisc */
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001599 q = qdisc_lookup(dev, qid);
1600 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001601 return -ENOENT;
1602
1603 /* An check that it supports classes */
1604 cops = q->ops->cl_ops;
1605 if (cops == NULL)
1606 return -EINVAL;
1607
1608 /* Now try to get class */
1609 if (clid == 0) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00001610 if (portid == TC_H_ROOT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611 clid = qid;
1612 } else
1613 clid = TC_H_MAKE(qid, clid);
1614
1615 if (clid)
1616 cl = cops->get(q, clid);
1617
1618 if (cl == 0) {
1619 err = -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001620 if (n->nlmsg_type != RTM_NEWTCLASS ||
1621 !(n->nlmsg_flags & NLM_F_CREATE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001622 goto out;
1623 } else {
1624 switch (n->nlmsg_type) {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001625 case RTM_NEWTCLASS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626 err = -EEXIST;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001627 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001628 goto out;
1629 break;
1630 case RTM_DELTCLASS:
Patrick McHardyde6d5cd2009-09-04 06:41:16 +00001631 err = -EOPNOTSUPP;
1632 if (cops->delete)
1633 err = cops->delete(q, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001634 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001635 tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636 goto out;
1637 case RTM_GETTCLASS:
Tom Goff7316ae82010-03-19 15:40:13 +00001638 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001639 goto out;
1640 default:
1641 err = -EINVAL;
1642 goto out;
1643 }
1644 }
1645
1646 new_cl = cl;
Patrick McHardyde6d5cd2009-09-04 06:41:16 +00001647 err = -EOPNOTSUPP;
1648 if (cops->change)
Eric W. Biederman15e47302012-09-07 20:12:54 +00001649 err = cops->change(q, clid, portid, tca, &new_cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001650 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001651 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001652
1653out:
1654 if (cl)
1655 cops->put(q, cl);
1656
1657 return err;
1658}
1659
1660
1661static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1662 unsigned long cl,
Eric W. Biederman15e47302012-09-07 20:12:54 +00001663 u32 portid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664{
1665 struct tcmsg *tcm;
1666 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001667 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001668 struct gnet_dump d;
Eric Dumazet20fea082007-11-14 01:44:41 -08001669 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670
Eric Dumazetfba373d2014-03-10 17:11:43 -07001671 cond_resched();
Eric W. Biederman15e47302012-09-07 20:12:54 +00001672 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
David S. Miller02ef22c2012-06-26 21:50:05 -07001673 if (!nlh)
1674 goto out_nlmsg_trim;
1675 tcm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 tcm->tcm_family = AF_UNSPEC;
Eric Dumazet16ebb5e2009-09-02 02:40:09 +00001677 tcm->tcm__pad1 = 0;
1678 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001679 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680 tcm->tcm_parent = q->handle;
1681 tcm->tcm_handle = q->handle;
1682 tcm->tcm_info = 0;
David S. Miller1b34ec42012-03-29 05:11:39 -04001683 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1684 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001686 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687
Jarek Poplawski102396a2008-08-29 14:21:52 -07001688 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
Eric Dumazetedb09eb2016-06-06 09:37:16 -07001689 NULL, &d, TCA_PAD) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001690 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691
1692 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001693 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001694
1695 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001696 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001698 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001699 return skb->len;
1700
David S. Miller02ef22c2012-06-26 21:50:05 -07001701out_nlmsg_trim:
Patrick McHardy1e904742008-01-22 22:11:17 -08001702nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001703 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704 return -1;
1705}
1706
Tom Goff7316ae82010-03-19 15:40:13 +00001707static int tclass_notify(struct net *net, struct sk_buff *oskb,
1708 struct nlmsghdr *n, struct Qdisc *q,
1709 unsigned long cl, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710{
1711 struct sk_buff *skb;
Eric W. Biederman15e47302012-09-07 20:12:54 +00001712 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713
1714 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1715 if (!skb)
1716 return -ENOBUFS;
1717
Eric W. Biederman15e47302012-09-07 20:12:54 +00001718 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001719 kfree_skb(skb);
1720 return -EINVAL;
1721 }
1722
Eric W. Biederman15e47302012-09-07 20:12:54 +00001723 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001724 n->nlmsg_flags & NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725}
1726
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001727struct qdisc_dump_args {
1728 struct qdisc_walker w;
1729 struct sk_buff *skb;
1730 struct netlink_callback *cb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001731};
1732
1733static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1734{
1735 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1736
Eric W. Biederman15e47302012-09-07 20:12:54 +00001737 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1739}
1740
David S. Miller30723672008-07-18 22:50:15 -07001741static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1742 struct tcmsg *tcm, struct netlink_callback *cb,
1743 int *t_p, int s_t)
1744{
1745 struct qdisc_dump_args arg;
1746
1747 if (tc_qdisc_dump_ignore(q) ||
1748 *t_p < s_t || !q->ops->cl_ops ||
1749 (tcm->tcm_parent &&
1750 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1751 (*t_p)++;
1752 return 0;
1753 }
1754 if (*t_p > s_t)
1755 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1756 arg.w.fn = qdisc_class_dump;
1757 arg.skb = skb;
1758 arg.cb = cb;
1759 arg.w.stop = 0;
1760 arg.w.skip = cb->args[1];
1761 arg.w.count = 0;
1762 q->ops->cl_ops->walk(q, &arg.w);
1763 cb->args[1] = arg.w.count;
1764 if (arg.w.stop)
1765 return -1;
1766 (*t_p)++;
1767 return 0;
1768}
1769
1770static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1771 struct tcmsg *tcm, struct netlink_callback *cb,
1772 int *t_p, int s_t)
1773{
1774 struct Qdisc *q;
1775
1776 if (!root)
1777 return 0;
1778
1779 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1780 return -1;
1781
1782 list_for_each_entry(q, &root->list, list) {
1783 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1784 return -1;
1785 }
1786
1787 return 0;
1788}
1789
Linus Torvalds1da177e2005-04-16 15:20:36 -07001790static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1791{
David S. Miller02ef22c2012-06-26 21:50:05 -07001792 struct tcmsg *tcm = nlmsg_data(cb->nlh);
David S. Miller30723672008-07-18 22:50:15 -07001793 struct net *net = sock_net(skb->sk);
1794 struct netdev_queue *dev_queue;
1795 struct net_device *dev;
1796 int t, s_t;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001797
Hong zhi guo573ce262013-03-27 06:47:04 +00001798 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001799 return 0;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001800 dev = dev_get_by_index(net, tcm->tcm_ifindex);
1801 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802 return 0;
1803
1804 s_t = cb->args[0];
1805 t = 0;
1806
Patrick McHardyaf356af2009-09-04 06:41:18 +00001807 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001808 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001809
Eric Dumazet24824a02010-10-02 06:11:55 +00001810 dev_queue = dev_ingress_queue(dev);
1811 if (dev_queue &&
1812 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1813 &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001814 goto done;
1815
1816done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001817 cb->args[0] = t;
1818
1819 dev_put(dev);
1820 return skb->len;
1821}
1822
1823/* Main classifier routine: scans classifier chain attached
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001824 * to this qdisc, (optionally) tests for protocol and asks
1825 * specific classifiers.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826 */
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001827int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
1828 struct tcf_result *res, bool compat_mode)
Patrick McHardy73ca4912007-07-15 00:02:31 -07001829{
Jiri Pirkod8b96052015-01-13 17:13:43 +01001830 __be16 protocol = tc_skb_protocol(skb);
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001831#ifdef CONFIG_NET_CLS_ACT
1832 const struct tcf_proto *old_tp = tp;
1833 int limit = 0;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001834
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001835reclassify:
1836#endif
John Fastabend25d8c0d2014-09-12 20:05:27 -07001837 for (; tp; tp = rcu_dereference_bh(tp->next)) {
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001838 int err;
1839
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001840 if (tp->protocol != protocol &&
1841 tp->protocol != htons(ETH_P_ALL))
1842 continue;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001843
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001844 err = tp->classify(skb, tp, res);
1845#ifdef CONFIG_NET_CLS_ACT
Daniel Borkmannc1b3b192015-08-28 18:46:39 +02001846 if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode))
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001847 goto reset;
1848#endif
Florian Westphale578d9c2015-05-11 19:50:41 +02001849 if (err >= 0)
Patrick McHardy73ca4912007-07-15 00:02:31 -07001850 return err;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001851 }
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001852
Jamal Hadi Salim7e6e18f2016-02-18 08:04:43 -05001853 return TC_ACT_UNSPEC; /* signal: continue lookup */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001854#ifdef CONFIG_NET_CLS_ACT
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001855reset:
1856 if (unlikely(limit++ >= MAX_REC_LOOP)) {
Daniel Borkmannc1b3b192015-08-28 18:46:39 +02001857 net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
1858 tp->q->ops->id, tp->prio & 0xffff,
1859 ntohs(tp->protocol));
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001860 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001861 }
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001862
1863 tp = old_tp;
Jamal Hadi Salim619fe322016-02-18 07:38:04 -05001864 protocol = tc_skb_protocol(skb);
Daniel Borkmann3b3ae882015-08-26 23:00:06 +02001865 goto reclassify;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001866#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867}
Patrick McHardy73ca4912007-07-15 00:02:31 -07001868EXPORT_SYMBOL(tc_classify);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869
Cong Wang1e052be2015-03-06 11:47:59 -08001870bool tcf_destroy(struct tcf_proto *tp, bool force)
Patrick McHardya48b5a62007-03-23 11:29:43 -07001871{
Cong Wang1e052be2015-03-06 11:47:59 -08001872 if (tp->ops->destroy(tp, force)) {
1873 module_put(tp->ops->owner);
1874 kfree_rcu(tp, rcu);
1875 return true;
1876 }
1877
1878 return false;
Patrick McHardya48b5a62007-03-23 11:29:43 -07001879}
1880
John Fastabend25d8c0d2014-09-12 20:05:27 -07001881void tcf_destroy_chain(struct tcf_proto __rcu **fl)
Patrick McHardya48b5a62007-03-23 11:29:43 -07001882{
1883 struct tcf_proto *tp;
1884
John Fastabend25d8c0d2014-09-12 20:05:27 -07001885 while ((tp = rtnl_dereference(*fl)) != NULL) {
1886 RCU_INIT_POINTER(*fl, tp->next);
Cong Wang1e052be2015-03-06 11:47:59 -08001887 tcf_destroy(tp, true);
Patrick McHardya48b5a62007-03-23 11:29:43 -07001888 }
1889}
1890EXPORT_SYMBOL(tcf_destroy_chain);
1891
Linus Torvalds1da177e2005-04-16 15:20:36 -07001892#ifdef CONFIG_PROC_FS
1893static int psched_show(struct seq_file *seq, void *v)
1894{
1895 seq_printf(seq, "%08x %08x %08x %08x\n",
Jarek Poplawskica44d6e2009-06-15 02:31:47 -07001896 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
Patrick McHardy514bca32007-03-16 12:34:52 -07001897 1000000,
Thomas Gleixner1e317682015-04-14 21:08:28 +00001898 (u32)NSEC_PER_SEC / hrtimer_resolution);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001899
1900 return 0;
1901}
1902
1903static int psched_open(struct inode *inode, struct file *file)
1904{
Tom Goff7e5ab152010-03-30 19:44:56 -07001905 return single_open(file, psched_show, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001906}
1907
Arjan van de Venda7071d2007-02-12 00:55:36 -08001908static const struct file_operations psched_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001909 .owner = THIS_MODULE,
1910 .open = psched_open,
1911 .read = seq_read,
1912 .llseek = seq_lseek,
1913 .release = single_release,
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001914};
Tom Goff7316ae82010-03-19 15:40:13 +00001915
1916static int __net_init psched_net_init(struct net *net)
1917{
1918 struct proc_dir_entry *e;
1919
Gao fengd4beaa62013-02-18 01:34:54 +00001920 e = proc_create("psched", 0, net->proc_net, &psched_fops);
Tom Goff7316ae82010-03-19 15:40:13 +00001921 if (e == NULL)
1922 return -ENOMEM;
1923
1924 return 0;
1925}
1926
1927static void __net_exit psched_net_exit(struct net *net)
1928{
Gao fengece31ff2013-02-18 01:34:56 +00001929 remove_proc_entry("psched", net->proc_net);
Tom Goff7316ae82010-03-19 15:40:13 +00001930}
1931#else
1932static int __net_init psched_net_init(struct net *net)
1933{
1934 return 0;
1935}
1936
1937static void __net_exit psched_net_exit(struct net *net)
1938{
1939}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940#endif
1941
Tom Goff7316ae82010-03-19 15:40:13 +00001942static struct pernet_operations psched_net_ops = {
1943 .init = psched_net_init,
1944 .exit = psched_net_exit,
1945};
1946
Linus Torvalds1da177e2005-04-16 15:20:36 -07001947static int __init pktsched_init(void)
1948{
Tom Goff7316ae82010-03-19 15:40:13 +00001949 int err;
1950
1951 err = register_pernet_subsys(&psched_net_ops);
1952 if (err) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001953 pr_err("pktsched_init: "
Tom Goff7316ae82010-03-19 15:40:13 +00001954 "cannot initialize per netns operations\n");
1955 return err;
1956 }
1957
stephen hemminger6da7c8f2013-08-27 16:19:08 -07001958 register_qdisc(&pfifo_fast_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001959 register_qdisc(&pfifo_qdisc_ops);
1960 register_qdisc(&bfifo_qdisc_ops);
Hagen Paul Pfeifer57dbb2d2010-01-24 12:30:59 +00001961 register_qdisc(&pfifo_head_drop_qdisc_ops);
David S. Miller6ec1c692009-09-06 01:58:51 -07001962 register_qdisc(&mq_qdisc_ops);
Phil Sutterd66d6c32015-08-27 21:21:38 +02001963 register_qdisc(&noqueue_qdisc_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001964
Greg Rosec7ac8672011-06-10 01:27:09 +00001965 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
1966 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
1967 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, NULL);
1968 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
1969 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
1970 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, NULL);
Thomas Grafbe577dd2007-03-22 11:55:50 -07001971
Linus Torvalds1da177e2005-04-16 15:20:36 -07001972 return 0;
1973}
1974
1975subsys_initcall(pktsched_init);