blob: fb8f138b97763bdf917b202ba1b93ad773d0407f [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
Patrick McHardy41794772007-03-16 01:19:15 -070029#include <linux/hrtimer.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090030#include <linux/slab.h>
Jiri Kosina59cc1f62016-08-10 11:05:15 +020031#include <linux/hashtable.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020033#include <net/net_namespace.h>
Denis V. Lunevb8542722007-12-01 00:21:31 +110034#include <net/sock.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070035#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036#include <net/pkt_sched.h>
Cong Wang07d79fc2017-08-30 14:30:36 -070037#include <net/pkt_cls.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038
Linus Torvalds1da177e2005-04-16 15:20:36 -070039/*
40
41 Short review.
42 -------------
43
44 This file consists of two interrelated parts:
45
46 1. queueing disciplines manager frontend.
47 2. traffic classes manager frontend.
48
49 Generally, queueing discipline ("qdisc") is a black box,
50 which is able to enqueue packets and to dequeue them (when
51 device is ready to send something) in order and at times
52 determined by algorithm hidden in it.
53
54 qdisc's are divided to two categories:
55 - "queues", which have no internal structure visible from outside.
56 - "schedulers", which split all the packets to "traffic classes",
57 using "packet classifiers" (look at cls_api.c)
58
59 In turn, classes may have child qdiscs (as rule, queues)
60 attached to them etc. etc. etc.
61
62 The goal of the routines in this file is to translate
63 information supplied by user in the form of handles
64 to more intelligible for kernel form, to make some sanity
65 checks and part of work, which is common to all qdiscs
66 and to provide rtnetlink notifications.
67
68 All real intelligent work is done inside qdisc modules.
69
70
71
72 Every discipline has two major routines: enqueue and dequeue.
73
74 ---dequeue
75
76 dequeue usually returns a skb to send. It is allowed to return NULL,
77 but it does not mean that queue is empty, it just means that
78 discipline does not want to send anything this time.
79 Queue is really empty if q->q.qlen == 0.
80 For complicated disciplines with multiple queues q->q is not
81 real packet queue, but however q->q.qlen must be valid.
82
83 ---enqueue
84
85 enqueue returns 0, if packet was enqueued successfully.
86 If packet (this one or another one) was dropped, it returns
87 not zero error code.
88 NET_XMIT_DROP - this packet dropped
89 Expected action: do not backoff, but wait until queue will clear.
90 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
91 Expected action: backoff or ignore
Linus Torvalds1da177e2005-04-16 15:20:36 -070092
93 Auxiliary routines:
94
Jarek Poplawski99c0db22008-10-31 00:45:27 -070095 ---peek
96
97 like dequeue but without removing a packet from the queue
98
Linus Torvalds1da177e2005-04-16 15:20:36 -070099 ---reset
100
101 returns qdisc to initial state: purge all buffers, clear all
102 timers, counters (except for statistics) etc.
103
104 ---init
105
106 initializes newly created qdisc.
107
108 ---destroy
109
110 destroys resources allocated by init and during lifetime of qdisc.
111
112 ---change
113
114 changes qdisc parameters.
115 */
116
117/* Protects list of registered TC modules. It is pure SMP lock. */
118static DEFINE_RWLOCK(qdisc_mod_lock);
119
120
121/************************************************
122 * Queueing disciplines manipulation. *
123 ************************************************/
124
125
126/* The list of all installed queueing disciplines. */
127
128static struct Qdisc_ops *qdisc_base;
129
Zhi Yong Wu21eb2182014-01-01 04:34:51 +0800130/* Register/unregister queueing discipline */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131
132int register_qdisc(struct Qdisc_ops *qops)
133{
134 struct Qdisc_ops *q, **qp;
135 int rc = -EEXIST;
136
137 write_lock(&qdisc_mod_lock);
138 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
139 if (!strcmp(qops->id, q->id))
140 goto out;
141
142 if (qops->enqueue == NULL)
143 qops->enqueue = noop_qdisc_ops.enqueue;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700144 if (qops->peek == NULL) {
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000145 if (qops->dequeue == NULL)
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700146 qops->peek = noop_qdisc_ops.peek;
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000147 else
148 goto out_einval;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700149 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700150 if (qops->dequeue == NULL)
151 qops->dequeue = noop_qdisc_ops.dequeue;
152
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000153 if (qops->cl_ops) {
154 const struct Qdisc_class_ops *cops = qops->cl_ops;
155
WANG Cong143976c2017-08-24 16:51:29 -0700156 if (!(cops->find && cops->walk && cops->leaf))
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000157 goto out_einval;
158
Jiri Pirko6529eab2017-05-17 11:07:55 +0200159 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000160 goto out_einval;
161 }
162
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163 qops->next = NULL;
164 *qp = qops;
165 rc = 0;
166out:
167 write_unlock(&qdisc_mod_lock);
168 return rc;
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000169
170out_einval:
171 rc = -EINVAL;
172 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800174EXPORT_SYMBOL(register_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175
176int unregister_qdisc(struct Qdisc_ops *qops)
177{
178 struct Qdisc_ops *q, **qp;
179 int err = -ENOENT;
180
181 write_lock(&qdisc_mod_lock);
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000182 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 if (q == qops)
184 break;
185 if (q) {
186 *qp = q->next;
187 q->next = NULL;
188 err = 0;
189 }
190 write_unlock(&qdisc_mod_lock);
191 return err;
192}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800193EXPORT_SYMBOL(unregister_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194
stephen hemminger6da7c8f2013-08-27 16:19:08 -0700195/* Get default qdisc if not otherwise specified */
196void qdisc_get_default(char *name, size_t len)
197{
198 read_lock(&qdisc_mod_lock);
199 strlcpy(name, default_qdisc_ops->id, len);
200 read_unlock(&qdisc_mod_lock);
201}
202
203static struct Qdisc_ops *qdisc_lookup_default(const char *name)
204{
205 struct Qdisc_ops *q = NULL;
206
207 for (q = qdisc_base; q; q = q->next) {
208 if (!strcmp(name, q->id)) {
209 if (!try_module_get(q->owner))
210 q = NULL;
211 break;
212 }
213 }
214
215 return q;
216}
217
218/* Set new default qdisc to use */
219int qdisc_set_default(const char *name)
220{
221 const struct Qdisc_ops *ops;
222
223 if (!capable(CAP_NET_ADMIN))
224 return -EPERM;
225
226 write_lock(&qdisc_mod_lock);
227 ops = qdisc_lookup_default(name);
228 if (!ops) {
229 /* Not found, drop lock and try to load module */
230 write_unlock(&qdisc_mod_lock);
231 request_module("sch_%s", name);
232 write_lock(&qdisc_mod_lock);
233
234 ops = qdisc_lookup_default(name);
235 }
236
237 if (ops) {
238 /* Set new default */
239 module_put(default_qdisc_ops->owner);
240 default_qdisc_ops = ops;
241 }
242 write_unlock(&qdisc_mod_lock);
243
244 return ops ? 0 : -ENOENT;
245}
246
stephen hemminger8ea3e432017-04-13 08:40:53 -0700247#ifdef CONFIG_NET_SCH_DEFAULT
248/* Set default value from kernel config */
249static int __init sch_default_qdisc(void)
250{
251 return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
252}
253late_initcall(sch_default_qdisc);
254#endif
255
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256/* We know handle. Find qdisc among all qdisc's attached to device
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800257 * (root qdisc, all its children, children of children etc.)
258 * Note: caller either uses rtnl or rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 */
260
Hannes Eder6113b742008-11-28 03:06:46 -0800261static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
David S. Miller8123b422008-08-08 23:23:39 -0700262{
263 struct Qdisc *q;
264
Jiri Kosina69012ae2016-08-16 23:52:58 +0200265 if (!qdisc_dev(root))
266 return (root->handle == handle ? root : NULL);
267
David S. Miller8123b422008-08-08 23:23:39 -0700268 if (!(root->flags & TCQ_F_BUILTIN) &&
269 root->handle == handle)
270 return root;
271
Jiri Kosina59cc1f62016-08-10 11:05:15 +0200272 hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
David S. Miller8123b422008-08-08 23:23:39 -0700273 if (q->handle == handle)
274 return q;
275 }
276 return NULL;
277}
278
Jiri Kosina49b49972017-03-08 16:03:32 +0100279void qdisc_hash_add(struct Qdisc *q, bool invisible)
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700280{
Eric Dumazet37314362014-03-08 08:01:19 -0800281 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800282 ASSERT_RTNL();
Jiri Kosina59cc1f62016-08-10 11:05:15 +0200283 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
Jiri Kosina49b49972017-03-08 16:03:32 +0100284 if (invisible)
285 q->flags |= TCQ_F_INVISIBLE;
Eric Dumazet37314362014-03-08 08:01:19 -0800286 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700287}
Jiri Kosina59cc1f62016-08-10 11:05:15 +0200288EXPORT_SYMBOL(qdisc_hash_add);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700289
Jiri Kosina59cc1f62016-08-10 11:05:15 +0200290void qdisc_hash_del(struct Qdisc *q)
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700291{
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800292 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
293 ASSERT_RTNL();
Jiri Kosina59cc1f62016-08-10 11:05:15 +0200294 hash_del_rcu(&q->hash);
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800295 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700296}
Jiri Kosina59cc1f62016-08-10 11:05:15 +0200297EXPORT_SYMBOL(qdisc_hash_del);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700298
David S. Milleread81cc2008-07-17 00:50:32 -0700299struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
Patrick McHardy43effa12006-11-29 17:35:48 -0800300{
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700301 struct Qdisc *q;
302
Cong Wang50317fc2017-10-27 22:08:56 -0700303 if (!handle)
304 return NULL;
Patrick McHardyaf356af2009-09-04 06:41:18 +0000305 q = qdisc_match_from_root(dev->qdisc, handle);
306 if (q)
307 goto out;
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700308
Eric Dumazet24824a02010-10-02 06:11:55 +0000309 if (dev_ingress_queue(dev))
310 q = qdisc_match_from_root(
311 dev_ingress_queue(dev)->qdisc_sleeping,
312 handle);
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800313out:
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700314 return q;
Patrick McHardy43effa12006-11-29 17:35:48 -0800315}
316
Vlad Buslov3a7d0d02018-09-24 19:22:51 +0300317struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
318{
319 struct netdev_queue *nq;
320 struct Qdisc *q;
321
322 if (!handle)
323 return NULL;
324 q = qdisc_match_from_root(dev->qdisc, handle);
325 if (q)
326 goto out;
327
328 nq = dev_ingress_queue_rcu(dev);
329 if (nq)
330 q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
331out:
332 return q;
333}
334
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
336{
337 unsigned long cl;
Eric Dumazet20fea082007-11-14 01:44:41 -0800338 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339
340 if (cops == NULL)
341 return NULL;
WANG Cong143976c2017-08-24 16:51:29 -0700342 cl = cops->find(p, classid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700343
344 if (cl == 0)
345 return NULL;
Tonghao Zhang2561f972018-12-13 00:43:23 -0800346 return cops->leaf(p, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347}
348
349/* Find queueing discipline by name */
350
Patrick McHardy1e904742008-01-22 22:11:17 -0800351static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352{
353 struct Qdisc_ops *q = NULL;
354
355 if (kind) {
356 read_lock(&qdisc_mod_lock);
357 for (q = qdisc_base; q; q = q->next) {
Patrick McHardy1e904742008-01-22 22:11:17 -0800358 if (nla_strcmp(kind, q->id) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359 if (!try_module_get(q->owner))
360 q = NULL;
361 break;
362 }
363 }
364 read_unlock(&qdisc_mod_lock);
365 }
366 return q;
367}
368
Jesper Dangaard Brouer8a8e3d82013-08-14 23:47:11 +0200369/* The linklayer setting were not transferred from iproute2, in older
370 * versions, and the rate tables lookup systems have been dropped in
371 * the kernel. To keep backward compatible with older iproute2 tc
372 * utils, we detect the linklayer setting by detecting if the rate
373 * table were modified.
374 *
375 * For linklayer ATM table entries, the rate table will be aligned to
376 * 48 bytes, thus some table entries will contain the same value. The
377 * mpu (min packet unit) is also encoded into the old rate table, thus
378 * starting from the mpu, we find low and high table entries for
379 * mapping this cell. If these entries contain the same value, when
380 * the rate tables have been modified for linklayer ATM.
381 *
382 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
383 * and then roundup to the next cell, calc the table entry one below,
384 * and compare.
385 */
386static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
387{
388 int low = roundup(r->mpu, 48);
389 int high = roundup(low+1, 48);
390 int cell_low = low >> r->cell_log;
391 int cell_high = (high >> r->cell_log) - 1;
392
393 /* rtab is too inaccurate at rates > 100Mbit/s */
394 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
395 pr_debug("TC linklayer: Giving up ATM detection\n");
396 return TC_LINKLAYER_ETHERNET;
397 }
398
399 if ((cell_high > cell_low) && (cell_high < 256)
400 && (rtab[cell_low] == rtab[cell_high])) {
401 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
402 cell_low, cell_high, rtab[cell_high]);
403 return TC_LINKLAYER_ATM;
404 }
405 return TC_LINKLAYER_ETHERNET;
406}
407
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408static struct qdisc_rate_table *qdisc_rtab_list;
409
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -0400410struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
Alexander Aringe9bc3fa2017-12-20 12:35:18 -0500411 struct nlattr *tab,
412 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413{
414 struct qdisc_rate_table *rtab;
415
Eric Dumazet40edeff2013-06-02 11:15:55 +0000416 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
Alexander Aringe9bc3fa2017-12-20 12:35:18 -0500417 nla_len(tab) != TC_RTAB_SIZE) {
418 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
Eric Dumazet40edeff2013-06-02 11:15:55 +0000419 return NULL;
Alexander Aringe9bc3fa2017-12-20 12:35:18 -0500420 }
Eric Dumazet40edeff2013-06-02 11:15:55 +0000421
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
Eric Dumazet40edeff2013-06-02 11:15:55 +0000423 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
424 !memcmp(&rtab->data, nla_data(tab), 1024)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 rtab->refcnt++;
426 return rtab;
427 }
428 }
429
Linus Torvalds1da177e2005-04-16 15:20:36 -0700430 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
431 if (rtab) {
432 rtab->rate = *r;
433 rtab->refcnt = 1;
Patrick McHardy1e904742008-01-22 22:11:17 -0800434 memcpy(rtab->data, nla_data(tab), 1024);
Jesper Dangaard Brouer8a8e3d82013-08-14 23:47:11 +0200435 if (r->linklayer == TC_LINKLAYER_UNAWARE)
436 r->linklayer = __detect_linklayer(r, rtab->data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437 rtab->next = qdisc_rtab_list;
438 qdisc_rtab_list = rtab;
Alexander Aringe9bc3fa2017-12-20 12:35:18 -0500439 } else {
440 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 }
442 return rtab;
443}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800444EXPORT_SYMBOL(qdisc_get_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445
446void qdisc_put_rtab(struct qdisc_rate_table *tab)
447{
448 struct qdisc_rate_table *rtab, **rtabp;
449
450 if (!tab || --tab->refcnt)
451 return;
452
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000453 for (rtabp = &qdisc_rtab_list;
454 (rtab = *rtabp) != NULL;
455 rtabp = &rtab->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456 if (rtab == tab) {
457 *rtabp = rtab->next;
458 kfree(rtab);
459 return;
460 }
461 }
462}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800463EXPORT_SYMBOL(qdisc_put_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700465static LIST_HEAD(qdisc_stab_list);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700466
467static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
468 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
469 [TCA_STAB_DATA] = { .type = NLA_BINARY },
470};
471
Alexander Aring09215592017-12-20 12:35:12 -0500472static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
473 struct netlink_ext_ack *extack)
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700474{
475 struct nlattr *tb[TCA_STAB_MAX + 1];
476 struct qdisc_size_table *stab;
477 struct tc_sizespec *s;
478 unsigned int tsize = 0;
479 u16 *tab = NULL;
480 int err;
481
Alexander Aring09215592017-12-20 12:35:12 -0500482 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, extack);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700483 if (err < 0)
484 return ERR_PTR(err);
Alexander Aring09215592017-12-20 12:35:12 -0500485 if (!tb[TCA_STAB_BASE]) {
486 NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700487 return ERR_PTR(-EINVAL);
Alexander Aring09215592017-12-20 12:35:12 -0500488 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700489
490 s = nla_data(tb[TCA_STAB_BASE]);
491
492 if (s->tsize > 0) {
Alexander Aring09215592017-12-20 12:35:12 -0500493 if (!tb[TCA_STAB_DATA]) {
494 NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700495 return ERR_PTR(-EINVAL);
Alexander Aring09215592017-12-20 12:35:12 -0500496 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700497 tab = nla_data(tb[TCA_STAB_DATA]);
498 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
499 }
500
Alexander Aring09215592017-12-20 12:35:12 -0500501 if (tsize != s->tsize || (!tab && tsize > 0)) {
502 NL_SET_ERR_MSG(extack, "Invalid size of size table");
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700503 return ERR_PTR(-EINVAL);
Alexander Aring09215592017-12-20 12:35:12 -0500504 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700505
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700506 list_for_each_entry(stab, &qdisc_stab_list, list) {
507 if (memcmp(&stab->szopts, s, sizeof(*s)))
508 continue;
509 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
510 continue;
511 stab->refcnt++;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700512 return stab;
513 }
514
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700515 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
516 if (!stab)
517 return ERR_PTR(-ENOMEM);
518
519 stab->refcnt = 1;
520 stab->szopts = *s;
521 if (tsize > 0)
522 memcpy(stab->data, tab, tsize * sizeof(u16));
523
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700524 list_add_tail(&stab->list, &qdisc_stab_list);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700525
526 return stab;
527}
528
529void qdisc_put_stab(struct qdisc_size_table *tab)
530{
531 if (!tab)
532 return;
533
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700534 if (--tab->refcnt == 0) {
535 list_del(&tab->list);
Wei Yongjun6e079022019-02-16 08:19:55 +0000536 kfree_rcu(tab, rcu);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700537 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700538}
539EXPORT_SYMBOL(qdisc_put_stab);
540
541static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
542{
543 struct nlattr *nest;
544
545 nest = nla_nest_start(skb, TCA_STAB);
Patrick McHardy3aa46142008-11-20 04:07:14 -0800546 if (nest == NULL)
547 goto nla_put_failure;
David S. Miller1b34ec42012-03-29 05:11:39 -0400548 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
549 goto nla_put_failure;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700550 nla_nest_end(skb, nest);
551
552 return skb->len;
553
554nla_put_failure:
555 return -1;
556}
557
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -0400558void __qdisc_calculate_pkt_len(struct sk_buff *skb,
559 const struct qdisc_size_table *stab)
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700560{
561 int pkt_len, slot;
562
563 pkt_len = skb->len + stab->szopts.overhead;
564 if (unlikely(!stab->szopts.tsize))
565 goto out;
566
567 slot = pkt_len + stab->szopts.cell_align;
568 if (unlikely(slot < 0))
569 slot = 0;
570
571 slot >>= stab->szopts.cell_log;
572 if (likely(slot < stab->szopts.tsize))
573 pkt_len = stab->data[slot];
574 else
575 pkt_len = stab->data[stab->szopts.tsize - 1] *
576 (slot / stab->szopts.tsize) +
577 stab->data[slot % stab->szopts.tsize];
578
579 pkt_len <<= stab->szopts.size_log;
580out:
581 if (unlikely(pkt_len < 1))
582 pkt_len = 1;
583 qdisc_skb_cb(skb)->pkt_len = pkt_len;
584}
Eric Dumazeta2da5702011-01-20 03:48:19 +0000585EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700586
Florian Westphal6e765a02014-06-11 20:35:18 +0200587void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
Jarek Poplawskib00355d2009-02-01 01:12:42 -0800588{
589 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000590 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
591 txt, qdisc->ops->id, qdisc->handle >> 16);
Jarek Poplawskib00355d2009-02-01 01:12:42 -0800592 qdisc->flags |= TCQ_F_WARN_NONWC;
593 }
594}
595EXPORT_SYMBOL(qdisc_warn_nonwc);
596
Patrick McHardy41794772007-03-16 01:19:15 -0700597static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
598{
599 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
David S. Miller2fbd3da2009-09-01 17:59:25 -0700600 timer);
Patrick McHardy41794772007-03-16 01:19:15 -0700601
John Fastabend1e203c12014-10-02 22:43:09 -0700602 rcu_read_lock();
David S. Miller8608db02008-08-18 20:51:18 -0700603 __netif_schedule(qdisc_root(wd->qdisc));
John Fastabend1e203c12014-10-02 22:43:09 -0700604 rcu_read_unlock();
Stephen Hemminger19365022007-03-22 12:18:35 -0700605
Patrick McHardy41794772007-03-16 01:19:15 -0700606 return HRTIMER_NORESTART;
607}
608
Vinicius Costa Gomes860b6422018-07-03 15:42:52 -0700609void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
610 clockid_t clockid)
Patrick McHardy41794772007-03-16 01:19:15 -0700611{
Vinicius Costa Gomes860b6422018-07-03 15:42:52 -0700612 hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
David S. Miller2fbd3da2009-09-01 17:59:25 -0700613 wd->timer.function = qdisc_watchdog;
Patrick McHardy41794772007-03-16 01:19:15 -0700614 wd->qdisc = qdisc;
615}
Vinicius Costa Gomes860b6422018-07-03 15:42:52 -0700616EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
617
618void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
619{
620 qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
621}
Patrick McHardy41794772007-03-16 01:19:15 -0700622EXPORT_SYMBOL(qdisc_watchdog_init);
623
Eric Dumazet45f50be2016-06-10 16:41:39 -0700624void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
Patrick McHardy41794772007-03-16 01:19:15 -0700625{
Jarek Poplawski2540e052008-08-21 05:11:14 -0700626 if (test_bit(__QDISC_STATE_DEACTIVATED,
627 &qdisc_root_sleeping(wd->qdisc)->state))
628 return;
629
Eric Dumazeta9efad82016-05-23 14:24:56 -0700630 if (wd->last_expires == expires)
631 return;
632
633 wd->last_expires = expires;
Eric Dumazet46baac32012-10-20 00:40:51 +0000634 hrtimer_start(&wd->timer,
Jiri Pirko34c5d292013-02-12 00:12:04 +0000635 ns_to_ktime(expires),
Eric Dumazet4a8e3202014-09-20 18:01:30 -0700636 HRTIMER_MODE_ABS_PINNED);
Patrick McHardy41794772007-03-16 01:19:15 -0700637}
Jiri Pirko34c5d292013-02-12 00:12:04 +0000638EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
Patrick McHardy41794772007-03-16 01:19:15 -0700639
640void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
641{
David S. Miller2fbd3da2009-09-01 17:59:25 -0700642 hrtimer_cancel(&wd->timer);
Patrick McHardy41794772007-03-16 01:19:15 -0700643}
644EXPORT_SYMBOL(qdisc_watchdog_cancel);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645
Adrian Bunka94f7792008-07-22 14:20:11 -0700646static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700647{
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700648 struct hlist_head *h;
Eric Dumazet9695fe62017-08-22 12:26:46 -0700649 unsigned int i;
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700650
Eric Dumazet9695fe62017-08-22 12:26:46 -0700651 h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700652
653 if (h != NULL) {
654 for (i = 0; i < n; i++)
655 INIT_HLIST_HEAD(&h[i]);
656 }
657 return h;
658}
659
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700660void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
661{
662 struct Qdisc_class_common *cl;
Sasha Levinb67bfe02013-02-27 17:06:00 -0800663 struct hlist_node *next;
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700664 struct hlist_head *nhash, *ohash;
665 unsigned int nsize, nmask, osize;
666 unsigned int i, h;
667
668 /* Rehash when load factor exceeds 0.75 */
669 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
670 return;
671 nsize = clhash->hashsize * 2;
672 nmask = nsize - 1;
673 nhash = qdisc_class_hash_alloc(nsize);
674 if (nhash == NULL)
675 return;
676
677 ohash = clhash->hash;
678 osize = clhash->hashsize;
679
680 sch_tree_lock(sch);
681 for (i = 0; i < osize; i++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -0800682 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700683 h = qdisc_class_hash(cl->classid, nmask);
684 hlist_add_head(&cl->hnode, &nhash[h]);
685 }
686 }
687 clhash->hash = nhash;
688 clhash->hashsize = nsize;
689 clhash->hashmask = nmask;
690 sch_tree_unlock(sch);
691
Eric Dumazet9695fe62017-08-22 12:26:46 -0700692 kvfree(ohash);
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700693}
694EXPORT_SYMBOL(qdisc_class_hash_grow);
695
696int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
697{
698 unsigned int size = 4;
699
700 clhash->hash = qdisc_class_hash_alloc(size);
Alexander Aringac8ef4a2017-12-20 12:35:11 -0500701 if (!clhash->hash)
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700702 return -ENOMEM;
703 clhash->hashsize = size;
704 clhash->hashmask = size - 1;
705 clhash->hashelems = 0;
706 return 0;
707}
708EXPORT_SYMBOL(qdisc_class_hash_init);
709
710void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
711{
Eric Dumazet9695fe62017-08-22 12:26:46 -0700712 kvfree(clhash->hash);
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700713}
714EXPORT_SYMBOL(qdisc_class_hash_destroy);
715
716void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
717 struct Qdisc_class_common *cl)
718{
719 unsigned int h;
720
721 INIT_HLIST_NODE(&cl->hnode);
722 h = qdisc_class_hash(cl->classid, clhash->hashmask);
723 hlist_add_head(&cl->hnode, &clhash->hash[h]);
724 clhash->hashelems++;
725}
726EXPORT_SYMBOL(qdisc_class_hash_insert);
727
728void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
729 struct Qdisc_class_common *cl)
730{
731 hlist_del(&cl->hnode);
732 clhash->hashelems--;
733}
734EXPORT_SYMBOL(qdisc_class_hash_remove);
735
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000736/* Allocate an unique handle from space managed by kernel
737 * Possible range is [8000-FFFF]:0000 (0x8000 values)
738 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739static u32 qdisc_alloc_handle(struct net_device *dev)
740{
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000741 int i = 0x8000;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
743
744 do {
745 autohandle += TC_H_MAKE(0x10000U, 0);
746 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
747 autohandle = TC_H_MAKE(0x80000000U, 0);
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000748 if (!qdisc_lookup(dev, autohandle))
749 return autohandle;
750 cond_resched();
751 } while (--i > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000753 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754}
755
Toke Høiland-Jørgensen5f2939d2019-01-09 17:10:57 +0100756void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
Patrick McHardy43effa12006-11-29 17:35:48 -0800757{
Nogah Frankelfd5ac142018-02-28 10:45:03 +0100758 bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
Eric Dumazet20fea082007-11-14 01:44:41 -0800759 const struct Qdisc_class_ops *cops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800760 unsigned long cl;
761 u32 parentid;
Konstantin Khlebnikov95946652017-08-15 16:39:59 +0300762 bool notify;
Eric Dumazet2c8c8e62013-10-07 08:32:32 -0700763 int drops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800764
WANG Cong2ccccf52016-02-25 14:55:01 -0800765 if (n == 0 && len == 0)
Patrick McHardy43effa12006-11-29 17:35:48 -0800766 return;
Eric Dumazet2c8c8e62013-10-07 08:32:32 -0700767 drops = max_t(int, n, 0);
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800768 rcu_read_lock();
Patrick McHardy43effa12006-11-29 17:35:48 -0800769 while ((parentid = sch->parent)) {
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700770 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800771 break;
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700772
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800773 if (sch->flags & TCQ_F_NOPARENT)
774 break;
Konstantin Khlebnikov95946652017-08-15 16:39:59 +0300775 /* Notify parent qdisc only if child qdisc becomes empty.
776 *
777 * If child was empty even before update then backlog
778 * counter is screwed and we skip notification because
779 * parent class is already passive.
Nogah Frankelfd5ac142018-02-28 10:45:03 +0100780 *
781 * If the original child was offloaded then it is allowed
782 * to be seem as empty, so the parent is notified anyway.
Konstantin Khlebnikov95946652017-08-15 16:39:59 +0300783 */
Nogah Frankelfd5ac142018-02-28 10:45:03 +0100784 notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
785 !qdisc_is_offloaded);
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800786 /* TODO: perform the search on a per txq basis */
David S. Miller5ce2d482008-07-08 17:06:30 -0700787 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700788 if (sch == NULL) {
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800789 WARN_ON_ONCE(parentid != TC_H_ROOT);
790 break;
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700791 }
Patrick McHardy43effa12006-11-29 17:35:48 -0800792 cops = sch->ops->cl_ops;
Konstantin Khlebnikov95946652017-08-15 16:39:59 +0300793 if (notify && cops->qlen_notify) {
WANG Cong143976c2017-08-24 16:51:29 -0700794 cl = cops->find(sch, parentid);
Patrick McHardy43effa12006-11-29 17:35:48 -0800795 cops->qlen_notify(sch, cl);
Patrick McHardy43effa12006-11-29 17:35:48 -0800796 }
797 sch->q.qlen -= n;
WANG Cong2ccccf52016-02-25 14:55:01 -0800798 sch->qstats.backlog -= len;
John Fastabend25331d62014-09-28 11:53:29 -0700799 __qdisc_qstats_drop(sch, drops);
Patrick McHardy43effa12006-11-29 17:35:48 -0800800 }
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800801 rcu_read_unlock();
Patrick McHardy43effa12006-11-29 17:35:48 -0800802}
WANG Cong2ccccf52016-02-25 14:55:01 -0800803EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804
Jakub Kicinskib5928432018-11-07 17:33:34 -0800805int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
806 void *type_data)
807{
808 struct net_device *dev = qdisc_dev(sch);
809 int err;
810
811 sch->flags &= ~TCQ_F_OFFLOADED;
812 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
813 return 0;
814
815 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
816 if (err == -EOPNOTSUPP)
817 return 0;
818
819 if (!err)
820 sch->flags |= TCQ_F_OFFLOADED;
821
822 return err;
823}
824EXPORT_SYMBOL(qdisc_offload_dump_helper);
825
Jakub Kicinskibfaee912018-11-07 17:33:37 -0800826void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
827 struct Qdisc *new, struct Qdisc *old,
828 enum tc_setup_type type, void *type_data,
829 struct netlink_ext_ack *extack)
830{
831 bool any_qdisc_is_offloaded;
832 int err;
833
834 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
835 return;
836
837 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
838
839 /* Don't report error if the graft is part of destroy operation. */
840 if (!err || !new || new == &noop_qdisc)
841 return;
842
843 /* Don't report error if the parent, the old child and the new
844 * one are not offloaded.
845 */
846 any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
847 any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
848 any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
849
850 if (any_qdisc_is_offloaded)
851 NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
852}
853EXPORT_SYMBOL(qdisc_offload_graft_helper);
854
Jakub Kicinski98b0e5f2018-11-12 14:58:10 -0800855static void qdisc_offload_graft_root(struct net_device *dev,
856 struct Qdisc *new, struct Qdisc *old,
857 struct netlink_ext_ack *extack)
858{
859 struct tc_root_qopt_offload graft_offload = {
860 .command = TC_ROOT_GRAFT,
861 .handle = new ? new->handle : 0,
862 .ingress = (new && new->flags & TCQ_F_INGRESS) ||
863 (old && old->flags & TCQ_F_INGRESS),
864 };
865
866 qdisc_offload_graft_helper(dev, NULL, new, old,
867 TC_SETUP_ROOT_QDISC, &graft_offload, extack);
868}
869
WANG Cong27d7f072017-08-24 16:51:27 -0700870static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
871 u32 portid, u32 seq, u16 flags, int event)
872{
873 struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
874 struct gnet_stats_queue __percpu *cpu_qstats = NULL;
875 struct tcmsg *tcm;
876 struct nlmsghdr *nlh;
877 unsigned char *b = skb_tail_pointer(skb);
878 struct gnet_dump d;
879 struct qdisc_size_table *stab;
Jiri Pirkod47a6b02018-01-17 11:46:52 +0100880 u32 block_index;
WANG Cong27d7f072017-08-24 16:51:27 -0700881 __u32 qlen;
882
883 cond_resched();
884 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
885 if (!nlh)
886 goto out_nlmsg_trim;
887 tcm = nlmsg_data(nlh);
888 tcm->tcm_family = AF_UNSPEC;
889 tcm->tcm__pad1 = 0;
890 tcm->tcm__pad2 = 0;
891 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
892 tcm->tcm_parent = clid;
893 tcm->tcm_handle = q->handle;
894 tcm->tcm_info = refcount_read(&q->refcnt);
895 if (nla_put_string(skb, TCA_KIND, q->ops->id))
896 goto nla_put_failure;
Jiri Pirkod47a6b02018-01-17 11:46:52 +0100897 if (q->ops->ingress_block_get) {
898 block_index = q->ops->ingress_block_get(q);
899 if (block_index &&
900 nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
901 goto nla_put_failure;
902 }
903 if (q->ops->egress_block_get) {
904 block_index = q->ops->egress_block_get(q);
905 if (block_index &&
906 nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
907 goto nla_put_failure;
908 }
WANG Cong27d7f072017-08-24 16:51:27 -0700909 if (q->ops->dump && q->ops->dump(q, skb) < 0)
910 goto nla_put_failure;
Nogah Frankel44edf2f2017-12-25 10:51:42 +0200911 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
912 goto nla_put_failure;
John Fastabend7e660162017-12-07 09:57:00 -0800913 qlen = qdisc_qlen_sum(q);
WANG Cong27d7f072017-08-24 16:51:27 -0700914
915 stab = rtnl_dereference(q->stab);
916 if (stab && qdisc_dump_stab(skb, stab) < 0)
917 goto nla_put_failure;
918
919 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
920 NULL, &d, TCA_PAD) < 0)
921 goto nla_put_failure;
922
923 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
924 goto nla_put_failure;
925
926 if (qdisc_is_percpu_stats(q)) {
927 cpu_bstats = q->cpu_bstats;
928 cpu_qstats = q->cpu_qstats;
929 }
930
931 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
932 &d, cpu_bstats, &q->bstats) < 0 ||
933 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
934 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
935 goto nla_put_failure;
936
937 if (gnet_stats_finish_copy(&d) < 0)
938 goto nla_put_failure;
939
940 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
941 return skb->len;
942
943out_nlmsg_trim:
944nla_put_failure:
945 nlmsg_trim(skb, b);
946 return -1;
947}
948
949static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
950{
951 if (q->flags & TCQ_F_BUILTIN)
952 return true;
953 if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
954 return true;
955
956 return false;
957}
958
959static int qdisc_notify(struct net *net, struct sk_buff *oskb,
960 struct nlmsghdr *n, u32 clid,
961 struct Qdisc *old, struct Qdisc *new)
962{
963 struct sk_buff *skb;
964 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
965
966 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
967 if (!skb)
968 return -ENOBUFS;
969
970 if (old && !tc_qdisc_dump_ignore(old, false)) {
971 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
972 0, RTM_DELQDISC) < 0)
973 goto err_out;
974 }
975 if (new && !tc_qdisc_dump_ignore(new, false)) {
976 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
977 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
978 goto err_out;
979 }
980
981 if (skb->len)
982 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
983 n->nlmsg_flags & NLM_F_ECHO);
984
985err_out:
986 kfree_skb(skb);
987 return -EINVAL;
988}
989
Tom Goff7316ae82010-03-19 15:40:13 +0000990static void notify_and_destroy(struct net *net, struct sk_buff *skb,
991 struct nlmsghdr *n, u32 clid,
David S. Miller99194cf2008-07-17 04:54:10 -0700992 struct Qdisc *old, struct Qdisc *new)
993{
994 if (new || old)
Tom Goff7316ae82010-03-19 15:40:13 +0000995 qdisc_notify(net, skb, n, clid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996
David S. Miller4d8863a2008-08-18 21:03:15 -0700997 if (old)
Vlad Buslov86bd4462018-09-24 19:22:50 +0300998 qdisc_put(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700999}
1000
1001/* Graft qdisc "new" to class "classid" of qdisc "parent" or
1002 * to device "dev".
1003 *
1004 * When appropriate send a netlink notification using 'skb'
1005 * and "n".
1006 *
1007 * On success, destroy old qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008 */
1009
1010static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
David S. Miller99194cf2008-07-17 04:54:10 -07001011 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
Alexander Aring09215592017-12-20 12:35:12 -05001012 struct Qdisc *new, struct Qdisc *old,
1013 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014{
David S. Miller99194cf2008-07-17 04:54:10 -07001015 struct Qdisc *q = old;
Tom Goff7316ae82010-03-19 15:40:13 +00001016 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001018 if (parent == NULL) {
David S. Miller99194cf2008-07-17 04:54:10 -07001019 unsigned int i, num_q, ingress;
1020
1021 ingress = 0;
1022 num_q = dev->num_tx_queues;
David S. Miller8d50b532008-07-30 02:37:46 -07001023 if ((q && q->flags & TCQ_F_INGRESS) ||
1024 (new && new->flags & TCQ_F_INGRESS)) {
David S. Miller99194cf2008-07-17 04:54:10 -07001025 num_q = 1;
1026 ingress = 1;
Alexander Aring09215592017-12-20 12:35:12 -05001027 if (!dev_ingress_queue(dev)) {
1028 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
Eric Dumazet24824a02010-10-02 06:11:55 +00001029 return -ENOENT;
Alexander Aring09215592017-12-20 12:35:12 -05001030 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031 }
David S. Miller99194cf2008-07-17 04:54:10 -07001032
1033 if (dev->flags & IFF_UP)
1034 dev_deactivate(dev);
1035
Jakub Kicinski98b0e5f2018-11-12 14:58:10 -08001036 qdisc_offload_graft_root(dev, new, old, extack);
1037
WANG Cong86e363d2015-05-26 16:08:48 -07001038 if (new && new->ops->attach)
1039 goto skip;
David S. Miller6ec1c692009-09-06 01:58:51 -07001040
David S. Miller99194cf2008-07-17 04:54:10 -07001041 for (i = 0; i < num_q; i++) {
Eric Dumazet24824a02010-10-02 06:11:55 +00001042 struct netdev_queue *dev_queue = dev_ingress_queue(dev);
David S. Miller99194cf2008-07-17 04:54:10 -07001043
1044 if (!ingress)
1045 dev_queue = netdev_get_tx_queue(dev, i);
1046
David S. Miller8d50b532008-07-30 02:37:46 -07001047 old = dev_graft_qdisc(dev_queue, new);
1048 if (new && i > 0)
Eric Dumazet551143d2017-08-24 21:12:28 -07001049 qdisc_refcount_inc(new);
David S. Miller8d50b532008-07-30 02:37:46 -07001050
Jarek Poplawski036d6a62009-09-13 22:35:44 +00001051 if (!ingress)
Vlad Buslov86bd4462018-09-24 19:22:50 +03001052 qdisc_put(old);
David S. Miller99194cf2008-07-17 04:54:10 -07001053 }
1054
WANG Cong86e363d2015-05-26 16:08:48 -07001055skip:
Jarek Poplawski036d6a62009-09-13 22:35:44 +00001056 if (!ingress) {
Tom Goff7316ae82010-03-19 15:40:13 +00001057 notify_and_destroy(net, skb, n, classid,
1058 dev->qdisc, new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +00001059 if (new && !new->ops->attach)
Eric Dumazet551143d2017-08-24 21:12:28 -07001060 qdisc_refcount_inc(new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +00001061 dev->qdisc = new ? : &noop_qdisc;
WANG Cong86e363d2015-05-26 16:08:48 -07001062
1063 if (new && new->ops->attach)
1064 new->ops->attach(new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +00001065 } else {
Tom Goff7316ae82010-03-19 15:40:13 +00001066 notify_and_destroy(net, skb, n, classid, old, new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +00001067 }
Patrick McHardyaf356af2009-09-04 06:41:18 +00001068
David S. Miller99194cf2008-07-17 04:54:10 -07001069 if (dev->flags & IFF_UP)
1070 dev_activate(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071 } else {
Eric Dumazet20fea082007-11-14 01:44:41 -08001072 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
Jakub Kicinski9da93ec2018-11-07 17:33:38 -08001073 unsigned long cl;
1074 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075
John Fastabendc5ad1192017-12-07 09:58:19 -08001076 /* Only support running class lockless if parent is lockless */
1077 if (new && (new->flags & TCQ_F_NOLOCK) &&
1078 parent && !(parent->flags & TCQ_F_NOLOCK))
1079 new->flags &= ~TCQ_F_NOLOCK;
1080
Jakub Kicinski9da93ec2018-11-07 17:33:38 -08001081 if (!cops || !cops->graft)
1082 return -EOPNOTSUPP;
WANG Cong143976c2017-08-24 16:51:29 -07001083
Jakub Kicinski9da93ec2018-11-07 17:33:38 -08001084 cl = cops->find(parent, classid);
1085 if (!cl) {
1086 NL_SET_ERR_MSG(extack, "Specified class not found");
1087 return -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088 }
Jakub Kicinski9da93ec2018-11-07 17:33:38 -08001089
1090 err = cops->graft(parent, cl, new, &old, extack);
1091 if (err)
1092 return err;
1093 notify_and_destroy(net, skb, n, classid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094 }
Jakub Kicinski9da93ec2018-11-07 17:33:38 -08001095 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001096}
1097
Jiri Pirkod47a6b02018-01-17 11:46:52 +01001098static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1099 struct netlink_ext_ack *extack)
1100{
1101 u32 block_index;
1102
1103 if (tca[TCA_INGRESS_BLOCK]) {
1104 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1105
1106 if (!block_index) {
1107 NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1108 return -EINVAL;
1109 }
1110 if (!sch->ops->ingress_block_set) {
1111 NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1112 return -EOPNOTSUPP;
1113 }
1114 sch->ops->ingress_block_set(sch, block_index);
1115 }
1116 if (tca[TCA_EGRESS_BLOCK]) {
1117 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1118
1119 if (!block_index) {
1120 NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1121 return -EINVAL;
1122 }
1123 if (!sch->ops->egress_block_set) {
1124 NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1125 return -EOPNOTSUPP;
1126 }
1127 sch->ops->egress_block_set(sch, block_index);
1128 }
1129 return 0;
1130}
1131
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132/*
1133 Allocate and initialize new qdisc.
1134
1135 Parameters are passed via opt.
1136 */
1137
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04001138static struct Qdisc *qdisc_create(struct net_device *dev,
1139 struct netdev_queue *dev_queue,
1140 struct Qdisc *p, u32 parent, u32 handle,
Alexander Aring09215592017-12-20 12:35:12 -05001141 struct nlattr **tca, int *errp,
1142 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143{
1144 int err;
Patrick McHardy1e904742008-01-22 22:11:17 -08001145 struct nlattr *kind = tca[TCA_KIND];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146 struct Qdisc *sch;
1147 struct Qdisc_ops *ops;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001148 struct qdisc_size_table *stab;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149
1150 ops = qdisc_lookup_ops(kind);
Johannes Berg95a5afc2008-10-16 15:24:51 -07001151#ifdef CONFIG_MODULES
Linus Torvalds1da177e2005-04-16 15:20:36 -07001152 if (ops == NULL && kind != NULL) {
1153 char name[IFNAMSIZ];
Patrick McHardy1e904742008-01-22 22:11:17 -08001154 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001155 /* We dropped the RTNL semaphore in order to
1156 * perform the module load. So, even if we
1157 * succeeded in loading the module we have to
1158 * tell the caller to replay the request. We
1159 * indicate this using -EAGAIN.
1160 * We replay the request because the device may
1161 * go away in the mean time.
1162 */
1163 rtnl_unlock();
1164 request_module("sch_%s", name);
1165 rtnl_lock();
1166 ops = qdisc_lookup_ops(kind);
1167 if (ops != NULL) {
1168 /* We will try again qdisc_lookup_ops,
1169 * so don't keep a reference.
1170 */
1171 module_put(ops->owner);
1172 err = -EAGAIN;
1173 goto err_out;
1174 }
1175 }
1176 }
1177#endif
1178
Jamal Hadi Salimb9e2cc02006-08-03 16:36:51 -07001179 err = -ENOENT;
Alexander Aring09215592017-12-20 12:35:12 -05001180 if (!ops) {
1181 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001182 goto err_out;
Alexander Aring09215592017-12-20 12:35:12 -05001183 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184
Alexander Aringd0bd6842017-12-20 12:35:20 -05001185 sch = qdisc_alloc(dev_queue, ops, extack);
Thomas Graf3d54b822005-07-05 14:15:09 -07001186 if (IS_ERR(sch)) {
1187 err = PTR_ERR(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188 goto err_out2;
Thomas Graf3d54b822005-07-05 14:15:09 -07001189 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001191 sch->parent = parent;
1192
Thomas Graf3d54b822005-07-05 14:15:09 -07001193 if (handle == TC_H_INGRESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194 sch->flags |= TCQ_F_INGRESS;
Thomas Graf3d54b822005-07-05 14:15:09 -07001195 handle = TC_H_MAKE(TC_H_INGRESS, 0);
Patrick McHardyfd44de72007-04-16 17:07:08 -07001196 } else {
Patrick McHardyfd44de72007-04-16 17:07:08 -07001197 if (handle == 0) {
1198 handle = qdisc_alloc_handle(dev);
Ivan Veceraaaeb1de2019-02-15 11:23:25 +01001199 if (handle == 0) {
1200 NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1201 err = -ENOSPC;
Patrick McHardyfd44de72007-04-16 17:07:08 -07001202 goto err_out3;
Ivan Veceraaaeb1de2019-02-15 11:23:25 +01001203 }
Patrick McHardyfd44de72007-04-16 17:07:08 -07001204 }
Eric Dumazet1abbe132012-12-11 15:54:33 +00001205 if (!netif_is_multiqueue(dev))
Eric Dumazet225734d2015-12-15 09:43:12 -08001206 sch->flags |= TCQ_F_ONETXQUEUE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207 }
1208
Thomas Graf3d54b822005-07-05 14:15:09 -07001209 sch->handle = handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210
Jesper Dangaard Brouer84c46dd2016-11-03 14:56:11 +01001211 /* This exist to keep backward compatible with a userspace
1212 * loophole, what allowed userspace to get IFF_NO_QUEUE
1213 * facility on older kernels by setting tx_queue_len=0 (prior
1214 * to qdisc init), and then forgot to reinit tx_queue_len
1215 * before again attaching a qdisc.
1216 */
1217 if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1218 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1219 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1220 }
1221
Jiri Pirkod47a6b02018-01-17 11:46:52 +01001222 err = qdisc_block_indexes_set(sch, tca, extack);
1223 if (err)
1224 goto err_out3;
1225
Alexander Aring54160ef2017-12-04 18:40:00 -05001226 if (ops->init) {
Alexander Aringe63d7df2017-12-20 12:35:13 -05001227 err = ops->init(sch, tca[TCA_OPTIONS], extack);
Alexander Aring54160ef2017-12-04 18:40:00 -05001228 if (err != 0)
1229 goto err_out5;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230 }
Alexander Aring54160ef2017-12-04 18:40:00 -05001231
Alexander Aring54160ef2017-12-04 18:40:00 -05001232 if (tca[TCA_STAB]) {
Alexander Aring09215592017-12-20 12:35:12 -05001233 stab = qdisc_get_stab(tca[TCA_STAB], extack);
Alexander Aring54160ef2017-12-04 18:40:00 -05001234 if (IS_ERR(stab)) {
1235 err = PTR_ERR(stab);
1236 goto err_out4;
1237 }
1238 rcu_assign_pointer(sch->stab, stab);
1239 }
1240 if (tca[TCA_RATE]) {
1241 seqcount_t *running;
1242
1243 err = -EOPNOTSUPP;
Alexander Aring09215592017-12-20 12:35:12 -05001244 if (sch->flags & TCQ_F_MQROOT) {
1245 NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
Alexander Aring54160ef2017-12-04 18:40:00 -05001246 goto err_out4;
Alexander Aring09215592017-12-20 12:35:12 -05001247 }
Alexander Aring54160ef2017-12-04 18:40:00 -05001248
1249 if (sch->parent != TC_H_ROOT &&
1250 !(sch->flags & TCQ_F_INGRESS) &&
1251 (!p || !(p->flags & TCQ_F_MQROOT)))
1252 running = qdisc_root_sleeping_running(sch);
1253 else
1254 running = &sch->running;
1255
1256 err = gen_new_estimator(&sch->bstats,
1257 sch->cpu_bstats,
1258 &sch->rate_est,
1259 NULL,
1260 running,
1261 tca[TCA_RATE]);
Alexander Aring09215592017-12-20 12:35:12 -05001262 if (err) {
1263 NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
Alexander Aring54160ef2017-12-04 18:40:00 -05001264 goto err_out4;
Alexander Aring09215592017-12-20 12:35:12 -05001265 }
Alexander Aring54160ef2017-12-04 18:40:00 -05001266 }
1267
1268 qdisc_hash_add(sch, false);
1269
1270 return sch;
1271
1272err_out5:
Eric Dumazet87b60cf2017-02-10 10:31:49 -08001273 /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
Gao Fengc1a48722017-06-28 12:53:54 +08001274 if (ops->destroy)
1275 ops->destroy(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001276err_out3:
1277 dev_put(dev);
Daniel Borkmann81d947e2018-01-15 23:12:09 +01001278 qdisc_free(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279err_out2:
1280 module_put(ops->owner);
1281err_out:
1282 *errp = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001283 return NULL;
Patrick McHardy23bcf632009-09-09 18:11:23 -07001284
1285err_out4:
1286 /*
1287 * Any broken qdiscs that would require a ops->reset() here?
1288 * The qdisc was never in action so it shouldn't be necessary.
1289 */
Eric Dumazeta2da5702011-01-20 03:48:19 +00001290 qdisc_put_stab(rtnl_dereference(sch->stab));
Patrick McHardy23bcf632009-09-09 18:11:23 -07001291 if (ops->destroy)
1292 ops->destroy(sch);
1293 goto err_out3;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001294}
1295
Alexander Aring09215592017-12-20 12:35:12 -05001296static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1297 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298{
Eric Dumazeta2da5702011-01-20 03:48:19 +00001299 struct qdisc_size_table *ostab, *stab = NULL;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001300 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001302 if (tca[TCA_OPTIONS]) {
Alexander Aring09215592017-12-20 12:35:12 -05001303 if (!sch->ops->change) {
1304 NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305 return -EINVAL;
Alexander Aring09215592017-12-20 12:35:12 -05001306 }
Jiri Pirkod47a6b02018-01-17 11:46:52 +01001307 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1308 NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1309 return -EOPNOTSUPP;
1310 }
Alexander Aring20307212017-12-20 12:35:14 -05001311 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312 if (err)
1313 return err;
1314 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001315
1316 if (tca[TCA_STAB]) {
Alexander Aring09215592017-12-20 12:35:12 -05001317 stab = qdisc_get_stab(tca[TCA_STAB], extack);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001318 if (IS_ERR(stab))
1319 return PTR_ERR(stab);
1320 }
1321
Eric Dumazeta2da5702011-01-20 03:48:19 +00001322 ostab = rtnl_dereference(sch->stab);
1323 rcu_assign_pointer(sch->stab, stab);
1324 qdisc_put_stab(ostab);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001325
Patrick McHardy23bcf632009-09-09 18:11:23 -07001326 if (tca[TCA_RATE]) {
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001327 /* NB: ignores errors from replace_estimator
1328 because change can't be undone. */
Patrick McHardy23bcf632009-09-09 18:11:23 -07001329 if (sch->flags & TCQ_F_MQROOT)
1330 goto out;
John Fastabend22e0f8b2014-09-28 11:52:56 -07001331 gen_replace_estimator(&sch->bstats,
1332 sch->cpu_bstats,
1333 &sch->rate_est,
Eric Dumazetedb09eb2016-06-06 09:37:16 -07001334 NULL,
1335 qdisc_root_sleeping_running(sch),
John Fastabend22e0f8b2014-09-28 11:52:56 -07001336 tca[TCA_RATE]);
Patrick McHardy23bcf632009-09-09 18:11:23 -07001337 }
1338out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339 return 0;
1340}
1341
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001342struct check_loop_arg {
1343 struct qdisc_walker w;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344 struct Qdisc *p;
1345 int depth;
1346};
1347
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04001348static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1349 struct qdisc_walker *w);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001350
1351static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1352{
1353 struct check_loop_arg arg;
1354
1355 if (q->ops->cl_ops == NULL)
1356 return 0;
1357
1358 arg.w.stop = arg.w.skip = arg.w.count = 0;
1359 arg.w.fn = check_loop_fn;
1360 arg.depth = depth;
1361 arg.p = p;
1362 q->ops->cl_ops->walk(q, &arg.w);
1363 return arg.w.stop ? -ELOOP : 0;
1364}
1365
1366static int
1367check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1368{
1369 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -08001370 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371 struct check_loop_arg *arg = (struct check_loop_arg *)w;
1372
1373 leaf = cops->leaf(q, cl);
1374 if (leaf) {
1375 if (leaf == arg->p || arg->depth > 7)
1376 return -ELOOP;
1377 return check_loop(leaf, arg->p, arg->depth + 1);
1378 }
1379 return 0;
1380}
1381
David Ahern8b4c3cd2018-10-03 15:05:36 -07001382const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1383 [TCA_KIND] = { .type = NLA_STRING },
David Ahern8b4c3cd2018-10-03 15:05:36 -07001384 [TCA_RATE] = { .type = NLA_BINARY,
1385 .len = sizeof(struct tc_estimator) },
1386 [TCA_STAB] = { .type = NLA_NESTED },
1387 [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG },
1388 [TCA_CHAIN] = { .type = NLA_U32 },
1389 [TCA_INGRESS_BLOCK] = { .type = NLA_U32 },
1390 [TCA_EGRESS_BLOCK] = { .type = NLA_U32 },
1391};
1392
Davide Carattie3314732018-10-10 22:00:58 +02001393/*
1394 * Delete/get qdisc.
1395 */
1396
David Ahernc21ef3e2017-04-16 09:48:24 -07001397static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1398 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001399{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001400 struct net *net = sock_net(skb->sk);
David S. Miller02ef22c2012-06-26 21:50:05 -07001401 struct tcmsg *tcm = nlmsg_data(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001402 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403 struct net_device *dev;
Hong zhi guode179c82013-03-25 17:36:33 +00001404 u32 clid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405 struct Qdisc *q = NULL;
1406 struct Qdisc *p = NULL;
1407 int err;
1408
Stéphane Graber4e8bbb82014-04-30 11:25:43 -04001409 if ((n->nlmsg_type != RTM_GETQDISC) &&
David S. Miller5f013c9b2014-05-12 13:19:14 -04001410 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001411 return -EPERM;
1412
David Ahern8b4c3cd2018-10-03 15:05:36 -07001413 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1414 extack);
Patrick McHardy1e904742008-01-22 22:11:17 -08001415 if (err < 0)
1416 return err;
1417
Hong zhi guode179c82013-03-25 17:36:33 +00001418 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1419 if (!dev)
1420 return -ENODEV;
1421
1422 clid = tcm->tcm_parent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423 if (clid) {
1424 if (clid != TC_H_ROOT) {
1425 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001426 p = qdisc_lookup(dev, TC_H_MAJ(clid));
Alexander Aring09215592017-12-20 12:35:12 -05001427 if (!p) {
1428 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001429 return -ENOENT;
Alexander Aring09215592017-12-20 12:35:12 -05001430 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001431 q = qdisc_leaf(p, clid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001432 } else if (dev_ingress_queue(dev)) {
1433 q = dev_ingress_queue(dev)->qdisc_sleeping;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001434 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +00001436 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437 }
Alexander Aring09215592017-12-20 12:35:12 -05001438 if (!q) {
1439 NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001440 return -ENOENT;
Alexander Aring09215592017-12-20 12:35:12 -05001441 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001442
Alexander Aring09215592017-12-20 12:35:12 -05001443 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1444 NL_SET_ERR_MSG(extack, "Invalid handle");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001445 return -EINVAL;
Alexander Aring09215592017-12-20 12:35:12 -05001446 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447 } else {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001448 q = qdisc_lookup(dev, tcm->tcm_handle);
Alexander Aring09215592017-12-20 12:35:12 -05001449 if (!q) {
1450 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001451 return -ENOENT;
Alexander Aring09215592017-12-20 12:35:12 -05001452 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453 }
1454
Alexander Aring09215592017-12-20 12:35:12 -05001455 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1456 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457 return -EINVAL;
Alexander Aring09215592017-12-20 12:35:12 -05001458 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001459
1460 if (n->nlmsg_type == RTM_DELQDISC) {
Alexander Aring09215592017-12-20 12:35:12 -05001461 if (!clid) {
1462 NL_SET_ERR_MSG(extack, "Classid cannot be zero");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463 return -EINVAL;
Alexander Aring09215592017-12-20 12:35:12 -05001464 }
1465 if (q->handle == 0) {
1466 NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001467 return -ENOENT;
Alexander Aring09215592017-12-20 12:35:12 -05001468 }
1469 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001470 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472 } else {
Tom Goff7316ae82010-03-19 15:40:13 +00001473 qdisc_notify(net, skb, n, clid, NULL, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001474 }
1475 return 0;
1476}
1477
1478/*
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001479 * Create/change qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480 */
1481
David Ahernc21ef3e2017-04-16 09:48:24 -07001482static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1483 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001485 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001486 struct tcmsg *tcm;
Patrick McHardy1e904742008-01-22 22:11:17 -08001487 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001488 struct net_device *dev;
1489 u32 clid;
1490 struct Qdisc *q, *p;
1491 int err;
1492
David S. Miller5f013c9b2014-05-12 13:19:14 -04001493 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001494 return -EPERM;
1495
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496replay:
1497 /* Reinit, just in case something touches this. */
David Ahern8b4c3cd2018-10-03 15:05:36 -07001498 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1499 extack);
Hong zhi guode179c82013-03-25 17:36:33 +00001500 if (err < 0)
1501 return err;
1502
David S. Miller02ef22c2012-06-26 21:50:05 -07001503 tcm = nlmsg_data(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504 clid = tcm->tcm_parent;
1505 q = p = NULL;
1506
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001507 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1508 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509 return -ENODEV;
1510
Patrick McHardy1e904742008-01-22 22:11:17 -08001511
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512 if (clid) {
1513 if (clid != TC_H_ROOT) {
1514 if (clid != TC_H_INGRESS) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001515 p = qdisc_lookup(dev, TC_H_MAJ(clid));
Alexander Aring09215592017-12-20 12:35:12 -05001516 if (!p) {
1517 NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001518 return -ENOENT;
Alexander Aring09215592017-12-20 12:35:12 -05001519 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520 q = qdisc_leaf(p, clid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001521 } else if (dev_ingress_queue_create(dev)) {
1522 q = dev_ingress_queue(dev)->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001523 }
1524 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +00001525 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001526 }
1527
1528 /* It may be default qdisc, ignore it */
1529 if (q && q->handle == 0)
1530 q = NULL;
1531
1532 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1533 if (tcm->tcm_handle) {
Alexander Aring09215592017-12-20 12:35:12 -05001534 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1535 NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001536 return -EEXIST;
Alexander Aring09215592017-12-20 12:35:12 -05001537 }
1538 if (TC_H_MIN(tcm->tcm_handle)) {
1539 NL_SET_ERR_MSG(extack, "Invalid minor handle");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001540 return -EINVAL;
Alexander Aring09215592017-12-20 12:35:12 -05001541 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001542 q = qdisc_lookup(dev, tcm->tcm_handle);
Jiri Pirko8ec69572017-12-28 16:52:10 +01001543 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001544 goto create_n_graft;
Alexander Aring09215592017-12-20 12:35:12 -05001545 if (n->nlmsg_flags & NLM_F_EXCL) {
1546 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001547 return -EEXIST;
Alexander Aring09215592017-12-20 12:35:12 -05001548 }
Alexander Aring0ac4bd62017-12-04 18:39:59 -05001549 if (tca[TCA_KIND] &&
Alexander Aring09215592017-12-20 12:35:12 -05001550 nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1551 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001552 return -EINVAL;
Alexander Aring09215592017-12-20 12:35:12 -05001553 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001554 if (q == p ||
Alexander Aring09215592017-12-20 12:35:12 -05001555 (p && check_loop(q, p, 0))) {
1556 NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001557 return -ELOOP;
Alexander Aring09215592017-12-20 12:35:12 -05001558 }
Eric Dumazet551143d2017-08-24 21:12:28 -07001559 qdisc_refcount_inc(q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001560 goto graft;
1561 } else {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001562 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001563 goto create_n_graft;
1564
1565 /* This magic test requires explanation.
1566 *
1567 * We know, that some child q is already
1568 * attached to this parent and have choice:
1569 * either to change it or to create/graft new one.
1570 *
1571 * 1. We are allowed to create/graft only
1572 * if CREATE and REPLACE flags are set.
1573 *
1574 * 2. If EXCL is set, requestor wanted to say,
1575 * that qdisc tcm_handle is not expected
1576 * to exist, so that we choose create/graft too.
1577 *
1578 * 3. The last case is when no flags are set.
1579 * Alas, it is sort of hole in API, we
1580 * cannot decide what to do unambiguously.
1581 * For now we select create/graft, if
1582 * user gave KIND, which does not match existing.
1583 */
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001584 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1585 (n->nlmsg_flags & NLM_F_REPLACE) &&
1586 ((n->nlmsg_flags & NLM_F_EXCL) ||
Patrick McHardy1e904742008-01-22 22:11:17 -08001587 (tca[TCA_KIND] &&
1588 nla_strcmp(tca[TCA_KIND], q->ops->id))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001589 goto create_n_graft;
1590 }
1591 }
1592 } else {
Alexander Aring09215592017-12-20 12:35:12 -05001593 if (!tcm->tcm_handle) {
1594 NL_SET_ERR_MSG(extack, "Handle cannot be zero");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001595 return -EINVAL;
Alexander Aring09215592017-12-20 12:35:12 -05001596 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597 q = qdisc_lookup(dev, tcm->tcm_handle);
1598 }
1599
1600 /* Change qdisc parameters */
Alexander Aring09215592017-12-20 12:35:12 -05001601 if (!q) {
1602 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603 return -ENOENT;
Alexander Aring09215592017-12-20 12:35:12 -05001604 }
1605 if (n->nlmsg_flags & NLM_F_EXCL) {
1606 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001607 return -EEXIST;
Alexander Aring09215592017-12-20 12:35:12 -05001608 }
1609 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1610 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611 return -EINVAL;
Alexander Aring09215592017-12-20 12:35:12 -05001612 }
1613 err = qdisc_change(q, tca, extack);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001614 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001615 qdisc_notify(net, skb, n, clid, NULL, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616 return err;
1617
1618create_n_graft:
Alexander Aring09215592017-12-20 12:35:12 -05001619 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1620 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621 return -ENOENT;
Alexander Aring09215592017-12-20 12:35:12 -05001622 }
Eric Dumazet24824a02010-10-02 06:11:55 +00001623 if (clid == TC_H_INGRESS) {
Alexander Aring09215592017-12-20 12:35:12 -05001624 if (dev_ingress_queue(dev)) {
Eric Dumazet24824a02010-10-02 06:11:55 +00001625 q = qdisc_create(dev, dev_ingress_queue(dev), p,
1626 tcm->tcm_parent, tcm->tcm_parent,
Alexander Aring09215592017-12-20 12:35:12 -05001627 tca, &err, extack);
1628 } else {
1629 NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
Eric Dumazet24824a02010-10-02 06:11:55 +00001630 err = -ENOENT;
Alexander Aring09215592017-12-20 12:35:12 -05001631 }
Eric Dumazet24824a02010-10-02 06:11:55 +00001632 } else {
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001633 struct netdev_queue *dev_queue;
David S. Miller6ec1c692009-09-06 01:58:51 -07001634
1635 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001636 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1637 else if (p)
1638 dev_queue = p->dev_queue;
1639 else
1640 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Miller6ec1c692009-09-06 01:58:51 -07001641
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001642 q = qdisc_create(dev, dev_queue, p,
David S. Millerbb949fb2008-07-08 16:55:56 -07001643 tcm->tcm_parent, tcm->tcm_handle,
Alexander Aring09215592017-12-20 12:35:12 -05001644 tca, &err, extack);
David S. Miller6ec1c692009-09-06 01:58:51 -07001645 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001646 if (q == NULL) {
1647 if (err == -EAGAIN)
1648 goto replay;
1649 return err;
1650 }
1651
1652graft:
Alexander Aring09215592017-12-20 12:35:12 -05001653 err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001654 if (err) {
1655 if (q)
Vlad Buslov86bd4462018-09-24 19:22:50 +03001656 qdisc_put(q);
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001657 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658 }
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001659
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660 return 0;
1661}
1662
David S. Miller30723672008-07-18 22:50:15 -07001663static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1664 struct netlink_callback *cb,
Jiri Kosina49b49972017-03-08 16:03:32 +01001665 int *q_idx_p, int s_q_idx, bool recur,
1666 bool dump_invisible)
David S. Miller30723672008-07-18 22:50:15 -07001667{
1668 int ret = 0, q_idx = *q_idx_p;
1669 struct Qdisc *q;
Jiri Kosina59cc1f62016-08-10 11:05:15 +02001670 int b;
David S. Miller30723672008-07-18 22:50:15 -07001671
1672 if (!root)
1673 return 0;
1674
1675 q = root;
1676 if (q_idx < s_q_idx) {
1677 q_idx++;
1678 } else {
Jiri Kosina49b49972017-03-08 16:03:32 +01001679 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
Eric W. Biederman15e47302012-09-07 20:12:54 +00001680 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04001681 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1682 RTM_NEWQDISC) <= 0)
David S. Miller30723672008-07-18 22:50:15 -07001683 goto done;
1684 q_idx++;
1685 }
Jiri Kosina69012ae2016-08-16 23:52:58 +02001686
Jiri Kosinaea327462016-08-16 23:53:46 +02001687 /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1688 * itself has already been dumped.
1689 *
1690 * If we've already dumped the top-level (ingress) qdisc above and the global
1691 * qdisc hashtable, we don't want to hit it again
1692 */
1693 if (!qdisc_dev(root) || !recur)
Jiri Kosina69012ae2016-08-16 23:52:58 +02001694 goto out;
1695
Jiri Kosina59cc1f62016-08-10 11:05:15 +02001696 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
David S. Miller30723672008-07-18 22:50:15 -07001697 if (q_idx < s_q_idx) {
1698 q_idx++;
1699 continue;
1700 }
Jiri Kosina49b49972017-03-08 16:03:32 +01001701 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
Eric W. Biederman15e47302012-09-07 20:12:54 +00001702 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04001703 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1704 RTM_NEWQDISC) <= 0)
David S. Miller30723672008-07-18 22:50:15 -07001705 goto done;
1706 q_idx++;
1707 }
1708
1709out:
1710 *q_idx_p = q_idx;
1711 return ret;
1712done:
1713 ret = -1;
1714 goto out;
1715}
1716
Linus Torvalds1da177e2005-04-16 15:20:36 -07001717static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1718{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001719 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001720 int idx, q_idx;
1721 int s_idx, s_q_idx;
1722 struct net_device *dev;
Jiri Kosina49b49972017-03-08 16:03:32 +01001723 const struct nlmsghdr *nlh = cb->nlh;
Jiri Kosina49b49972017-03-08 16:03:32 +01001724 struct nlattr *tca[TCA_MAX + 1];
1725 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001726
1727 s_idx = cb->args[0];
1728 s_q_idx = q_idx = cb->args[1];
stephen hemmingerf1e90162009-11-10 07:54:49 +00001729
Pavel Emelianov7562f872007-05-03 15:13:45 -07001730 idx = 0;
Eric Dumazet15dc36e2014-03-10 17:11:42 -07001731 ASSERT_RTNL();
Jiri Kosina49b49972017-03-08 16:03:32 +01001732
David Ahern8b4c3cd2018-10-03 15:05:36 -07001733 err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
David Aherndac9c972018-10-07 20:16:24 -07001734 rtm_tca_policy, cb->extack);
Jiri Kosina49b49972017-03-08 16:03:32 +01001735 if (err < 0)
1736 return err;
1737
Eric Dumazet15dc36e2014-03-10 17:11:42 -07001738 for_each_netdev(net, dev) {
David S. Miller30723672008-07-18 22:50:15 -07001739 struct netdev_queue *dev_queue;
1740
Linus Torvalds1da177e2005-04-16 15:20:36 -07001741 if (idx < s_idx)
Pavel Emelianov7562f872007-05-03 15:13:45 -07001742 goto cont;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743 if (idx > s_idx)
1744 s_q_idx = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001745 q_idx = 0;
David S. Miller30723672008-07-18 22:50:15 -07001746
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04001747 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
Jiri Kosina49b49972017-03-08 16:03:32 +01001748 true, tca[TCA_DUMP_INVISIBLE]) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001749 goto done;
1750
Eric Dumazet24824a02010-10-02 06:11:55 +00001751 dev_queue = dev_ingress_queue(dev);
1752 if (dev_queue &&
1753 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
Jiri Kosina49b49972017-03-08 16:03:32 +01001754 &q_idx, s_q_idx, false,
1755 tca[TCA_DUMP_INVISIBLE]) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001756 goto done;
1757
Pavel Emelianov7562f872007-05-03 15:13:45 -07001758cont:
1759 idx++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001760 }
1761
1762done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001763 cb->args[0] = idx;
1764 cb->args[1] = q_idx;
1765
1766 return skb->len;
1767}
1768
1769
1770
1771/************************************************
1772 * Traffic classes manipulation. *
1773 ************************************************/
1774
WANG Cong27d7f072017-08-24 16:51:27 -07001775static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1776 unsigned long cl,
1777 u32 portid, u32 seq, u16 flags, int event)
1778{
1779 struct tcmsg *tcm;
1780 struct nlmsghdr *nlh;
1781 unsigned char *b = skb_tail_pointer(skb);
1782 struct gnet_dump d;
1783 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001784
WANG Cong27d7f072017-08-24 16:51:27 -07001785 cond_resched();
1786 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1787 if (!nlh)
1788 goto out_nlmsg_trim;
1789 tcm = nlmsg_data(nlh);
1790 tcm->tcm_family = AF_UNSPEC;
1791 tcm->tcm__pad1 = 0;
1792 tcm->tcm__pad2 = 0;
1793 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1794 tcm->tcm_parent = q->handle;
1795 tcm->tcm_handle = q->handle;
1796 tcm->tcm_info = 0;
1797 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1798 goto nla_put_failure;
1799 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1800 goto nla_put_failure;
1801
1802 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1803 NULL, &d, TCA_PAD) < 0)
1804 goto nla_put_failure;
1805
1806 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1807 goto nla_put_failure;
1808
1809 if (gnet_stats_finish_copy(&d) < 0)
1810 goto nla_put_failure;
1811
1812 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1813 return skb->len;
1814
1815out_nlmsg_trim:
1816nla_put_failure:
1817 nlmsg_trim(skb, b);
1818 return -1;
1819}
1820
1821static int tclass_notify(struct net *net, struct sk_buff *oskb,
1822 struct nlmsghdr *n, struct Qdisc *q,
1823 unsigned long cl, int event)
1824{
1825 struct sk_buff *skb;
1826 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
Zhike Wang5b5f99b2019-03-11 03:15:54 -07001827 int err = 0;
WANG Cong27d7f072017-08-24 16:51:27 -07001828
1829 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1830 if (!skb)
1831 return -ENOBUFS;
1832
1833 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1834 kfree_skb(skb);
1835 return -EINVAL;
1836 }
1837
Zhike Wang5b5f99b2019-03-11 03:15:54 -07001838 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1839 n->nlmsg_flags & NLM_F_ECHO);
1840 if (err > 0)
1841 err = 0;
1842 return err;
WANG Cong27d7f072017-08-24 16:51:27 -07001843}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001844
WANG Cong14546ba2017-08-24 16:51:28 -07001845static int tclass_del_notify(struct net *net,
1846 const struct Qdisc_class_ops *cops,
1847 struct sk_buff *oskb, struct nlmsghdr *n,
1848 struct Qdisc *q, unsigned long cl)
1849{
1850 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1851 struct sk_buff *skb;
1852 int err = 0;
1853
1854 if (!cops->delete)
1855 return -EOPNOTSUPP;
1856
1857 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1858 if (!skb)
1859 return -ENOBUFS;
1860
1861 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1862 RTM_DELTCLASS) < 0) {
1863 kfree_skb(skb);
1864 return -EINVAL;
1865 }
1866
1867 err = cops->delete(q, cl);
1868 if (err) {
1869 kfree_skb(skb);
1870 return err;
1871 }
1872
Zhike Wang5b5f99b2019-03-11 03:15:54 -07001873 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1874 n->nlmsg_flags & NLM_F_ECHO);
1875 if (err > 0)
1876 err = 0;
1877 return err;
WANG Cong14546ba2017-08-24 16:51:28 -07001878}
1879
Cong Wang07d79fc2017-08-30 14:30:36 -07001880#ifdef CONFIG_NET_CLS
1881
1882struct tcf_bind_args {
1883 struct tcf_walker w;
1884 u32 classid;
1885 unsigned long cl;
1886};
1887
1888static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1889{
1890 struct tcf_bind_args *a = (void *)arg;
1891
1892 if (tp->ops->bind_class) {
Jiri Pirko74e3be62017-10-13 14:01:04 +02001893 struct Qdisc *q = tcf_block_q(tp->chain->block);
1894
1895 sch_tree_lock(q);
Cong Wang07d79fc2017-08-30 14:30:36 -07001896 tp->ops->bind_class(n, a->classid, a->cl);
Jiri Pirko74e3be62017-10-13 14:01:04 +02001897 sch_tree_unlock(q);
Cong Wang07d79fc2017-08-30 14:30:36 -07001898 }
1899 return 0;
1900}
1901
1902static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1903 unsigned long new_cl)
1904{
1905 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1906 struct tcf_block *block;
1907 struct tcf_chain *chain;
1908 unsigned long cl;
1909
1910 cl = cops->find(q, portid);
1911 if (!cl)
1912 return;
Alexander Aringcbaacc42017-12-20 12:35:16 -05001913 block = cops->tcf_block(q, cl, NULL);
Cong Wang07d79fc2017-08-30 14:30:36 -07001914 if (!block)
1915 return;
Vlad Buslovbbf73832019-02-11 10:55:36 +02001916 for (chain = tcf_get_next_chain(block, NULL);
1917 chain;
1918 chain = tcf_get_next_chain(block, chain)) {
Cong Wang07d79fc2017-08-30 14:30:36 -07001919 struct tcf_proto *tp;
1920
Vlad Buslov12db03b2019-02-11 10:55:45 +02001921 for (tp = tcf_get_next_proto(chain, NULL, true);
1922 tp; tp = tcf_get_next_proto(chain, tp, true)) {
Cong Wang07d79fc2017-08-30 14:30:36 -07001923 struct tcf_bind_args arg = {};
1924
1925 arg.w.fn = tcf_node_bind;
1926 arg.classid = clid;
1927 arg.cl = new_cl;
Vlad Buslov12db03b2019-02-11 10:55:45 +02001928 tp->ops->walk(tp, &arg.w, true);
Cong Wang07d79fc2017-08-30 14:30:36 -07001929 }
1930 }
1931}
1932
1933#else
1934
1935static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1936 unsigned long new_cl)
1937{
1938}
1939
1940#endif
1941
David Ahernc21ef3e2017-04-16 09:48:24 -07001942static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1943 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001944{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001945 struct net *net = sock_net(skb->sk);
David S. Miller02ef22c2012-06-26 21:50:05 -07001946 struct tcmsg *tcm = nlmsg_data(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001947 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001948 struct net_device *dev;
1949 struct Qdisc *q = NULL;
Eric Dumazet20fea082007-11-14 01:44:41 -08001950 const struct Qdisc_class_ops *cops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001951 unsigned long cl = 0;
1952 unsigned long new_cl;
Hong zhi guode179c82013-03-25 17:36:33 +00001953 u32 portid;
1954 u32 clid;
1955 u32 qid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001956 int err;
1957
Stéphane Graber4e8bbb82014-04-30 11:25:43 -04001958 if ((n->nlmsg_type != RTM_GETTCLASS) &&
David S. Miller5f013c9b2014-05-12 13:19:14 -04001959 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001960 return -EPERM;
1961
David Ahern8b4c3cd2018-10-03 15:05:36 -07001962 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy,
1963 extack);
Patrick McHardy1e904742008-01-22 22:11:17 -08001964 if (err < 0)
1965 return err;
1966
Hong zhi guode179c82013-03-25 17:36:33 +00001967 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1968 if (!dev)
1969 return -ENODEV;
1970
Linus Torvalds1da177e2005-04-16 15:20:36 -07001971 /*
1972 parent == TC_H_UNSPEC - unspecified parent.
1973 parent == TC_H_ROOT - class is root, which has no parent.
1974 parent == X:0 - parent is root class.
1975 parent == X:Y - parent is a node in hierarchy.
1976 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1977
1978 handle == 0:0 - generate handle from kernel pool.
1979 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1980 handle == X:Y - clear.
1981 handle == X:0 - root class.
1982 */
1983
1984 /* Step 1. Determine qdisc handle X:0 */
1985
Hong zhi guode179c82013-03-25 17:36:33 +00001986 portid = tcm->tcm_parent;
1987 clid = tcm->tcm_handle;
1988 qid = TC_H_MAJ(clid);
1989
Eric W. Biederman15e47302012-09-07 20:12:54 +00001990 if (portid != TC_H_ROOT) {
1991 u32 qid1 = TC_H_MAJ(portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001992
1993 if (qid && qid1) {
1994 /* If both majors are known, they must be identical. */
1995 if (qid != qid1)
1996 return -EINVAL;
1997 } else if (qid1) {
1998 qid = qid1;
1999 } else if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00002000 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002001
2002 /* Now qid is genuine qdisc handle consistent
Eric Dumazetcc7ec452011-01-19 19:26:56 +00002003 * both with parent and child.
2004 *
Eric W. Biederman15e47302012-09-07 20:12:54 +00002005 * TC_H_MAJ(portid) still may be unspecified, complete it now.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002006 */
Eric W. Biederman15e47302012-09-07 20:12:54 +00002007 if (portid)
2008 portid = TC_H_MAKE(qid, portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002009 } else {
2010 if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00002011 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002012 }
2013
2014 /* OK. Locate qdisc */
Eric Dumazetcc7ec452011-01-19 19:26:56 +00002015 q = qdisc_lookup(dev, qid);
2016 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002017 return -ENOENT;
2018
2019 /* An check that it supports classes */
2020 cops = q->ops->cl_ops;
2021 if (cops == NULL)
2022 return -EINVAL;
2023
2024 /* Now try to get class */
2025 if (clid == 0) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00002026 if (portid == TC_H_ROOT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002027 clid = qid;
2028 } else
2029 clid = TC_H_MAKE(qid, clid);
2030
2031 if (clid)
WANG Cong143976c2017-08-24 16:51:29 -07002032 cl = cops->find(q, clid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002033
2034 if (cl == 0) {
2035 err = -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00002036 if (n->nlmsg_type != RTM_NEWTCLASS ||
2037 !(n->nlmsg_flags & NLM_F_CREATE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002038 goto out;
2039 } else {
2040 switch (n->nlmsg_type) {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09002041 case RTM_NEWTCLASS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002042 err = -EEXIST;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00002043 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002044 goto out;
2045 break;
2046 case RTM_DELTCLASS:
WANG Cong14546ba2017-08-24 16:51:28 -07002047 err = tclass_del_notify(net, cops, skb, n, q, cl);
Cong Wang07d79fc2017-08-30 14:30:36 -07002048 /* Unbind the class with flilters with 0 */
2049 tc_bind_tclass(q, portid, clid, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002050 goto out;
2051 case RTM_GETTCLASS:
Tom Goff7316ae82010-03-19 15:40:13 +00002052 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002053 goto out;
2054 default:
2055 err = -EINVAL;
2056 goto out;
2057 }
2058 }
2059
Jiri Pirkod47a6b02018-01-17 11:46:52 +01002060 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2061 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2062 return -EOPNOTSUPP;
2063 }
2064
Linus Torvalds1da177e2005-04-16 15:20:36 -07002065 new_cl = cl;
Patrick McHardyde6d5cd2009-09-04 06:41:16 +00002066 err = -EOPNOTSUPP;
2067 if (cops->change)
Alexander Aring793d81d2017-12-20 12:35:15 -05002068 err = cops->change(q, clid, portid, tca, &new_cl, extack);
Cong Wang07d79fc2017-08-30 14:30:36 -07002069 if (err == 0) {
Tom Goff7316ae82010-03-19 15:40:13 +00002070 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
Cong Wang07d79fc2017-08-30 14:30:36 -07002071 /* We just create a new class, need to do reverse binding. */
2072 if (cl != new_cl)
2073 tc_bind_tclass(q, portid, clid, new_cl);
2074 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002075out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002076 return err;
2077}
2078
Eric Dumazetcc7ec452011-01-19 19:26:56 +00002079struct qdisc_dump_args {
2080 struct qdisc_walker w;
2081 struct sk_buff *skb;
2082 struct netlink_callback *cb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083};
2084
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04002085static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2086 struct qdisc_walker *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002087{
2088 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2089
Eric W. Biederman15e47302012-09-07 20:12:54 +00002090 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04002091 a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2092 RTM_NEWTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002093}
2094
David S. Miller30723672008-07-18 22:50:15 -07002095static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2096 struct tcmsg *tcm, struct netlink_callback *cb,
2097 int *t_p, int s_t)
2098{
2099 struct qdisc_dump_args arg;
2100
Jiri Kosina49b49972017-03-08 16:03:32 +01002101 if (tc_qdisc_dump_ignore(q, false) ||
David S. Miller30723672008-07-18 22:50:15 -07002102 *t_p < s_t || !q->ops->cl_ops ||
2103 (tcm->tcm_parent &&
2104 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2105 (*t_p)++;
2106 return 0;
2107 }
2108 if (*t_p > s_t)
2109 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2110 arg.w.fn = qdisc_class_dump;
2111 arg.skb = skb;
2112 arg.cb = cb;
2113 arg.w.stop = 0;
2114 arg.w.skip = cb->args[1];
2115 arg.w.count = 0;
2116 q->ops->cl_ops->walk(q, &arg.w);
2117 cb->args[1] = arg.w.count;
2118 if (arg.w.stop)
2119 return -1;
2120 (*t_p)++;
2121 return 0;
2122}
2123
2124static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2125 struct tcmsg *tcm, struct netlink_callback *cb,
2126 int *t_p, int s_t)
2127{
2128 struct Qdisc *q;
Jiri Kosina59cc1f62016-08-10 11:05:15 +02002129 int b;
David S. Miller30723672008-07-18 22:50:15 -07002130
2131 if (!root)
2132 return 0;
2133
2134 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2135 return -1;
2136
Jiri Kosina69012ae2016-08-16 23:52:58 +02002137 if (!qdisc_dev(root))
2138 return 0;
2139
Eric Dumazetcb395b22017-05-10 21:59:28 -07002140 if (tcm->tcm_parent) {
2141 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
Phil Sutter3c53ed82018-10-18 10:34:26 +02002142 if (q && q != root &&
2143 tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
Eric Dumazetcb395b22017-05-10 21:59:28 -07002144 return -1;
2145 return 0;
2146 }
Jiri Kosina59cc1f62016-08-10 11:05:15 +02002147 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
David S. Miller30723672008-07-18 22:50:15 -07002148 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2149 return -1;
2150 }
2151
2152 return 0;
2153}
2154
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2156{
David S. Miller02ef22c2012-06-26 21:50:05 -07002157 struct tcmsg *tcm = nlmsg_data(cb->nlh);
David S. Miller30723672008-07-18 22:50:15 -07002158 struct net *net = sock_net(skb->sk);
2159 struct netdev_queue *dev_queue;
2160 struct net_device *dev;
2161 int t, s_t;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002162
Hong zhi guo573ce262013-03-27 06:47:04 +00002163 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002164 return 0;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00002165 dev = dev_get_by_index(net, tcm->tcm_ifindex);
2166 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002167 return 0;
2168
2169 s_t = cb->args[0];
2170 t = 0;
2171
Patrick McHardyaf356af2009-09-04 06:41:18 +00002172 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07002173 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002174
Eric Dumazet24824a02010-10-02 06:11:55 +00002175 dev_queue = dev_ingress_queue(dev);
2176 if (dev_queue &&
2177 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
2178 &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07002179 goto done;
2180
2181done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002182 cb->args[0] = t;
2183
2184 dev_put(dev);
2185 return skb->len;
2186}
2187
Linus Torvalds1da177e2005-04-16 15:20:36 -07002188#ifdef CONFIG_PROC_FS
2189static int psched_show(struct seq_file *seq, void *v)
2190{
2191 seq_printf(seq, "%08x %08x %08x %08x\n",
Jarek Poplawskica44d6e2009-06-15 02:31:47 -07002192 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
Patrick McHardy514bca32007-03-16 12:34:52 -07002193 1000000,
Thomas Gleixner1e317682015-04-14 21:08:28 +00002194 (u32)NSEC_PER_SEC / hrtimer_resolution);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002195
2196 return 0;
2197}
2198
Tom Goff7316ae82010-03-19 15:40:13 +00002199static int __net_init psched_net_init(struct net *net)
2200{
2201 struct proc_dir_entry *e;
2202
Christoph Hellwig3f3942a2018-05-15 15:57:23 +02002203 e = proc_create_single("psched", 0, net->proc_net, psched_show);
Tom Goff7316ae82010-03-19 15:40:13 +00002204 if (e == NULL)
2205 return -ENOMEM;
2206
2207 return 0;
2208}
2209
2210static void __net_exit psched_net_exit(struct net *net)
2211{
Gao fengece31ff2013-02-18 01:34:56 +00002212 remove_proc_entry("psched", net->proc_net);
Tom Goff7316ae82010-03-19 15:40:13 +00002213}
2214#else
2215static int __net_init psched_net_init(struct net *net)
2216{
2217 return 0;
2218}
2219
2220static void __net_exit psched_net_exit(struct net *net)
2221{
2222}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002223#endif
2224
Tom Goff7316ae82010-03-19 15:40:13 +00002225static struct pernet_operations psched_net_ops = {
2226 .init = psched_net_init,
2227 .exit = psched_net_exit,
2228};
2229
Linus Torvalds1da177e2005-04-16 15:20:36 -07002230static int __init pktsched_init(void)
2231{
Tom Goff7316ae82010-03-19 15:40:13 +00002232 int err;
2233
2234 err = register_pernet_subsys(&psched_net_ops);
2235 if (err) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00002236 pr_err("pktsched_init: "
Tom Goff7316ae82010-03-19 15:40:13 +00002237 "cannot initialize per netns operations\n");
2238 return err;
2239 }
2240
stephen hemminger6da7c8f2013-08-27 16:19:08 -07002241 register_qdisc(&pfifo_fast_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002242 register_qdisc(&pfifo_qdisc_ops);
2243 register_qdisc(&bfifo_qdisc_ops);
Hagen Paul Pfeifer57dbb2d2010-01-24 12:30:59 +00002244 register_qdisc(&pfifo_head_drop_qdisc_ops);
David S. Miller6ec1c692009-09-06 01:58:51 -07002245 register_qdisc(&mq_qdisc_ops);
Phil Sutterd66d6c32015-08-27 21:21:38 +02002246 register_qdisc(&noqueue_qdisc_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002247
Florian Westphalb97bac62017-08-09 20:41:48 +02002248 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2249 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04002250 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
Florian Westphalb97bac62017-08-09 20:41:48 +02002251 0);
2252 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2253 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04002254 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
Florian Westphalb97bac62017-08-09 20:41:48 +02002255 0);
Thomas Grafbe577dd2007-03-22 11:55:50 -07002256
Linus Torvalds1da177e2005-04-16 15:20:36 -07002257 return 0;
2258}
2259
2260subsys_initcall(pktsched_init);