blob: 2a76a2f5ed88cd406fe4d58e187891ea5d723dd1 [file] [log] [blame]
Thomas Gleixner2874c5f2019-05-27 08:55:01 +02001// SPDX-License-Identifier: GPL-2.0-or-later
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
3 * net/sched/sch_api.c Packet scheduler API.
4 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07005 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6 *
7 * Fixes:
8 *
9 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
10 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
11 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
12 */
13
Linus Torvalds1da177e2005-04-16 15:20:36 -070014#include <linux/module.h>
15#include <linux/types.h>
16#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <linux/init.h>
21#include <linux/proc_fs.h>
22#include <linux/seq_file.h>
23#include <linux/kmod.h>
24#include <linux/list.h>
Patrick McHardy41794772007-03-16 01:19:15 -070025#include <linux/hrtimer.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090026#include <linux/slab.h>
Jiri Kosina59cc1f62016-08-10 11:05:15 +020027#include <linux/hashtable.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020029#include <net/net_namespace.h>
Denis V. Lunevb8542722007-12-01 00:21:31 +110030#include <net/sock.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070031#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#include <net/pkt_sched.h>
Cong Wang07d79fc2017-08-30 14:30:36 -070033#include <net/pkt_cls.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070034
Cong Wangf5a78332020-05-26 21:35:25 -070035#include <trace/events/qdisc.h>
36
Linus Torvalds1da177e2005-04-16 15:20:36 -070037/*
38
39 Short review.
40 -------------
41
42 This file consists of two interrelated parts:
43
44 1. queueing disciplines manager frontend.
45 2. traffic classes manager frontend.
46
47 Generally, queueing discipline ("qdisc") is a black box,
48 which is able to enqueue packets and to dequeue them (when
49 device is ready to send something) in order and at times
50 determined by algorithm hidden in it.
51
52 qdisc's are divided to two categories:
53 - "queues", which have no internal structure visible from outside.
54 - "schedulers", which split all the packets to "traffic classes",
55 using "packet classifiers" (look at cls_api.c)
56
57 In turn, classes may have child qdiscs (as rule, queues)
58 attached to them etc. etc. etc.
59
60 The goal of the routines in this file is to translate
61 information supplied by user in the form of handles
62 to more intelligible for kernel form, to make some sanity
63 checks and part of work, which is common to all qdiscs
64 and to provide rtnetlink notifications.
65
66 All real intelligent work is done inside qdisc modules.
67
68
69
70 Every discipline has two major routines: enqueue and dequeue.
71
72 ---dequeue
73
74 dequeue usually returns a skb to send. It is allowed to return NULL,
75 but it does not mean that queue is empty, it just means that
76 discipline does not want to send anything this time.
77 Queue is really empty if q->q.qlen == 0.
78 For complicated disciplines with multiple queues q->q is not
79 real packet queue, but however q->q.qlen must be valid.
80
81 ---enqueue
82
83 enqueue returns 0, if packet was enqueued successfully.
84 If packet (this one or another one) was dropped, it returns
85 not zero error code.
86 NET_XMIT_DROP - this packet dropped
87 Expected action: do not backoff, but wait until queue will clear.
88 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
89 Expected action: backoff or ignore
Linus Torvalds1da177e2005-04-16 15:20:36 -070090
91 Auxiliary routines:
92
Jarek Poplawski99c0db22008-10-31 00:45:27 -070093 ---peek
94
95 like dequeue but without removing a packet from the queue
96
Linus Torvalds1da177e2005-04-16 15:20:36 -070097 ---reset
98
99 returns qdisc to initial state: purge all buffers, clear all
100 timers, counters (except for statistics) etc.
101
102 ---init
103
104 initializes newly created qdisc.
105
106 ---destroy
107
108 destroys resources allocated by init and during lifetime of qdisc.
109
110 ---change
111
112 changes qdisc parameters.
113 */
114
115/* Protects list of registered TC modules. It is pure SMP lock. */
116static DEFINE_RWLOCK(qdisc_mod_lock);
117
118
119/************************************************
120 * Queueing disciplines manipulation. *
121 ************************************************/
122
123
124/* The list of all installed queueing disciplines. */
125
126static struct Qdisc_ops *qdisc_base;
127
Zhi Yong Wu21eb2182014-01-01 04:34:51 +0800128/* Register/unregister queueing discipline */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129
130int register_qdisc(struct Qdisc_ops *qops)
131{
132 struct Qdisc_ops *q, **qp;
133 int rc = -EEXIST;
134
135 write_lock(&qdisc_mod_lock);
136 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
137 if (!strcmp(qops->id, q->id))
138 goto out;
139
140 if (qops->enqueue == NULL)
141 qops->enqueue = noop_qdisc_ops.enqueue;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700142 if (qops->peek == NULL) {
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000143 if (qops->dequeue == NULL)
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700144 qops->peek = noop_qdisc_ops.peek;
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000145 else
146 goto out_einval;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700147 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148 if (qops->dequeue == NULL)
149 qops->dequeue = noop_qdisc_ops.dequeue;
150
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000151 if (qops->cl_ops) {
152 const struct Qdisc_class_ops *cops = qops->cl_ops;
153
WANG Cong143976c2017-08-24 16:51:29 -0700154 if (!(cops->find && cops->walk && cops->leaf))
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000155 goto out_einval;
156
Jiri Pirko6529eab2017-05-17 11:07:55 +0200157 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000158 goto out_einval;
159 }
160
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161 qops->next = NULL;
162 *qp = qops;
163 rc = 0;
164out:
165 write_unlock(&qdisc_mod_lock);
166 return rc;
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000167
168out_einval:
169 rc = -EINVAL;
170 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800172EXPORT_SYMBOL(register_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173
174int unregister_qdisc(struct Qdisc_ops *qops)
175{
176 struct Qdisc_ops *q, **qp;
177 int err = -ENOENT;
178
179 write_lock(&qdisc_mod_lock);
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000180 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 if (q == qops)
182 break;
183 if (q) {
184 *qp = q->next;
185 q->next = NULL;
186 err = 0;
187 }
188 write_unlock(&qdisc_mod_lock);
189 return err;
190}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800191EXPORT_SYMBOL(unregister_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192
stephen hemminger6da7c8f2013-08-27 16:19:08 -0700193/* Get default qdisc if not otherwise specified */
194void qdisc_get_default(char *name, size_t len)
195{
196 read_lock(&qdisc_mod_lock);
197 strlcpy(name, default_qdisc_ops->id, len);
198 read_unlock(&qdisc_mod_lock);
199}
200
201static struct Qdisc_ops *qdisc_lookup_default(const char *name)
202{
203 struct Qdisc_ops *q = NULL;
204
205 for (q = qdisc_base; q; q = q->next) {
206 if (!strcmp(name, q->id)) {
207 if (!try_module_get(q->owner))
208 q = NULL;
209 break;
210 }
211 }
212
213 return q;
214}
215
216/* Set new default qdisc to use */
217int qdisc_set_default(const char *name)
218{
219 const struct Qdisc_ops *ops;
220
221 if (!capable(CAP_NET_ADMIN))
222 return -EPERM;
223
224 write_lock(&qdisc_mod_lock);
225 ops = qdisc_lookup_default(name);
226 if (!ops) {
227 /* Not found, drop lock and try to load module */
228 write_unlock(&qdisc_mod_lock);
229 request_module("sch_%s", name);
230 write_lock(&qdisc_mod_lock);
231
232 ops = qdisc_lookup_default(name);
233 }
234
235 if (ops) {
236 /* Set new default */
237 module_put(default_qdisc_ops->owner);
238 default_qdisc_ops = ops;
239 }
240 write_unlock(&qdisc_mod_lock);
241
242 return ops ? 0 : -ENOENT;
243}
244
stephen hemminger8ea3e432017-04-13 08:40:53 -0700245#ifdef CONFIG_NET_SCH_DEFAULT
246/* Set default value from kernel config */
247static int __init sch_default_qdisc(void)
248{
249 return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
250}
251late_initcall(sch_default_qdisc);
252#endif
253
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254/* We know handle. Find qdisc among all qdisc's attached to device
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800255 * (root qdisc, all its children, children of children etc.)
256 * Note: caller either uses rtnl or rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 */
258
Hannes Eder6113b742008-11-28 03:06:46 -0800259static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
David S. Miller8123b422008-08-08 23:23:39 -0700260{
261 struct Qdisc *q;
262
Jiri Kosina69012ae2016-08-16 23:52:58 +0200263 if (!qdisc_dev(root))
264 return (root->handle == handle ? root : NULL);
265
David S. Miller8123b422008-08-08 23:23:39 -0700266 if (!(root->flags & TCQ_F_BUILTIN) &&
267 root->handle == handle)
268 return root;
269
Jiri Pirkoa8b7b2d2020-07-20 10:10:41 +0200270 hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle,
271 lockdep_rtnl_is_held()) {
David S. Miller8123b422008-08-08 23:23:39 -0700272 if (q->handle == handle)
273 return q;
274 }
275 return NULL;
276}
277
Jiri Kosina49b49972017-03-08 16:03:32 +0100278void qdisc_hash_add(struct Qdisc *q, bool invisible)
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700279{
Eric Dumazet37314362014-03-08 08:01:19 -0800280 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800281 ASSERT_RTNL();
Jiri Kosina59cc1f62016-08-10 11:05:15 +0200282 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
Jiri Kosina49b49972017-03-08 16:03:32 +0100283 if (invisible)
284 q->flags |= TCQ_F_INVISIBLE;
Eric Dumazet37314362014-03-08 08:01:19 -0800285 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700286}
Jiri Kosina59cc1f62016-08-10 11:05:15 +0200287EXPORT_SYMBOL(qdisc_hash_add);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700288
Jiri Kosina59cc1f62016-08-10 11:05:15 +0200289void qdisc_hash_del(struct Qdisc *q)
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700290{
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800291 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
292 ASSERT_RTNL();
Jiri Kosina59cc1f62016-08-10 11:05:15 +0200293 hash_del_rcu(&q->hash);
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800294 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700295}
Jiri Kosina59cc1f62016-08-10 11:05:15 +0200296EXPORT_SYMBOL(qdisc_hash_del);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700297
David S. Milleread81cc2008-07-17 00:50:32 -0700298struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
Patrick McHardy43effa12006-11-29 17:35:48 -0800299{
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700300 struct Qdisc *q;
301
Cong Wang50317fc2017-10-27 22:08:56 -0700302 if (!handle)
303 return NULL;
Patrick McHardyaf356af2009-09-04 06:41:18 +0000304 q = qdisc_match_from_root(dev->qdisc, handle);
305 if (q)
306 goto out;
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700307
Eric Dumazet24824a02010-10-02 06:11:55 +0000308 if (dev_ingress_queue(dev))
309 q = qdisc_match_from_root(
310 dev_ingress_queue(dev)->qdisc_sleeping,
311 handle);
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800312out:
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700313 return q;
Patrick McHardy43effa12006-11-29 17:35:48 -0800314}
315
Vlad Buslov3a7d0d02018-09-24 19:22:51 +0300316struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
317{
318 struct netdev_queue *nq;
319 struct Qdisc *q;
320
321 if (!handle)
322 return NULL;
323 q = qdisc_match_from_root(dev->qdisc, handle);
324 if (q)
325 goto out;
326
327 nq = dev_ingress_queue_rcu(dev);
328 if (nq)
329 q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
330out:
331 return q;
332}
333
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
335{
336 unsigned long cl;
Eric Dumazet20fea082007-11-14 01:44:41 -0800337 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338
339 if (cops == NULL)
340 return NULL;
WANG Cong143976c2017-08-24 16:51:29 -0700341 cl = cops->find(p, classid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342
343 if (cl == 0)
344 return NULL;
Tonghao Zhang2561f972018-12-13 00:43:23 -0800345 return cops->leaf(p, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346}
347
348/* Find queueing discipline by name */
349
Patrick McHardy1e904742008-01-22 22:11:17 -0800350static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351{
352 struct Qdisc_ops *q = NULL;
353
354 if (kind) {
355 read_lock(&qdisc_mod_lock);
356 for (q = qdisc_base; q; q = q->next) {
Patrick McHardy1e904742008-01-22 22:11:17 -0800357 if (nla_strcmp(kind, q->id) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 if (!try_module_get(q->owner))
359 q = NULL;
360 break;
361 }
362 }
363 read_unlock(&qdisc_mod_lock);
364 }
365 return q;
366}
367
Jesper Dangaard Brouer8a8e3d82013-08-14 23:47:11 +0200368/* The linklayer setting were not transferred from iproute2, in older
369 * versions, and the rate tables lookup systems have been dropped in
370 * the kernel. To keep backward compatible with older iproute2 tc
371 * utils, we detect the linklayer setting by detecting if the rate
372 * table were modified.
373 *
374 * For linklayer ATM table entries, the rate table will be aligned to
375 * 48 bytes, thus some table entries will contain the same value. The
376 * mpu (min packet unit) is also encoded into the old rate table, thus
377 * starting from the mpu, we find low and high table entries for
378 * mapping this cell. If these entries contain the same value, when
379 * the rate tables have been modified for linklayer ATM.
380 *
381 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
382 * and then roundup to the next cell, calc the table entry one below,
383 * and compare.
384 */
385static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
386{
387 int low = roundup(r->mpu, 48);
388 int high = roundup(low+1, 48);
389 int cell_low = low >> r->cell_log;
390 int cell_high = (high >> r->cell_log) - 1;
391
392 /* rtab is too inaccurate at rates > 100Mbit/s */
393 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
394 pr_debug("TC linklayer: Giving up ATM detection\n");
395 return TC_LINKLAYER_ETHERNET;
396 }
397
398 if ((cell_high > cell_low) && (cell_high < 256)
399 && (rtab[cell_low] == rtab[cell_high])) {
400 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
401 cell_low, cell_high, rtab[cell_high]);
402 return TC_LINKLAYER_ATM;
403 }
404 return TC_LINKLAYER_ETHERNET;
405}
406
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407static struct qdisc_rate_table *qdisc_rtab_list;
408
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -0400409struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
Alexander Aringe9bc3fa2017-12-20 12:35:18 -0500410 struct nlattr *tab,
411 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412{
413 struct qdisc_rate_table *rtab;
414
Eric Dumazet40edeff2013-06-02 11:15:55 +0000415 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
Alexander Aringe9bc3fa2017-12-20 12:35:18 -0500416 nla_len(tab) != TC_RTAB_SIZE) {
417 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
Eric Dumazet40edeff2013-06-02 11:15:55 +0000418 return NULL;
Alexander Aringe9bc3fa2017-12-20 12:35:18 -0500419 }
Eric Dumazet40edeff2013-06-02 11:15:55 +0000420
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
Eric Dumazet40edeff2013-06-02 11:15:55 +0000422 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
423 !memcmp(&rtab->data, nla_data(tab), 1024)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424 rtab->refcnt++;
425 return rtab;
426 }
427 }
428
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
430 if (rtab) {
431 rtab->rate = *r;
432 rtab->refcnt = 1;
Patrick McHardy1e904742008-01-22 22:11:17 -0800433 memcpy(rtab->data, nla_data(tab), 1024);
Jesper Dangaard Brouer8a8e3d82013-08-14 23:47:11 +0200434 if (r->linklayer == TC_LINKLAYER_UNAWARE)
435 r->linklayer = __detect_linklayer(r, rtab->data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 rtab->next = qdisc_rtab_list;
437 qdisc_rtab_list = rtab;
Alexander Aringe9bc3fa2017-12-20 12:35:18 -0500438 } else {
439 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440 }
441 return rtab;
442}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800443EXPORT_SYMBOL(qdisc_get_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444
445void qdisc_put_rtab(struct qdisc_rate_table *tab)
446{
447 struct qdisc_rate_table *rtab, **rtabp;
448
449 if (!tab || --tab->refcnt)
450 return;
451
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000452 for (rtabp = &qdisc_rtab_list;
453 (rtab = *rtabp) != NULL;
454 rtabp = &rtab->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455 if (rtab == tab) {
456 *rtabp = rtab->next;
457 kfree(rtab);
458 return;
459 }
460 }
461}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800462EXPORT_SYMBOL(qdisc_put_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700464static LIST_HEAD(qdisc_stab_list);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700465
466static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
467 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
468 [TCA_STAB_DATA] = { .type = NLA_BINARY },
469};
470
Alexander Aring09215592017-12-20 12:35:12 -0500471static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
472 struct netlink_ext_ack *extack)
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700473{
474 struct nlattr *tb[TCA_STAB_MAX + 1];
475 struct qdisc_size_table *stab;
476 struct tc_sizespec *s;
477 unsigned int tsize = 0;
478 u16 *tab = NULL;
479 int err;
480
Johannes Berg8cb08172019-04-26 14:07:28 +0200481 err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
482 extack);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700483 if (err < 0)
484 return ERR_PTR(err);
Alexander Aring09215592017-12-20 12:35:12 -0500485 if (!tb[TCA_STAB_BASE]) {
486 NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700487 return ERR_PTR(-EINVAL);
Alexander Aring09215592017-12-20 12:35:12 -0500488 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700489
490 s = nla_data(tb[TCA_STAB_BASE]);
491
492 if (s->tsize > 0) {
Alexander Aring09215592017-12-20 12:35:12 -0500493 if (!tb[TCA_STAB_DATA]) {
494 NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700495 return ERR_PTR(-EINVAL);
Alexander Aring09215592017-12-20 12:35:12 -0500496 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700497 tab = nla_data(tb[TCA_STAB_DATA]);
498 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
499 }
500
Alexander Aring09215592017-12-20 12:35:12 -0500501 if (tsize != s->tsize || (!tab && tsize > 0)) {
502 NL_SET_ERR_MSG(extack, "Invalid size of size table");
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700503 return ERR_PTR(-EINVAL);
Alexander Aring09215592017-12-20 12:35:12 -0500504 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700505
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700506 list_for_each_entry(stab, &qdisc_stab_list, list) {
507 if (memcmp(&stab->szopts, s, sizeof(*s)))
508 continue;
509 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
510 continue;
511 stab->refcnt++;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700512 return stab;
513 }
514
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700515 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
516 if (!stab)
517 return ERR_PTR(-ENOMEM);
518
519 stab->refcnt = 1;
520 stab->szopts = *s;
521 if (tsize > 0)
522 memcpy(stab->data, tab, tsize * sizeof(u16));
523
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700524 list_add_tail(&stab->list, &qdisc_stab_list);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700525
526 return stab;
527}
528
529void qdisc_put_stab(struct qdisc_size_table *tab)
530{
531 if (!tab)
532 return;
533
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700534 if (--tab->refcnt == 0) {
535 list_del(&tab->list);
Wei Yongjun6e079022019-02-16 08:19:55 +0000536 kfree_rcu(tab, rcu);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700537 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700538}
539EXPORT_SYMBOL(qdisc_put_stab);
540
541static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
542{
543 struct nlattr *nest;
544
Michal Kubecekae0be8d2019-04-26 11:13:06 +0200545 nest = nla_nest_start_noflag(skb, TCA_STAB);
Patrick McHardy3aa46142008-11-20 04:07:14 -0800546 if (nest == NULL)
547 goto nla_put_failure;
David S. Miller1b34ec42012-03-29 05:11:39 -0400548 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
549 goto nla_put_failure;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700550 nla_nest_end(skb, nest);
551
552 return skb->len;
553
554nla_put_failure:
555 return -1;
556}
557
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -0400558void __qdisc_calculate_pkt_len(struct sk_buff *skb,
559 const struct qdisc_size_table *stab)
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700560{
561 int pkt_len, slot;
562
563 pkt_len = skb->len + stab->szopts.overhead;
564 if (unlikely(!stab->szopts.tsize))
565 goto out;
566
567 slot = pkt_len + stab->szopts.cell_align;
568 if (unlikely(slot < 0))
569 slot = 0;
570
571 slot >>= stab->szopts.cell_log;
572 if (likely(slot < stab->szopts.tsize))
573 pkt_len = stab->data[slot];
574 else
575 pkt_len = stab->data[stab->szopts.tsize - 1] *
576 (slot / stab->szopts.tsize) +
577 stab->data[slot % stab->szopts.tsize];
578
579 pkt_len <<= stab->szopts.size_log;
580out:
581 if (unlikely(pkt_len < 1))
582 pkt_len = 1;
583 qdisc_skb_cb(skb)->pkt_len = pkt_len;
584}
Eric Dumazeta2da5702011-01-20 03:48:19 +0000585EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700586
Florian Westphal6e765a02014-06-11 20:35:18 +0200587void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
Jarek Poplawskib00355d2009-02-01 01:12:42 -0800588{
589 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000590 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
591 txt, qdisc->ops->id, qdisc->handle >> 16);
Jarek Poplawskib00355d2009-02-01 01:12:42 -0800592 qdisc->flags |= TCQ_F_WARN_NONWC;
593 }
594}
595EXPORT_SYMBOL(qdisc_warn_nonwc);
596
Patrick McHardy41794772007-03-16 01:19:15 -0700597static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
598{
599 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
David S. Miller2fbd3da2009-09-01 17:59:25 -0700600 timer);
Patrick McHardy41794772007-03-16 01:19:15 -0700601
John Fastabend1e203c12014-10-02 22:43:09 -0700602 rcu_read_lock();
David S. Miller8608db02008-08-18 20:51:18 -0700603 __netif_schedule(qdisc_root(wd->qdisc));
John Fastabend1e203c12014-10-02 22:43:09 -0700604 rcu_read_unlock();
Stephen Hemminger19365022007-03-22 12:18:35 -0700605
Patrick McHardy41794772007-03-16 01:19:15 -0700606 return HRTIMER_NORESTART;
607}
608
Vinicius Costa Gomes860b6422018-07-03 15:42:52 -0700609void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
610 clockid_t clockid)
Patrick McHardy41794772007-03-16 01:19:15 -0700611{
Vinicius Costa Gomes860b6422018-07-03 15:42:52 -0700612 hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
David S. Miller2fbd3da2009-09-01 17:59:25 -0700613 wd->timer.function = qdisc_watchdog;
Patrick McHardy41794772007-03-16 01:19:15 -0700614 wd->qdisc = qdisc;
615}
Vinicius Costa Gomes860b6422018-07-03 15:42:52 -0700616EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
617
618void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
619{
620 qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
621}
Patrick McHardy41794772007-03-16 01:19:15 -0700622EXPORT_SYMBOL(qdisc_watchdog_init);
623
Eric Dumazetefe074c2020-03-16 19:12:49 -0700624void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
625 u64 delta_ns)
Patrick McHardy41794772007-03-16 01:19:15 -0700626{
Jarek Poplawski2540e052008-08-21 05:11:14 -0700627 if (test_bit(__QDISC_STATE_DEACTIVATED,
628 &qdisc_root_sleeping(wd->qdisc)->state))
629 return;
630
Eric Dumazetb88948f2020-03-16 19:12:50 -0700631 if (hrtimer_is_queued(&wd->timer)) {
632 /* If timer is already set in [expires, expires + delta_ns],
633 * do not reprogram it.
634 */
635 if (wd->last_expires - expires <= delta_ns)
636 return;
637 }
Eric Dumazeta9efad82016-05-23 14:24:56 -0700638
639 wd->last_expires = expires;
Eric Dumazetefe074c2020-03-16 19:12:49 -0700640 hrtimer_start_range_ns(&wd->timer,
641 ns_to_ktime(expires),
642 delta_ns,
643 HRTIMER_MODE_ABS_PINNED);
Patrick McHardy41794772007-03-16 01:19:15 -0700644}
Eric Dumazetefe074c2020-03-16 19:12:49 -0700645EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);
Patrick McHardy41794772007-03-16 01:19:15 -0700646
647void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
648{
David S. Miller2fbd3da2009-09-01 17:59:25 -0700649 hrtimer_cancel(&wd->timer);
Patrick McHardy41794772007-03-16 01:19:15 -0700650}
651EXPORT_SYMBOL(qdisc_watchdog_cancel);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652
Adrian Bunka94f7792008-07-22 14:20:11 -0700653static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700654{
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700655 struct hlist_head *h;
Eric Dumazet9695fe62017-08-22 12:26:46 -0700656 unsigned int i;
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700657
Eric Dumazet9695fe62017-08-22 12:26:46 -0700658 h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700659
660 if (h != NULL) {
661 for (i = 0; i < n; i++)
662 INIT_HLIST_HEAD(&h[i]);
663 }
664 return h;
665}
666
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700667void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
668{
669 struct Qdisc_class_common *cl;
Sasha Levinb67bfe02013-02-27 17:06:00 -0800670 struct hlist_node *next;
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700671 struct hlist_head *nhash, *ohash;
672 unsigned int nsize, nmask, osize;
673 unsigned int i, h;
674
675 /* Rehash when load factor exceeds 0.75 */
676 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
677 return;
678 nsize = clhash->hashsize * 2;
679 nmask = nsize - 1;
680 nhash = qdisc_class_hash_alloc(nsize);
681 if (nhash == NULL)
682 return;
683
684 ohash = clhash->hash;
685 osize = clhash->hashsize;
686
687 sch_tree_lock(sch);
688 for (i = 0; i < osize; i++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -0800689 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700690 h = qdisc_class_hash(cl->classid, nmask);
691 hlist_add_head(&cl->hnode, &nhash[h]);
692 }
693 }
694 clhash->hash = nhash;
695 clhash->hashsize = nsize;
696 clhash->hashmask = nmask;
697 sch_tree_unlock(sch);
698
Eric Dumazet9695fe62017-08-22 12:26:46 -0700699 kvfree(ohash);
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700700}
701EXPORT_SYMBOL(qdisc_class_hash_grow);
702
703int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
704{
705 unsigned int size = 4;
706
707 clhash->hash = qdisc_class_hash_alloc(size);
Alexander Aringac8ef4a2017-12-20 12:35:11 -0500708 if (!clhash->hash)
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700709 return -ENOMEM;
710 clhash->hashsize = size;
711 clhash->hashmask = size - 1;
712 clhash->hashelems = 0;
713 return 0;
714}
715EXPORT_SYMBOL(qdisc_class_hash_init);
716
717void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
718{
Eric Dumazet9695fe62017-08-22 12:26:46 -0700719 kvfree(clhash->hash);
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700720}
721EXPORT_SYMBOL(qdisc_class_hash_destroy);
722
723void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
724 struct Qdisc_class_common *cl)
725{
726 unsigned int h;
727
728 INIT_HLIST_NODE(&cl->hnode);
729 h = qdisc_class_hash(cl->classid, clhash->hashmask);
730 hlist_add_head(&cl->hnode, &clhash->hash[h]);
731 clhash->hashelems++;
732}
733EXPORT_SYMBOL(qdisc_class_hash_insert);
734
735void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
736 struct Qdisc_class_common *cl)
737{
738 hlist_del(&cl->hnode);
739 clhash->hashelems--;
740}
741EXPORT_SYMBOL(qdisc_class_hash_remove);
742
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000743/* Allocate an unique handle from space managed by kernel
744 * Possible range is [8000-FFFF]:0000 (0x8000 values)
745 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746static u32 qdisc_alloc_handle(struct net_device *dev)
747{
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000748 int i = 0x8000;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
750
751 do {
752 autohandle += TC_H_MAKE(0x10000U, 0);
753 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
754 autohandle = TC_H_MAKE(0x80000000U, 0);
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000755 if (!qdisc_lookup(dev, autohandle))
756 return autohandle;
757 cond_resched();
758 } while (--i > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000760 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761}
762
Toke Høiland-Jørgensen5f2939d2019-01-09 17:10:57 +0100763void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
Patrick McHardy43effa12006-11-29 17:35:48 -0800764{
Nogah Frankelfd5ac142018-02-28 10:45:03 +0100765 bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
Eric Dumazet20fea082007-11-14 01:44:41 -0800766 const struct Qdisc_class_ops *cops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800767 unsigned long cl;
768 u32 parentid;
Konstantin Khlebnikov95946652017-08-15 16:39:59 +0300769 bool notify;
Eric Dumazet2c8c8e62013-10-07 08:32:32 -0700770 int drops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800771
WANG Cong2ccccf52016-02-25 14:55:01 -0800772 if (n == 0 && len == 0)
Patrick McHardy43effa12006-11-29 17:35:48 -0800773 return;
Eric Dumazet2c8c8e62013-10-07 08:32:32 -0700774 drops = max_t(int, n, 0);
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800775 rcu_read_lock();
Patrick McHardy43effa12006-11-29 17:35:48 -0800776 while ((parentid = sch->parent)) {
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700777 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800778 break;
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700779
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800780 if (sch->flags & TCQ_F_NOPARENT)
781 break;
Konstantin Khlebnikov95946652017-08-15 16:39:59 +0300782 /* Notify parent qdisc only if child qdisc becomes empty.
783 *
784 * If child was empty even before update then backlog
785 * counter is screwed and we skip notification because
786 * parent class is already passive.
Nogah Frankelfd5ac142018-02-28 10:45:03 +0100787 *
788 * If the original child was offloaded then it is allowed
789 * to be seem as empty, so the parent is notified anyway.
Konstantin Khlebnikov95946652017-08-15 16:39:59 +0300790 */
Nogah Frankelfd5ac142018-02-28 10:45:03 +0100791 notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
792 !qdisc_is_offloaded);
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800793 /* TODO: perform the search on a per txq basis */
David S. Miller5ce2d482008-07-08 17:06:30 -0700794 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700795 if (sch == NULL) {
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800796 WARN_ON_ONCE(parentid != TC_H_ROOT);
797 break;
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700798 }
Patrick McHardy43effa12006-11-29 17:35:48 -0800799 cops = sch->ops->cl_ops;
Konstantin Khlebnikov95946652017-08-15 16:39:59 +0300800 if (notify && cops->qlen_notify) {
WANG Cong143976c2017-08-24 16:51:29 -0700801 cl = cops->find(sch, parentid);
Patrick McHardy43effa12006-11-29 17:35:48 -0800802 cops->qlen_notify(sch, cl);
Patrick McHardy43effa12006-11-29 17:35:48 -0800803 }
804 sch->q.qlen -= n;
WANG Cong2ccccf52016-02-25 14:55:01 -0800805 sch->qstats.backlog -= len;
John Fastabend25331d62014-09-28 11:53:29 -0700806 __qdisc_qstats_drop(sch, drops);
Patrick McHardy43effa12006-11-29 17:35:48 -0800807 }
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800808 rcu_read_unlock();
Patrick McHardy43effa12006-11-29 17:35:48 -0800809}
WANG Cong2ccccf52016-02-25 14:55:01 -0800810EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811
Jakub Kicinskib5928432018-11-07 17:33:34 -0800812int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
813 void *type_data)
814{
815 struct net_device *dev = qdisc_dev(sch);
816 int err;
817
818 sch->flags &= ~TCQ_F_OFFLOADED;
819 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
820 return 0;
821
822 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
823 if (err == -EOPNOTSUPP)
824 return 0;
825
826 if (!err)
827 sch->flags |= TCQ_F_OFFLOADED;
828
829 return err;
830}
831EXPORT_SYMBOL(qdisc_offload_dump_helper);
832
Jakub Kicinskibfaee912018-11-07 17:33:37 -0800833void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
834 struct Qdisc *new, struct Qdisc *old,
835 enum tc_setup_type type, void *type_data,
836 struct netlink_ext_ack *extack)
837{
838 bool any_qdisc_is_offloaded;
839 int err;
840
841 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
842 return;
843
844 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
845
846 /* Don't report error if the graft is part of destroy operation. */
847 if (!err || !new || new == &noop_qdisc)
848 return;
849
850 /* Don't report error if the parent, the old child and the new
851 * one are not offloaded.
852 */
853 any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
854 any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
855 any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
856
857 if (any_qdisc_is_offloaded)
858 NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
859}
860EXPORT_SYMBOL(qdisc_offload_graft_helper);
861
Jakub Kicinski98b0e5f2018-11-12 14:58:10 -0800862static void qdisc_offload_graft_root(struct net_device *dev,
863 struct Qdisc *new, struct Qdisc *old,
864 struct netlink_ext_ack *extack)
865{
866 struct tc_root_qopt_offload graft_offload = {
867 .command = TC_ROOT_GRAFT,
868 .handle = new ? new->handle : 0,
869 .ingress = (new && new->flags & TCQ_F_INGRESS) ||
870 (old && old->flags & TCQ_F_INGRESS),
871 };
872
873 qdisc_offload_graft_helper(dev, NULL, new, old,
874 TC_SETUP_ROOT_QDISC, &graft_offload, extack);
875}
876
WANG Cong27d7f072017-08-24 16:51:27 -0700877static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
878 u32 portid, u32 seq, u16 flags, int event)
879{
880 struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
881 struct gnet_stats_queue __percpu *cpu_qstats = NULL;
882 struct tcmsg *tcm;
883 struct nlmsghdr *nlh;
884 unsigned char *b = skb_tail_pointer(skb);
885 struct gnet_dump d;
886 struct qdisc_size_table *stab;
Jiri Pirkod47a6b02018-01-17 11:46:52 +0100887 u32 block_index;
WANG Cong27d7f072017-08-24 16:51:27 -0700888 __u32 qlen;
889
890 cond_resched();
891 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
892 if (!nlh)
893 goto out_nlmsg_trim;
894 tcm = nlmsg_data(nlh);
895 tcm->tcm_family = AF_UNSPEC;
896 tcm->tcm__pad1 = 0;
897 tcm->tcm__pad2 = 0;
898 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
899 tcm->tcm_parent = clid;
900 tcm->tcm_handle = q->handle;
901 tcm->tcm_info = refcount_read(&q->refcnt);
902 if (nla_put_string(skb, TCA_KIND, q->ops->id))
903 goto nla_put_failure;
Jiri Pirkod47a6b02018-01-17 11:46:52 +0100904 if (q->ops->ingress_block_get) {
905 block_index = q->ops->ingress_block_get(q);
906 if (block_index &&
907 nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
908 goto nla_put_failure;
909 }
910 if (q->ops->egress_block_get) {
911 block_index = q->ops->egress_block_get(q);
912 if (block_index &&
913 nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
914 goto nla_put_failure;
915 }
WANG Cong27d7f072017-08-24 16:51:27 -0700916 if (q->ops->dump && q->ops->dump(q, skb) < 0)
917 goto nla_put_failure;
Nogah Frankel44edf2f2017-12-25 10:51:42 +0200918 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
919 goto nla_put_failure;
John Fastabend7e660162017-12-07 09:57:00 -0800920 qlen = qdisc_qlen_sum(q);
WANG Cong27d7f072017-08-24 16:51:27 -0700921
922 stab = rtnl_dereference(q->stab);
923 if (stab && qdisc_dump_stab(skb, stab) < 0)
924 goto nla_put_failure;
925
926 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
927 NULL, &d, TCA_PAD) < 0)
928 goto nla_put_failure;
929
930 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
931 goto nla_put_failure;
932
933 if (qdisc_is_percpu_stats(q)) {
934 cpu_bstats = q->cpu_bstats;
935 cpu_qstats = q->cpu_qstats;
936 }
937
938 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
939 &d, cpu_bstats, &q->bstats) < 0 ||
940 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
941 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
942 goto nla_put_failure;
943
944 if (gnet_stats_finish_copy(&d) < 0)
945 goto nla_put_failure;
946
947 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
948 return skb->len;
949
950out_nlmsg_trim:
951nla_put_failure:
952 nlmsg_trim(skb, b);
953 return -1;
954}
955
956static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
957{
958 if (q->flags & TCQ_F_BUILTIN)
959 return true;
960 if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
961 return true;
962
963 return false;
964}
965
966static int qdisc_notify(struct net *net, struct sk_buff *oskb,
967 struct nlmsghdr *n, u32 clid,
968 struct Qdisc *old, struct Qdisc *new)
969{
970 struct sk_buff *skb;
971 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
972
973 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
974 if (!skb)
975 return -ENOBUFS;
976
977 if (old && !tc_qdisc_dump_ignore(old, false)) {
978 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
979 0, RTM_DELQDISC) < 0)
980 goto err_out;
981 }
982 if (new && !tc_qdisc_dump_ignore(new, false)) {
983 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
984 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
985 goto err_out;
986 }
987
988 if (skb->len)
989 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
990 n->nlmsg_flags & NLM_F_ECHO);
991
992err_out:
993 kfree_skb(skb);
994 return -EINVAL;
995}
996
Tom Goff7316ae82010-03-19 15:40:13 +0000997static void notify_and_destroy(struct net *net, struct sk_buff *skb,
998 struct nlmsghdr *n, u32 clid,
David S. Miller99194cf2008-07-17 04:54:10 -0700999 struct Qdisc *old, struct Qdisc *new)
1000{
1001 if (new || old)
Tom Goff7316ae82010-03-19 15:40:13 +00001002 qdisc_notify(net, skb, n, clid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003
David S. Miller4d8863a2008-08-18 21:03:15 -07001004 if (old)
Vlad Buslov86bd4462018-09-24 19:22:50 +03001005 qdisc_put(old);
David S. Miller99194cf2008-07-17 04:54:10 -07001006}
1007
Paolo Abeni8a53e612019-04-10 14:32:40 +02001008static void qdisc_clear_nolock(struct Qdisc *sch)
1009{
1010 sch->flags &= ~TCQ_F_NOLOCK;
1011 if (!(sch->flags & TCQ_F_CPUSTATS))
1012 return;
1013
1014 free_percpu(sch->cpu_bstats);
1015 free_percpu(sch->cpu_qstats);
1016 sch->cpu_bstats = NULL;
1017 sch->cpu_qstats = NULL;
1018 sch->flags &= ~TCQ_F_CPUSTATS;
1019}
1020
David S. Miller99194cf2008-07-17 04:54:10 -07001021/* Graft qdisc "new" to class "classid" of qdisc "parent" or
1022 * to device "dev".
1023 *
1024 * When appropriate send a netlink notification using 'skb'
1025 * and "n".
1026 *
1027 * On success, destroy old qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028 */
1029
1030static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
David S. Miller99194cf2008-07-17 04:54:10 -07001031 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
Alexander Aring09215592017-12-20 12:35:12 -05001032 struct Qdisc *new, struct Qdisc *old,
1033 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034{
David S. Miller99194cf2008-07-17 04:54:10 -07001035 struct Qdisc *q = old;
Tom Goff7316ae82010-03-19 15:40:13 +00001036 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001038 if (parent == NULL) {
David S. Miller99194cf2008-07-17 04:54:10 -07001039 unsigned int i, num_q, ingress;
1040
1041 ingress = 0;
1042 num_q = dev->num_tx_queues;
David S. Miller8d50b532008-07-30 02:37:46 -07001043 if ((q && q->flags & TCQ_F_INGRESS) ||
1044 (new && new->flags & TCQ_F_INGRESS)) {
David S. Miller99194cf2008-07-17 04:54:10 -07001045 num_q = 1;
1046 ingress = 1;
Alexander Aring09215592017-12-20 12:35:12 -05001047 if (!dev_ingress_queue(dev)) {
1048 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
Eric Dumazet24824a02010-10-02 06:11:55 +00001049 return -ENOENT;
Alexander Aring09215592017-12-20 12:35:12 -05001050 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001051 }
David S. Miller99194cf2008-07-17 04:54:10 -07001052
1053 if (dev->flags & IFF_UP)
1054 dev_deactivate(dev);
1055
Jakub Kicinski98b0e5f2018-11-12 14:58:10 -08001056 qdisc_offload_graft_root(dev, new, old, extack);
1057
WANG Cong86e363d2015-05-26 16:08:48 -07001058 if (new && new->ops->attach)
1059 goto skip;
David S. Miller6ec1c692009-09-06 01:58:51 -07001060
David S. Miller99194cf2008-07-17 04:54:10 -07001061 for (i = 0; i < num_q; i++) {
Eric Dumazet24824a02010-10-02 06:11:55 +00001062 struct netdev_queue *dev_queue = dev_ingress_queue(dev);
David S. Miller99194cf2008-07-17 04:54:10 -07001063
1064 if (!ingress)
1065 dev_queue = netdev_get_tx_queue(dev, i);
1066
David S. Miller8d50b532008-07-30 02:37:46 -07001067 old = dev_graft_qdisc(dev_queue, new);
1068 if (new && i > 0)
Eric Dumazet551143d2017-08-24 21:12:28 -07001069 qdisc_refcount_inc(new);
David S. Miller8d50b532008-07-30 02:37:46 -07001070
Jarek Poplawski036d6a62009-09-13 22:35:44 +00001071 if (!ingress)
Vlad Buslov86bd4462018-09-24 19:22:50 +03001072 qdisc_put(old);
David S. Miller99194cf2008-07-17 04:54:10 -07001073 }
1074
WANG Cong86e363d2015-05-26 16:08:48 -07001075skip:
Jarek Poplawski036d6a62009-09-13 22:35:44 +00001076 if (!ingress) {
Tom Goff7316ae82010-03-19 15:40:13 +00001077 notify_and_destroy(net, skb, n, classid,
1078 dev->qdisc, new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +00001079 if (new && !new->ops->attach)
Eric Dumazet551143d2017-08-24 21:12:28 -07001080 qdisc_refcount_inc(new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +00001081 dev->qdisc = new ? : &noop_qdisc;
WANG Cong86e363d2015-05-26 16:08:48 -07001082
1083 if (new && new->ops->attach)
1084 new->ops->attach(new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +00001085 } else {
Tom Goff7316ae82010-03-19 15:40:13 +00001086 notify_and_destroy(net, skb, n, classid, old, new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +00001087 }
Patrick McHardyaf356af2009-09-04 06:41:18 +00001088
David S. Miller99194cf2008-07-17 04:54:10 -07001089 if (dev->flags & IFF_UP)
1090 dev_activate(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091 } else {
Eric Dumazet20fea082007-11-14 01:44:41 -08001092 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
Jakub Kicinski9da93ec2018-11-07 17:33:38 -08001093 unsigned long cl;
1094 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095
John Fastabendc5ad1192017-12-07 09:58:19 -08001096 /* Only support running class lockless if parent is lockless */
Gaurav Singh8eaf8d92020-06-18 16:36:31 -04001097 if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK))
Paolo Abeni8a53e612019-04-10 14:32:40 +02001098 qdisc_clear_nolock(new);
John Fastabendc5ad1192017-12-07 09:58:19 -08001099
Jakub Kicinski9da93ec2018-11-07 17:33:38 -08001100 if (!cops || !cops->graft)
1101 return -EOPNOTSUPP;
WANG Cong143976c2017-08-24 16:51:29 -07001102
Jakub Kicinski9da93ec2018-11-07 17:33:38 -08001103 cl = cops->find(parent, classid);
1104 if (!cl) {
1105 NL_SET_ERR_MSG(extack, "Specified class not found");
1106 return -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107 }
Jakub Kicinski9da93ec2018-11-07 17:33:38 -08001108
1109 err = cops->graft(parent, cl, new, &old, extack);
1110 if (err)
1111 return err;
1112 notify_and_destroy(net, skb, n, classid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113 }
Jakub Kicinski9da93ec2018-11-07 17:33:38 -08001114 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115}
1116
Jiri Pirkod47a6b02018-01-17 11:46:52 +01001117static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1118 struct netlink_ext_ack *extack)
1119{
1120 u32 block_index;
1121
1122 if (tca[TCA_INGRESS_BLOCK]) {
1123 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1124
1125 if (!block_index) {
1126 NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1127 return -EINVAL;
1128 }
1129 if (!sch->ops->ingress_block_set) {
1130 NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1131 return -EOPNOTSUPP;
1132 }
1133 sch->ops->ingress_block_set(sch, block_index);
1134 }
1135 if (tca[TCA_EGRESS_BLOCK]) {
1136 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1137
1138 if (!block_index) {
1139 NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1140 return -EINVAL;
1141 }
1142 if (!sch->ops->egress_block_set) {
1143 NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1144 return -EOPNOTSUPP;
1145 }
1146 sch->ops->egress_block_set(sch, block_index);
1147 }
1148 return 0;
1149}
1150
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151/*
1152 Allocate and initialize new qdisc.
1153
1154 Parameters are passed via opt.
1155 */
1156
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04001157static struct Qdisc *qdisc_create(struct net_device *dev,
1158 struct netdev_queue *dev_queue,
1159 struct Qdisc *p, u32 parent, u32 handle,
Alexander Aring09215592017-12-20 12:35:12 -05001160 struct nlattr **tca, int *errp,
1161 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162{
1163 int err;
Patrick McHardy1e904742008-01-22 22:11:17 -08001164 struct nlattr *kind = tca[TCA_KIND];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165 struct Qdisc *sch;
1166 struct Qdisc_ops *ops;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001167 struct qdisc_size_table *stab;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168
1169 ops = qdisc_lookup_ops(kind);
Johannes Berg95a5afc2008-10-16 15:24:51 -07001170#ifdef CONFIG_MODULES
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171 if (ops == NULL && kind != NULL) {
1172 char name[IFNAMSIZ];
Patrick McHardy1e904742008-01-22 22:11:17 -08001173 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174 /* We dropped the RTNL semaphore in order to
1175 * perform the module load. So, even if we
1176 * succeeded in loading the module we have to
1177 * tell the caller to replay the request. We
1178 * indicate this using -EAGAIN.
1179 * We replay the request because the device may
1180 * go away in the mean time.
1181 */
1182 rtnl_unlock();
1183 request_module("sch_%s", name);
1184 rtnl_lock();
1185 ops = qdisc_lookup_ops(kind);
1186 if (ops != NULL) {
1187 /* We will try again qdisc_lookup_ops,
1188 * so don't keep a reference.
1189 */
1190 module_put(ops->owner);
1191 err = -EAGAIN;
1192 goto err_out;
1193 }
1194 }
1195 }
1196#endif
1197
Jamal Hadi Salimb9e2cc02006-08-03 16:36:51 -07001198 err = -ENOENT;
Alexander Aring09215592017-12-20 12:35:12 -05001199 if (!ops) {
1200 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201 goto err_out;
Alexander Aring09215592017-12-20 12:35:12 -05001202 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203
Alexander Aringd0bd6842017-12-20 12:35:20 -05001204 sch = qdisc_alloc(dev_queue, ops, extack);
Thomas Graf3d54b822005-07-05 14:15:09 -07001205 if (IS_ERR(sch)) {
1206 err = PTR_ERR(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207 goto err_out2;
Thomas Graf3d54b822005-07-05 14:15:09 -07001208 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001210 sch->parent = parent;
1211
Thomas Graf3d54b822005-07-05 14:15:09 -07001212 if (handle == TC_H_INGRESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001213 sch->flags |= TCQ_F_INGRESS;
Thomas Graf3d54b822005-07-05 14:15:09 -07001214 handle = TC_H_MAKE(TC_H_INGRESS, 0);
Patrick McHardyfd44de72007-04-16 17:07:08 -07001215 } else {
Patrick McHardyfd44de72007-04-16 17:07:08 -07001216 if (handle == 0) {
1217 handle = qdisc_alloc_handle(dev);
Ivan Veceraaaeb1de2019-02-15 11:23:25 +01001218 if (handle == 0) {
1219 NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1220 err = -ENOSPC;
Patrick McHardyfd44de72007-04-16 17:07:08 -07001221 goto err_out3;
Ivan Veceraaaeb1de2019-02-15 11:23:25 +01001222 }
Patrick McHardyfd44de72007-04-16 17:07:08 -07001223 }
Eric Dumazet1abbe132012-12-11 15:54:33 +00001224 if (!netif_is_multiqueue(dev))
Eric Dumazet225734d2015-12-15 09:43:12 -08001225 sch->flags |= TCQ_F_ONETXQUEUE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226 }
1227
Thomas Graf3d54b822005-07-05 14:15:09 -07001228 sch->handle = handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229
Jesper Dangaard Brouer84c46dd2016-11-03 14:56:11 +01001230 /* This exist to keep backward compatible with a userspace
1231 * loophole, what allowed userspace to get IFF_NO_QUEUE
1232 * facility on older kernels by setting tx_queue_len=0 (prior
1233 * to qdisc init), and then forgot to reinit tx_queue_len
1234 * before again attaching a qdisc.
1235 */
1236 if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1237 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1238 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1239 }
1240
Jiri Pirkod47a6b02018-01-17 11:46:52 +01001241 err = qdisc_block_indexes_set(sch, tca, extack);
1242 if (err)
1243 goto err_out3;
1244
Alexander Aring54160ef2017-12-04 18:40:00 -05001245 if (ops->init) {
Alexander Aringe63d7df2017-12-20 12:35:13 -05001246 err = ops->init(sch, tca[TCA_OPTIONS], extack);
Alexander Aring54160ef2017-12-04 18:40:00 -05001247 if (err != 0)
1248 goto err_out5;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001249 }
Alexander Aring54160ef2017-12-04 18:40:00 -05001250
Alexander Aring54160ef2017-12-04 18:40:00 -05001251 if (tca[TCA_STAB]) {
Alexander Aring09215592017-12-20 12:35:12 -05001252 stab = qdisc_get_stab(tca[TCA_STAB], extack);
Alexander Aring54160ef2017-12-04 18:40:00 -05001253 if (IS_ERR(stab)) {
1254 err = PTR_ERR(stab);
1255 goto err_out4;
1256 }
1257 rcu_assign_pointer(sch->stab, stab);
1258 }
1259 if (tca[TCA_RATE]) {
1260 seqcount_t *running;
1261
1262 err = -EOPNOTSUPP;
Alexander Aring09215592017-12-20 12:35:12 -05001263 if (sch->flags & TCQ_F_MQROOT) {
1264 NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
Alexander Aring54160ef2017-12-04 18:40:00 -05001265 goto err_out4;
Alexander Aring09215592017-12-20 12:35:12 -05001266 }
Alexander Aring54160ef2017-12-04 18:40:00 -05001267
1268 if (sch->parent != TC_H_ROOT &&
1269 !(sch->flags & TCQ_F_INGRESS) &&
1270 (!p || !(p->flags & TCQ_F_MQROOT)))
1271 running = qdisc_root_sleeping_running(sch);
1272 else
1273 running = &sch->running;
1274
1275 err = gen_new_estimator(&sch->bstats,
1276 sch->cpu_bstats,
1277 &sch->rate_est,
1278 NULL,
1279 running,
1280 tca[TCA_RATE]);
Alexander Aring09215592017-12-20 12:35:12 -05001281 if (err) {
1282 NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
Alexander Aring54160ef2017-12-04 18:40:00 -05001283 goto err_out4;
Alexander Aring09215592017-12-20 12:35:12 -05001284 }
Alexander Aring54160ef2017-12-04 18:40:00 -05001285 }
1286
1287 qdisc_hash_add(sch, false);
Cong Wangf5a78332020-05-26 21:35:25 -07001288 trace_qdisc_create(ops, dev, parent);
Alexander Aring54160ef2017-12-04 18:40:00 -05001289
1290 return sch;
1291
1292err_out5:
Eric Dumazet87b60cf2017-02-10 10:31:49 -08001293 /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
Gao Fengc1a48722017-06-28 12:53:54 +08001294 if (ops->destroy)
1295 ops->destroy(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001296err_out3:
1297 dev_put(dev);
Daniel Borkmann81d947e2018-01-15 23:12:09 +01001298 qdisc_free(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001299err_out2:
1300 module_put(ops->owner);
1301err_out:
1302 *errp = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001303 return NULL;
Patrick McHardy23bcf632009-09-09 18:11:23 -07001304
1305err_out4:
1306 /*
1307 * Any broken qdiscs that would require a ops->reset() here?
1308 * The qdisc was never in action so it shouldn't be necessary.
1309 */
Eric Dumazeta2da5702011-01-20 03:48:19 +00001310 qdisc_put_stab(rtnl_dereference(sch->stab));
Patrick McHardy23bcf632009-09-09 18:11:23 -07001311 if (ops->destroy)
1312 ops->destroy(sch);
1313 goto err_out3;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001314}
1315
Alexander Aring09215592017-12-20 12:35:12 -05001316static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1317 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318{
Eric Dumazeta2da5702011-01-20 03:48:19 +00001319 struct qdisc_size_table *ostab, *stab = NULL;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001320 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001321
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001322 if (tca[TCA_OPTIONS]) {
Alexander Aring09215592017-12-20 12:35:12 -05001323 if (!sch->ops->change) {
1324 NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001325 return -EINVAL;
Alexander Aring09215592017-12-20 12:35:12 -05001326 }
Jiri Pirkod47a6b02018-01-17 11:46:52 +01001327 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1328 NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1329 return -EOPNOTSUPP;
1330 }
Alexander Aring20307212017-12-20 12:35:14 -05001331 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332 if (err)
1333 return err;
1334 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001335
1336 if (tca[TCA_STAB]) {
Alexander Aring09215592017-12-20 12:35:12 -05001337 stab = qdisc_get_stab(tca[TCA_STAB], extack);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001338 if (IS_ERR(stab))
1339 return PTR_ERR(stab);
1340 }
1341
Eric Dumazeta2da5702011-01-20 03:48:19 +00001342 ostab = rtnl_dereference(sch->stab);
1343 rcu_assign_pointer(sch->stab, stab);
1344 qdisc_put_stab(ostab);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001345
Patrick McHardy23bcf632009-09-09 18:11:23 -07001346 if (tca[TCA_RATE]) {
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001347 /* NB: ignores errors from replace_estimator
1348 because change can't be undone. */
Patrick McHardy23bcf632009-09-09 18:11:23 -07001349 if (sch->flags & TCQ_F_MQROOT)
1350 goto out;
John Fastabend22e0f8b2014-09-28 11:52:56 -07001351 gen_replace_estimator(&sch->bstats,
1352 sch->cpu_bstats,
1353 &sch->rate_est,
Eric Dumazetedb09eb2016-06-06 09:37:16 -07001354 NULL,
1355 qdisc_root_sleeping_running(sch),
John Fastabend22e0f8b2014-09-28 11:52:56 -07001356 tca[TCA_RATE]);
Patrick McHardy23bcf632009-09-09 18:11:23 -07001357 }
1358out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359 return 0;
1360}
1361
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001362struct check_loop_arg {
1363 struct qdisc_walker w;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364 struct Qdisc *p;
1365 int depth;
1366};
1367
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04001368static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1369 struct qdisc_walker *w);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001370
1371static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1372{
1373 struct check_loop_arg arg;
1374
1375 if (q->ops->cl_ops == NULL)
1376 return 0;
1377
1378 arg.w.stop = arg.w.skip = arg.w.count = 0;
1379 arg.w.fn = check_loop_fn;
1380 arg.depth = depth;
1381 arg.p = p;
1382 q->ops->cl_ops->walk(q, &arg.w);
1383 return arg.w.stop ? -ELOOP : 0;
1384}
1385
1386static int
1387check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1388{
1389 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -08001390 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001391 struct check_loop_arg *arg = (struct check_loop_arg *)w;
1392
1393 leaf = cops->leaf(q, cl);
1394 if (leaf) {
1395 if (leaf == arg->p || arg->depth > 7)
1396 return -ELOOP;
1397 return check_loop(leaf, arg->p, arg->depth + 1);
1398 }
1399 return 0;
1400}
1401
David Ahern8b4c3cd2018-10-03 15:05:36 -07001402const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
Cong Wang6f96c3c2019-10-07 13:26:28 -07001403 [TCA_KIND] = { .type = NLA_STRING },
David Ahern8b4c3cd2018-10-03 15:05:36 -07001404 [TCA_RATE] = { .type = NLA_BINARY,
1405 .len = sizeof(struct tc_estimator) },
1406 [TCA_STAB] = { .type = NLA_NESTED },
1407 [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG },
1408 [TCA_CHAIN] = { .type = NLA_U32 },
1409 [TCA_INGRESS_BLOCK] = { .type = NLA_U32 },
1410 [TCA_EGRESS_BLOCK] = { .type = NLA_U32 },
1411};
1412
Davide Carattie3314732018-10-10 22:00:58 +02001413/*
1414 * Delete/get qdisc.
1415 */
1416
David Ahernc21ef3e2017-04-16 09:48:24 -07001417static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1418 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001419{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001420 struct net *net = sock_net(skb->sk);
David S. Miller02ef22c2012-06-26 21:50:05 -07001421 struct tcmsg *tcm = nlmsg_data(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001422 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423 struct net_device *dev;
Hong zhi guode179c82013-03-25 17:36:33 +00001424 u32 clid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001425 struct Qdisc *q = NULL;
1426 struct Qdisc *p = NULL;
1427 int err;
1428
Stéphane Graber4e8bbb82014-04-30 11:25:43 -04001429 if ((n->nlmsg_type != RTM_GETQDISC) &&
David S. Miller5f013c9b2014-05-12 13:19:14 -04001430 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001431 return -EPERM;
1432
Johannes Berg8cb08172019-04-26 14:07:28 +02001433 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1434 rtm_tca_policy, extack);
Patrick McHardy1e904742008-01-22 22:11:17 -08001435 if (err < 0)
1436 return err;
1437
Hong zhi guode179c82013-03-25 17:36:33 +00001438 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1439 if (!dev)
1440 return -ENODEV;
1441
1442 clid = tcm->tcm_parent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443 if (clid) {
1444 if (clid != TC_H_ROOT) {
1445 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001446 p = qdisc_lookup(dev, TC_H_MAJ(clid));
Alexander Aring09215592017-12-20 12:35:12 -05001447 if (!p) {
1448 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449 return -ENOENT;
Alexander Aring09215592017-12-20 12:35:12 -05001450 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001451 q = qdisc_leaf(p, clid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001452 } else if (dev_ingress_queue(dev)) {
1453 q = dev_ingress_queue(dev)->qdisc_sleeping;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001454 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +00001456 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457 }
Alexander Aring09215592017-12-20 12:35:12 -05001458 if (!q) {
1459 NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001460 return -ENOENT;
Alexander Aring09215592017-12-20 12:35:12 -05001461 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001462
Alexander Aring09215592017-12-20 12:35:12 -05001463 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1464 NL_SET_ERR_MSG(extack, "Invalid handle");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001465 return -EINVAL;
Alexander Aring09215592017-12-20 12:35:12 -05001466 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001467 } else {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001468 q = qdisc_lookup(dev, tcm->tcm_handle);
Alexander Aring09215592017-12-20 12:35:12 -05001469 if (!q) {
1470 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471 return -ENOENT;
Alexander Aring09215592017-12-20 12:35:12 -05001472 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001473 }
1474
Alexander Aring09215592017-12-20 12:35:12 -05001475 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1476 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477 return -EINVAL;
Alexander Aring09215592017-12-20 12:35:12 -05001478 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479
1480 if (n->nlmsg_type == RTM_DELQDISC) {
Alexander Aring09215592017-12-20 12:35:12 -05001481 if (!clid) {
1482 NL_SET_ERR_MSG(extack, "Classid cannot be zero");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001483 return -EINVAL;
Alexander Aring09215592017-12-20 12:35:12 -05001484 }
1485 if (q->handle == 0) {
1486 NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487 return -ENOENT;
Alexander Aring09215592017-12-20 12:35:12 -05001488 }
1489 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001490 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492 } else {
Tom Goff7316ae82010-03-19 15:40:13 +00001493 qdisc_notify(net, skb, n, clid, NULL, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494 }
1495 return 0;
1496}
1497
1498/*
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001499 * Create/change qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 */
1501
David Ahernc21ef3e2017-04-16 09:48:24 -07001502static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1503 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001505 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506 struct tcmsg *tcm;
Patrick McHardy1e904742008-01-22 22:11:17 -08001507 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508 struct net_device *dev;
1509 u32 clid;
1510 struct Qdisc *q, *p;
1511 int err;
1512
David S. Miller5f013c9b2014-05-12 13:19:14 -04001513 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001514 return -EPERM;
1515
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516replay:
1517 /* Reinit, just in case something touches this. */
Johannes Berg8cb08172019-04-26 14:07:28 +02001518 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1519 rtm_tca_policy, extack);
Hong zhi guode179c82013-03-25 17:36:33 +00001520 if (err < 0)
1521 return err;
1522
David S. Miller02ef22c2012-06-26 21:50:05 -07001523 tcm = nlmsg_data(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001524 clid = tcm->tcm_parent;
1525 q = p = NULL;
1526
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001527 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1528 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001529 return -ENODEV;
1530
Patrick McHardy1e904742008-01-22 22:11:17 -08001531
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 if (clid) {
1533 if (clid != TC_H_ROOT) {
1534 if (clid != TC_H_INGRESS) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001535 p = qdisc_lookup(dev, TC_H_MAJ(clid));
Alexander Aring09215592017-12-20 12:35:12 -05001536 if (!p) {
1537 NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001538 return -ENOENT;
Alexander Aring09215592017-12-20 12:35:12 -05001539 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001540 q = qdisc_leaf(p, clid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001541 } else if (dev_ingress_queue_create(dev)) {
1542 q = dev_ingress_queue(dev)->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543 }
1544 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +00001545 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001546 }
1547
1548 /* It may be default qdisc, ignore it */
1549 if (q && q->handle == 0)
1550 q = NULL;
1551
1552 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1553 if (tcm->tcm_handle) {
Alexander Aring09215592017-12-20 12:35:12 -05001554 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1555 NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001556 return -EEXIST;
Alexander Aring09215592017-12-20 12:35:12 -05001557 }
1558 if (TC_H_MIN(tcm->tcm_handle)) {
1559 NL_SET_ERR_MSG(extack, "Invalid minor handle");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001560 return -EINVAL;
Alexander Aring09215592017-12-20 12:35:12 -05001561 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001562 q = qdisc_lookup(dev, tcm->tcm_handle);
Jiri Pirko8ec69572017-12-28 16:52:10 +01001563 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564 goto create_n_graft;
Alexander Aring09215592017-12-20 12:35:12 -05001565 if (n->nlmsg_flags & NLM_F_EXCL) {
1566 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567 return -EEXIST;
Alexander Aring09215592017-12-20 12:35:12 -05001568 }
Alexander Aring0ac4bd62017-12-04 18:39:59 -05001569 if (tca[TCA_KIND] &&
Alexander Aring09215592017-12-20 12:35:12 -05001570 nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1571 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572 return -EINVAL;
Alexander Aring09215592017-12-20 12:35:12 -05001573 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574 if (q == p ||
Alexander Aring09215592017-12-20 12:35:12 -05001575 (p && check_loop(q, p, 0))) {
1576 NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001577 return -ELOOP;
Alexander Aring09215592017-12-20 12:35:12 -05001578 }
Eric Dumazet551143d2017-08-24 21:12:28 -07001579 qdisc_refcount_inc(q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580 goto graft;
1581 } else {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001582 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583 goto create_n_graft;
1584
1585 /* This magic test requires explanation.
1586 *
1587 * We know, that some child q is already
1588 * attached to this parent and have choice:
1589 * either to change it or to create/graft new one.
1590 *
1591 * 1. We are allowed to create/graft only
1592 * if CREATE and REPLACE flags are set.
1593 *
1594 * 2. If EXCL is set, requestor wanted to say,
1595 * that qdisc tcm_handle is not expected
1596 * to exist, so that we choose create/graft too.
1597 *
1598 * 3. The last case is when no flags are set.
1599 * Alas, it is sort of hole in API, we
1600 * cannot decide what to do unambiguously.
1601 * For now we select create/graft, if
1602 * user gave KIND, which does not match existing.
1603 */
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001604 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1605 (n->nlmsg_flags & NLM_F_REPLACE) &&
1606 ((n->nlmsg_flags & NLM_F_EXCL) ||
Patrick McHardy1e904742008-01-22 22:11:17 -08001607 (tca[TCA_KIND] &&
1608 nla_strcmp(tca[TCA_KIND], q->ops->id))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609 goto create_n_graft;
1610 }
1611 }
1612 } else {
Alexander Aring09215592017-12-20 12:35:12 -05001613 if (!tcm->tcm_handle) {
1614 NL_SET_ERR_MSG(extack, "Handle cannot be zero");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001615 return -EINVAL;
Alexander Aring09215592017-12-20 12:35:12 -05001616 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001617 q = qdisc_lookup(dev, tcm->tcm_handle);
1618 }
1619
1620 /* Change qdisc parameters */
Alexander Aring09215592017-12-20 12:35:12 -05001621 if (!q) {
1622 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001623 return -ENOENT;
Alexander Aring09215592017-12-20 12:35:12 -05001624 }
1625 if (n->nlmsg_flags & NLM_F_EXCL) {
1626 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627 return -EEXIST;
Alexander Aring09215592017-12-20 12:35:12 -05001628 }
1629 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1630 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631 return -EINVAL;
Alexander Aring09215592017-12-20 12:35:12 -05001632 }
1633 err = qdisc_change(q, tca, extack);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001634 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001635 qdisc_notify(net, skb, n, clid, NULL, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636 return err;
1637
1638create_n_graft:
Alexander Aring09215592017-12-20 12:35:12 -05001639 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1640 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641 return -ENOENT;
Alexander Aring09215592017-12-20 12:35:12 -05001642 }
Eric Dumazet24824a02010-10-02 06:11:55 +00001643 if (clid == TC_H_INGRESS) {
Alexander Aring09215592017-12-20 12:35:12 -05001644 if (dev_ingress_queue(dev)) {
Eric Dumazet24824a02010-10-02 06:11:55 +00001645 q = qdisc_create(dev, dev_ingress_queue(dev), p,
1646 tcm->tcm_parent, tcm->tcm_parent,
Alexander Aring09215592017-12-20 12:35:12 -05001647 tca, &err, extack);
1648 } else {
1649 NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
Eric Dumazet24824a02010-10-02 06:11:55 +00001650 err = -ENOENT;
Alexander Aring09215592017-12-20 12:35:12 -05001651 }
Eric Dumazet24824a02010-10-02 06:11:55 +00001652 } else {
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001653 struct netdev_queue *dev_queue;
David S. Miller6ec1c692009-09-06 01:58:51 -07001654
1655 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001656 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1657 else if (p)
1658 dev_queue = p->dev_queue;
1659 else
1660 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Miller6ec1c692009-09-06 01:58:51 -07001661
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001662 q = qdisc_create(dev, dev_queue, p,
David S. Millerbb949fb2008-07-08 16:55:56 -07001663 tcm->tcm_parent, tcm->tcm_handle,
Alexander Aring09215592017-12-20 12:35:12 -05001664 tca, &err, extack);
David S. Miller6ec1c692009-09-06 01:58:51 -07001665 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001666 if (q == NULL) {
1667 if (err == -EAGAIN)
1668 goto replay;
1669 return err;
1670 }
1671
1672graft:
Alexander Aring09215592017-12-20 12:35:12 -05001673 err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001674 if (err) {
1675 if (q)
Vlad Buslov86bd4462018-09-24 19:22:50 +03001676 qdisc_put(q);
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001677 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001678 }
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001679
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680 return 0;
1681}
1682
David S. Miller30723672008-07-18 22:50:15 -07001683static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1684 struct netlink_callback *cb,
Jiri Kosina49b49972017-03-08 16:03:32 +01001685 int *q_idx_p, int s_q_idx, bool recur,
1686 bool dump_invisible)
David S. Miller30723672008-07-18 22:50:15 -07001687{
1688 int ret = 0, q_idx = *q_idx_p;
1689 struct Qdisc *q;
Jiri Kosina59cc1f62016-08-10 11:05:15 +02001690 int b;
David S. Miller30723672008-07-18 22:50:15 -07001691
1692 if (!root)
1693 return 0;
1694
1695 q = root;
1696 if (q_idx < s_q_idx) {
1697 q_idx++;
1698 } else {
Jiri Kosina49b49972017-03-08 16:03:32 +01001699 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
Eric W. Biederman15e47302012-09-07 20:12:54 +00001700 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04001701 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1702 RTM_NEWQDISC) <= 0)
David S. Miller30723672008-07-18 22:50:15 -07001703 goto done;
1704 q_idx++;
1705 }
Jiri Kosina69012ae2016-08-16 23:52:58 +02001706
Jiri Kosinaea327462016-08-16 23:53:46 +02001707 /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1708 * itself has already been dumped.
1709 *
1710 * If we've already dumped the top-level (ingress) qdisc above and the global
1711 * qdisc hashtable, we don't want to hit it again
1712 */
1713 if (!qdisc_dev(root) || !recur)
Jiri Kosina69012ae2016-08-16 23:52:58 +02001714 goto out;
1715
Jiri Kosina59cc1f62016-08-10 11:05:15 +02001716 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
David S. Miller30723672008-07-18 22:50:15 -07001717 if (q_idx < s_q_idx) {
1718 q_idx++;
1719 continue;
1720 }
Jiri Kosina49b49972017-03-08 16:03:32 +01001721 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
Eric W. Biederman15e47302012-09-07 20:12:54 +00001722 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04001723 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1724 RTM_NEWQDISC) <= 0)
David S. Miller30723672008-07-18 22:50:15 -07001725 goto done;
1726 q_idx++;
1727 }
1728
1729out:
1730 *q_idx_p = q_idx;
1731 return ret;
1732done:
1733 ret = -1;
1734 goto out;
1735}
1736
Linus Torvalds1da177e2005-04-16 15:20:36 -07001737static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1738{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001739 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740 int idx, q_idx;
1741 int s_idx, s_q_idx;
1742 struct net_device *dev;
Jiri Kosina49b49972017-03-08 16:03:32 +01001743 const struct nlmsghdr *nlh = cb->nlh;
Jiri Kosina49b49972017-03-08 16:03:32 +01001744 struct nlattr *tca[TCA_MAX + 1];
1745 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746
1747 s_idx = cb->args[0];
1748 s_q_idx = q_idx = cb->args[1];
stephen hemmingerf1e90162009-11-10 07:54:49 +00001749
Pavel Emelianov7562f872007-05-03 15:13:45 -07001750 idx = 0;
Eric Dumazet15dc36e2014-03-10 17:11:42 -07001751 ASSERT_RTNL();
Jiri Kosina49b49972017-03-08 16:03:32 +01001752
Johannes Berg8cb08172019-04-26 14:07:28 +02001753 err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1754 rtm_tca_policy, cb->extack);
Jiri Kosina49b49972017-03-08 16:03:32 +01001755 if (err < 0)
1756 return err;
1757
Eric Dumazet15dc36e2014-03-10 17:11:42 -07001758 for_each_netdev(net, dev) {
David S. Miller30723672008-07-18 22:50:15 -07001759 struct netdev_queue *dev_queue;
1760
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761 if (idx < s_idx)
Pavel Emelianov7562f872007-05-03 15:13:45 -07001762 goto cont;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001763 if (idx > s_idx)
1764 s_q_idx = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001765 q_idx = 0;
David S. Miller30723672008-07-18 22:50:15 -07001766
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04001767 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
Jiri Kosina49b49972017-03-08 16:03:32 +01001768 true, tca[TCA_DUMP_INVISIBLE]) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001769 goto done;
1770
Eric Dumazet24824a02010-10-02 06:11:55 +00001771 dev_queue = dev_ingress_queue(dev);
1772 if (dev_queue &&
1773 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
Jiri Kosina49b49972017-03-08 16:03:32 +01001774 &q_idx, s_q_idx, false,
1775 tca[TCA_DUMP_INVISIBLE]) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001776 goto done;
1777
Pavel Emelianov7562f872007-05-03 15:13:45 -07001778cont:
1779 idx++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780 }
1781
1782done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783 cb->args[0] = idx;
1784 cb->args[1] = q_idx;
1785
1786 return skb->len;
1787}
1788
1789
1790
1791/************************************************
1792 * Traffic classes manipulation. *
1793 ************************************************/
1794
WANG Cong27d7f072017-08-24 16:51:27 -07001795static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1796 unsigned long cl,
1797 u32 portid, u32 seq, u16 flags, int event)
1798{
1799 struct tcmsg *tcm;
1800 struct nlmsghdr *nlh;
1801 unsigned char *b = skb_tail_pointer(skb);
1802 struct gnet_dump d;
1803 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804
WANG Cong27d7f072017-08-24 16:51:27 -07001805 cond_resched();
1806 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1807 if (!nlh)
1808 goto out_nlmsg_trim;
1809 tcm = nlmsg_data(nlh);
1810 tcm->tcm_family = AF_UNSPEC;
1811 tcm->tcm__pad1 = 0;
1812 tcm->tcm__pad2 = 0;
1813 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1814 tcm->tcm_parent = q->handle;
1815 tcm->tcm_handle = q->handle;
1816 tcm->tcm_info = 0;
1817 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1818 goto nla_put_failure;
1819 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1820 goto nla_put_failure;
1821
1822 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1823 NULL, &d, TCA_PAD) < 0)
1824 goto nla_put_failure;
1825
1826 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1827 goto nla_put_failure;
1828
1829 if (gnet_stats_finish_copy(&d) < 0)
1830 goto nla_put_failure;
1831
1832 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1833 return skb->len;
1834
1835out_nlmsg_trim:
1836nla_put_failure:
1837 nlmsg_trim(skb, b);
1838 return -1;
1839}
1840
1841static int tclass_notify(struct net *net, struct sk_buff *oskb,
1842 struct nlmsghdr *n, struct Qdisc *q,
1843 unsigned long cl, int event)
1844{
1845 struct sk_buff *skb;
1846 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
Zhike Wang5b5f99b2019-03-11 03:15:54 -07001847 int err = 0;
WANG Cong27d7f072017-08-24 16:51:27 -07001848
1849 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1850 if (!skb)
1851 return -ENOBUFS;
1852
1853 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1854 kfree_skb(skb);
1855 return -EINVAL;
1856 }
1857
Zhike Wang5b5f99b2019-03-11 03:15:54 -07001858 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1859 n->nlmsg_flags & NLM_F_ECHO);
1860 if (err > 0)
1861 err = 0;
1862 return err;
WANG Cong27d7f072017-08-24 16:51:27 -07001863}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864
WANG Cong14546ba2017-08-24 16:51:28 -07001865static int tclass_del_notify(struct net *net,
1866 const struct Qdisc_class_ops *cops,
1867 struct sk_buff *oskb, struct nlmsghdr *n,
1868 struct Qdisc *q, unsigned long cl)
1869{
1870 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1871 struct sk_buff *skb;
1872 int err = 0;
1873
1874 if (!cops->delete)
1875 return -EOPNOTSUPP;
1876
1877 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1878 if (!skb)
1879 return -ENOBUFS;
1880
1881 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1882 RTM_DELTCLASS) < 0) {
1883 kfree_skb(skb);
1884 return -EINVAL;
1885 }
1886
1887 err = cops->delete(q, cl);
1888 if (err) {
1889 kfree_skb(skb);
1890 return err;
1891 }
1892
Zhike Wang5b5f99b2019-03-11 03:15:54 -07001893 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1894 n->nlmsg_flags & NLM_F_ECHO);
1895 if (err > 0)
1896 err = 0;
1897 return err;
WANG Cong14546ba2017-08-24 16:51:28 -07001898}
1899
Cong Wang07d79fc2017-08-30 14:30:36 -07001900#ifdef CONFIG_NET_CLS
1901
1902struct tcf_bind_args {
1903 struct tcf_walker w;
Cong Wang2e24cd72020-01-23 16:26:18 -08001904 unsigned long base;
Cong Wang07d79fc2017-08-30 14:30:36 -07001905 unsigned long cl;
Cong Wang2e24cd72020-01-23 16:26:18 -08001906 u32 classid;
Cong Wang07d79fc2017-08-30 14:30:36 -07001907};
1908
1909static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1910{
1911 struct tcf_bind_args *a = (void *)arg;
1912
1913 if (tp->ops->bind_class) {
Jiri Pirko74e3be62017-10-13 14:01:04 +02001914 struct Qdisc *q = tcf_block_q(tp->chain->block);
1915
1916 sch_tree_lock(q);
Cong Wang2e24cd72020-01-23 16:26:18 -08001917 tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
Jiri Pirko74e3be62017-10-13 14:01:04 +02001918 sch_tree_unlock(q);
Cong Wang07d79fc2017-08-30 14:30:36 -07001919 }
1920 return 0;
1921}
1922
Cong Wang760d2282020-01-23 17:27:08 -08001923struct tc_bind_class_args {
1924 struct qdisc_walker w;
1925 unsigned long new_cl;
1926 u32 portid;
1927 u32 clid;
1928};
1929
1930static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
1931 struct qdisc_walker *w)
Cong Wang07d79fc2017-08-30 14:30:36 -07001932{
Cong Wang760d2282020-01-23 17:27:08 -08001933 struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
Cong Wang07d79fc2017-08-30 14:30:36 -07001934 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1935 struct tcf_block *block;
1936 struct tcf_chain *chain;
Cong Wang07d79fc2017-08-30 14:30:36 -07001937
Alexander Aringcbaacc42017-12-20 12:35:16 -05001938 block = cops->tcf_block(q, cl, NULL);
Cong Wang07d79fc2017-08-30 14:30:36 -07001939 if (!block)
Cong Wang760d2282020-01-23 17:27:08 -08001940 return 0;
Vlad Buslovbbf73832019-02-11 10:55:36 +02001941 for (chain = tcf_get_next_chain(block, NULL);
1942 chain;
1943 chain = tcf_get_next_chain(block, chain)) {
Cong Wang07d79fc2017-08-30 14:30:36 -07001944 struct tcf_proto *tp;
1945
Vlad Buslov12db03b2019-02-11 10:55:45 +02001946 for (tp = tcf_get_next_proto(chain, NULL, true);
1947 tp; tp = tcf_get_next_proto(chain, tp, true)) {
Cong Wang07d79fc2017-08-30 14:30:36 -07001948 struct tcf_bind_args arg = {};
1949
1950 arg.w.fn = tcf_node_bind;
Cong Wang760d2282020-01-23 17:27:08 -08001951 arg.classid = a->clid;
Cong Wang2e24cd72020-01-23 16:26:18 -08001952 arg.base = cl;
Cong Wang760d2282020-01-23 17:27:08 -08001953 arg.cl = a->new_cl;
Vlad Buslov12db03b2019-02-11 10:55:45 +02001954 tp->ops->walk(tp, &arg.w, true);
Cong Wang07d79fc2017-08-30 14:30:36 -07001955 }
1956 }
Cong Wang760d2282020-01-23 17:27:08 -08001957
1958 return 0;
1959}
1960
1961static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1962 unsigned long new_cl)
1963{
1964 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1965 struct tc_bind_class_args args = {};
1966
1967 if (!cops->tcf_block)
1968 return;
1969 args.portid = portid;
1970 args.clid = clid;
1971 args.new_cl = new_cl;
1972 args.w.fn = tc_bind_class_walker;
1973 q->ops->cl_ops->walk(q, &args.w);
Cong Wang07d79fc2017-08-30 14:30:36 -07001974}
1975
1976#else
1977
1978static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1979 unsigned long new_cl)
1980{
1981}
1982
1983#endif
1984
David Ahernc21ef3e2017-04-16 09:48:24 -07001985static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1986 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001987{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001988 struct net *net = sock_net(skb->sk);
David S. Miller02ef22c2012-06-26 21:50:05 -07001989 struct tcmsg *tcm = nlmsg_data(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001990 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991 struct net_device *dev;
1992 struct Qdisc *q = NULL;
Eric Dumazet20fea082007-11-14 01:44:41 -08001993 const struct Qdisc_class_ops *cops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001994 unsigned long cl = 0;
1995 unsigned long new_cl;
Hong zhi guode179c82013-03-25 17:36:33 +00001996 u32 portid;
1997 u32 clid;
1998 u32 qid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001999 int err;
2000
Stéphane Graber4e8bbb82014-04-30 11:25:43 -04002001 if ((n->nlmsg_type != RTM_GETTCLASS) &&
David S. Miller5f013c9b2014-05-12 13:19:14 -04002002 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00002003 return -EPERM;
2004
Johannes Berg8cb08172019-04-26 14:07:28 +02002005 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
2006 rtm_tca_policy, extack);
Patrick McHardy1e904742008-01-22 22:11:17 -08002007 if (err < 0)
2008 return err;
2009
Hong zhi guode179c82013-03-25 17:36:33 +00002010 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2011 if (!dev)
2012 return -ENODEV;
2013
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014 /*
2015 parent == TC_H_UNSPEC - unspecified parent.
2016 parent == TC_H_ROOT - class is root, which has no parent.
2017 parent == X:0 - parent is root class.
2018 parent == X:Y - parent is a node in hierarchy.
2019 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
2020
2021 handle == 0:0 - generate handle from kernel pool.
2022 handle == 0:Y - class is X:Y, where X:0 is qdisc.
2023 handle == X:Y - clear.
2024 handle == X:0 - root class.
2025 */
2026
2027 /* Step 1. Determine qdisc handle X:0 */
2028
Hong zhi guode179c82013-03-25 17:36:33 +00002029 portid = tcm->tcm_parent;
2030 clid = tcm->tcm_handle;
2031 qid = TC_H_MAJ(clid);
2032
Eric W. Biederman15e47302012-09-07 20:12:54 +00002033 if (portid != TC_H_ROOT) {
2034 u32 qid1 = TC_H_MAJ(portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002035
2036 if (qid && qid1) {
2037 /* If both majors are known, they must be identical. */
2038 if (qid != qid1)
2039 return -EINVAL;
2040 } else if (qid1) {
2041 qid = qid1;
2042 } else if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00002043 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002044
2045 /* Now qid is genuine qdisc handle consistent
Eric Dumazetcc7ec452011-01-19 19:26:56 +00002046 * both with parent and child.
2047 *
Eric W. Biederman15e47302012-09-07 20:12:54 +00002048 * TC_H_MAJ(portid) still may be unspecified, complete it now.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002049 */
Eric W. Biederman15e47302012-09-07 20:12:54 +00002050 if (portid)
2051 portid = TC_H_MAKE(qid, portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002052 } else {
2053 if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00002054 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055 }
2056
2057 /* OK. Locate qdisc */
Eric Dumazetcc7ec452011-01-19 19:26:56 +00002058 q = qdisc_lookup(dev, qid);
2059 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002060 return -ENOENT;
2061
2062 /* An check that it supports classes */
2063 cops = q->ops->cl_ops;
2064 if (cops == NULL)
2065 return -EINVAL;
2066
2067 /* Now try to get class */
2068 if (clid == 0) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00002069 if (portid == TC_H_ROOT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002070 clid = qid;
2071 } else
2072 clid = TC_H_MAKE(qid, clid);
2073
2074 if (clid)
WANG Cong143976c2017-08-24 16:51:29 -07002075 cl = cops->find(q, clid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002076
2077 if (cl == 0) {
2078 err = -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00002079 if (n->nlmsg_type != RTM_NEWTCLASS ||
2080 !(n->nlmsg_flags & NLM_F_CREATE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002081 goto out;
2082 } else {
2083 switch (n->nlmsg_type) {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09002084 case RTM_NEWTCLASS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002085 err = -EEXIST;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00002086 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002087 goto out;
2088 break;
2089 case RTM_DELTCLASS:
WANG Cong14546ba2017-08-24 16:51:28 -07002090 err = tclass_del_notify(net, cops, skb, n, q, cl);
Cong Wang07d79fc2017-08-30 14:30:36 -07002091 /* Unbind the class with flilters with 0 */
2092 tc_bind_tclass(q, portid, clid, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002093 goto out;
2094 case RTM_GETTCLASS:
Tom Goff7316ae82010-03-19 15:40:13 +00002095 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002096 goto out;
2097 default:
2098 err = -EINVAL;
2099 goto out;
2100 }
2101 }
2102
Jiri Pirkod47a6b02018-01-17 11:46:52 +01002103 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2104 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2105 return -EOPNOTSUPP;
2106 }
2107
Linus Torvalds1da177e2005-04-16 15:20:36 -07002108 new_cl = cl;
Patrick McHardyde6d5cd2009-09-04 06:41:16 +00002109 err = -EOPNOTSUPP;
2110 if (cops->change)
Alexander Aring793d81d2017-12-20 12:35:15 -05002111 err = cops->change(q, clid, portid, tca, &new_cl, extack);
Cong Wang07d79fc2017-08-30 14:30:36 -07002112 if (err == 0) {
Tom Goff7316ae82010-03-19 15:40:13 +00002113 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
Cong Wang07d79fc2017-08-30 14:30:36 -07002114 /* We just create a new class, need to do reverse binding. */
2115 if (cl != new_cl)
2116 tc_bind_tclass(q, portid, clid, new_cl);
2117 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002118out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002119 return err;
2120}
2121
Eric Dumazetcc7ec452011-01-19 19:26:56 +00002122struct qdisc_dump_args {
2123 struct qdisc_walker w;
2124 struct sk_buff *skb;
2125 struct netlink_callback *cb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002126};
2127
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04002128static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2129 struct qdisc_walker *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002130{
2131 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2132
Eric W. Biederman15e47302012-09-07 20:12:54 +00002133 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04002134 a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2135 RTM_NEWTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002136}
2137
David S. Miller30723672008-07-18 22:50:15 -07002138static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2139 struct tcmsg *tcm, struct netlink_callback *cb,
2140 int *t_p, int s_t)
2141{
2142 struct qdisc_dump_args arg;
2143
Jiri Kosina49b49972017-03-08 16:03:32 +01002144 if (tc_qdisc_dump_ignore(q, false) ||
David S. Miller30723672008-07-18 22:50:15 -07002145 *t_p < s_t || !q->ops->cl_ops ||
2146 (tcm->tcm_parent &&
2147 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2148 (*t_p)++;
2149 return 0;
2150 }
2151 if (*t_p > s_t)
2152 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2153 arg.w.fn = qdisc_class_dump;
2154 arg.skb = skb;
2155 arg.cb = cb;
2156 arg.w.stop = 0;
2157 arg.w.skip = cb->args[1];
2158 arg.w.count = 0;
2159 q->ops->cl_ops->walk(q, &arg.w);
2160 cb->args[1] = arg.w.count;
2161 if (arg.w.stop)
2162 return -1;
2163 (*t_p)++;
2164 return 0;
2165}
2166
2167static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2168 struct tcmsg *tcm, struct netlink_callback *cb,
2169 int *t_p, int s_t)
2170{
2171 struct Qdisc *q;
Jiri Kosina59cc1f62016-08-10 11:05:15 +02002172 int b;
David S. Miller30723672008-07-18 22:50:15 -07002173
2174 if (!root)
2175 return 0;
2176
2177 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2178 return -1;
2179
Jiri Kosina69012ae2016-08-16 23:52:58 +02002180 if (!qdisc_dev(root))
2181 return 0;
2182
Eric Dumazetcb395b22017-05-10 21:59:28 -07002183 if (tcm->tcm_parent) {
2184 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
Phil Sutter3c53ed82018-10-18 10:34:26 +02002185 if (q && q != root &&
2186 tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
Eric Dumazetcb395b22017-05-10 21:59:28 -07002187 return -1;
2188 return 0;
2189 }
Jiri Kosina59cc1f62016-08-10 11:05:15 +02002190 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
David S. Miller30723672008-07-18 22:50:15 -07002191 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2192 return -1;
2193 }
2194
2195 return 0;
2196}
2197
Linus Torvalds1da177e2005-04-16 15:20:36 -07002198static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2199{
David S. Miller02ef22c2012-06-26 21:50:05 -07002200 struct tcmsg *tcm = nlmsg_data(cb->nlh);
David S. Miller30723672008-07-18 22:50:15 -07002201 struct net *net = sock_net(skb->sk);
2202 struct netdev_queue *dev_queue;
2203 struct net_device *dev;
2204 int t, s_t;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002205
Hong zhi guo573ce262013-03-27 06:47:04 +00002206 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002207 return 0;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00002208 dev = dev_get_by_index(net, tcm->tcm_ifindex);
2209 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002210 return 0;
2211
2212 s_t = cb->args[0];
2213 t = 0;
2214
Patrick McHardyaf356af2009-09-04 06:41:18 +00002215 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07002216 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002217
Eric Dumazet24824a02010-10-02 06:11:55 +00002218 dev_queue = dev_ingress_queue(dev);
2219 if (dev_queue &&
2220 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
2221 &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07002222 goto done;
2223
2224done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002225 cb->args[0] = t;
2226
2227 dev_put(dev);
2228 return skb->len;
2229}
2230
Linus Torvalds1da177e2005-04-16 15:20:36 -07002231#ifdef CONFIG_PROC_FS
2232static int psched_show(struct seq_file *seq, void *v)
2233{
2234 seq_printf(seq, "%08x %08x %08x %08x\n",
Jarek Poplawskica44d6e2009-06-15 02:31:47 -07002235 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
Patrick McHardy514bca32007-03-16 12:34:52 -07002236 1000000,
Thomas Gleixner1e317682015-04-14 21:08:28 +00002237 (u32)NSEC_PER_SEC / hrtimer_resolution);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002238
2239 return 0;
2240}
2241
Tom Goff7316ae82010-03-19 15:40:13 +00002242static int __net_init psched_net_init(struct net *net)
2243{
2244 struct proc_dir_entry *e;
2245
Christoph Hellwig3f3942a2018-05-15 15:57:23 +02002246 e = proc_create_single("psched", 0, net->proc_net, psched_show);
Tom Goff7316ae82010-03-19 15:40:13 +00002247 if (e == NULL)
2248 return -ENOMEM;
2249
2250 return 0;
2251}
2252
2253static void __net_exit psched_net_exit(struct net *net)
2254{
Gao fengece31ff2013-02-18 01:34:56 +00002255 remove_proc_entry("psched", net->proc_net);
Tom Goff7316ae82010-03-19 15:40:13 +00002256}
2257#else
2258static int __net_init psched_net_init(struct net *net)
2259{
2260 return 0;
2261}
2262
2263static void __net_exit psched_net_exit(struct net *net)
2264{
2265}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002266#endif
2267
Tom Goff7316ae82010-03-19 15:40:13 +00002268static struct pernet_operations psched_net_ops = {
2269 .init = psched_net_init,
2270 .exit = psched_net_exit,
2271};
2272
Linus Torvalds1da177e2005-04-16 15:20:36 -07002273static int __init pktsched_init(void)
2274{
Tom Goff7316ae82010-03-19 15:40:13 +00002275 int err;
2276
2277 err = register_pernet_subsys(&psched_net_ops);
2278 if (err) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00002279 pr_err("pktsched_init: "
Tom Goff7316ae82010-03-19 15:40:13 +00002280 "cannot initialize per netns operations\n");
2281 return err;
2282 }
2283
stephen hemminger6da7c8f2013-08-27 16:19:08 -07002284 register_qdisc(&pfifo_fast_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002285 register_qdisc(&pfifo_qdisc_ops);
2286 register_qdisc(&bfifo_qdisc_ops);
Hagen Paul Pfeifer57dbb2d2010-01-24 12:30:59 +00002287 register_qdisc(&pfifo_head_drop_qdisc_ops);
David S. Miller6ec1c692009-09-06 01:58:51 -07002288 register_qdisc(&mq_qdisc_ops);
Phil Sutterd66d6c32015-08-27 21:21:38 +02002289 register_qdisc(&noqueue_qdisc_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002290
Florian Westphalb97bac62017-08-09 20:41:48 +02002291 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2292 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04002293 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
Florian Westphalb97bac62017-08-09 20:41:48 +02002294 0);
2295 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2296 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04002297 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
Florian Westphalb97bac62017-08-09 20:41:48 +02002298 0);
Thomas Grafbe577dd2007-03-22 11:55:50 -07002299
Linus Torvalds1da177e2005-04-16 15:20:36 -07002300 return 0;
2301}
2302
2303subsys_initcall(pktsched_init);