blob: 96a5e5d9378e140190409ef96242b542b95b77aa [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
Patrick McHardy41794772007-03-16 01:19:15 -070029#include <linux/hrtimer.h>
Jarek Poplawski25bfcd52008-08-18 20:53:34 -070030#include <linux/lockdep.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090031#include <linux/slab.h>
Jiri Kosina59cc1f62016-08-10 11:05:15 +020032#include <linux/hashtable.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070033
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020034#include <net/net_namespace.h>
Denis V. Lunevb8542722007-12-01 00:21:31 +110035#include <net/sock.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070036#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070037#include <net/pkt_sched.h>
Cong Wang07d79fc2017-08-30 14:30:36 -070038#include <net/pkt_cls.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
Linus Torvalds1da177e2005-04-16 15:20:36 -070040/*
41
42 Short review.
43 -------------
44
45 This file consists of two interrelated parts:
46
47 1. queueing disciplines manager frontend.
48 2. traffic classes manager frontend.
49
50 Generally, queueing discipline ("qdisc") is a black box,
51 which is able to enqueue packets and to dequeue them (when
52 device is ready to send something) in order and at times
53 determined by algorithm hidden in it.
54
55 qdisc's are divided to two categories:
56 - "queues", which have no internal structure visible from outside.
57 - "schedulers", which split all the packets to "traffic classes",
58 using "packet classifiers" (look at cls_api.c)
59
60 In turn, classes may have child qdiscs (as rule, queues)
61 attached to them etc. etc. etc.
62
63 The goal of the routines in this file is to translate
64 information supplied by user in the form of handles
65 to more intelligible for kernel form, to make some sanity
66 checks and part of work, which is common to all qdiscs
67 and to provide rtnetlink notifications.
68
69 All real intelligent work is done inside qdisc modules.
70
71
72
73 Every discipline has two major routines: enqueue and dequeue.
74
75 ---dequeue
76
77 dequeue usually returns a skb to send. It is allowed to return NULL,
78 but it does not mean that queue is empty, it just means that
79 discipline does not want to send anything this time.
80 Queue is really empty if q->q.qlen == 0.
81 For complicated disciplines with multiple queues q->q is not
82 real packet queue, but however q->q.qlen must be valid.
83
84 ---enqueue
85
86 enqueue returns 0, if packet was enqueued successfully.
87 If packet (this one or another one) was dropped, it returns
88 not zero error code.
89 NET_XMIT_DROP - this packet dropped
90 Expected action: do not backoff, but wait until queue will clear.
91 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
92 Expected action: backoff or ignore
Linus Torvalds1da177e2005-04-16 15:20:36 -070093
94 Auxiliary routines:
95
Jarek Poplawski99c0db22008-10-31 00:45:27 -070096 ---peek
97
98 like dequeue but without removing a packet from the queue
99
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100 ---reset
101
102 returns qdisc to initial state: purge all buffers, clear all
103 timers, counters (except for statistics) etc.
104
105 ---init
106
107 initializes newly created qdisc.
108
109 ---destroy
110
111 destroys resources allocated by init and during lifetime of qdisc.
112
113 ---change
114
115 changes qdisc parameters.
116 */
117
118/* Protects list of registered TC modules. It is pure SMP lock. */
119static DEFINE_RWLOCK(qdisc_mod_lock);
120
121
122/************************************************
123 * Queueing disciplines manipulation. *
124 ************************************************/
125
126
127/* The list of all installed queueing disciplines. */
128
129static struct Qdisc_ops *qdisc_base;
130
Zhi Yong Wu21eb2182014-01-01 04:34:51 +0800131/* Register/unregister queueing discipline */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132
133int register_qdisc(struct Qdisc_ops *qops)
134{
135 struct Qdisc_ops *q, **qp;
136 int rc = -EEXIST;
137
138 write_lock(&qdisc_mod_lock);
139 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
140 if (!strcmp(qops->id, q->id))
141 goto out;
142
143 if (qops->enqueue == NULL)
144 qops->enqueue = noop_qdisc_ops.enqueue;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700145 if (qops->peek == NULL) {
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000146 if (qops->dequeue == NULL)
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700147 qops->peek = noop_qdisc_ops.peek;
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000148 else
149 goto out_einval;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700150 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151 if (qops->dequeue == NULL)
152 qops->dequeue = noop_qdisc_ops.dequeue;
153
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000154 if (qops->cl_ops) {
155 const struct Qdisc_class_ops *cops = qops->cl_ops;
156
WANG Cong143976c2017-08-24 16:51:29 -0700157 if (!(cops->find && cops->walk && cops->leaf))
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000158 goto out_einval;
159
Jiri Pirko6529eab2017-05-17 11:07:55 +0200160 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000161 goto out_einval;
162 }
163
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 qops->next = NULL;
165 *qp = qops;
166 rc = 0;
167out:
168 write_unlock(&qdisc_mod_lock);
169 return rc;
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000170
171out_einval:
172 rc = -EINVAL;
173 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800175EXPORT_SYMBOL(register_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176
177int unregister_qdisc(struct Qdisc_ops *qops)
178{
179 struct Qdisc_ops *q, **qp;
180 int err = -ENOENT;
181
182 write_lock(&qdisc_mod_lock);
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000183 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700184 if (q == qops)
185 break;
186 if (q) {
187 *qp = q->next;
188 q->next = NULL;
189 err = 0;
190 }
191 write_unlock(&qdisc_mod_lock);
192 return err;
193}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800194EXPORT_SYMBOL(unregister_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195
stephen hemminger6da7c8f2013-08-27 16:19:08 -0700196/* Get default qdisc if not otherwise specified */
197void qdisc_get_default(char *name, size_t len)
198{
199 read_lock(&qdisc_mod_lock);
200 strlcpy(name, default_qdisc_ops->id, len);
201 read_unlock(&qdisc_mod_lock);
202}
203
204static struct Qdisc_ops *qdisc_lookup_default(const char *name)
205{
206 struct Qdisc_ops *q = NULL;
207
208 for (q = qdisc_base; q; q = q->next) {
209 if (!strcmp(name, q->id)) {
210 if (!try_module_get(q->owner))
211 q = NULL;
212 break;
213 }
214 }
215
216 return q;
217}
218
219/* Set new default qdisc to use */
220int qdisc_set_default(const char *name)
221{
222 const struct Qdisc_ops *ops;
223
224 if (!capable(CAP_NET_ADMIN))
225 return -EPERM;
226
227 write_lock(&qdisc_mod_lock);
228 ops = qdisc_lookup_default(name);
229 if (!ops) {
230 /* Not found, drop lock and try to load module */
231 write_unlock(&qdisc_mod_lock);
232 request_module("sch_%s", name);
233 write_lock(&qdisc_mod_lock);
234
235 ops = qdisc_lookup_default(name);
236 }
237
238 if (ops) {
239 /* Set new default */
240 module_put(default_qdisc_ops->owner);
241 default_qdisc_ops = ops;
242 }
243 write_unlock(&qdisc_mod_lock);
244
245 return ops ? 0 : -ENOENT;
246}
247
stephen hemminger8ea3e432017-04-13 08:40:53 -0700248#ifdef CONFIG_NET_SCH_DEFAULT
249/* Set default value from kernel config */
250static int __init sch_default_qdisc(void)
251{
252 return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
253}
254late_initcall(sch_default_qdisc);
255#endif
256
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257/* We know handle. Find qdisc among all qdisc's attached to device
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800258 * (root qdisc, all its children, children of children etc.)
259 * Note: caller either uses rtnl or rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260 */
261
Hannes Eder6113b742008-11-28 03:06:46 -0800262static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
David S. Miller8123b422008-08-08 23:23:39 -0700263{
264 struct Qdisc *q;
265
Jiri Kosina69012ae2016-08-16 23:52:58 +0200266 if (!qdisc_dev(root))
267 return (root->handle == handle ? root : NULL);
268
David S. Miller8123b422008-08-08 23:23:39 -0700269 if (!(root->flags & TCQ_F_BUILTIN) &&
270 root->handle == handle)
271 return root;
272
Jiri Kosina59cc1f62016-08-10 11:05:15 +0200273 hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
David S. Miller8123b422008-08-08 23:23:39 -0700274 if (q->handle == handle)
275 return q;
276 }
277 return NULL;
278}
279
Jiri Kosina49b49972017-03-08 16:03:32 +0100280void qdisc_hash_add(struct Qdisc *q, bool invisible)
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700281{
Eric Dumazet37314362014-03-08 08:01:19 -0800282 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800283 ASSERT_RTNL();
Jiri Kosina59cc1f62016-08-10 11:05:15 +0200284 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
Jiri Kosina49b49972017-03-08 16:03:32 +0100285 if (invisible)
286 q->flags |= TCQ_F_INVISIBLE;
Eric Dumazet37314362014-03-08 08:01:19 -0800287 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700288}
Jiri Kosina59cc1f62016-08-10 11:05:15 +0200289EXPORT_SYMBOL(qdisc_hash_add);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700290
Jiri Kosina59cc1f62016-08-10 11:05:15 +0200291void qdisc_hash_del(struct Qdisc *q)
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700292{
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800293 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
294 ASSERT_RTNL();
Jiri Kosina59cc1f62016-08-10 11:05:15 +0200295 hash_del_rcu(&q->hash);
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800296 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700297}
Jiri Kosina59cc1f62016-08-10 11:05:15 +0200298EXPORT_SYMBOL(qdisc_hash_del);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700299
David S. Milleread81cc2008-07-17 00:50:32 -0700300struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
Patrick McHardy43effa12006-11-29 17:35:48 -0800301{
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700302 struct Qdisc *q;
303
Cong Wang50317fc2017-10-27 22:08:56 -0700304 if (!handle)
305 return NULL;
Patrick McHardyaf356af2009-09-04 06:41:18 +0000306 q = qdisc_match_from_root(dev->qdisc, handle);
307 if (q)
308 goto out;
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700309
Eric Dumazet24824a02010-10-02 06:11:55 +0000310 if (dev_ingress_queue(dev))
311 q = qdisc_match_from_root(
312 dev_ingress_queue(dev)->qdisc_sleeping,
313 handle);
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800314out:
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700315 return q;
Patrick McHardy43effa12006-11-29 17:35:48 -0800316}
317
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
319{
320 unsigned long cl;
321 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800322 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323
324 if (cops == NULL)
325 return NULL;
WANG Cong143976c2017-08-24 16:51:29 -0700326 cl = cops->find(p, classid);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327
328 if (cl == 0)
329 return NULL;
330 leaf = cops->leaf(p, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 return leaf;
332}
333
334/* Find queueing discipline by name */
335
Patrick McHardy1e904742008-01-22 22:11:17 -0800336static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337{
338 struct Qdisc_ops *q = NULL;
339
340 if (kind) {
341 read_lock(&qdisc_mod_lock);
342 for (q = qdisc_base; q; q = q->next) {
Patrick McHardy1e904742008-01-22 22:11:17 -0800343 if (nla_strcmp(kind, q->id) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344 if (!try_module_get(q->owner))
345 q = NULL;
346 break;
347 }
348 }
349 read_unlock(&qdisc_mod_lock);
350 }
351 return q;
352}
353
Jesper Dangaard Brouer8a8e3d82013-08-14 23:47:11 +0200354/* The linklayer setting were not transferred from iproute2, in older
355 * versions, and the rate tables lookup systems have been dropped in
356 * the kernel. To keep backward compatible with older iproute2 tc
357 * utils, we detect the linklayer setting by detecting if the rate
358 * table were modified.
359 *
360 * For linklayer ATM table entries, the rate table will be aligned to
361 * 48 bytes, thus some table entries will contain the same value. The
362 * mpu (min packet unit) is also encoded into the old rate table, thus
363 * starting from the mpu, we find low and high table entries for
364 * mapping this cell. If these entries contain the same value, when
365 * the rate tables have been modified for linklayer ATM.
366 *
367 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
368 * and then roundup to the next cell, calc the table entry one below,
369 * and compare.
370 */
371static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
372{
373 int low = roundup(r->mpu, 48);
374 int high = roundup(low+1, 48);
375 int cell_low = low >> r->cell_log;
376 int cell_high = (high >> r->cell_log) - 1;
377
378 /* rtab is too inaccurate at rates > 100Mbit/s */
379 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
380 pr_debug("TC linklayer: Giving up ATM detection\n");
381 return TC_LINKLAYER_ETHERNET;
382 }
383
384 if ((cell_high > cell_low) && (cell_high < 256)
385 && (rtab[cell_low] == rtab[cell_high])) {
386 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
387 cell_low, cell_high, rtab[cell_high]);
388 return TC_LINKLAYER_ATM;
389 }
390 return TC_LINKLAYER_ETHERNET;
391}
392
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393static struct qdisc_rate_table *qdisc_rtab_list;
394
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -0400395struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
396 struct nlattr *tab)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397{
398 struct qdisc_rate_table *rtab;
399
Eric Dumazet40edeff2013-06-02 11:15:55 +0000400 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
401 nla_len(tab) != TC_RTAB_SIZE)
402 return NULL;
403
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
Eric Dumazet40edeff2013-06-02 11:15:55 +0000405 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
406 !memcmp(&rtab->data, nla_data(tab), 1024)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407 rtab->refcnt++;
408 return rtab;
409 }
410 }
411
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
413 if (rtab) {
414 rtab->rate = *r;
415 rtab->refcnt = 1;
Patrick McHardy1e904742008-01-22 22:11:17 -0800416 memcpy(rtab->data, nla_data(tab), 1024);
Jesper Dangaard Brouer8a8e3d82013-08-14 23:47:11 +0200417 if (r->linklayer == TC_LINKLAYER_UNAWARE)
418 r->linklayer = __detect_linklayer(r, rtab->data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419 rtab->next = qdisc_rtab_list;
420 qdisc_rtab_list = rtab;
421 }
422 return rtab;
423}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800424EXPORT_SYMBOL(qdisc_get_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425
426void qdisc_put_rtab(struct qdisc_rate_table *tab)
427{
428 struct qdisc_rate_table *rtab, **rtabp;
429
430 if (!tab || --tab->refcnt)
431 return;
432
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000433 for (rtabp = &qdisc_rtab_list;
434 (rtab = *rtabp) != NULL;
435 rtabp = &rtab->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 if (rtab == tab) {
437 *rtabp = rtab->next;
438 kfree(rtab);
439 return;
440 }
441 }
442}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800443EXPORT_SYMBOL(qdisc_put_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700445static LIST_HEAD(qdisc_stab_list);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700446
447static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
448 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
449 [TCA_STAB_DATA] = { .type = NLA_BINARY },
450};
451
452static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
453{
454 struct nlattr *tb[TCA_STAB_MAX + 1];
455 struct qdisc_size_table *stab;
456 struct tc_sizespec *s;
457 unsigned int tsize = 0;
458 u16 *tab = NULL;
459 int err;
460
Johannes Bergfceb6432017-04-12 14:34:07 +0200461 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, NULL);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700462 if (err < 0)
463 return ERR_PTR(err);
464 if (!tb[TCA_STAB_BASE])
465 return ERR_PTR(-EINVAL);
466
467 s = nla_data(tb[TCA_STAB_BASE]);
468
469 if (s->tsize > 0) {
470 if (!tb[TCA_STAB_DATA])
471 return ERR_PTR(-EINVAL);
472 tab = nla_data(tb[TCA_STAB_DATA]);
473 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
474 }
475
Dan Carpenter00093fa2010-08-14 11:09:49 +0000476 if (tsize != s->tsize || (!tab && tsize > 0))
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700477 return ERR_PTR(-EINVAL);
478
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700479 list_for_each_entry(stab, &qdisc_stab_list, list) {
480 if (memcmp(&stab->szopts, s, sizeof(*s)))
481 continue;
482 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
483 continue;
484 stab->refcnt++;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700485 return stab;
486 }
487
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700488 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
489 if (!stab)
490 return ERR_PTR(-ENOMEM);
491
492 stab->refcnt = 1;
493 stab->szopts = *s;
494 if (tsize > 0)
495 memcpy(stab->data, tab, tsize * sizeof(u16));
496
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700497 list_add_tail(&stab->list, &qdisc_stab_list);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700498
499 return stab;
500}
501
Eric Dumazeta2da5702011-01-20 03:48:19 +0000502static void stab_kfree_rcu(struct rcu_head *head)
503{
504 kfree(container_of(head, struct qdisc_size_table, rcu));
505}
506
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700507void qdisc_put_stab(struct qdisc_size_table *tab)
508{
509 if (!tab)
510 return;
511
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700512 if (--tab->refcnt == 0) {
513 list_del(&tab->list);
Eric Dumazeta2da5702011-01-20 03:48:19 +0000514 call_rcu_bh(&tab->rcu, stab_kfree_rcu);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700515 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700516}
517EXPORT_SYMBOL(qdisc_put_stab);
518
519static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
520{
521 struct nlattr *nest;
522
523 nest = nla_nest_start(skb, TCA_STAB);
Patrick McHardy3aa46142008-11-20 04:07:14 -0800524 if (nest == NULL)
525 goto nla_put_failure;
David S. Miller1b34ec42012-03-29 05:11:39 -0400526 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
527 goto nla_put_failure;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700528 nla_nest_end(skb, nest);
529
530 return skb->len;
531
532nla_put_failure:
533 return -1;
534}
535
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -0400536void __qdisc_calculate_pkt_len(struct sk_buff *skb,
537 const struct qdisc_size_table *stab)
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700538{
539 int pkt_len, slot;
540
541 pkt_len = skb->len + stab->szopts.overhead;
542 if (unlikely(!stab->szopts.tsize))
543 goto out;
544
545 slot = pkt_len + stab->szopts.cell_align;
546 if (unlikely(slot < 0))
547 slot = 0;
548
549 slot >>= stab->szopts.cell_log;
550 if (likely(slot < stab->szopts.tsize))
551 pkt_len = stab->data[slot];
552 else
553 pkt_len = stab->data[stab->szopts.tsize - 1] *
554 (slot / stab->szopts.tsize) +
555 stab->data[slot % stab->szopts.tsize];
556
557 pkt_len <<= stab->szopts.size_log;
558out:
559 if (unlikely(pkt_len < 1))
560 pkt_len = 1;
561 qdisc_skb_cb(skb)->pkt_len = pkt_len;
562}
Eric Dumazeta2da5702011-01-20 03:48:19 +0000563EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700564
Florian Westphal6e765a02014-06-11 20:35:18 +0200565void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
Jarek Poplawskib00355d2009-02-01 01:12:42 -0800566{
567 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000568 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
569 txt, qdisc->ops->id, qdisc->handle >> 16);
Jarek Poplawskib00355d2009-02-01 01:12:42 -0800570 qdisc->flags |= TCQ_F_WARN_NONWC;
571 }
572}
573EXPORT_SYMBOL(qdisc_warn_nonwc);
574
Patrick McHardy41794772007-03-16 01:19:15 -0700575static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
576{
577 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
David S. Miller2fbd3da2009-09-01 17:59:25 -0700578 timer);
Patrick McHardy41794772007-03-16 01:19:15 -0700579
John Fastabend1e203c12014-10-02 22:43:09 -0700580 rcu_read_lock();
David S. Miller8608db02008-08-18 20:51:18 -0700581 __netif_schedule(qdisc_root(wd->qdisc));
John Fastabend1e203c12014-10-02 22:43:09 -0700582 rcu_read_unlock();
Stephen Hemminger19365022007-03-22 12:18:35 -0700583
Patrick McHardy41794772007-03-16 01:19:15 -0700584 return HRTIMER_NORESTART;
585}
586
587void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
588{
Eric Dumazet4a8e3202014-09-20 18:01:30 -0700589 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
David S. Miller2fbd3da2009-09-01 17:59:25 -0700590 wd->timer.function = qdisc_watchdog;
Patrick McHardy41794772007-03-16 01:19:15 -0700591 wd->qdisc = qdisc;
592}
593EXPORT_SYMBOL(qdisc_watchdog_init);
594
Eric Dumazet45f50be2016-06-10 16:41:39 -0700595void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
Patrick McHardy41794772007-03-16 01:19:15 -0700596{
Jarek Poplawski2540e052008-08-21 05:11:14 -0700597 if (test_bit(__QDISC_STATE_DEACTIVATED,
598 &qdisc_root_sleeping(wd->qdisc)->state))
599 return;
600
Eric Dumazeta9efad82016-05-23 14:24:56 -0700601 if (wd->last_expires == expires)
602 return;
603
604 wd->last_expires = expires;
Eric Dumazet46baac32012-10-20 00:40:51 +0000605 hrtimer_start(&wd->timer,
Jiri Pirko34c5d292013-02-12 00:12:04 +0000606 ns_to_ktime(expires),
Eric Dumazet4a8e3202014-09-20 18:01:30 -0700607 HRTIMER_MODE_ABS_PINNED);
Patrick McHardy41794772007-03-16 01:19:15 -0700608}
Jiri Pirko34c5d292013-02-12 00:12:04 +0000609EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
Patrick McHardy41794772007-03-16 01:19:15 -0700610
611void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
612{
David S. Miller2fbd3da2009-09-01 17:59:25 -0700613 hrtimer_cancel(&wd->timer);
Patrick McHardy41794772007-03-16 01:19:15 -0700614}
615EXPORT_SYMBOL(qdisc_watchdog_cancel);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616
Adrian Bunka94f7792008-07-22 14:20:11 -0700617static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700618{
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700619 struct hlist_head *h;
Eric Dumazet9695fe62017-08-22 12:26:46 -0700620 unsigned int i;
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700621
Eric Dumazet9695fe62017-08-22 12:26:46 -0700622 h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700623
624 if (h != NULL) {
625 for (i = 0; i < n; i++)
626 INIT_HLIST_HEAD(&h[i]);
627 }
628 return h;
629}
630
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700631void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
632{
633 struct Qdisc_class_common *cl;
Sasha Levinb67bfe02013-02-27 17:06:00 -0800634 struct hlist_node *next;
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700635 struct hlist_head *nhash, *ohash;
636 unsigned int nsize, nmask, osize;
637 unsigned int i, h;
638
639 /* Rehash when load factor exceeds 0.75 */
640 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
641 return;
642 nsize = clhash->hashsize * 2;
643 nmask = nsize - 1;
644 nhash = qdisc_class_hash_alloc(nsize);
645 if (nhash == NULL)
646 return;
647
648 ohash = clhash->hash;
649 osize = clhash->hashsize;
650
651 sch_tree_lock(sch);
652 for (i = 0; i < osize; i++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -0800653 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700654 h = qdisc_class_hash(cl->classid, nmask);
655 hlist_add_head(&cl->hnode, &nhash[h]);
656 }
657 }
658 clhash->hash = nhash;
659 clhash->hashsize = nsize;
660 clhash->hashmask = nmask;
661 sch_tree_unlock(sch);
662
Eric Dumazet9695fe62017-08-22 12:26:46 -0700663 kvfree(ohash);
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700664}
665EXPORT_SYMBOL(qdisc_class_hash_grow);
666
667int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
668{
669 unsigned int size = 4;
670
671 clhash->hash = qdisc_class_hash_alloc(size);
Alexander Aringac8ef4a2017-12-20 12:35:11 -0500672 if (!clhash->hash)
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700673 return -ENOMEM;
674 clhash->hashsize = size;
675 clhash->hashmask = size - 1;
676 clhash->hashelems = 0;
677 return 0;
678}
679EXPORT_SYMBOL(qdisc_class_hash_init);
680
681void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
682{
Eric Dumazet9695fe62017-08-22 12:26:46 -0700683 kvfree(clhash->hash);
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700684}
685EXPORT_SYMBOL(qdisc_class_hash_destroy);
686
687void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
688 struct Qdisc_class_common *cl)
689{
690 unsigned int h;
691
692 INIT_HLIST_NODE(&cl->hnode);
693 h = qdisc_class_hash(cl->classid, clhash->hashmask);
694 hlist_add_head(&cl->hnode, &clhash->hash[h]);
695 clhash->hashelems++;
696}
697EXPORT_SYMBOL(qdisc_class_hash_insert);
698
699void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
700 struct Qdisc_class_common *cl)
701{
702 hlist_del(&cl->hnode);
703 clhash->hashelems--;
704}
705EXPORT_SYMBOL(qdisc_class_hash_remove);
706
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000707/* Allocate an unique handle from space managed by kernel
708 * Possible range is [8000-FFFF]:0000 (0x8000 values)
709 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710static u32 qdisc_alloc_handle(struct net_device *dev)
711{
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000712 int i = 0x8000;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
714
715 do {
716 autohandle += TC_H_MAKE(0x10000U, 0);
717 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
718 autohandle = TC_H_MAKE(0x80000000U, 0);
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000719 if (!qdisc_lookup(dev, autohandle))
720 return autohandle;
721 cond_resched();
722 } while (--i > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000724 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725}
726
WANG Cong2ccccf52016-02-25 14:55:01 -0800727void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
728 unsigned int len)
Patrick McHardy43effa12006-11-29 17:35:48 -0800729{
Eric Dumazet20fea082007-11-14 01:44:41 -0800730 const struct Qdisc_class_ops *cops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800731 unsigned long cl;
732 u32 parentid;
Konstantin Khlebnikov95946652017-08-15 16:39:59 +0300733 bool notify;
Eric Dumazet2c8c8e62013-10-07 08:32:32 -0700734 int drops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800735
WANG Cong2ccccf52016-02-25 14:55:01 -0800736 if (n == 0 && len == 0)
Patrick McHardy43effa12006-11-29 17:35:48 -0800737 return;
Eric Dumazet2c8c8e62013-10-07 08:32:32 -0700738 drops = max_t(int, n, 0);
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800739 rcu_read_lock();
Patrick McHardy43effa12006-11-29 17:35:48 -0800740 while ((parentid = sch->parent)) {
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700741 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800742 break;
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700743
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800744 if (sch->flags & TCQ_F_NOPARENT)
745 break;
Konstantin Khlebnikov95946652017-08-15 16:39:59 +0300746 /* Notify parent qdisc only if child qdisc becomes empty.
747 *
748 * If child was empty even before update then backlog
749 * counter is screwed and we skip notification because
750 * parent class is already passive.
751 */
752 notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800753 /* TODO: perform the search on a per txq basis */
David S. Miller5ce2d482008-07-08 17:06:30 -0700754 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700755 if (sch == NULL) {
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800756 WARN_ON_ONCE(parentid != TC_H_ROOT);
757 break;
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700758 }
Patrick McHardy43effa12006-11-29 17:35:48 -0800759 cops = sch->ops->cl_ops;
Konstantin Khlebnikov95946652017-08-15 16:39:59 +0300760 if (notify && cops->qlen_notify) {
WANG Cong143976c2017-08-24 16:51:29 -0700761 cl = cops->find(sch, parentid);
Patrick McHardy43effa12006-11-29 17:35:48 -0800762 cops->qlen_notify(sch, cl);
Patrick McHardy43effa12006-11-29 17:35:48 -0800763 }
764 sch->q.qlen -= n;
WANG Cong2ccccf52016-02-25 14:55:01 -0800765 sch->qstats.backlog -= len;
John Fastabend25331d62014-09-28 11:53:29 -0700766 __qdisc_qstats_drop(sch, drops);
Patrick McHardy43effa12006-11-29 17:35:48 -0800767 }
Eric Dumazet4eaf3b82015-12-01 20:08:51 -0800768 rcu_read_unlock();
Patrick McHardy43effa12006-11-29 17:35:48 -0800769}
WANG Cong2ccccf52016-02-25 14:55:01 -0800770EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771
WANG Cong27d7f072017-08-24 16:51:27 -0700772static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
773 u32 portid, u32 seq, u16 flags, int event)
774{
775 struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
776 struct gnet_stats_queue __percpu *cpu_qstats = NULL;
777 struct tcmsg *tcm;
778 struct nlmsghdr *nlh;
779 unsigned char *b = skb_tail_pointer(skb);
780 struct gnet_dump d;
781 struct qdisc_size_table *stab;
782 __u32 qlen;
783
784 cond_resched();
785 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
786 if (!nlh)
787 goto out_nlmsg_trim;
788 tcm = nlmsg_data(nlh);
789 tcm->tcm_family = AF_UNSPEC;
790 tcm->tcm__pad1 = 0;
791 tcm->tcm__pad2 = 0;
792 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
793 tcm->tcm_parent = clid;
794 tcm->tcm_handle = q->handle;
795 tcm->tcm_info = refcount_read(&q->refcnt);
796 if (nla_put_string(skb, TCA_KIND, q->ops->id))
797 goto nla_put_failure;
Yuval Mintz7a4fa292017-12-14 15:54:29 +0200798 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
799 goto nla_put_failure;
WANG Cong27d7f072017-08-24 16:51:27 -0700800 if (q->ops->dump && q->ops->dump(q, skb) < 0)
801 goto nla_put_failure;
John Fastabend7e660162017-12-07 09:57:00 -0800802
803 qlen = qdisc_qlen_sum(q);
WANG Cong27d7f072017-08-24 16:51:27 -0700804
805 stab = rtnl_dereference(q->stab);
806 if (stab && qdisc_dump_stab(skb, stab) < 0)
807 goto nla_put_failure;
808
809 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
810 NULL, &d, TCA_PAD) < 0)
811 goto nla_put_failure;
812
813 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
814 goto nla_put_failure;
815
816 if (qdisc_is_percpu_stats(q)) {
817 cpu_bstats = q->cpu_bstats;
818 cpu_qstats = q->cpu_qstats;
819 }
820
821 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
822 &d, cpu_bstats, &q->bstats) < 0 ||
823 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
824 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
825 goto nla_put_failure;
826
827 if (gnet_stats_finish_copy(&d) < 0)
828 goto nla_put_failure;
829
830 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
831 return skb->len;
832
833out_nlmsg_trim:
834nla_put_failure:
835 nlmsg_trim(skb, b);
836 return -1;
837}
838
839static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
840{
841 if (q->flags & TCQ_F_BUILTIN)
842 return true;
843 if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
844 return true;
845
846 return false;
847}
848
849static int qdisc_notify(struct net *net, struct sk_buff *oskb,
850 struct nlmsghdr *n, u32 clid,
851 struct Qdisc *old, struct Qdisc *new)
852{
853 struct sk_buff *skb;
854 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
855
856 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
857 if (!skb)
858 return -ENOBUFS;
859
860 if (old && !tc_qdisc_dump_ignore(old, false)) {
861 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
862 0, RTM_DELQDISC) < 0)
863 goto err_out;
864 }
865 if (new && !tc_qdisc_dump_ignore(new, false)) {
866 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
867 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
868 goto err_out;
869 }
870
871 if (skb->len)
872 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
873 n->nlmsg_flags & NLM_F_ECHO);
874
875err_out:
876 kfree_skb(skb);
877 return -EINVAL;
878}
879
Tom Goff7316ae82010-03-19 15:40:13 +0000880static void notify_and_destroy(struct net *net, struct sk_buff *skb,
881 struct nlmsghdr *n, u32 clid,
David S. Miller99194cf2008-07-17 04:54:10 -0700882 struct Qdisc *old, struct Qdisc *new)
883{
884 if (new || old)
Tom Goff7316ae82010-03-19 15:40:13 +0000885 qdisc_notify(net, skb, n, clid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886
David S. Miller4d8863a2008-08-18 21:03:15 -0700887 if (old)
David S. Miller99194cf2008-07-17 04:54:10 -0700888 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700889}
890
891/* Graft qdisc "new" to class "classid" of qdisc "parent" or
892 * to device "dev".
893 *
894 * When appropriate send a netlink notification using 'skb'
895 * and "n".
896 *
897 * On success, destroy old qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898 */
899
900static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
David S. Miller99194cf2008-07-17 04:54:10 -0700901 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
902 struct Qdisc *new, struct Qdisc *old)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903{
David S. Miller99194cf2008-07-17 04:54:10 -0700904 struct Qdisc *q = old;
Tom Goff7316ae82010-03-19 15:40:13 +0000905 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900908 if (parent == NULL) {
David S. Miller99194cf2008-07-17 04:54:10 -0700909 unsigned int i, num_q, ingress;
910
911 ingress = 0;
912 num_q = dev->num_tx_queues;
David S. Miller8d50b532008-07-30 02:37:46 -0700913 if ((q && q->flags & TCQ_F_INGRESS) ||
914 (new && new->flags & TCQ_F_INGRESS)) {
David S. Miller99194cf2008-07-17 04:54:10 -0700915 num_q = 1;
916 ingress = 1;
Eric Dumazet24824a02010-10-02 06:11:55 +0000917 if (!dev_ingress_queue(dev))
918 return -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919 }
David S. Miller99194cf2008-07-17 04:54:10 -0700920
921 if (dev->flags & IFF_UP)
922 dev_deactivate(dev);
923
WANG Cong86e363d2015-05-26 16:08:48 -0700924 if (new && new->ops->attach)
925 goto skip;
David S. Miller6ec1c692009-09-06 01:58:51 -0700926
David S. Miller99194cf2008-07-17 04:54:10 -0700927 for (i = 0; i < num_q; i++) {
Eric Dumazet24824a02010-10-02 06:11:55 +0000928 struct netdev_queue *dev_queue = dev_ingress_queue(dev);
David S. Miller99194cf2008-07-17 04:54:10 -0700929
930 if (!ingress)
931 dev_queue = netdev_get_tx_queue(dev, i);
932
David S. Miller8d50b532008-07-30 02:37:46 -0700933 old = dev_graft_qdisc(dev_queue, new);
934 if (new && i > 0)
Eric Dumazet551143d2017-08-24 21:12:28 -0700935 qdisc_refcount_inc(new);
David S. Miller8d50b532008-07-30 02:37:46 -0700936
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000937 if (!ingress)
938 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700939 }
940
WANG Cong86e363d2015-05-26 16:08:48 -0700941skip:
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000942 if (!ingress) {
Tom Goff7316ae82010-03-19 15:40:13 +0000943 notify_and_destroy(net, skb, n, classid,
944 dev->qdisc, new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000945 if (new && !new->ops->attach)
Eric Dumazet551143d2017-08-24 21:12:28 -0700946 qdisc_refcount_inc(new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000947 dev->qdisc = new ? : &noop_qdisc;
WANG Cong86e363d2015-05-26 16:08:48 -0700948
949 if (new && new->ops->attach)
950 new->ops->attach(new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000951 } else {
Tom Goff7316ae82010-03-19 15:40:13 +0000952 notify_and_destroy(net, skb, n, classid, old, new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000953 }
Patrick McHardyaf356af2009-09-04 06:41:18 +0000954
David S. Miller99194cf2008-07-17 04:54:10 -0700955 if (dev->flags & IFF_UP)
956 dev_activate(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957 } else {
Eric Dumazet20fea082007-11-14 01:44:41 -0800958 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959
John Fastabendc5ad1192017-12-07 09:58:19 -0800960 /* Only support running class lockless if parent is lockless */
961 if (new && (new->flags & TCQ_F_NOLOCK) &&
962 parent && !(parent->flags & TCQ_F_NOLOCK))
963 new->flags &= ~TCQ_F_NOLOCK;
964
Patrick McHardyc9f1d032009-09-04 06:41:13 +0000965 err = -EOPNOTSUPP;
966 if (cops && cops->graft) {
WANG Cong143976c2017-08-24 16:51:29 -0700967 unsigned long cl = cops->find(parent, classid);
968
969 if (cl)
David S. Miller99194cf2008-07-17 04:54:10 -0700970 err = cops->graft(parent, cl, new, &old);
WANG Cong143976c2017-08-24 16:51:29 -0700971 else
Patrick McHardyc9f1d032009-09-04 06:41:13 +0000972 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700973 }
David S. Miller99194cf2008-07-17 04:54:10 -0700974 if (!err)
Tom Goff7316ae82010-03-19 15:40:13 +0000975 notify_and_destroy(net, skb, n, classid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976 }
977 return err;
978}
979
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700980/* lockdep annotation is needed for ingress; egress gets it only for name */
981static struct lock_class_key qdisc_tx_lock;
982static struct lock_class_key qdisc_rx_lock;
983
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984/*
985 Allocate and initialize new qdisc.
986
987 Parameters are passed via opt.
988 */
989
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -0400990static struct Qdisc *qdisc_create(struct net_device *dev,
991 struct netdev_queue *dev_queue,
992 struct Qdisc *p, u32 parent, u32 handle,
993 struct nlattr **tca, int *errp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994{
995 int err;
Patrick McHardy1e904742008-01-22 22:11:17 -0800996 struct nlattr *kind = tca[TCA_KIND];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997 struct Qdisc *sch;
998 struct Qdisc_ops *ops;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700999 struct qdisc_size_table *stab;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000
1001 ops = qdisc_lookup_ops(kind);
Johannes Berg95a5afc2008-10-16 15:24:51 -07001002#ifdef CONFIG_MODULES
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003 if (ops == NULL && kind != NULL) {
1004 char name[IFNAMSIZ];
Patrick McHardy1e904742008-01-22 22:11:17 -08001005 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006 /* We dropped the RTNL semaphore in order to
1007 * perform the module load. So, even if we
1008 * succeeded in loading the module we have to
1009 * tell the caller to replay the request. We
1010 * indicate this using -EAGAIN.
1011 * We replay the request because the device may
1012 * go away in the mean time.
1013 */
1014 rtnl_unlock();
1015 request_module("sch_%s", name);
1016 rtnl_lock();
1017 ops = qdisc_lookup_ops(kind);
1018 if (ops != NULL) {
1019 /* We will try again qdisc_lookup_ops,
1020 * so don't keep a reference.
1021 */
1022 module_put(ops->owner);
1023 err = -EAGAIN;
1024 goto err_out;
1025 }
1026 }
1027 }
1028#endif
1029
Jamal Hadi Salimb9e2cc02006-08-03 16:36:51 -07001030 err = -ENOENT;
Alexander Aring0ac4bd62017-12-04 18:39:59 -05001031 if (!ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032 goto err_out;
1033
David S. Miller5ce2d482008-07-08 17:06:30 -07001034 sch = qdisc_alloc(dev_queue, ops);
Thomas Graf3d54b822005-07-05 14:15:09 -07001035 if (IS_ERR(sch)) {
1036 err = PTR_ERR(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037 goto err_out2;
Thomas Graf3d54b822005-07-05 14:15:09 -07001038 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001040 sch->parent = parent;
1041
Thomas Graf3d54b822005-07-05 14:15:09 -07001042 if (handle == TC_H_INGRESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043 sch->flags |= TCQ_F_INGRESS;
Thomas Graf3d54b822005-07-05 14:15:09 -07001044 handle = TC_H_MAKE(TC_H_INGRESS, 0);
Jarek Poplawski25bfcd52008-08-18 20:53:34 -07001045 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
Patrick McHardyfd44de72007-04-16 17:07:08 -07001046 } else {
Patrick McHardyfd44de72007-04-16 17:07:08 -07001047 if (handle == 0) {
1048 handle = qdisc_alloc_handle(dev);
1049 err = -ENOMEM;
1050 if (handle == 0)
1051 goto err_out3;
1052 }
Jarek Poplawski25bfcd52008-08-18 20:53:34 -07001053 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
Eric Dumazet1abbe132012-12-11 15:54:33 +00001054 if (!netif_is_multiqueue(dev))
Eric Dumazet225734d2015-12-15 09:43:12 -08001055 sch->flags |= TCQ_F_ONETXQUEUE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056 }
1057
Thomas Graf3d54b822005-07-05 14:15:09 -07001058 sch->handle = handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059
Jesper Dangaard Brouer84c46dd2016-11-03 14:56:11 +01001060 /* This exist to keep backward compatible with a userspace
1061 * loophole, what allowed userspace to get IFF_NO_QUEUE
1062 * facility on older kernels by setting tx_queue_len=0 (prior
1063 * to qdisc init), and then forgot to reinit tx_queue_len
1064 * before again attaching a qdisc.
1065 */
1066 if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1067 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1068 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1069 }
1070
Alexander Aring54160ef2017-12-04 18:40:00 -05001071 if (ops->init) {
1072 err = ops->init(sch, tca[TCA_OPTIONS]);
1073 if (err != 0)
1074 goto err_out5;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075 }
Alexander Aring54160ef2017-12-04 18:40:00 -05001076
1077 if (qdisc_is_percpu_stats(sch)) {
1078 sch->cpu_bstats =
1079 netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
1080 if (!sch->cpu_bstats)
1081 goto err_out4;
1082
1083 sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
1084 if (!sch->cpu_qstats)
1085 goto err_out4;
1086 }
1087
1088 if (tca[TCA_STAB]) {
1089 stab = qdisc_get_stab(tca[TCA_STAB]);
1090 if (IS_ERR(stab)) {
1091 err = PTR_ERR(stab);
1092 goto err_out4;
1093 }
1094 rcu_assign_pointer(sch->stab, stab);
1095 }
1096 if (tca[TCA_RATE]) {
1097 seqcount_t *running;
1098
1099 err = -EOPNOTSUPP;
1100 if (sch->flags & TCQ_F_MQROOT)
1101 goto err_out4;
1102
1103 if (sch->parent != TC_H_ROOT &&
1104 !(sch->flags & TCQ_F_INGRESS) &&
1105 (!p || !(p->flags & TCQ_F_MQROOT)))
1106 running = qdisc_root_sleeping_running(sch);
1107 else
1108 running = &sch->running;
1109
1110 err = gen_new_estimator(&sch->bstats,
1111 sch->cpu_bstats,
1112 &sch->rate_est,
1113 NULL,
1114 running,
1115 tca[TCA_RATE]);
1116 if (err)
1117 goto err_out4;
1118 }
1119
1120 qdisc_hash_add(sch, false);
1121
1122 return sch;
1123
1124err_out5:
Eric Dumazet87b60cf2017-02-10 10:31:49 -08001125 /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
Gao Fengc1a48722017-06-28 12:53:54 +08001126 if (ops->destroy)
1127 ops->destroy(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128err_out3:
1129 dev_put(dev);
Thomas Graf3d54b822005-07-05 14:15:09 -07001130 kfree((char *) sch - sch->padded);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131err_out2:
1132 module_put(ops->owner);
1133err_out:
1134 *errp = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135 return NULL;
Patrick McHardy23bcf632009-09-09 18:11:23 -07001136
1137err_out4:
John Fastabend22e0f8b2014-09-28 11:52:56 -07001138 free_percpu(sch->cpu_bstats);
John Fastabendb0ab6f92014-09-28 11:54:24 -07001139 free_percpu(sch->cpu_qstats);
Patrick McHardy23bcf632009-09-09 18:11:23 -07001140 /*
1141 * Any broken qdiscs that would require a ops->reset() here?
1142 * The qdisc was never in action so it shouldn't be necessary.
1143 */
Eric Dumazeta2da5702011-01-20 03:48:19 +00001144 qdisc_put_stab(rtnl_dereference(sch->stab));
Patrick McHardy23bcf632009-09-09 18:11:23 -07001145 if (ops->destroy)
1146 ops->destroy(sch);
1147 goto err_out3;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001148}
1149
Patrick McHardy1e904742008-01-22 22:11:17 -08001150static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151{
Eric Dumazeta2da5702011-01-20 03:48:19 +00001152 struct qdisc_size_table *ostab, *stab = NULL;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001153 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001155 if (tca[TCA_OPTIONS]) {
Alexander Aring0ac4bd62017-12-04 18:39:59 -05001156 if (!sch->ops->change)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157 return -EINVAL;
Patrick McHardy1e904742008-01-22 22:11:17 -08001158 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159 if (err)
1160 return err;
1161 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001162
1163 if (tca[TCA_STAB]) {
1164 stab = qdisc_get_stab(tca[TCA_STAB]);
1165 if (IS_ERR(stab))
1166 return PTR_ERR(stab);
1167 }
1168
Eric Dumazeta2da5702011-01-20 03:48:19 +00001169 ostab = rtnl_dereference(sch->stab);
1170 rcu_assign_pointer(sch->stab, stab);
1171 qdisc_put_stab(ostab);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001172
Patrick McHardy23bcf632009-09-09 18:11:23 -07001173 if (tca[TCA_RATE]) {
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001174 /* NB: ignores errors from replace_estimator
1175 because change can't be undone. */
Patrick McHardy23bcf632009-09-09 18:11:23 -07001176 if (sch->flags & TCQ_F_MQROOT)
1177 goto out;
John Fastabend22e0f8b2014-09-28 11:52:56 -07001178 gen_replace_estimator(&sch->bstats,
1179 sch->cpu_bstats,
1180 &sch->rate_est,
Eric Dumazetedb09eb2016-06-06 09:37:16 -07001181 NULL,
1182 qdisc_root_sleeping_running(sch),
John Fastabend22e0f8b2014-09-28 11:52:56 -07001183 tca[TCA_RATE]);
Patrick McHardy23bcf632009-09-09 18:11:23 -07001184 }
1185out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186 return 0;
1187}
1188
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001189struct check_loop_arg {
1190 struct qdisc_walker w;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191 struct Qdisc *p;
1192 int depth;
1193};
1194
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04001195static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1196 struct qdisc_walker *w);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197
1198static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1199{
1200 struct check_loop_arg arg;
1201
1202 if (q->ops->cl_ops == NULL)
1203 return 0;
1204
1205 arg.w.stop = arg.w.skip = arg.w.count = 0;
1206 arg.w.fn = check_loop_fn;
1207 arg.depth = depth;
1208 arg.p = p;
1209 q->ops->cl_ops->walk(q, &arg.w);
1210 return arg.w.stop ? -ELOOP : 0;
1211}
1212
1213static int
1214check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1215{
1216 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -08001217 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218 struct check_loop_arg *arg = (struct check_loop_arg *)w;
1219
1220 leaf = cops->leaf(q, cl);
1221 if (leaf) {
1222 if (leaf == arg->p || arg->depth > 7)
1223 return -ELOOP;
1224 return check_loop(leaf, arg->p, arg->depth + 1);
1225 }
1226 return 0;
1227}
1228
1229/*
1230 * Delete/get qdisc.
1231 */
1232
David Ahernc21ef3e2017-04-16 09:48:24 -07001233static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1234 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001236 struct net *net = sock_net(skb->sk);
David S. Miller02ef22c2012-06-26 21:50:05 -07001237 struct tcmsg *tcm = nlmsg_data(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001238 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001239 struct net_device *dev;
Hong zhi guode179c82013-03-25 17:36:33 +00001240 u32 clid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241 struct Qdisc *q = NULL;
1242 struct Qdisc *p = NULL;
1243 int err;
1244
Stéphane Graber4e8bbb82014-04-30 11:25:43 -04001245 if ((n->nlmsg_type != RTM_GETQDISC) &&
David S. Miller5f013c9b2014-05-12 13:19:14 -04001246 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001247 return -EPERM;
1248
David Ahernc21ef3e2017-04-16 09:48:24 -07001249 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
Patrick McHardy1e904742008-01-22 22:11:17 -08001250 if (err < 0)
1251 return err;
1252
Hong zhi guode179c82013-03-25 17:36:33 +00001253 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1254 if (!dev)
1255 return -ENODEV;
1256
1257 clid = tcm->tcm_parent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258 if (clid) {
1259 if (clid != TC_H_ROOT) {
1260 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001261 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1262 if (!p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001263 return -ENOENT;
1264 q = qdisc_leaf(p, clid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001265 } else if (dev_ingress_queue(dev)) {
1266 q = dev_ingress_queue(dev)->qdisc_sleeping;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001267 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +00001269 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270 }
1271 if (!q)
1272 return -ENOENT;
1273
1274 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1275 return -EINVAL;
1276 } else {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001277 q = qdisc_lookup(dev, tcm->tcm_handle);
1278 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001279 return -ENOENT;
1280 }
1281
Patrick McHardy1e904742008-01-22 22:11:17 -08001282 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001283 return -EINVAL;
1284
1285 if (n->nlmsg_type == RTM_DELQDISC) {
1286 if (!clid)
1287 return -EINVAL;
1288 if (q->handle == 0)
1289 return -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001290 err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1291 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293 } else {
Tom Goff7316ae82010-03-19 15:40:13 +00001294 qdisc_notify(net, skb, n, clid, NULL, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295 }
1296 return 0;
1297}
1298
1299/*
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001300 * Create/change qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301 */
1302
David Ahernc21ef3e2017-04-16 09:48:24 -07001303static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1304 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001306 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001307 struct tcmsg *tcm;
Patrick McHardy1e904742008-01-22 22:11:17 -08001308 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 struct net_device *dev;
1310 u32 clid;
1311 struct Qdisc *q, *p;
1312 int err;
1313
David S. Miller5f013c9b2014-05-12 13:19:14 -04001314 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001315 return -EPERM;
1316
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317replay:
1318 /* Reinit, just in case something touches this. */
David Ahernc21ef3e2017-04-16 09:48:24 -07001319 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
Hong zhi guode179c82013-03-25 17:36:33 +00001320 if (err < 0)
1321 return err;
1322
David S. Miller02ef22c2012-06-26 21:50:05 -07001323 tcm = nlmsg_data(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324 clid = tcm->tcm_parent;
1325 q = p = NULL;
1326
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001327 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1328 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329 return -ENODEV;
1330
Patrick McHardy1e904742008-01-22 22:11:17 -08001331
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332 if (clid) {
1333 if (clid != TC_H_ROOT) {
1334 if (clid != TC_H_INGRESS) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001335 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1336 if (!p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337 return -ENOENT;
1338 q = qdisc_leaf(p, clid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001339 } else if (dev_ingress_queue_create(dev)) {
1340 q = dev_ingress_queue(dev)->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341 }
1342 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +00001343 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344 }
1345
1346 /* It may be default qdisc, ignore it */
1347 if (q && q->handle == 0)
1348 q = NULL;
1349
1350 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1351 if (tcm->tcm_handle) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001352 if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001353 return -EEXIST;
1354 if (TC_H_MIN(tcm->tcm_handle))
1355 return -EINVAL;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001356 q = qdisc_lookup(dev, tcm->tcm_handle);
1357 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358 goto create_n_graft;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001359 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 return -EEXIST;
Alexander Aring0ac4bd62017-12-04 18:39:59 -05001361 if (tca[TCA_KIND] &&
1362 nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001363 return -EINVAL;
1364 if (q == p ||
1365 (p && check_loop(q, p, 0)))
1366 return -ELOOP;
Eric Dumazet551143d2017-08-24 21:12:28 -07001367 qdisc_refcount_inc(q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001368 goto graft;
1369 } else {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001370 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371 goto create_n_graft;
1372
1373 /* This magic test requires explanation.
1374 *
1375 * We know, that some child q is already
1376 * attached to this parent and have choice:
1377 * either to change it or to create/graft new one.
1378 *
1379 * 1. We are allowed to create/graft only
1380 * if CREATE and REPLACE flags are set.
1381 *
1382 * 2. If EXCL is set, requestor wanted to say,
1383 * that qdisc tcm_handle is not expected
1384 * to exist, so that we choose create/graft too.
1385 *
1386 * 3. The last case is when no flags are set.
1387 * Alas, it is sort of hole in API, we
1388 * cannot decide what to do unambiguously.
1389 * For now we select create/graft, if
1390 * user gave KIND, which does not match existing.
1391 */
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001392 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1393 (n->nlmsg_flags & NLM_F_REPLACE) &&
1394 ((n->nlmsg_flags & NLM_F_EXCL) ||
Patrick McHardy1e904742008-01-22 22:11:17 -08001395 (tca[TCA_KIND] &&
1396 nla_strcmp(tca[TCA_KIND], q->ops->id))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001397 goto create_n_graft;
1398 }
1399 }
1400 } else {
1401 if (!tcm->tcm_handle)
1402 return -EINVAL;
1403 q = qdisc_lookup(dev, tcm->tcm_handle);
1404 }
1405
1406 /* Change qdisc parameters */
Alexander Aring0ac4bd62017-12-04 18:39:59 -05001407 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408 return -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001409 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001411 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001412 return -EINVAL;
1413 err = qdisc_change(q, tca);
1414 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001415 qdisc_notify(net, skb, n, clid, NULL, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001416 return err;
1417
1418create_n_graft:
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001419 if (!(n->nlmsg_flags & NLM_F_CREATE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420 return -ENOENT;
Eric Dumazet24824a02010-10-02 06:11:55 +00001421 if (clid == TC_H_INGRESS) {
1422 if (dev_ingress_queue(dev))
1423 q = qdisc_create(dev, dev_ingress_queue(dev), p,
1424 tcm->tcm_parent, tcm->tcm_parent,
1425 tca, &err);
1426 else
1427 err = -ENOENT;
1428 } else {
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001429 struct netdev_queue *dev_queue;
David S. Miller6ec1c692009-09-06 01:58:51 -07001430
1431 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001432 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1433 else if (p)
1434 dev_queue = p->dev_queue;
1435 else
1436 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Miller6ec1c692009-09-06 01:58:51 -07001437
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001438 q = qdisc_create(dev, dev_queue, p,
David S. Millerbb949fb2008-07-08 16:55:56 -07001439 tcm->tcm_parent, tcm->tcm_handle,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001440 tca, &err);
David S. Miller6ec1c692009-09-06 01:58:51 -07001441 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001442 if (q == NULL) {
1443 if (err == -EAGAIN)
1444 goto replay;
1445 return err;
1446 }
1447
1448graft:
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001449 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1450 if (err) {
1451 if (q)
1452 qdisc_destroy(q);
1453 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454 }
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001455
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456 return 0;
1457}
1458
David S. Miller30723672008-07-18 22:50:15 -07001459static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1460 struct netlink_callback *cb,
Jiri Kosina49b49972017-03-08 16:03:32 +01001461 int *q_idx_p, int s_q_idx, bool recur,
1462 bool dump_invisible)
David S. Miller30723672008-07-18 22:50:15 -07001463{
1464 int ret = 0, q_idx = *q_idx_p;
1465 struct Qdisc *q;
Jiri Kosina59cc1f62016-08-10 11:05:15 +02001466 int b;
David S. Miller30723672008-07-18 22:50:15 -07001467
1468 if (!root)
1469 return 0;
1470
1471 q = root;
1472 if (q_idx < s_q_idx) {
1473 q_idx++;
1474 } else {
Jiri Kosina49b49972017-03-08 16:03:32 +01001475 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
Eric W. Biederman15e47302012-09-07 20:12:54 +00001476 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04001477 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1478 RTM_NEWQDISC) <= 0)
David S. Miller30723672008-07-18 22:50:15 -07001479 goto done;
1480 q_idx++;
1481 }
Jiri Kosina69012ae2016-08-16 23:52:58 +02001482
Jiri Kosinaea327462016-08-16 23:53:46 +02001483 /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1484 * itself has already been dumped.
1485 *
1486 * If we've already dumped the top-level (ingress) qdisc above and the global
1487 * qdisc hashtable, we don't want to hit it again
1488 */
1489 if (!qdisc_dev(root) || !recur)
Jiri Kosina69012ae2016-08-16 23:52:58 +02001490 goto out;
1491
Jiri Kosina59cc1f62016-08-10 11:05:15 +02001492 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
David S. Miller30723672008-07-18 22:50:15 -07001493 if (q_idx < s_q_idx) {
1494 q_idx++;
1495 continue;
1496 }
Jiri Kosina49b49972017-03-08 16:03:32 +01001497 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
Eric W. Biederman15e47302012-09-07 20:12:54 +00001498 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04001499 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1500 RTM_NEWQDISC) <= 0)
David S. Miller30723672008-07-18 22:50:15 -07001501 goto done;
1502 q_idx++;
1503 }
1504
1505out:
1506 *q_idx_p = q_idx;
1507 return ret;
1508done:
1509 ret = -1;
1510 goto out;
1511}
1512
Linus Torvalds1da177e2005-04-16 15:20:36 -07001513static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1514{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001515 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516 int idx, q_idx;
1517 int s_idx, s_q_idx;
1518 struct net_device *dev;
Jiri Kosina49b49972017-03-08 16:03:32 +01001519 const struct nlmsghdr *nlh = cb->nlh;
Jiri Kosina49b49972017-03-08 16:03:32 +01001520 struct nlattr *tca[TCA_MAX + 1];
1521 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001522
1523 s_idx = cb->args[0];
1524 s_q_idx = q_idx = cb->args[1];
stephen hemmingerf1e90162009-11-10 07:54:49 +00001525
Pavel Emelianov7562f872007-05-03 15:13:45 -07001526 idx = 0;
Eric Dumazet15dc36e2014-03-10 17:11:42 -07001527 ASSERT_RTNL();
Jiri Kosina49b49972017-03-08 16:03:32 +01001528
Colin Ian King7e5dd532017-09-18 12:40:38 +01001529 err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL);
Jiri Kosina49b49972017-03-08 16:03:32 +01001530 if (err < 0)
1531 return err;
1532
Eric Dumazet15dc36e2014-03-10 17:11:42 -07001533 for_each_netdev(net, dev) {
David S. Miller30723672008-07-18 22:50:15 -07001534 struct netdev_queue *dev_queue;
1535
Linus Torvalds1da177e2005-04-16 15:20:36 -07001536 if (idx < s_idx)
Pavel Emelianov7562f872007-05-03 15:13:45 -07001537 goto cont;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001538 if (idx > s_idx)
1539 s_q_idx = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001540 q_idx = 0;
David S. Miller30723672008-07-18 22:50:15 -07001541
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04001542 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
Jiri Kosina49b49972017-03-08 16:03:32 +01001543 true, tca[TCA_DUMP_INVISIBLE]) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001544 goto done;
1545
Eric Dumazet24824a02010-10-02 06:11:55 +00001546 dev_queue = dev_ingress_queue(dev);
1547 if (dev_queue &&
1548 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
Jiri Kosina49b49972017-03-08 16:03:32 +01001549 &q_idx, s_q_idx, false,
1550 tca[TCA_DUMP_INVISIBLE]) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001551 goto done;
1552
Pavel Emelianov7562f872007-05-03 15:13:45 -07001553cont:
1554 idx++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001555 }
1556
1557done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558 cb->args[0] = idx;
1559 cb->args[1] = q_idx;
1560
1561 return skb->len;
1562}
1563
1564
1565
1566/************************************************
1567 * Traffic classes manipulation. *
1568 ************************************************/
1569
WANG Cong27d7f072017-08-24 16:51:27 -07001570static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1571 unsigned long cl,
1572 u32 portid, u32 seq, u16 flags, int event)
1573{
1574 struct tcmsg *tcm;
1575 struct nlmsghdr *nlh;
1576 unsigned char *b = skb_tail_pointer(skb);
1577 struct gnet_dump d;
1578 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001579
WANG Cong27d7f072017-08-24 16:51:27 -07001580 cond_resched();
1581 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1582 if (!nlh)
1583 goto out_nlmsg_trim;
1584 tcm = nlmsg_data(nlh);
1585 tcm->tcm_family = AF_UNSPEC;
1586 tcm->tcm__pad1 = 0;
1587 tcm->tcm__pad2 = 0;
1588 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1589 tcm->tcm_parent = q->handle;
1590 tcm->tcm_handle = q->handle;
1591 tcm->tcm_info = 0;
1592 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1593 goto nla_put_failure;
1594 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1595 goto nla_put_failure;
1596
1597 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1598 NULL, &d, TCA_PAD) < 0)
1599 goto nla_put_failure;
1600
1601 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1602 goto nla_put_failure;
1603
1604 if (gnet_stats_finish_copy(&d) < 0)
1605 goto nla_put_failure;
1606
1607 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1608 return skb->len;
1609
1610out_nlmsg_trim:
1611nla_put_failure:
1612 nlmsg_trim(skb, b);
1613 return -1;
1614}
1615
1616static int tclass_notify(struct net *net, struct sk_buff *oskb,
1617 struct nlmsghdr *n, struct Qdisc *q,
1618 unsigned long cl, int event)
1619{
1620 struct sk_buff *skb;
1621 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1622
1623 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1624 if (!skb)
1625 return -ENOBUFS;
1626
1627 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1628 kfree_skb(skb);
1629 return -EINVAL;
1630 }
1631
1632 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1633 n->nlmsg_flags & NLM_F_ECHO);
1634}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635
WANG Cong14546ba2017-08-24 16:51:28 -07001636static int tclass_del_notify(struct net *net,
1637 const struct Qdisc_class_ops *cops,
1638 struct sk_buff *oskb, struct nlmsghdr *n,
1639 struct Qdisc *q, unsigned long cl)
1640{
1641 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1642 struct sk_buff *skb;
1643 int err = 0;
1644
1645 if (!cops->delete)
1646 return -EOPNOTSUPP;
1647
1648 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1649 if (!skb)
1650 return -ENOBUFS;
1651
1652 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1653 RTM_DELTCLASS) < 0) {
1654 kfree_skb(skb);
1655 return -EINVAL;
1656 }
1657
1658 err = cops->delete(q, cl);
1659 if (err) {
1660 kfree_skb(skb);
1661 return err;
1662 }
1663
1664 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1665 n->nlmsg_flags & NLM_F_ECHO);
1666}
1667
Cong Wang07d79fc2017-08-30 14:30:36 -07001668#ifdef CONFIG_NET_CLS
1669
1670struct tcf_bind_args {
1671 struct tcf_walker w;
1672 u32 classid;
1673 unsigned long cl;
1674};
1675
1676static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1677{
1678 struct tcf_bind_args *a = (void *)arg;
1679
1680 if (tp->ops->bind_class) {
Jiri Pirko74e3be62017-10-13 14:01:04 +02001681 struct Qdisc *q = tcf_block_q(tp->chain->block);
1682
1683 sch_tree_lock(q);
Cong Wang07d79fc2017-08-30 14:30:36 -07001684 tp->ops->bind_class(n, a->classid, a->cl);
Jiri Pirko74e3be62017-10-13 14:01:04 +02001685 sch_tree_unlock(q);
Cong Wang07d79fc2017-08-30 14:30:36 -07001686 }
1687 return 0;
1688}
1689
1690static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1691 unsigned long new_cl)
1692{
1693 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1694 struct tcf_block *block;
1695 struct tcf_chain *chain;
1696 unsigned long cl;
1697
1698 cl = cops->find(q, portid);
1699 if (!cl)
1700 return;
1701 block = cops->tcf_block(q, cl);
1702 if (!block)
1703 return;
1704 list_for_each_entry(chain, &block->chain_list, list) {
1705 struct tcf_proto *tp;
1706
1707 for (tp = rtnl_dereference(chain->filter_chain);
1708 tp; tp = rtnl_dereference(tp->next)) {
1709 struct tcf_bind_args arg = {};
1710
1711 arg.w.fn = tcf_node_bind;
1712 arg.classid = clid;
1713 arg.cl = new_cl;
1714 tp->ops->walk(tp, &arg.w);
1715 }
1716 }
1717}
1718
1719#else
1720
1721static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1722 unsigned long new_cl)
1723{
1724}
1725
1726#endif
1727
David Ahernc21ef3e2017-04-16 09:48:24 -07001728static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1729 struct netlink_ext_ack *extack)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001731 struct net *net = sock_net(skb->sk);
David S. Miller02ef22c2012-06-26 21:50:05 -07001732 struct tcmsg *tcm = nlmsg_data(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001733 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001734 struct net_device *dev;
1735 struct Qdisc *q = NULL;
Eric Dumazet20fea082007-11-14 01:44:41 -08001736 const struct Qdisc_class_ops *cops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001737 unsigned long cl = 0;
1738 unsigned long new_cl;
Hong zhi guode179c82013-03-25 17:36:33 +00001739 u32 portid;
1740 u32 clid;
1741 u32 qid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001742 int err;
1743
Stéphane Graber4e8bbb82014-04-30 11:25:43 -04001744 if ((n->nlmsg_type != RTM_GETTCLASS) &&
David S. Miller5f013c9b2014-05-12 13:19:14 -04001745 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001746 return -EPERM;
1747
David Ahernc21ef3e2017-04-16 09:48:24 -07001748 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
Patrick McHardy1e904742008-01-22 22:11:17 -08001749 if (err < 0)
1750 return err;
1751
Hong zhi guode179c82013-03-25 17:36:33 +00001752 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1753 if (!dev)
1754 return -ENODEV;
1755
Linus Torvalds1da177e2005-04-16 15:20:36 -07001756 /*
1757 parent == TC_H_UNSPEC - unspecified parent.
1758 parent == TC_H_ROOT - class is root, which has no parent.
1759 parent == X:0 - parent is root class.
1760 parent == X:Y - parent is a node in hierarchy.
1761 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1762
1763 handle == 0:0 - generate handle from kernel pool.
1764 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1765 handle == X:Y - clear.
1766 handle == X:0 - root class.
1767 */
1768
1769 /* Step 1. Determine qdisc handle X:0 */
1770
Hong zhi guode179c82013-03-25 17:36:33 +00001771 portid = tcm->tcm_parent;
1772 clid = tcm->tcm_handle;
1773 qid = TC_H_MAJ(clid);
1774
Eric W. Biederman15e47302012-09-07 20:12:54 +00001775 if (portid != TC_H_ROOT) {
1776 u32 qid1 = TC_H_MAJ(portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777
1778 if (qid && qid1) {
1779 /* If both majors are known, they must be identical. */
1780 if (qid != qid1)
1781 return -EINVAL;
1782 } else if (qid1) {
1783 qid = qid1;
1784 } else if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00001785 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001786
1787 /* Now qid is genuine qdisc handle consistent
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001788 * both with parent and child.
1789 *
Eric W. Biederman15e47302012-09-07 20:12:54 +00001790 * TC_H_MAJ(portid) still may be unspecified, complete it now.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001791 */
Eric W. Biederman15e47302012-09-07 20:12:54 +00001792 if (portid)
1793 portid = TC_H_MAKE(qid, portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001794 } else {
1795 if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00001796 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001797 }
1798
1799 /* OK. Locate qdisc */
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001800 q = qdisc_lookup(dev, qid);
1801 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802 return -ENOENT;
1803
1804 /* An check that it supports classes */
1805 cops = q->ops->cl_ops;
1806 if (cops == NULL)
1807 return -EINVAL;
1808
1809 /* Now try to get class */
1810 if (clid == 0) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00001811 if (portid == TC_H_ROOT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001812 clid = qid;
1813 } else
1814 clid = TC_H_MAKE(qid, clid);
1815
1816 if (clid)
WANG Cong143976c2017-08-24 16:51:29 -07001817 cl = cops->find(q, clid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001818
1819 if (cl == 0) {
1820 err = -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001821 if (n->nlmsg_type != RTM_NEWTCLASS ||
1822 !(n->nlmsg_flags & NLM_F_CREATE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823 goto out;
1824 } else {
1825 switch (n->nlmsg_type) {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001826 case RTM_NEWTCLASS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827 err = -EEXIST;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001828 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829 goto out;
1830 break;
1831 case RTM_DELTCLASS:
WANG Cong14546ba2017-08-24 16:51:28 -07001832 err = tclass_del_notify(net, cops, skb, n, q, cl);
Cong Wang07d79fc2017-08-30 14:30:36 -07001833 /* Unbind the class with flilters with 0 */
1834 tc_bind_tclass(q, portid, clid, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001835 goto out;
1836 case RTM_GETTCLASS:
Tom Goff7316ae82010-03-19 15:40:13 +00001837 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001838 goto out;
1839 default:
1840 err = -EINVAL;
1841 goto out;
1842 }
1843 }
1844
1845 new_cl = cl;
Patrick McHardyde6d5cd2009-09-04 06:41:16 +00001846 err = -EOPNOTSUPP;
1847 if (cops->change)
Eric W. Biederman15e47302012-09-07 20:12:54 +00001848 err = cops->change(q, clid, portid, tca, &new_cl);
Cong Wang07d79fc2017-08-30 14:30:36 -07001849 if (err == 0) {
Tom Goff7316ae82010-03-19 15:40:13 +00001850 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
Cong Wang07d79fc2017-08-30 14:30:36 -07001851 /* We just create a new class, need to do reverse binding. */
1852 if (cl != new_cl)
1853 tc_bind_tclass(q, portid, clid, new_cl);
1854 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001855out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001856 return err;
1857}
1858
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001859struct qdisc_dump_args {
1860 struct qdisc_walker w;
1861 struct sk_buff *skb;
1862 struct netlink_callback *cb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001863};
1864
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04001865static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
1866 struct qdisc_walker *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867{
1868 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1869
Eric W. Biederman15e47302012-09-07 20:12:54 +00001870 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04001871 a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1872 RTM_NEWTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001873}
1874
David S. Miller30723672008-07-18 22:50:15 -07001875static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1876 struct tcmsg *tcm, struct netlink_callback *cb,
1877 int *t_p, int s_t)
1878{
1879 struct qdisc_dump_args arg;
1880
Jiri Kosina49b49972017-03-08 16:03:32 +01001881 if (tc_qdisc_dump_ignore(q, false) ||
David S. Miller30723672008-07-18 22:50:15 -07001882 *t_p < s_t || !q->ops->cl_ops ||
1883 (tcm->tcm_parent &&
1884 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1885 (*t_p)++;
1886 return 0;
1887 }
1888 if (*t_p > s_t)
1889 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1890 arg.w.fn = qdisc_class_dump;
1891 arg.skb = skb;
1892 arg.cb = cb;
1893 arg.w.stop = 0;
1894 arg.w.skip = cb->args[1];
1895 arg.w.count = 0;
1896 q->ops->cl_ops->walk(q, &arg.w);
1897 cb->args[1] = arg.w.count;
1898 if (arg.w.stop)
1899 return -1;
1900 (*t_p)++;
1901 return 0;
1902}
1903
1904static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1905 struct tcmsg *tcm, struct netlink_callback *cb,
1906 int *t_p, int s_t)
1907{
1908 struct Qdisc *q;
Jiri Kosina59cc1f62016-08-10 11:05:15 +02001909 int b;
David S. Miller30723672008-07-18 22:50:15 -07001910
1911 if (!root)
1912 return 0;
1913
1914 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1915 return -1;
1916
Jiri Kosina69012ae2016-08-16 23:52:58 +02001917 if (!qdisc_dev(root))
1918 return 0;
1919
Eric Dumazetcb395b22017-05-10 21:59:28 -07001920 if (tcm->tcm_parent) {
1921 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
1922 if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1923 return -1;
1924 return 0;
1925 }
Jiri Kosina59cc1f62016-08-10 11:05:15 +02001926 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
David S. Miller30723672008-07-18 22:50:15 -07001927 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1928 return -1;
1929 }
1930
1931 return 0;
1932}
1933
Linus Torvalds1da177e2005-04-16 15:20:36 -07001934static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1935{
David S. Miller02ef22c2012-06-26 21:50:05 -07001936 struct tcmsg *tcm = nlmsg_data(cb->nlh);
David S. Miller30723672008-07-18 22:50:15 -07001937 struct net *net = sock_net(skb->sk);
1938 struct netdev_queue *dev_queue;
1939 struct net_device *dev;
1940 int t, s_t;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001941
Hong zhi guo573ce262013-03-27 06:47:04 +00001942 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001943 return 0;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001944 dev = dev_get_by_index(net, tcm->tcm_ifindex);
1945 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001946 return 0;
1947
1948 s_t = cb->args[0];
1949 t = 0;
1950
Patrick McHardyaf356af2009-09-04 06:41:18 +00001951 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001952 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001953
Eric Dumazet24824a02010-10-02 06:11:55 +00001954 dev_queue = dev_ingress_queue(dev);
1955 if (dev_queue &&
1956 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1957 &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001958 goto done;
1959
1960done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961 cb->args[0] = t;
1962
1963 dev_put(dev);
1964 return skb->len;
1965}
1966
Linus Torvalds1da177e2005-04-16 15:20:36 -07001967#ifdef CONFIG_PROC_FS
1968static int psched_show(struct seq_file *seq, void *v)
1969{
1970 seq_printf(seq, "%08x %08x %08x %08x\n",
Jarek Poplawskica44d6e2009-06-15 02:31:47 -07001971 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
Patrick McHardy514bca32007-03-16 12:34:52 -07001972 1000000,
Thomas Gleixner1e317682015-04-14 21:08:28 +00001973 (u32)NSEC_PER_SEC / hrtimer_resolution);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001974
1975 return 0;
1976}
1977
1978static int psched_open(struct inode *inode, struct file *file)
1979{
Tom Goff7e5ab152010-03-30 19:44:56 -07001980 return single_open(file, psched_show, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001981}
1982
Arjan van de Venda7071d2007-02-12 00:55:36 -08001983static const struct file_operations psched_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001984 .owner = THIS_MODULE,
1985 .open = psched_open,
1986 .read = seq_read,
1987 .llseek = seq_lseek,
1988 .release = single_release,
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001989};
Tom Goff7316ae82010-03-19 15:40:13 +00001990
1991static int __net_init psched_net_init(struct net *net)
1992{
1993 struct proc_dir_entry *e;
1994
Gao fengd4beaa62013-02-18 01:34:54 +00001995 e = proc_create("psched", 0, net->proc_net, &psched_fops);
Tom Goff7316ae82010-03-19 15:40:13 +00001996 if (e == NULL)
1997 return -ENOMEM;
1998
1999 return 0;
2000}
2001
2002static void __net_exit psched_net_exit(struct net *net)
2003{
Gao fengece31ff2013-02-18 01:34:56 +00002004 remove_proc_entry("psched", net->proc_net);
Tom Goff7316ae82010-03-19 15:40:13 +00002005}
2006#else
2007static int __net_init psched_net_init(struct net *net)
2008{
2009 return 0;
2010}
2011
2012static void __net_exit psched_net_exit(struct net *net)
2013{
2014}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002015#endif
2016
Tom Goff7316ae82010-03-19 15:40:13 +00002017static struct pernet_operations psched_net_ops = {
2018 .init = psched_net_init,
2019 .exit = psched_net_exit,
2020};
2021
Linus Torvalds1da177e2005-04-16 15:20:36 -07002022static int __init pktsched_init(void)
2023{
Tom Goff7316ae82010-03-19 15:40:13 +00002024 int err;
2025
2026 err = register_pernet_subsys(&psched_net_ops);
2027 if (err) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00002028 pr_err("pktsched_init: "
Tom Goff7316ae82010-03-19 15:40:13 +00002029 "cannot initialize per netns operations\n");
2030 return err;
2031 }
2032
stephen hemminger6da7c8f2013-08-27 16:19:08 -07002033 register_qdisc(&pfifo_fast_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002034 register_qdisc(&pfifo_qdisc_ops);
2035 register_qdisc(&bfifo_qdisc_ops);
Hagen Paul Pfeifer57dbb2d2010-01-24 12:30:59 +00002036 register_qdisc(&pfifo_head_drop_qdisc_ops);
David S. Miller6ec1c692009-09-06 01:58:51 -07002037 register_qdisc(&mq_qdisc_ops);
Phil Sutterd66d6c32015-08-27 21:21:38 +02002038 register_qdisc(&noqueue_qdisc_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002039
Florian Westphalb97bac62017-08-09 20:41:48 +02002040 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2041 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04002042 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
Florian Westphalb97bac62017-08-09 20:41:48 +02002043 0);
2044 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2045 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
Jamal Hadi Salim5a7a5552016-09-18 08:45:33 -04002046 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
Florian Westphalb97bac62017-08-09 20:41:48 +02002047 0);
Thomas Grafbe577dd2007-03-22 11:55:50 -07002048
Linus Torvalds1da177e2005-04-16 15:20:36 -07002049 return 0;
2050}
2051
2052subsys_initcall(pktsched_init);