blob: db0c2ba1d1562854826582cca0bafbab24cae243 [file] [log] [blame]
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -07001// SPDX-License-Identifier: GPL-2.0
2
3/* net/sched/sch_etf.c Earliest TxTime First queueing discipline.
4 *
5 * Authors: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
6 * Vinicius Costa Gomes <vinicius.gomes@intel.com>
7 */
8
9#include <linux/module.h>
10#include <linux/types.h>
11#include <linux/kernel.h>
12#include <linux/string.h>
13#include <linux/errno.h>
Jesus Sanchez-Palencia4b15c702018-07-03 15:43:00 -070014#include <linux/errqueue.h>
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -070015#include <linux/rbtree.h>
16#include <linux/skbuff.h>
17#include <linux/posix-timers.h>
18#include <net/netlink.h>
19#include <net/sch_generic.h>
20#include <net/pkt_sched.h>
21#include <net/sock.h>
22
23#define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
Jesus Sanchez-Palencia88cab772018-07-03 15:42:54 -070024#define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -070025
26struct etf_sched_data {
Jesus Sanchez-Palencia88cab772018-07-03 15:42:54 -070027 bool offload;
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -070028 bool deadline_mode;
29 int clockid;
30 int queue;
31 s32 delta; /* in ns */
32 ktime_t last; /* The txtime of the last skb sent to the netdevice. */
Jesus Sanchez-Palencia09fd4862018-11-14 17:26:33 -080033 struct rb_root_cached head;
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -070034 struct qdisc_watchdog watchdog;
35 ktime_t (*get_time)(void);
36};
37
38static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = {
39 [TCA_ETF_PARMS] = { .len = sizeof(struct tc_etf_qopt) },
40};
41
42static inline int validate_input_params(struct tc_etf_qopt *qopt,
43 struct netlink_ext_ack *extack)
44{
45 /* Check if params comply to the following rules:
46 * * Clockid and delta must be valid.
47 *
48 * * Dynamic clockids are not supported.
49 *
50 * * Delta must be a positive integer.
Jesus Sanchez-Palencia88cab772018-07-03 15:42:54 -070051 *
52 * Also note that for the HW offload case, we must
53 * expect that system clocks have been synchronized to PHC.
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -070054 */
55 if (qopt->clockid < 0) {
56 NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");
57 return -ENOTSUPP;
58 }
59
60 if (qopt->clockid != CLOCK_TAI) {
61 NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used");
62 return -EINVAL;
63 }
64
65 if (qopt->delta < 0) {
66 NL_SET_ERR_MSG(extack, "Delta must be positive");
67 return -EINVAL;
68 }
69
70 return 0;
71}
72
73static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb)
74{
75 struct etf_sched_data *q = qdisc_priv(sch);
76 ktime_t txtime = nskb->tstamp;
77 struct sock *sk = nskb->sk;
78 ktime_t now;
79
80 if (!sk)
81 return false;
82
83 if (!sock_flag(sk, SOCK_TXTIME))
84 return false;
85
86 /* We don't perform crosstimestamping.
87 * Drop if packet's clockid differs from qdisc's.
88 */
89 if (sk->sk_clockid != q->clockid)
90 return false;
91
92 if (sk->sk_txtime_deadline_mode != q->deadline_mode)
93 return false;
94
95 now = q->get_time();
96 if (ktime_before(txtime, now) || ktime_before(txtime, q->last))
97 return false;
98
99 return true;
100}
101
102static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch)
103{
104 struct etf_sched_data *q = qdisc_priv(sch);
105 struct rb_node *p;
106
Jesus Sanchez-Palencia09fd4862018-11-14 17:26:33 -0800107 p = rb_first_cached(&q->head);
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700108 if (!p)
109 return NULL;
110
111 return rb_to_skb(p);
112}
113
114static void reset_watchdog(struct Qdisc *sch)
115{
116 struct etf_sched_data *q = qdisc_priv(sch);
117 struct sk_buff *skb = etf_peek_timesortedlist(sch);
118 ktime_t next;
119
Jesus Sanchez-Palencia3fcbdae2018-11-14 17:26:32 -0800120 if (!skb) {
121 qdisc_watchdog_cancel(&q->watchdog);
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700122 return;
Jesus Sanchez-Palencia3fcbdae2018-11-14 17:26:32 -0800123 }
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700124
125 next = ktime_sub_ns(skb->tstamp, q->delta);
126 qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next));
127}
128
Jesus Sanchez-Palencia4b15c702018-07-03 15:43:00 -0700129static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
130{
131 struct sock_exterr_skb *serr;
132 struct sk_buff *clone;
133 ktime_t txtime = skb->tstamp;
134
135 if (!skb->sk || !(skb->sk->sk_txtime_report_errors))
136 return;
137
138 clone = skb_clone(skb, GFP_ATOMIC);
139 if (!clone)
140 return;
141
142 serr = SKB_EXT_ERR(clone);
143 serr->ee.ee_errno = err;
144 serr->ee.ee_origin = SO_EE_ORIGIN_TXTIME;
145 serr->ee.ee_type = 0;
146 serr->ee.ee_code = code;
147 serr->ee.ee_pad = 0;
148 serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */
149 serr->ee.ee_info = txtime; /* low part of tstamp */
150
151 if (sock_queue_err_skb(skb->sk, clone))
152 kfree_skb(clone);
153}
154
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700155static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
156 struct sk_buff **to_free)
157{
158 struct etf_sched_data *q = qdisc_priv(sch);
Jesus Sanchez-Palencia09fd4862018-11-14 17:26:33 -0800159 struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL;
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700160 ktime_t txtime = nskb->tstamp;
Jesus Sanchez-Palencia09fd4862018-11-14 17:26:33 -0800161 bool leftmost = true;
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700162
Jesus Sanchez-Palencia4b15c702018-07-03 15:43:00 -0700163 if (!is_packet_valid(sch, nskb)) {
164 report_sock_error(nskb, EINVAL,
165 SO_EE_CODE_TXTIME_INVALID_PARAM);
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700166 return qdisc_drop(nskb, sch, to_free);
Jesus Sanchez-Palencia4b15c702018-07-03 15:43:00 -0700167 }
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700168
169 while (*p) {
170 struct sk_buff *skb;
171
172 parent = *p;
173 skb = rb_to_skb(parent);
Jesus Sanchez-Palencia09fd4862018-11-14 17:26:33 -0800174 if (ktime_after(txtime, skb->tstamp)) {
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700175 p = &parent->rb_right;
Jesus Sanchez-Palencia09fd4862018-11-14 17:26:33 -0800176 leftmost = false;
177 } else {
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700178 p = &parent->rb_left;
Jesus Sanchez-Palencia09fd4862018-11-14 17:26:33 -0800179 }
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700180 }
181 rb_link_node(&nskb->rbnode, parent, p);
Jesus Sanchez-Palencia09fd4862018-11-14 17:26:33 -0800182 rb_insert_color_cached(&nskb->rbnode, &q->head, leftmost);
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700183
184 qdisc_qstats_backlog_inc(sch, nskb);
185 sch->q.qlen++;
186
187 /* Now we may need to re-arm the qdisc watchdog for the next packet. */
188 reset_watchdog(sch);
189
190 return NET_XMIT_SUCCESS;
191}
192
Jesus Sanchez-Palencia37342bd2018-11-14 17:26:35 -0800193static void timesortedlist_drop(struct Qdisc *sch, struct sk_buff *skb,
194 ktime_t now)
Jesus Sanchez-Palenciacbeeb8e2018-11-14 17:26:34 -0800195{
196 struct etf_sched_data *q = qdisc_priv(sch);
197 struct sk_buff *to_free = NULL;
Jesus Sanchez-Palencia37342bd2018-11-14 17:26:35 -0800198 struct sk_buff *tmp = NULL;
Jesus Sanchez-Palenciacbeeb8e2018-11-14 17:26:34 -0800199
Jesus Sanchez-Palencia37342bd2018-11-14 17:26:35 -0800200 skb_rbtree_walk_from_safe(skb, tmp) {
201 if (ktime_after(skb->tstamp, now))
202 break;
Jesus Sanchez-Palenciacbeeb8e2018-11-14 17:26:34 -0800203
Jesus Sanchez-Palencia37342bd2018-11-14 17:26:35 -0800204 rb_erase_cached(&skb->rbnode, &q->head);
Jesus Sanchez-Palenciacbeeb8e2018-11-14 17:26:34 -0800205
Jesus Sanchez-Palencia37342bd2018-11-14 17:26:35 -0800206 /* The rbnode field in the skb re-uses these fields, now that
207 * we are done with the rbnode, reset them.
208 */
209 skb->next = NULL;
210 skb->prev = NULL;
211 skb->dev = qdisc_dev(sch);
Jesus Sanchez-Palenciacbeeb8e2018-11-14 17:26:34 -0800212
Jesus Sanchez-Palencia37342bd2018-11-14 17:26:35 -0800213 report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED);
Jesus Sanchez-Palenciacbeeb8e2018-11-14 17:26:34 -0800214
Jesus Sanchez-Palencia37342bd2018-11-14 17:26:35 -0800215 qdisc_qstats_backlog_dec(sch, skb);
216 qdisc_drop(skb, sch, &to_free);
217 qdisc_qstats_overlimit(sch);
218 sch->q.qlen--;
219 }
220
Jesus Sanchez-Palenciacbeeb8e2018-11-14 17:26:34 -0800221 kfree_skb_list(to_free);
Jesus Sanchez-Palenciacbeeb8e2018-11-14 17:26:34 -0800222}
223
224static void timesortedlist_remove(struct Qdisc *sch, struct sk_buff *skb)
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700225{
226 struct etf_sched_data *q = qdisc_priv(sch);
227
Jesus Sanchez-Palencia09fd4862018-11-14 17:26:33 -0800228 rb_erase_cached(&skb->rbnode, &q->head);
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700229
230 /* The rbnode field in the skb re-uses these fields, now that
231 * we are done with the rbnode, reset them.
232 */
233 skb->next = NULL;
234 skb->prev = NULL;
235 skb->dev = qdisc_dev(sch);
236
237 qdisc_qstats_backlog_dec(sch, skb);
238
Jesus Sanchez-Palenciacbeeb8e2018-11-14 17:26:34 -0800239 qdisc_bstats_update(sch, skb);
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700240
Jesus Sanchez-Palenciacbeeb8e2018-11-14 17:26:34 -0800241 q->last = skb->tstamp;
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700242
243 sch->q.qlen--;
244}
245
246static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch)
247{
248 struct etf_sched_data *q = qdisc_priv(sch);
249 struct sk_buff *skb;
250 ktime_t now, next;
251
252 skb = etf_peek_timesortedlist(sch);
253 if (!skb)
254 return NULL;
255
256 now = q->get_time();
257
258 /* Drop if packet has expired while in queue. */
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700259 if (ktime_before(skb->tstamp, now)) {
Jesus Sanchez-Palencia37342bd2018-11-14 17:26:35 -0800260 timesortedlist_drop(sch, skb, now);
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700261 skb = NULL;
262 goto out;
263 }
264
265 /* When in deadline mode, dequeue as soon as possible and change the
266 * txtime from deadline to (now + delta).
267 */
268 if (q->deadline_mode) {
Jesus Sanchez-Palenciacbeeb8e2018-11-14 17:26:34 -0800269 timesortedlist_remove(sch, skb);
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700270 skb->tstamp = now;
271 goto out;
272 }
273
274 next = ktime_sub_ns(skb->tstamp, q->delta);
275
276 /* Dequeue only if now is within the [txtime - delta, txtime] range. */
277 if (ktime_after(now, next))
Jesus Sanchez-Palenciacbeeb8e2018-11-14 17:26:34 -0800278 timesortedlist_remove(sch, skb);
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700279 else
280 skb = NULL;
281
282out:
283 /* Now we may need to re-arm the qdisc watchdog for the next packet. */
284 reset_watchdog(sch);
285
286 return skb;
287}
288
Jesus Sanchez-Palencia88cab772018-07-03 15:42:54 -0700289static void etf_disable_offload(struct net_device *dev,
290 struct etf_sched_data *q)
291{
292 struct tc_etf_qopt_offload etf = { };
293 const struct net_device_ops *ops;
294 int err;
295
296 if (!q->offload)
297 return;
298
299 ops = dev->netdev_ops;
300 if (!ops->ndo_setup_tc)
301 return;
302
303 etf.queue = q->queue;
304 etf.enable = 0;
305
306 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
307 if (err < 0)
308 pr_warn("Couldn't disable ETF offload for queue %d\n",
309 etf.queue);
310}
311
312static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q,
313 struct netlink_ext_ack *extack)
314{
315 const struct net_device_ops *ops = dev->netdev_ops;
316 struct tc_etf_qopt_offload etf = { };
317 int err;
318
319 if (q->offload)
320 return 0;
321
322 if (!ops->ndo_setup_tc) {
323 NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload");
324 return -EOPNOTSUPP;
325 }
326
327 etf.queue = q->queue;
328 etf.enable = 1;
329
330 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
331 if (err < 0) {
332 NL_SET_ERR_MSG(extack, "Specified device failed to setup ETF hardware offload");
333 return err;
334 }
335
336 return 0;
337}
338
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700339static int etf_init(struct Qdisc *sch, struct nlattr *opt,
340 struct netlink_ext_ack *extack)
341{
342 struct etf_sched_data *q = qdisc_priv(sch);
343 struct net_device *dev = qdisc_dev(sch);
344 struct nlattr *tb[TCA_ETF_MAX + 1];
345 struct tc_etf_qopt *qopt;
346 int err;
347
348 if (!opt) {
349 NL_SET_ERR_MSG(extack,
350 "Missing ETF qdisc options which are mandatory");
351 return -EINVAL;
352 }
353
Johannes Berg8cb08172019-04-26 14:07:28 +0200354 err = nla_parse_nested_deprecated(tb, TCA_ETF_MAX, opt, etf_policy,
355 extack);
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700356 if (err < 0)
357 return err;
358
359 if (!tb[TCA_ETF_PARMS]) {
360 NL_SET_ERR_MSG(extack, "Missing mandatory ETF parameters");
361 return -EINVAL;
362 }
363
364 qopt = nla_data(tb[TCA_ETF_PARMS]);
365
Jesus Sanchez-Palencia88cab772018-07-03 15:42:54 -0700366 pr_debug("delta %d clockid %d offload %s deadline %s\n",
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700367 qopt->delta, qopt->clockid,
Jesus Sanchez-Palencia88cab772018-07-03 15:42:54 -0700368 OFFLOAD_IS_ON(qopt) ? "on" : "off",
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700369 DEADLINE_MODE_IS_ON(qopt) ? "on" : "off");
370
371 err = validate_input_params(qopt, extack);
372 if (err < 0)
373 return err;
374
375 q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
376
Jesus Sanchez-Palencia88cab772018-07-03 15:42:54 -0700377 if (OFFLOAD_IS_ON(qopt)) {
378 err = etf_enable_offload(dev, q, extack);
379 if (err < 0)
380 return err;
381 }
382
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700383 /* Everything went OK, save the parameters used. */
384 q->delta = qopt->delta;
385 q->clockid = qopt->clockid;
Jesus Sanchez-Palencia88cab772018-07-03 15:42:54 -0700386 q->offload = OFFLOAD_IS_ON(qopt);
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700387 q->deadline_mode = DEADLINE_MODE_IS_ON(qopt);
388
389 switch (q->clockid) {
390 case CLOCK_REALTIME:
391 q->get_time = ktime_get_real;
392 break;
393 case CLOCK_MONOTONIC:
394 q->get_time = ktime_get;
395 break;
396 case CLOCK_BOOTTIME:
397 q->get_time = ktime_get_boottime;
398 break;
399 case CLOCK_TAI:
400 q->get_time = ktime_get_clocktai;
401 break;
402 default:
403 NL_SET_ERR_MSG(extack, "Clockid is not supported");
404 return -ENOTSUPP;
405 }
406
407 qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid);
408
409 return 0;
410}
411
412static void timesortedlist_clear(struct Qdisc *sch)
413{
414 struct etf_sched_data *q = qdisc_priv(sch);
Jesus Sanchez-Palencia09fd4862018-11-14 17:26:33 -0800415 struct rb_node *p = rb_first_cached(&q->head);
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700416
417 while (p) {
418 struct sk_buff *skb = rb_to_skb(p);
419
420 p = rb_next(p);
421
Jesus Sanchez-Palencia09fd4862018-11-14 17:26:33 -0800422 rb_erase_cached(&skb->rbnode, &q->head);
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700423 rtnl_kfree_skbs(skb, skb);
424 sch->q.qlen--;
425 }
426}
427
428static void etf_reset(struct Qdisc *sch)
429{
430 struct etf_sched_data *q = qdisc_priv(sch);
431
432 /* Only cancel watchdog if it's been initialized. */
433 if (q->watchdog.qdisc == sch)
434 qdisc_watchdog_cancel(&q->watchdog);
435
436 /* No matter which mode we are on, it's safe to clear both lists. */
437 timesortedlist_clear(sch);
438 __qdisc_reset_queue(&sch->q);
439
440 sch->qstats.backlog = 0;
441 sch->q.qlen = 0;
442
443 q->last = 0;
444}
445
446static void etf_destroy(struct Qdisc *sch)
447{
448 struct etf_sched_data *q = qdisc_priv(sch);
Jesus Sanchez-Palencia88cab772018-07-03 15:42:54 -0700449 struct net_device *dev = qdisc_dev(sch);
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700450
451 /* Only cancel watchdog if it's been initialized. */
452 if (q->watchdog.qdisc == sch)
453 qdisc_watchdog_cancel(&q->watchdog);
Jesus Sanchez-Palencia88cab772018-07-03 15:42:54 -0700454
455 etf_disable_offload(dev, q);
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700456}
457
458static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)
459{
460 struct etf_sched_data *q = qdisc_priv(sch);
461 struct tc_etf_qopt opt = { };
462 struct nlattr *nest;
463
Michal Kubecekae0be8d2019-04-26 11:13:06 +0200464 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700465 if (!nest)
466 goto nla_put_failure;
467
468 opt.delta = q->delta;
469 opt.clockid = q->clockid;
Jesus Sanchez-Palencia88cab772018-07-03 15:42:54 -0700470 if (q->offload)
471 opt.flags |= TC_ETF_OFFLOAD_ON;
472
Vinicius Costa Gomes25db26a2018-07-03 15:42:53 -0700473 if (q->deadline_mode)
474 opt.flags |= TC_ETF_DEADLINE_MODE_ON;
475
476 if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt))
477 goto nla_put_failure;
478
479 return nla_nest_end(skb, nest);
480
481nla_put_failure:
482 nla_nest_cancel(skb, nest);
483 return -1;
484}
485
486static struct Qdisc_ops etf_qdisc_ops __read_mostly = {
487 .id = "etf",
488 .priv_size = sizeof(struct etf_sched_data),
489 .enqueue = etf_enqueue_timesortedlist,
490 .dequeue = etf_dequeue_timesortedlist,
491 .peek = etf_peek_timesortedlist,
492 .init = etf_init,
493 .reset = etf_reset,
494 .destroy = etf_destroy,
495 .dump = etf_dump,
496 .owner = THIS_MODULE,
497};
498
499static int __init etf_module_init(void)
500{
501 return register_qdisc(&etf_qdisc_ops);
502}
503
504static void __exit etf_module_exit(void)
505{
506 unregister_qdisc(&etf_qdisc_ops);
507}
508module_init(etf_module_init)
509module_exit(etf_module_exit)
510MODULE_LICENSE("GPL");